From cde697dd147917a6125f40b5aceac801e84e941c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 13:38:46 +0300 Subject: [PATCH 01/21] introduce BUILD_TINY_GPU_MODULE CMake option it adds OPENCV_TINY_GPU_MODULE definition, that will allow to build tiny version of gpu module (with limited image format support) --- CMakeLists.txt | 2 ++ cmake/OpenCVCompilerOptions.cmake | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd2095db86..71d714b9b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,6 +194,7 @@ OCV_OPTION(BUILD_WITH_STATIC_CRT "Enables use of staticaly linked CRT for sta OCV_OPTION(BUILD_FAT_JAVA_LIB "Create fat java wrapper containing the whole OpenCV library" ON IF NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX ) OCV_OPTION(BUILD_ANDROID_SERVICE "Build OpenCV Manager for Google Play" OFF IF ANDROID AND ANDROID_SOURCE_TREE ) OCV_OPTION(BUILD_ANDROID_PACKAGE "Build platform-specific package for Google Play" OFF IF ANDROID ) +OCV_OPTION(BUILD_TINY_GPU_MODULE "Build tiny gpu module with limited image format support" OFF ) # 3rd party libs OCV_OPTION(BUILD_ZLIB "Build zlib from source" WIN32 OR APPLE ) @@ -996,6 +997,7 @@ if(HAVE_CUDA) status(" NVIDIA GPU arch:" ${OPENCV_CUDA_ARCH_BIN}) status(" NVIDIA PTX archs:" ${OPENCV_CUDA_ARCH_PTX}) status(" Use fast math:" CUDA_FAST_MATH THEN YES ELSE NO) + status(" Tiny gpu module:" BUILD_TINY_GPU_MODULE THEN YES ELSE NO) endif() if(HAVE_OPENCL) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 6a93d008ee..a69ac8c7e6 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -63,6 +63,10 @@ if(OPENCV_CAN_BREAK_BINARY_COMPATIBILITY) add_definitions(-DOPENCV_CAN_BREAK_BINARY_COMPATIBILITY) endif() +if(BUILD_TINY_GPU_MODULE) + add_definitions(-DOPENCV_TINY_GPU_MODULE) +endif() + if(CMAKE_COMPILER_IS_GNUCXX) # High level of warnings. add_extra_compiler_option(-W) From 00c36e88efec3f7f443d291a0f0e8572c7f589e4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 14:18:49 +0300 Subject: [PATCH 02/21] reduce separable filter instantiates for tiny build --- modules/gpu/src/cuda/column_filter.10.cu | 4 + modules/gpu/src/cuda/column_filter.11.cu | 4 + modules/gpu/src/cuda/column_filter.12.cu | 4 + modules/gpu/src/cuda/column_filter.13.cu | 4 + modules/gpu/src/cuda/column_filter.14.cu | 4 + modules/gpu/src/cuda/column_filter.3.cu | 4 + modules/gpu/src/cuda/column_filter.4.cu | 4 + modules/gpu/src/cuda/column_filter.8.cu | 4 + modules/gpu/src/cuda/column_filter.9.cu | 4 + modules/gpu/src/cuda/column_filter.h | 187 ++++++++++++++++++++++- modules/gpu/src/cuda/row_filter.10.cu | 4 + modules/gpu/src/cuda/row_filter.11.cu | 4 + modules/gpu/src/cuda/row_filter.12.cu | 4 + modules/gpu/src/cuda/row_filter.13.cu | 4 + modules/gpu/src/cuda/row_filter.14.cu | 4 + modules/gpu/src/cuda/row_filter.3.cu | 4 + modules/gpu/src/cuda/row_filter.4.cu | 4 + modules/gpu/src/cuda/row_filter.8.cu | 4 + modules/gpu/src/cuda/row_filter.9.cu | 4 + modules/gpu/src/cuda/row_filter.h | 187 ++++++++++++++++++++++- modules/gpu/src/filtering.cpp | 26 ++++ 21 files changed, 470 insertions(+), 2 deletions(-) diff --git a/modules/gpu/src/cuda/column_filter.10.cu b/modules/gpu/src/cuda/column_filter.10.cu index b71e25207e..81e4fe7a0a 100644 --- a/modules/gpu/src/cuda/column_filter.10.cu +++ b/modules/gpu/src/cuda/column_filter.10.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.11.cu b/modules/gpu/src/cuda/column_filter.11.cu index ccfbf8e773..34a065453b 100644 --- a/modules/gpu/src/cuda/column_filter.11.cu +++ b/modules/gpu/src/cuda/column_filter.11.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.12.cu b/modules/gpu/src/cuda/column_filter.12.cu index a38f93b531..bc0a45bc3f 100644 --- a/modules/gpu/src/cuda/column_filter.12.cu +++ b/modules/gpu/src/cuda/column_filter.12.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.13.cu b/modules/gpu/src/cuda/column_filter.13.cu index 40eec7a83f..b7facb6c03 100644 --- a/modules/gpu/src/cuda/column_filter.13.cu +++ b/modules/gpu/src/cuda/column_filter.13.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.14.cu b/modules/gpu/src/cuda/column_filter.14.cu index 08151ac6d0..6db983786b 100644 --- a/modules/gpu/src/cuda/column_filter.14.cu +++ b/modules/gpu/src/cuda/column_filter.14.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.3.cu b/modules/gpu/src/cuda/column_filter.3.cu index 7304565b96..339bdabc68 100644 --- a/modules/gpu/src/cuda/column_filter.3.cu +++ b/modules/gpu/src/cuda/column_filter.3.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.4.cu b/modules/gpu/src/cuda/column_filter.4.cu index 8c9db6985b..37f9bd718e 100644 --- a/modules/gpu/src/cuda/column_filter.4.cu +++ b/modules/gpu/src/cuda/column_filter.4.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.8.cu b/modules/gpu/src/cuda/column_filter.8.cu index 0a63a1dd43..b4ad5bd02e 100644 --- a/modules/gpu/src/cuda/column_filter.8.cu +++ b/modules/gpu/src/cuda/column_filter.8.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.9.cu b/modules/gpu/src/cuda/column_filter.9.cu index 758d9289d9..da64c32225 100644 --- a/modules/gpu/src/cuda/column_filter.9.cu +++ b/modules/gpu/src/cuda/column_filter.9.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.h b/modules/gpu/src/cuda/column_filter.h index 46e3583153..139a6ef20a 100644 --- a/modules/gpu/src/cuda/column_filter.h +++ b/modules/gpu/src/cuda/column_filter.h @@ -183,6 +183,186 @@ namespace filter { typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[5][33] = + { + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReflect101>, + 0, + column_filter::caller< 5, T, D, BrdColReflect101>, + 0, + column_filter::caller< 7, T, D, BrdColReflect101>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReplicate>, + 0, + column_filter::caller< 5, T, D, BrdColReplicate>, + 0, + column_filter::caller< 7, T, D, BrdColReplicate>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColConstant>, + 0, + column_filter::caller< 5, T, D, BrdColConstant>, + 0, + column_filter::caller< 7, T, D, BrdColConstant>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReflect>, + 0, + column_filter::caller< 5, T, D, BrdColReflect>, + 0, + column_filter::caller< 7, T, D, BrdColReflect>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + } + }; +#else static const caller_t callers[5][33] = { { @@ -361,12 +541,17 @@ namespace filter column_filter::caller<32, T, D, BrdColWrap> } }; +#endif + + const caller_t caller = callers[brd_type][ksize]; + if (!caller) + cv::gpu::error("Unsupported input parameters for column_filter", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); + caller((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } } diff --git a/modules/gpu/src/cuda/row_filter.10.cu b/modules/gpu/src/cuda/row_filter.10.cu index 7d93ee31ac..c910270a64 100644 --- a/modules/gpu/src/cuda/row_filter.10.cu +++ b/modules/gpu/src/cuda/row_filter.10.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.11.cu b/modules/gpu/src/cuda/row_filter.11.cu index 31bccc48b6..c5e1fbcd96 100644 --- a/modules/gpu/src/cuda/row_filter.11.cu +++ b/modules/gpu/src/cuda/row_filter.11.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.12.cu b/modules/gpu/src/cuda/row_filter.12.cu index 7be543f6b2..017aff8e7f 100644 --- a/modules/gpu/src/cuda/row_filter.12.cu +++ b/modules/gpu/src/cuda/row_filter.12.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.13.cu b/modules/gpu/src/cuda/row_filter.13.cu index bd700b1bb2..676f5ae826 100644 --- a/modules/gpu/src/cuda/row_filter.13.cu +++ b/modules/gpu/src/cuda/row_filter.13.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.14.cu b/modules/gpu/src/cuda/row_filter.14.cu index 97df2f128a..e8d0ec501a 100644 --- a/modules/gpu/src/cuda/row_filter.14.cu +++ b/modules/gpu/src/cuda/row_filter.14.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.3.cu b/modules/gpu/src/cuda/row_filter.3.cu index fe84666954..57013781c5 100644 --- a/modules/gpu/src/cuda/row_filter.3.cu +++ b/modules/gpu/src/cuda/row_filter.3.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.4.cu b/modules/gpu/src/cuda/row_filter.4.cu index 050f7af04e..277ab7f87d 100644 --- a/modules/gpu/src/cuda/row_filter.4.cu +++ b/modules/gpu/src/cuda/row_filter.4.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.8.cu b/modules/gpu/src/cuda/row_filter.8.cu index b899e87a7a..e9dfd7f4a3 100644 --- a/modules/gpu/src/cuda/row_filter.8.cu +++ b/modules/gpu/src/cuda/row_filter.8.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.9.cu b/modules/gpu/src/cuda/row_filter.9.cu index 516dd8fe7c..eaad54d343 100644 --- a/modules/gpu/src/cuda/row_filter.9.cu +++ b/modules/gpu/src/cuda/row_filter.9.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.h b/modules/gpu/src/cuda/row_filter.h index 933f900293..9bfaf7f3d8 100644 --- a/modules/gpu/src/cuda/row_filter.h +++ b/modules/gpu/src/cuda/row_filter.h @@ -182,6 +182,186 @@ namespace filter { typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[5][33] = + { + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReflect101>, + 0, + row_filter::caller< 5, T, D, BrdRowReflect101>, + 0, + row_filter::caller< 7, T, D, BrdRowReflect101>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReplicate>, + 0, + row_filter::caller< 5, T, D, BrdRowReplicate>, + 0, + row_filter::caller< 7, T, D, BrdRowReplicate>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowConstant>, + 0, + row_filter::caller< 5, T, D, BrdRowConstant>, + 0, + row_filter::caller< 7, T, D, BrdRowConstant>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReflect>, + 0, + row_filter::caller< 5, T, D, BrdRowReflect>, + 0, + row_filter::caller< 7, T, D, BrdRowReflect>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + } + }; +#else static const caller_t callers[5][33] = { { @@ -360,12 +540,17 @@ namespace filter row_filter::caller<32, T, D, BrdRowWrap> } }; +#endif + + const caller_t caller = callers[brd_type][ksize]; + if (!caller) + cv::gpu::error("Unsupported input parameters for row_filter", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); + caller((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } } diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 8905eaed64..8f6e780a57 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -893,6 +893,18 @@ namespace Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType) { +#ifdef OPENCV_TINY_GPU_MODULE + static const gpuFilter1D_t funcs[7][4] = + { + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0} + }; +#else static const gpuFilter1D_t funcs[7][4] = { {filter::linearRow, 0, filter::linearRow, filter::linearRow}, @@ -903,6 +915,7 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, {filter::linearRow, 0, filter::linearRow, filter::linearRow}, {0, 0, 0, 0} }; +#endif static const nppFilter1D_t npp_funcs[] = { 0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R @@ -998,6 +1011,18 @@ namespace Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType) { +#ifdef OPENCV_TINY_GPU_MODULE + static const gpuFilter1D_t funcs[7][4] = + { + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0} + }; +#else static const gpuFilter1D_t funcs[7][4] = { {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, @@ -1008,6 +1033,7 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, {0, 0, 0, 0} }; +#endif static const nppFilter1D_t npp_funcs[] = { 0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R From 8e49ab1d3bddb836e6bd4a128db8da4587c35126 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 14:24:18 +0300 Subject: [PATCH 03/21] reduce filter2d instantiates for tiny build --- modules/gpu/src/cuda/imgproc.cu | 19 ++++++++++++++++++- modules/gpu/src/filtering.cpp | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 067dfaf640..2a1bca4ad9 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -985,6 +985,16 @@ namespace cv { namespace gpu { namespace device int borderMode, const float* borderValue, cudaStream_t stream) { typedef void (*func_t)(const PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + 0 + }; +#else static const func_t funcs[] = { Filter2DCaller::call, @@ -993,19 +1003,26 @@ namespace cv { namespace gpu { namespace device Filter2DCaller::call, Filter2DCaller::call }; +#endif + + const func_t func = funcs[borderMode]; + if (!func) + cv::gpu::error("Unsupported input parameters for filter2D", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - funcs[borderMode](static_cast< PtrStepSz >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); + func(static_cast< PtrStepSz >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); } template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); +#endif template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); } // namespace imgproc diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 8f6e780a57..c7fd61a9c9 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -789,12 +789,14 @@ Ptr cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const case CV_8UC4: func = filter2D_gpu; break; +#ifndef OPENCV_TINY_GPU_MODULE case CV_16UC1: func = filter2D_gpu; break; case CV_16UC4: func = filter2D_gpu; break; +#endif case CV_32FC1: func = filter2D_gpu; break; From b7a9e672c52d0e2a182268f380b6696ad55fd78a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 14:32:11 +0300 Subject: [PATCH 04/21] reduce bilateral filter instantiates for tiny build --- modules/gpu/src/cuda/bilateral_filter.cu | 20 +++++++++++++++++++- modules/gpu/src/denoising.cpp | 12 ++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu index 15e72a8b75..69f58aabd9 100644 --- a/modules/gpu/src/cuda/bilateral_filter.cu +++ b/modules/gpu/src/cuda/bilateral_filter.cu @@ -149,6 +149,16 @@ namespace cv { namespace gpu { namespace device { typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static caller_t funcs[] = + { + bilateral_caller, + bilateral_caller, + 0, + 0, + 0, + }; +#else static caller_t funcs[] = { bilateral_caller, @@ -157,7 +167,13 @@ namespace cv { namespace gpu { namespace device bilateral_caller, bilateral_caller, }; - funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream); +#endif + + const caller_t caller = funcs[borderMode]; + if (!caller) + cv::gpu::error("Unsupported input parameters for bilateral_filter", __FILE__, __LINE__, ""); + + caller(src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream); } } }}} @@ -171,6 +187,7 @@ OCV_INSTANTIATE_BILATERAL_FILTER(uchar) OCV_INSTANTIATE_BILATERAL_FILTER(uchar3) OCV_INSTANTIATE_BILATERAL_FILTER(uchar4) +#ifndef OPENCV_TINY_GPU_MODULE //OCV_INSTANTIATE_BILATERAL_FILTER(schar) //OCV_INSTANTIATE_BILATERAL_FILTER(schar2) //OCV_INSTANTIATE_BILATERAL_FILTER(schar3) @@ -190,6 +207,7 @@ OCV_INSTANTIATE_BILATERAL_FILTER(ushort4) //OCV_INSTANTIATE_BILATERAL_FILTER(int2) //OCV_INSTANTIATE_BILATERAL_FILTER(int3) //OCV_INSTANTIATE_BILATERAL_FILTER(int4) +#endif OCV_INSTANTIATE_BILATERAL_FILTER(float) //OCV_INSTANTIATE_BILATERAL_FILTER(float2) diff --git a/modules/gpu/src/denoising.cpp b/modules/gpu/src/denoising.cpp index 50fab0cc2e..3eb74a92e6 100644 --- a/modules/gpu/src/denoising.cpp +++ b/modules/gpu/src/denoising.cpp @@ -77,6 +77,17 @@ void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, f typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu }, + {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, + {0 /*bilateral_filter_gpu*/, 0 /*bilateral_filter_gpu*/, 0 /*bilateral_filter_gpu*/, 0 /*bilateral_filter_gpu*/}, + {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, + {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, + {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu } + }; +#else static const func_t funcs[6][4] = { {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu }, @@ -86,6 +97,7 @@ void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, f {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu } }; +#endif sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color; sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial; From db25e44a17b1e1b1b756049c71af2334c79d9539 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 14:44:52 +0300 Subject: [PATCH 05/21] reduce brute_force_matcher instantiates for tiny build --- modules/gpu/src/brute_force_matcher.cpp | 97 +++++++++++++++++++++++++ modules/gpu/src/cuda/bf_knnmatch.cu | 12 +++ modules/gpu/src/cuda/bf_match.cu | 12 +++ modules/gpu/src/cuda/bf_radius_match.cu | 12 +++ 4 files changed, 133 insertions(+) diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index 5da22e156b..e39bce2270 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -204,6 +204,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[3][6] = + { + { + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/ + }, + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }, + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/ + } + }; +#else static const caller_t callers[3][6] = { { @@ -222,6 +242,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const matchHamming_gpu, 0/*matchHamming_gpu*/ } }; +#endif CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); @@ -334,6 +355,16 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& masks, Stream& stream) { +#ifdef OPENCV_TINY_GPU_MODULE + (void)query; + (void)trainCollection; + (void)trainIdx; + (void)imgIdx; + (void)distance; + (void)masks; + (void)stream; + CV_Error(CV_StsNotImplemented, "not available in tiny build"); +#else if (query.empty() || trainCollection.empty()) return; @@ -374,6 +405,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c CV_Assert(func != 0); func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); +#endif } void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, vector& matches) @@ -451,6 +483,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[3][6] = + { + { + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/ + }, + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }, + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/ + } + }; +#else static const caller_t callers[3][6] = { { @@ -469,6 +521,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co matchHamming_gpu, 0/*matchHamming_gpu*/ } }; +#endif CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); @@ -568,6 +621,16 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& maskCollection, Stream& stream) { +#ifdef OPENCV_TINY_GPU_MODULE + (void)query; + (void)trainCollection; + (void)trainIdx; + (void)imgIdx; + (void)distance; + (void)maskCollection; + (void)stream; + CV_Error(CV_StsNotImplemented, "not available in tiny build"); +#else if (query.empty() || trainCollection.empty()) return; @@ -613,6 +676,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer CV_Assert(func != 0); func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); +#endif } void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, @@ -755,6 +819,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[3][6] = + { + { + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/ + }, + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }, + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/ + } + }; +#else static const caller_t callers[3][6] = { { @@ -773,6 +857,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, matchHamming_gpu, 0/*matchHamming_gpu*/ } }; +#endif const int nQuery = query.rows; const int nTrain = train.rows; @@ -872,6 +957,17 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const vector& masks, Stream& stream) { +#ifdef OPENCV_TINY_GPU_MODULE + (void)query; + (void)trainIdx; + (void)imgIdx; + (void)distance; + (void)nMatches; + (void)maxDistance; + (void)masks; + (void)stream; + CV_Error(CV_StsNotImplemented, "not available in tiny build"); +#else if (query.empty() || empty()) return; @@ -926,6 +1022,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); +#endif } void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, diff --git a/modules/gpu/src/cuda/bf_knnmatch.cu b/modules/gpu/src/cuda/bf_knnmatch.cu index 3e5bc741ff..640dafb816 100644 --- a/modules/gpu/src/cuda/bf_knnmatch.cu +++ b/modules/gpu/src/cuda/bf_knnmatch.cu @@ -1168,12 +1168,14 @@ namespace cv { namespace gpu { namespace device matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, @@ -1185,11 +1187,13 @@ namespace cv { namespace gpu { namespace device matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, stream); } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, @@ -1203,10 +1207,12 @@ namespace cv { namespace gpu { namespace device } template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#endif template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, @@ -1218,12 +1224,14 @@ namespace cv { namespace gpu { namespace device match2Dispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); +#endif template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, @@ -1235,12 +1243,14 @@ namespace cv { namespace gpu { namespace device match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, stream); } +#ifndef OPENCV_TINY_GPU_MODULE //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); +#endif template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, @@ -1252,11 +1262,13 @@ namespace cv { namespace gpu { namespace device match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); +#endif } // namespace bf_knnmatch }}} // namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/bf_match.cu b/modules/gpu/src/cuda/bf_match.cu index c2ae48bb30..baaf979e5d 100644 --- a/modules/gpu/src/cuda/bf_match.cu +++ b/modules/gpu/src/cuda/bf_match.cu @@ -644,12 +644,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, @@ -669,11 +671,13 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, @@ -695,10 +699,12 @@ namespace cv { namespace gpu { namespace device } template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, @@ -718,12 +724,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, @@ -743,12 +751,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& maskCollection, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, @@ -768,11 +778,13 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif } // namespace bf_match }}} // namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpu/src/cuda/bf_radius_match.cu index d83f9f7f96..8493b4e065 100644 --- a/modules/gpu/src/cuda/bf_radius_match.cu +++ b/modules/gpu/src/cuda/bf_radius_match.cu @@ -356,12 +356,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -381,11 +383,13 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, @@ -407,10 +411,12 @@ namespace cv { namespace gpu { namespace device } template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -421,12 +427,14 @@ namespace cv { namespace gpu { namespace device stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -437,12 +445,14 @@ namespace cv { namespace gpu { namespace device stream); } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -453,11 +463,13 @@ namespace cv { namespace gpu { namespace device stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif } // namespace bf_radius_match }}} // namespace cv { namespace gpu { namespace device From 87d655325c60c9b898f9fb4d1348264f0501b313 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 14:57:18 +0300 Subject: [PATCH 06/21] reduce add_weighted instantiates for tiny build --- modules/gpu/src/cuda/element_operations.cu | 21 +- modules/gpu/src/element_operations.cpp | 461 +++++++++++++++++++++ 2 files changed, 477 insertions(+), 5 deletions(-) diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index f606f0c8c1..e277d829a1 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -2397,6 +2397,7 @@ namespace arithm } template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2451,9 +2452,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2501,9 +2503,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2543,9 +2546,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2577,9 +2581,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2603,15 +2608,18 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2621,9 +2629,11 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2631,6 +2641,7 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif } #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 354d614d42..fa98520ff8 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -3007,6 +3007,466 @@ namespace arithm void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int ddepth, Stream& stream) { typedef void (*func_t)(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7][7] = + { + { + { + arithm::addWeighted, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/ + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + arithm::addWeighted, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + } + }; +#else static const func_t funcs[7][7][7] = { { @@ -3465,6 +3925,7 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, } } }; +#endif int sdepth1 = src1.depth(); int sdepth2 = src2.depth(); From 9682d2afff6e0a2b61e032a28ad1412ac6ee54dd Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:02:54 +0300 Subject: [PATCH 07/21] reduce element_operations instantiates for tiny build --- modules/gpu/src/cuda/element_operations.cu | 134 ++ modules/gpu/src/element_operations.cpp | 1284 ++++++++++++++++++-- 2 files changed, 1310 insertions(+), 108 deletions(-) diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index e277d829a1..1f94f6a5c3 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -234,6 +234,7 @@ namespace arithm } template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -278,7 +279,9 @@ namespace arithm //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -288,6 +291,7 @@ namespace arithm //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -329,6 +333,7 @@ namespace arithm } template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -373,7 +378,9 @@ namespace arithm //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -383,6 +390,7 @@ namespace arithm //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -469,6 +477,7 @@ namespace arithm } template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -513,7 +522,9 @@ namespace arithm //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -523,6 +534,7 @@ namespace arithm //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -542,6 +554,7 @@ namespace arithm } template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -586,7 +599,9 @@ namespace arithm //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -596,6 +611,7 @@ namespace arithm //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -700,6 +716,7 @@ namespace arithm } template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -744,7 +761,9 @@ namespace arithm //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -754,6 +773,7 @@ namespace arithm //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -791,6 +811,7 @@ namespace arithm } template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -835,7 +856,9 @@ namespace arithm //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -845,6 +868,7 @@ namespace arithm //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -968,6 +992,7 @@ namespace arithm } template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1012,7 +1037,9 @@ namespace arithm //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1022,6 +1049,7 @@ namespace arithm //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1037,6 +1065,7 @@ namespace arithm } template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1081,7 +1110,9 @@ namespace arithm //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1091,6 +1122,7 @@ namespace arithm //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1128,6 +1160,7 @@ namespace arithm } template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1172,7 +1205,9 @@ namespace arithm //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1182,6 +1217,7 @@ namespace arithm //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1278,12 +1314,16 @@ namespace arithm } template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1323,12 +1363,16 @@ namespace arithm } template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1349,13 +1393,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1390,13 +1438,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1417,13 +1469,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1444,13 +1500,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1486,13 +1546,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1620,36 +1684,52 @@ namespace arithm } template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1824,52 +1904,76 @@ namespace arithm } template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1981,19 +2085,25 @@ namespace arithm } template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2067,12 +2177,16 @@ namespace arithm } template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2080,12 +2194,16 @@ namespace arithm } template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2159,12 +2277,16 @@ namespace arithm } template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2172,12 +2294,16 @@ namespace arithm } template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2233,12 +2359,16 @@ namespace arithm } template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2312,13 +2442,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, PowOp(power), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index fa98520ff8..bd8ca81bf1 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -275,6 +275,75 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addMat, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + addMat, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -284,7 +353,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { addMat, @@ -293,7 +362,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -302,7 +371,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -311,7 +380,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -320,7 +389,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -329,7 +398,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, 0 /*addMat*/, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -338,9 +407,10 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, 0 /*addMat*/, 0 /*addMat*/, - addMat + addMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -421,6 +491,75 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addScalar, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + addScalar, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -430,7 +569,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { addScalar, @@ -439,7 +578,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -448,7 +587,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -457,7 +596,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -466,7 +605,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -475,7 +614,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, 0 /*addScalar*/, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -484,9 +623,10 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, 0 /*addScalar*/, 0 /*addScalar*/, - addScalar + addScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -555,6 +695,75 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subMat, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + subMat, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -564,7 +773,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { subMat, @@ -573,7 +782,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -582,7 +791,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -591,7 +800,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -600,7 +809,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -609,7 +818,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, 0 /*subMat*/, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -618,9 +827,10 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, 0 /*subMat*/, 0 /*subMat*/, - subMat + subMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -701,6 +911,75 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subScalar, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + subScalar, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -710,7 +989,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { subScalar, @@ -719,7 +998,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -728,7 +1007,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -737,7 +1016,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -746,7 +1025,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -755,7 +1034,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, 0 /*subScalar*/, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -764,9 +1043,10 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, 0 /*subScalar*/, 0 /*subScalar*/, - subScalar + subScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -856,6 +1136,75 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulMat, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + mulMat, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -865,7 +1214,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { mulMat, @@ -874,7 +1223,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -883,7 +1232,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -892,7 +1241,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -901,7 +1250,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -910,7 +1259,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, 0 /*mulMat*/, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -919,9 +1268,10 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, 0 /*mulMat*/, 0 /*mulMat*/, - mulMat + mulMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -965,6 +1315,75 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulScalar, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + mulScalar, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -974,7 +1393,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { mulScalar, @@ -983,7 +1402,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -992,7 +1411,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1001,7 +1420,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1010,7 +1429,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1019,7 +1438,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, 0 /*mulScalar*/, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1028,9 +1447,10 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, 0 /*mulScalar*/, 0 /*mulScalar*/, - mulScalar + mulScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1121,6 +1541,75 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divMat, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + divMat, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1130,7 +1619,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { divMat, @@ -1139,7 +1628,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1148,7 +1637,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1157,7 +1646,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1166,7 +1655,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1175,7 +1664,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, 0 /*divMat*/, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1184,9 +1673,10 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, 0 /*divMat*/, 0 /*divMat*/, - divMat + divMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -1230,6 +1720,75 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divScalar, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + divScalar, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1239,7 +1798,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { divScalar, @@ -1248,7 +1807,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1257,7 +1816,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1266,7 +1825,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1275,7 +1834,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1284,7 +1843,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, 0 /*divScalar*/, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1293,9 +1852,10 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, 0 /*divScalar*/, 0 /*divScalar*/, - divScalar + divScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1359,6 +1919,75 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divInv, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + divInv, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1368,7 +1997,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { divInv, @@ -1377,7 +2006,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1386,7 +2015,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1395,7 +2024,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1404,7 +2033,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1413,7 +2042,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, 0 /*divInv*/, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1422,9 +2051,10 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, 0 /*divInv*/, 0 /*divInv*/, - divInv + divInv, } }; +#endif if (dtype < 0) dtype = src.depth(); @@ -1471,6 +2101,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffMat, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + absDiffMat, + 0/*absDiffMat*/, + }; +#else static const func_t funcs[] = { absDiffMat, @@ -1479,8 +2122,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea absDiffMat, absDiffMat, absDiffMat, - absDiffMat + absDiffMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -1556,6 +2200,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffScalar, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + absDiffScalar, + 0/*absDiffScalar*/, + }; +#else static const func_t funcs[] = { absDiffScalar, @@ -1564,8 +2221,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea absDiffScalar, absDiffScalar, absDiffScalar, - absDiffScalar + absDiffScalar, }; +#endif const int depth = src1.depth(); @@ -1578,9 +2236,13 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src1.size(), src1.type()); - funcs[depth](src1, src2.val[0], dst, StreamAccessor::getStream(stream)); + func(src1, src2.val[0], dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1597,6 +2259,19 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + absMat, + 0/*absMat*/, + }; +#else static const func_t funcs[] = { absMat, @@ -1605,8 +2280,9 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) absMat, absMat, absMat, - absMat + absMat, }; +#endif const int depth = src.depth(); @@ -1619,9 +2295,13 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1638,6 +2318,19 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + sqrMat, + 0/*sqrMat*/, + }; +#else static const func_t funcs[] = { sqrMat, @@ -1646,8 +2339,9 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) sqrMat, sqrMat, sqrMat, - sqrMat + sqrMat, }; +#endif const int depth = src.depth(); @@ -1660,9 +2354,13 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1679,6 +2377,19 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + sqrtMat, + 0/*sqrtMat*/, + }; +#else static const func_t funcs[] = { sqrtMat, @@ -1687,8 +2398,9 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) sqrtMat, sqrtMat, sqrtMat, - sqrtMat + sqrtMat, }; +#endif const int depth = src.depth(); @@ -1701,9 +2413,13 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1720,6 +2436,19 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + logMat, + 0/*logMat*/, + }; +#else static const func_t funcs[] = { logMat, @@ -1728,8 +2457,9 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) logMat, logMat, logMat, - logMat + logMat, }; +#endif const int depth = src.depth(); @@ -1742,9 +2472,13 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1761,6 +2495,19 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + expMat, + 0/*expMat*/, + }; +#else static const func_t funcs[] = { expMat, @@ -1769,8 +2516,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) expMat, expMat, expMat, - expMat + expMat, }; +#endif const int depth = src.depth(); @@ -1783,9 +2531,13 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1809,6 +2561,19 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][4] = + { + {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, + {0, 0, 0, 0}, + }; +#else static const func_t funcs[7][4] = { {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, @@ -1819,6 +2584,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe } }; +#endif typedef void (*func_v4_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); static const func_v4_t funcs_v4[] = @@ -1839,10 +2605,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - static const int codes[] = { 0, 2, 3, 2, 3, 1 @@ -1857,6 +2619,15 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c }; const int code = codes[cmpop]; + + const func_t func = funcs[depth][code]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); + + cudaStream_t stream = StreamAccessor::getStream(s); + PtrStepSzb src1_(src1.rows, src1.cols * cn, psrc1[cmpop]->data, psrc1[cmpop]->step); PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step); PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); @@ -1882,8 +2653,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c } } - const func_t func = funcs[depth][code]; - func(src1_, src2_, dst_, stream); } @@ -1913,6 +2682,31 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre using namespace arithm; typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); + typedef void (*cast_func_t)(Scalar& sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][6] = + { + {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, + {0, 0, 0, 0, 0, 0}, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/, + }; +#else static const func_t funcs[7][6] = { {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, @@ -1924,11 +2718,11 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe } }; - typedef void (*cast_func_t)(Scalar& sc); static const cast_func_t cast_func[] = { castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -1943,11 +2737,15 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth][cmpop]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn)); cast_func[depth](sc); - funcs[depth][cmpop](src, cn, sc.val, dst, StreamAccessor::getStream(stream)); + func(src, cn, sc.val, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2391,14 +3189,56 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } }; +#else + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarAnd >::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } + }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2406,9 +3246,13 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2416,14 +3260,56 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } }; +#else + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarOr >::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } + }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2431,9 +3317,13 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2441,14 +3331,56 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } }; +#else + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarXor >::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } + }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2456,9 +3388,13 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2578,6 +3514,19 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minMat, + 0/*minMat*/, + 0/*minMat*/, + 0/*minMat*/, + 0/*minMat*/, + minMat, + 0/*minMat*/, + }; +#else static const func_t funcs[] = { minMat, @@ -2586,8 +3535,9 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s minMat, minMat, minMat, - minMat + minMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2657,6 +3607,19 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxMat, + 0/*maxMat*/, + 0/*maxMat*/, + 0/*maxMat*/, + 0/*maxMat*/, + maxMat, + 0/*maxMat*/, + }; +#else static const func_t funcs[] = { maxMat, @@ -2665,8 +3628,9 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s maxMat, maxMat, maxMat, - maxMat + maxMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2744,6 +3708,31 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minScalar, + 0/*minScalar*/, + 0/*minScalar*/, + 0/*minScalar*/, + 0/*minScalar*/, + minScalar, + 0/*minScalar*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/, + }; +#else static const func_t funcs[] = { minScalar, @@ -2752,14 +3741,20 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) minScalar, minScalar, minScalar, - minScalar + minScalar, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, }; +#endif const int depth = src.depth(); @@ -2772,9 +3767,13 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) @@ -2782,6 +3781,31 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxScalar, + 0/*maxScalar*/, + 0/*maxScalar*/, + 0/*maxScalar*/, + 0/*maxScalar*/, + maxScalar, + 0/*maxScalar*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/ + }; +#else static const func_t funcs[] = { maxScalar, @@ -2790,14 +3814,20 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) maxScalar, maxScalar, maxScalar, - maxScalar + maxScalar, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, }; +#endif const int depth = src.depth(); @@ -2810,9 +3840,13 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -2858,6 +3892,18 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double else { typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + arithm::threshold, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + arithm::threshold, + 0/*arithm::threshold*/ + }; +#else static const func_t funcs[] = { arithm::threshold, @@ -2868,6 +3914,11 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double arithm::threshold, arithm::threshold }; +#endif + + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); if (depth != CV_32F && depth != CV_64F) { @@ -2875,7 +3926,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal = cvRound(maxVal); } - funcs[depth](src, dst, thresh, maxVal, type, stream); + func(src, dst, thresh, maxVal, type, stream); } return thresh; @@ -2892,6 +3943,18 @@ namespace arithm void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) { typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + arithm::pow, + 0/*arithm::pow*/, + }; +#else static const func_t funcs[] = { arithm::pow, @@ -2902,6 +3965,7 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) arithm::pow, arithm::pow }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2914,12 +3978,16 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); - funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); + func(src_, power, dst_, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// From 3004f5fa3028b3a0defbdad40240f80ab8a638c4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:08:19 +0300 Subject: [PATCH 08/21] reduce stereobm instantiates for tiny build --- modules/gpu/src/cuda/stereobm.cu | 39 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index ad256357b8..87af7e52e6 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -330,24 +330,55 @@ namespace cv { namespace gpu { namespace device typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream); +#ifdef OPENCV_TINY_GPU_MODULE + const static kernel_caller_t callers[] = + { + 0, + kernel_caller< 1>, + kernel_caller< 2>, + kernel_caller< 3>, + kernel_caller< 4>, + kernel_caller< 5>, + 0/*kernel_caller< 6>*/, + 0/*kernel_caller< 7>*/, + 0/*kernel_caller< 8>*/, + 0/*kernel_caller< 9>*/, + 0/*kernel_caller<10>*/, + 0/*kernel_caller<11>*/, + 0/*kernel_caller<12>*/, + 0/*kernel_caller<13>*/, + 0/*kernel_caller<14>*/, + kernel_caller<15>, + 0/*kernel_caller<16>*/, + 0/*kernel_caller<17>*/, + 0/*kernel_caller<18>*/, + 0/*kernel_caller<19>*/, + 0/*kernel_caller<20>*/, + 0/*kernel_caller<21>*/, + 0/*kernel_caller<22>*/, + 0/*kernel_caller<23>*/, + 0/*kernel_caller<24>*/, + 0/*kernel_caller<25>*/, + }; +#else const static kernel_caller_t callers[] = { 0, kernel_caller< 1>, kernel_caller< 2>, kernel_caller< 3>, kernel_caller< 4>, kernel_caller< 5>, kernel_caller< 6>, kernel_caller< 7>, kernel_caller< 8>, kernel_caller< 9>, kernel_caller<10>, - kernel_caller<11>, kernel_caller<12>, kernel_caller<13>, kernel_caller<15>, kernel_caller<15>, + kernel_caller<11>, kernel_caller<12>, kernel_caller<13>, kernel_caller<14>, kernel_caller<15>, kernel_caller<16>, kernel_caller<17>, kernel_caller<18>, kernel_caller<19>, kernel_caller<20>, kernel_caller<21>, kernel_caller<22>, kernel_caller<23>, kernel_caller<24>, kernel_caller<25> - - //0,0,0, 0,0,0, 0,0,kernel_caller<9> }; +#endif + const int calles_num = sizeof(callers)/sizeof(callers[0]); void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, int winsz, const PtrStepSz& minSSD_buf, cudaStream_t& stream) { int winsz2 = winsz >> 1; - if (winsz2 == 0 || winsz2 >= calles_num) + if (winsz2 == 0 || winsz2 >= calles_num || callers[winsz2] == 0) cv::gpu::error("Unsupported window size", __FILE__, __LINE__, "stereoBM_GPU"); //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) ); From e7b55f498224eee038717c5cfd160b7cf810274d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:15:14 +0300 Subject: [PATCH 09/21] reduce warp instantiates for tiny build --- modules/gpu/src/cuda/warp.cu | 43 ++++++++++++++++++++++++++++++++---- modules/gpu/src/warp.cpp | 24 ++++++++++++++++++++ 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/modules/gpu/src/cuda/warp.cu b/modules/gpu/src/cuda/warp.cu index 49130d9405..ad867601ed 100644 --- a/modules/gpu/src/cuda/warp.cu +++ b/modules/gpu/src/cuda/warp.cu @@ -278,6 +278,7 @@ namespace cv { namespace gpu { namespace device { typedef void (*func_t)(PtrStepSz src, PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[3][5] = { { @@ -285,25 +286,55 @@ namespace cv { namespace gpu { namespace device WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, - WarpDispatcher::call + 0/*WarpDispatcher::call*/, }, { WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, - WarpDispatcher::call + 0/*WarpDispatcher::call*/, + }, + { + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + } + }; +#else + static const func_t funcs[3][5] = + { + { + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + }, + { + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, }, { WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, - WarpDispatcher::call + WarpDispatcher::call, } }; +#endif - funcs[interpolation][borderMode](static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, + const func_t func = funcs[interpolation][borderMode]; + if (!func) + cv::gpu::error("Unsupported input parameters for warp_caller", __FILE__, __LINE__, ""); + + func(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, static_cast< PtrStepSz >(dst), borderValue, stream, cc20); } @@ -320,6 +351,7 @@ namespace cv { namespace gpu { namespace device template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifndef OPENCV_TINY_GPU_MODULE //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -339,6 +371,7 @@ namespace cv { namespace gpu { namespace device //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#endif template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -358,6 +391,7 @@ namespace cv { namespace gpu { namespace device template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifndef OPENCV_TINY_GPU_MODULE //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -377,6 +411,7 @@ namespace cv { namespace gpu { namespace device //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#endif template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); diff --git a/modules/gpu/src/warp.cpp b/modules/gpu/src/warp.cpp index 827d5219f1..c963235b72 100644 --- a/modules/gpu/src/warp.cpp +++ b/modules/gpu/src/warp.cpp @@ -277,6 +277,17 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu } + }; +#else static const func_t funcs[6][4] = { {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu }, @@ -286,6 +297,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz {0 /*warpAffine_gpu*/ , 0 /*warpAffine_gpu*/ , 0 /*warpAffine_gpu*/ , 0 /*warpAffine_gpu*/ }, {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu } }; +#endif const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); @@ -415,6 +427,17 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu } + }; +#else static const func_t funcs[6][4] = { {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu }, @@ -424,6 +447,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size {0 /*warpPerspective_gpu*/ , 0 /*warpPerspective_gpu*/ , 0 /*warpPerspective_gpu*/ , 0 /*warpPerspective_gpu*/ }, {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu } }; +#endif const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); From 612888c0661d4410f07f93c162f8487a3c58067d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:18:57 +0300 Subject: [PATCH 10/21] reduce remap instantiates for tiny build --- modules/gpu/src/cuda/remap.cu | 41 +++++++++++++++++++++++++++++++---- modules/gpu/src/remap.cpp | 12 ++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/modules/gpu/src/cuda/remap.cu b/modules/gpu/src/cuda/remap.cu index f40ada0302..77bf976140 100644 --- a/modules/gpu/src/cuda/remap.cu +++ b/modules/gpu/src/cuda/remap.cu @@ -209,6 +209,7 @@ namespace cv { namespace gpu { namespace device typedef void (*caller_t)(PtrStepSz src, PtrStepSz srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSz dst, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE static const caller_t callers[3][5] = { { @@ -216,25 +217,55 @@ namespace cv { namespace gpu { namespace device RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, - RemapDispatcher::call + 0/*RemapDispatcher::call*/, }, { RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, - RemapDispatcher::call + 0/*RemapDispatcher::call*/, + }, + { + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + } + }; +#else + static const caller_t callers[3][5] = + { + { + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + }, + { + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, }, { RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, - RemapDispatcher::call + RemapDispatcher::call, } }; +#endif - callers[interpolation][borderMode](static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, xmap, ymap, + const caller_t caller = callers[interpolation][borderMode]; + if (!caller) + cv::gpu::error("Unsupported input parameters for remap", __FILE__, __LINE__, ""); + + caller(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, xmap, ymap, static_cast< PtrStepSz >(dst), borderValue, stream, cc20); } @@ -243,6 +274,7 @@ namespace cv { namespace gpu { namespace device template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifndef OPENCV_TINY_GPU_MODULE //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -262,6 +294,7 @@ namespace cv { namespace gpu { namespace device //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#endif template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); diff --git a/modules/gpu/src/remap.cpp b/modules/gpu/src/remap.cpp index 4b87286331..3e13c7285c 100644 --- a/modules/gpu/src/remap.cpp +++ b/modules/gpu/src/remap.cpp @@ -65,6 +65,17 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu } + }; +#else static const func_t funcs[6][4] = { {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu }, @@ -74,6 +85,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp {0 /*remap_gpu*/ , 0 /*remap_gpu*/ , 0 /*remap_gpu*/ , 0 /*remap_gpu*/ }, {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu } }; +#endif CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size()); From 72685cf01cefd0f1e7ca7c35d249b94c3b659d62 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:23:30 +0300 Subject: [PATCH 11/21] reduce resize instantiates for tiny build --- modules/gpu/src/cuda/resize.cu | 12 ++++++++++++ modules/gpu/src/resize.cpp | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index 1998b3b07c..ed3806be85 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -342,11 +342,13 @@ namespace cv { namespace gpu { namespace device template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; +#ifndef OPENCV_TINY_GPU_MODULE template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; +#endif template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; @@ -380,11 +382,13 @@ namespace cv { namespace gpu { namespace device template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; +#ifndef OPENCV_TINY_GPU_MODULE template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; +#endif template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; @@ -410,6 +414,7 @@ namespace cv { namespace gpu { namespace device } }; +#ifndef OPENCV_TINY_GPU_MODULE template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; @@ -421,6 +426,7 @@ namespace cv { namespace gpu { namespace device template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; +#endif // ResizeAreaDispatcher @@ -467,7 +473,11 @@ namespace cv { namespace gpu { namespace device { ResizeNearestDispatcher::call, ResizeLinearDispatcher::call, +#ifdef OPENCV_TINY_GPU_MODULE + 0, +#else ResizeCubicDispatcher::call, +#endif ResizeAreaDispatcher::call }; @@ -482,6 +492,7 @@ namespace cv { namespace gpu { namespace device template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); @@ -489,6 +500,7 @@ namespace cv { namespace gpu { namespace device template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); +#endif template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); diff --git a/modules/gpu/src/resize.cpp b/modules/gpu/src/resize.cpp index e1b502672a..66a771668d 100644 --- a/modules/gpu/src/resize.cpp +++ b/modules/gpu/src/resize.cpp @@ -57,6 +57,18 @@ namespace cv { namespace gpu { namespace device void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& stream) { typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {device::resize , 0 /*device::resize*/ , device::resize , device::resize }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {device::resize , 0 /*device::resize*/ , device::resize , device::resize } + }; +#else static const func_t funcs[6][4] = { {device::resize , 0 /*device::resize*/ , device::resize , device::resize }, @@ -66,6 +78,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub {0 /*device::resize*/ , 0 /*device::resize*/ , 0 /*device::resize*/ , 0 /*device::resize*/ }, {device::resize , 0 /*device::resize*/ , device::resize , device::resize } }; +#endif CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA ); From 2e47a1a61be525a408629f0f19b490083773c0b4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:27:20 +0300 Subject: [PATCH 12/21] reduce pyramids instantiates for tiny build --- modules/gpu/src/cuda/pyr_down.cu | 2 ++ modules/gpu/src/cuda/pyr_up.cu | 2 ++ modules/gpu/src/pyramids.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/modules/gpu/src/cuda/pyr_down.cu b/modules/gpu/src/cuda/pyr_down.cu index eac7928826..af0e18d888 100644 --- a/modules/gpu/src/cuda/pyr_down.cu +++ b/modules/gpu/src/cuda/pyr_down.cu @@ -197,6 +197,7 @@ namespace cv { namespace gpu { namespace device template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); @@ -216,6 +217,7 @@ namespace cv { namespace gpu { namespace device //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/pyr_up.cu b/modules/gpu/src/cuda/pyr_up.cu index b14d124e7c..ffb6276622 100644 --- a/modules/gpu/src/cuda/pyr_up.cu +++ b/modules/gpu/src/cuda/pyr_up.cu @@ -166,6 +166,7 @@ namespace cv { namespace gpu { namespace device template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); @@ -185,6 +186,7 @@ namespace cv { namespace gpu { namespace device //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); diff --git a/modules/gpu/src/pyramids.cpp b/modules/gpu/src/pyramids.cpp index 85fb99040c..b4d4676587 100644 --- a/modules/gpu/src/pyramids.cpp +++ b/modules/gpu/src/pyramids.cpp @@ -68,6 +68,17 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu } + }; +#else static const func_t funcs[6][4] = { {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu }, @@ -77,6 +88,7 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) {0 /*pyrDown_gpu*/ , 0 /*pyrDown_gpu*/ , 0 /*pyrDown_gpu*/ , 0 /*pyrDown_gpu*/ }, {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu } }; +#endif CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); @@ -106,6 +118,17 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu } + }; +#else static const func_t funcs[6][4] = { {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu }, @@ -115,6 +138,7 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) {0 /*pyrUp_gpu*/ , 0 /*pyrUp_gpu*/ , 0 /*pyrUp_gpu*/ , 0 /*pyrUp_gpu*/ }, {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu } }; +#endif CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); From b44b1ab47b791b8e61d922412bb054302dffbbab Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:37:41 +0300 Subject: [PATCH 13/21] reduce matrix_reductions instantiates for tiny build --- modules/gpu/src/cuda/matrix_reductions.cu | 36 +++ modules/gpu/src/matrix_reductions.cpp | 262 ++++++++++++++++++++-- 2 files changed, 281 insertions(+), 17 deletions(-) diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 745daca1db..6048d41688 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -462,6 +462,7 @@ namespace sum } template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -485,8 +486,10 @@ namespace sum template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -495,6 +498,7 @@ namespace sum template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask) @@ -504,6 +508,7 @@ namespace sum } template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -527,8 +532,10 @@ namespace sum template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -537,6 +544,7 @@ namespace sum template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template struct Sqr : unary_function { @@ -553,6 +561,7 @@ namespace sum } template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -576,8 +585,10 @@ namespace sum template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -586,6 +597,7 @@ namespace sum template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif } ///////////////////////////////////////////////////////////// @@ -773,12 +785,16 @@ namespace minMax } template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#endif template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#endif } ///////////////////////////////////////////////////////////// @@ -955,12 +971,16 @@ namespace minMaxLoc } template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#endif template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#endif } ///////////////////////////////////////////////////////////// @@ -1079,12 +1099,16 @@ namespace countNonZero } template int run(const PtrStepSzb src, PtrStep buf); +#ifndef OPENCV_TINY_GPU_MODULE template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); +#endif template int run(const PtrStepSzb src, PtrStep buf); +#ifndef OPENCV_TINY_GPU_MODULE template int run(const PtrStepSzb src, PtrStep buf); +#endif } ////////////////////////////////////////////////////////////////////////////// @@ -1257,6 +1281,11 @@ namespace reduce funcs[op]((PtrStepSz) src, (D*) dst, stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#else template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); @@ -1280,6 +1309,7 @@ namespace reduce template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#endif /////////////////////////////////////////////////////////// @@ -1338,6 +1368,11 @@ namespace reduce funcs[cn][op](src, dst, stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#else template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); @@ -1361,6 +1396,7 @@ namespace reduce template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#endif } #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 4e09246e78..c7a760cf5e 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -261,6 +261,18 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::run, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::run, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run}, @@ -271,6 +283,7 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run}, {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -286,6 +299,8 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -307,6 +322,18 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::runAbs, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::runAbs, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs}, @@ -317,6 +344,7 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs}, {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -332,6 +360,8 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -353,6 +383,18 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::runSqr, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::runSqr, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr}, @@ -363,6 +405,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr}, {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -378,6 +421,8 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -405,6 +450,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::minMax::run, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + ::minMax::run, + 0/*::minMax::run*/, + }; +#else static const func_t funcs[] = { ::minMax::run, @@ -413,8 +470,9 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ::minMax::run, ::minMax::run, ::minMax::run, - ::minMax::run + ::minMax::run, }; +#endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); @@ -430,6 +488,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, buf); @@ -456,6 +516,18 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::minMaxLoc::run, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + ::minMaxLoc::run, + 0/*::minMaxLoc::run*/, + }; +#else static const func_t funcs[] = { ::minMaxLoc::run, @@ -464,8 +536,9 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ::minMaxLoc::run, ::minMaxLoc::run, ::minMaxLoc::run, - ::minMaxLoc::run + ::minMaxLoc::run, }; +#endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); @@ -482,6 +555,8 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; Point temp3, temp4; @@ -508,6 +583,18 @@ int cv::gpu::countNonZero(const GpuMat& src) int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { typedef int (*func_t)(const PtrStepSzb src, PtrStep buf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::countNonZero::run, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + ::countNonZero::run, + 0/*::countNonZero::run*/, + }; +#else static const func_t funcs[] = { ::countNonZero::run, @@ -516,8 +603,9 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ::countNonZero::run, ::countNonZero::run, ::countNonZero::run, - ::countNonZero::run + ::countNonZero::run, }; +#endif CV_Assert(src.channels() == 1); @@ -532,6 +620,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); return func(src, buf); } @@ -562,6 +652,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int if (dim == 0) { typedef void (*func_t)(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + ::reduce::rows, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + ::reduce::rows, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + ::reduce::rows, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -571,7 +729,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -580,7 +738,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, 0/*::reduce::rows*/, - 0/*::reduce::rows*/ + 0/*::reduce::rows*/, }, { 0/*::reduce::rows*/, @@ -589,7 +747,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -598,7 +756,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ::reduce::rows, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -607,7 +765,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -616,7 +774,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -625,9 +783,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, 0/*::reduce::rows*/, - ::reduce::rows + ::reduce::rows, } }; +#endif const func_t func = funcs[src.depth()][dst.depth()]; @@ -639,6 +798,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int else { typedef void (*func_t)(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + ::reduce::cols, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + ::reduce::cols, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + ::reduce::cols, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -648,7 +875,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -657,7 +884,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, 0/*::reduce::cols*/, - 0/*::reduce::cols*/ + 0/*::reduce::cols*/, }, { 0/*::reduce::cols*/, @@ -666,7 +893,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -675,7 +902,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ::reduce::cols, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -684,7 +911,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -693,7 +920,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -702,9 +929,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, 0/*::reduce::cols*/, - ::reduce::cols + ::reduce::cols, } }; +#endif const func_t func = funcs[src.depth()][dst.depth()]; From 6d7a61cbdc91b9c053e26be7972dd3a397258456 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:41:12 +0300 Subject: [PATCH 14/21] reduce copy_make_border instantiates for tiny build --- modules/gpu/src/cuda/copy_make_border.cu | 8 ++++++++ modules/gpu/src/imgproc.cpp | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu index 5553810ebd..3bd0c8f5e9 100644 --- a/modules/gpu/src/cuda/copy_make_border.cu +++ b/modules/gpu/src/cuda/copy_make_border.cu @@ -90,17 +90,24 @@ namespace cv { namespace gpu { namespace device CopyMakeBorderDispatcher::call, CopyMakeBorderDispatcher::call, CopyMakeBorderDispatcher::call, + #ifdef OPENCV_TINY_GPU_MODULE + 0, + #else CopyMakeBorderDispatcher::call + #endif }; callers[borderMode](PtrStepSz(src), PtrStepSz(dst), top, left, borderValue, stream); } template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); +#endif template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream); @@ -120,6 +127,7 @@ namespace cv { namespace gpu { namespace device //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream); +#endif template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream); diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 66f838f77a..a8f1864054 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -336,6 +336,17 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom #endif { typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[6][4] = + { + { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/, copyMakeBorder_caller , copyMakeBorder_caller}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/ , copyMakeBorder_caller , copyMakeBorder_caller} + }; +#else static const caller_t callers[6][4] = { { copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller}, @@ -345,6 +356,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom {0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/ , 0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/}, { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/ , copyMakeBorder_caller , copyMakeBorder_caller} }; +#endif caller_t func = callers[src.depth()][src.channels() - 1]; CV_Assert(func != 0); From ee316758ca8efc093e46449ff2114a7c720a0928 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 17:03:15 +0300 Subject: [PATCH 15/21] reduce color instantiates for tiny build --- modules/gpu/src/color.cpp | 150 ++++++++++++++++----------- modules/gpu/src/cuda/color.cu | 14 ++- modules/gpu/src/cvt_color_internal.h | 14 ++- 3 files changed, 110 insertions(+), 68 deletions(-) diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp index 66a1ad6791..09b8be526f 100644 --- a/modules/gpu/src/color.cpp +++ b/modules/gpu/src/color.cpp @@ -71,6 +71,12 @@ namespace cv { namespace gpu { using namespace ::cv::gpu::device; +#ifdef OPENCV_TINY_GPU_MODULE + #define APPEND_16U(func) 0 +#else + #define APPEND_16U(func) func ## _16u +#endif + namespace { typedef void (*gpu_func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); @@ -78,10 +84,11 @@ namespace void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, bgr_to_rgb_16u, 0, 0, bgr_to_rgb_32f}; + static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, APPEND_16U(bgr_to_rgb), 0, 0, bgr_to_rgb_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -91,10 +98,11 @@ namespace void bgr_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, bgr_to_bgra_16u, 0, 0, bgr_to_bgra_32f}; + static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, APPEND_16U(bgr_to_bgra), 0, 0, bgr_to_bgra_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -104,10 +112,11 @@ namespace void bgr_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, bgr_to_rgba_16u, 0, 0, bgr_to_rgba_32f}; + static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, APPEND_16U(bgr_to_rgba), 0, 0, bgr_to_rgba_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -117,10 +126,11 @@ namespace void bgra_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, bgra_to_bgr_16u, 0, 0, bgra_to_bgr_32f}; + static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, APPEND_16U(bgra_to_bgr), 0, 0, bgra_to_bgr_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -130,10 +140,11 @@ namespace void bgra_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, bgra_to_rgb_16u, 0, 0, bgra_to_rgb_32f}; + static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, APPEND_16U(bgra_to_rgb), 0, 0, bgra_to_rgb_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -143,10 +154,11 @@ namespace void bgra_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, bgra_to_rgba_16u, 0, 0, bgra_to_rgba_32f}; + static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, APPEND_16U(bgra_to_rgba), 0, 0, bgra_to_rgba_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -316,10 +328,11 @@ namespace void gray_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, gray_to_bgr_16u, 0, 0, gray_to_bgr_32f}; + static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, APPEND_16U(gray_to_bgr), 0, 0, gray_to_bgr_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 1); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -329,10 +342,11 @@ namespace void gray_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, gray_to_bgra_16u, 0, 0, gray_to_bgra_32f}; + static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, APPEND_16U(gray_to_bgra), 0, 0, gray_to_bgra_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 1); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -382,10 +396,11 @@ namespace void rgb_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, rgb_to_gray_16u, 0, 0, rgb_to_gray_32f}; + static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, APPEND_16U(rgb_to_gray), 0, 0, rgb_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -395,10 +410,11 @@ namespace void bgr_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, bgr_to_gray_16u, 0, 0, bgr_to_gray_32f}; + static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, APPEND_16U(bgr_to_gray), 0, 0, bgr_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -408,10 +424,11 @@ namespace void rgba_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, rgba_to_gray_16u, 0, 0, rgba_to_gray_32f}; + static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, APPEND_16U(rgba_to_gray), 0, 0, rgba_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -421,10 +438,11 @@ namespace void bgra_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, bgra_to_gray_16u, 0, 0, bgra_to_gray_32f}; + static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, APPEND_16U(bgra_to_gray), 0, 0, bgra_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -437,12 +455,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {rgb_to_yuv_8u, 0, rgb_to_yuv_16u, 0, 0, rgb_to_yuv_32f}, - {rgba_to_yuv_8u, 0, rgba_to_yuv_16u, 0, 0, rgba_to_yuv_32f} + {rgb_to_yuv_8u, 0, APPEND_16U(rgb_to_yuv), 0, 0, rgb_to_yuv_32f}, + {rgba_to_yuv_8u, 0, APPEND_16U(rgba_to_yuv), 0, 0, rgba_to_yuv_32f} }, { - {rgb_to_yuv4_8u, 0, rgb_to_yuv4_16u, 0, 0, rgb_to_yuv4_32f}, - {rgba_to_yuv4_8u, 0, rgba_to_yuv4_16u, 0, 0, rgba_to_yuv4_32f} + {rgb_to_yuv4_8u, 0, APPEND_16U(rgb_to_yuv4), 0, 0, rgb_to_yuv4_32f}, + {rgba_to_yuv4_8u, 0, APPEND_16U(rgba_to_yuv4), 0, 0, rgba_to_yuv4_32f} } }; @@ -451,6 +469,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -463,12 +482,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {bgr_to_yuv_8u, 0, bgr_to_yuv_16u, 0, 0, bgr_to_yuv_32f}, - {bgra_to_yuv_8u, 0, bgra_to_yuv_16u, 0, 0, bgra_to_yuv_32f} + {bgr_to_yuv_8u, 0, APPEND_16U(bgr_to_yuv), 0, 0, bgr_to_yuv_32f}, + {bgra_to_yuv_8u, 0, APPEND_16U(bgra_to_yuv), 0, 0, bgra_to_yuv_32f} }, { - {bgr_to_yuv4_8u, 0, bgr_to_yuv4_16u, 0, 0, bgr_to_yuv4_32f}, - {bgra_to_yuv4_8u, 0, bgra_to_yuv4_16u, 0, 0, bgra_to_yuv4_32f} + {bgr_to_yuv4_8u, 0, APPEND_16U(bgr_to_yuv4), 0, 0, bgr_to_yuv4_32f}, + {bgra_to_yuv4_8u, 0, APPEND_16U(bgra_to_yuv4), 0, 0, bgra_to_yuv4_32f} } }; @@ -477,6 +496,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -489,12 +509,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {yuv_to_rgb_8u, 0, yuv_to_rgb_16u, 0, 0, yuv_to_rgb_32f}, - {yuv4_to_rgb_8u, 0, yuv4_to_rgb_16u, 0, 0, yuv4_to_rgb_32f} + {yuv_to_rgb_8u, 0, APPEND_16U(yuv_to_rgb), 0, 0, yuv_to_rgb_32f}, + {yuv4_to_rgb_8u, 0, APPEND_16U(yuv4_to_rgb), 0, 0, yuv4_to_rgb_32f} }, { - {yuv_to_rgba_8u, 0, yuv_to_rgba_16u, 0, 0, yuv_to_rgba_32f}, - {yuv4_to_rgba_8u, 0, yuv4_to_rgba_16u, 0, 0, yuv4_to_rgba_32f} + {yuv_to_rgba_8u, 0, APPEND_16U(yuv_to_rgba), 0, 0, yuv_to_rgba_32f}, + {yuv4_to_rgba_8u, 0, APPEND_16U(yuv4_to_rgba), 0, 0, yuv4_to_rgba_32f} } }; @@ -503,6 +523,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -515,12 +536,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {yuv_to_bgr_8u, 0, yuv_to_bgr_16u, 0, 0, yuv_to_bgr_32f}, - {yuv4_to_bgr_8u, 0, yuv4_to_bgr_16u, 0, 0, yuv4_to_bgr_32f} + {yuv_to_bgr_8u, 0, APPEND_16U(yuv_to_bgr), 0, 0, yuv_to_bgr_32f}, + {yuv4_to_bgr_8u, 0, APPEND_16U(yuv4_to_bgr), 0, 0, yuv4_to_bgr_32f} }, { - {yuv_to_bgra_8u, 0, yuv_to_bgra_16u, 0, 0, yuv_to_bgra_32f}, - {yuv4_to_bgra_8u, 0, yuv4_to_bgra_16u, 0, 0, yuv4_to_bgra_32f} + {yuv_to_bgra_8u, 0, APPEND_16U(yuv_to_bgra), 0, 0, yuv_to_bgra_32f}, + {yuv4_to_bgra_8u, 0, APPEND_16U(yuv4_to_bgra), 0, 0, yuv4_to_bgra_32f} } }; @@ -529,6 +550,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -541,12 +563,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {rgb_to_YCrCb_8u, 0, rgb_to_YCrCb_16u, 0, 0, rgb_to_YCrCb_32f}, - {rgba_to_YCrCb_8u, 0, rgba_to_YCrCb_16u, 0, 0, rgba_to_YCrCb_32f} + {rgb_to_YCrCb_8u, 0, APPEND_16U(rgb_to_YCrCb), 0, 0, rgb_to_YCrCb_32f}, + {rgba_to_YCrCb_8u, 0, APPEND_16U(rgba_to_YCrCb), 0, 0, rgba_to_YCrCb_32f} }, { - {rgb_to_YCrCb4_8u, 0, rgb_to_YCrCb4_16u, 0, 0, rgb_to_YCrCb4_32f}, - {rgba_to_YCrCb4_8u, 0, rgba_to_YCrCb4_16u, 0, 0, rgba_to_YCrCb4_32f} + {rgb_to_YCrCb4_8u, 0, APPEND_16U(rgb_to_YCrCb4), 0, 0, rgb_to_YCrCb4_32f}, + {rgba_to_YCrCb4_8u, 0, APPEND_16U(rgba_to_YCrCb4), 0, 0, rgba_to_YCrCb4_32f} } }; @@ -555,6 +577,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -567,12 +590,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {bgr_to_YCrCb_8u, 0, bgr_to_YCrCb_16u, 0, 0, bgr_to_YCrCb_32f}, - {bgra_to_YCrCb_8u, 0, bgra_to_YCrCb_16u, 0, 0, bgra_to_YCrCb_32f} + {bgr_to_YCrCb_8u, 0, APPEND_16U(bgr_to_YCrCb), 0, 0, bgr_to_YCrCb_32f}, + {bgra_to_YCrCb_8u, 0, APPEND_16U(bgra_to_YCrCb), 0, 0, bgra_to_YCrCb_32f} }, { - {bgr_to_YCrCb4_8u, 0, bgr_to_YCrCb4_16u, 0, 0, bgr_to_YCrCb4_32f}, - {bgra_to_YCrCb4_8u, 0, bgra_to_YCrCb4_16u, 0, 0, bgra_to_YCrCb4_32f} + {bgr_to_YCrCb4_8u, 0, APPEND_16U(bgr_to_YCrCb4), 0, 0, bgr_to_YCrCb4_32f}, + {bgra_to_YCrCb4_8u, 0, APPEND_16U(bgra_to_YCrCb4), 0, 0, bgra_to_YCrCb4_32f} } }; @@ -581,6 +604,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -593,12 +617,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {YCrCb_to_rgb_8u, 0, YCrCb_to_rgb_16u, 0, 0, YCrCb_to_rgb_32f}, - {YCrCb4_to_rgb_8u, 0, YCrCb4_to_rgb_16u, 0, 0, YCrCb4_to_rgb_32f} + {YCrCb_to_rgb_8u, 0, APPEND_16U(YCrCb_to_rgb), 0, 0, YCrCb_to_rgb_32f}, + {YCrCb4_to_rgb_8u, 0, APPEND_16U(YCrCb4_to_rgb), 0, 0, YCrCb4_to_rgb_32f} }, { - {YCrCb_to_rgba_8u, 0, YCrCb_to_rgba_16u, 0, 0, YCrCb_to_rgba_32f}, - {YCrCb4_to_rgba_8u, 0, YCrCb4_to_rgba_16u, 0, 0, YCrCb4_to_rgba_32f} + {YCrCb_to_rgba_8u, 0, APPEND_16U(YCrCb_to_rgba), 0, 0, YCrCb_to_rgba_32f}, + {YCrCb4_to_rgba_8u, 0, APPEND_16U(YCrCb4_to_rgba), 0, 0, YCrCb4_to_rgba_32f} } }; @@ -607,6 +631,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -619,12 +644,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {YCrCb_to_bgr_8u, 0, YCrCb_to_bgr_16u, 0, 0, YCrCb_to_bgr_32f}, - {YCrCb4_to_bgr_8u, 0, YCrCb4_to_bgr_16u, 0, 0, YCrCb4_to_bgr_32f} + {YCrCb_to_bgr_8u, 0, APPEND_16U(YCrCb_to_bgr), 0, 0, YCrCb_to_bgr_32f}, + {YCrCb4_to_bgr_8u, 0, APPEND_16U(YCrCb4_to_bgr), 0, 0, YCrCb4_to_bgr_32f} }, { - {YCrCb_to_bgra_8u, 0, YCrCb_to_bgra_16u, 0, 0, YCrCb_to_bgra_32f}, - {YCrCb4_to_bgra_8u, 0, YCrCb4_to_bgra_16u, 0, 0, YCrCb4_to_bgra_32f} + {YCrCb_to_bgra_8u, 0, APPEND_16U(YCrCb_to_bgra), 0, 0, YCrCb_to_bgra_32f}, + {YCrCb4_to_bgra_8u, 0, APPEND_16U(YCrCb4_to_bgra), 0, 0, YCrCb4_to_bgra_32f} } }; @@ -633,6 +658,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -645,12 +671,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {rgb_to_xyz_8u, 0, rgb_to_xyz_16u, 0, 0, rgb_to_xyz_32f}, - {rgba_to_xyz_8u, 0, rgba_to_xyz_16u, 0, 0, rgba_to_xyz_32f} + {rgb_to_xyz_8u, 0, APPEND_16U(rgb_to_xyz), 0, 0, rgb_to_xyz_32f}, + {rgba_to_xyz_8u, 0, APPEND_16U(rgba_to_xyz), 0, 0, rgba_to_xyz_32f} }, { - {rgb_to_xyz4_8u, 0, rgb_to_xyz4_16u, 0, 0, rgb_to_xyz4_32f}, - {rgba_to_xyz4_8u, 0, rgba_to_xyz4_16u, 0, 0, rgba_to_xyz4_32f} + {rgb_to_xyz4_8u, 0, APPEND_16U(rgb_to_xyz4), 0, 0, rgb_to_xyz4_32f}, + {rgba_to_xyz4_8u, 0, APPEND_16U(rgba_to_xyz4), 0, 0, rgba_to_xyz4_32f} } }; @@ -659,6 +685,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -671,12 +698,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {bgr_to_xyz_8u, 0, bgr_to_xyz_16u, 0, 0, bgr_to_xyz_32f}, - {bgra_to_xyz_8u, 0, bgra_to_xyz_16u, 0, 0, bgra_to_xyz_32f} + {bgr_to_xyz_8u, 0, APPEND_16U(bgr_to_xyz), 0, 0, bgr_to_xyz_32f}, + {bgra_to_xyz_8u, 0, APPEND_16U(bgra_to_xyz), 0, 0, bgra_to_xyz_32f} }, { - {bgr_to_xyz4_8u, 0, bgr_to_xyz4_16u, 0, 0, bgr_to_xyz4_32f}, - {bgra_to_xyz4_8u, 0, bgra_to_xyz4_16u, 0, 0, bgra_to_xyz4_32f} + {bgr_to_xyz4_8u, 0, APPEND_16U(bgr_to_xyz4), 0, 0, bgr_to_xyz4_32f}, + {bgra_to_xyz4_8u, 0, APPEND_16U(bgra_to_xyz4), 0, 0, bgra_to_xyz4_32f} } }; @@ -685,6 +712,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -697,12 +725,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {xyz_to_rgb_8u, 0, xyz_to_rgb_16u, 0, 0, xyz_to_rgb_32f}, - {xyz4_to_rgb_8u, 0, xyz4_to_rgb_16u, 0, 0, xyz4_to_rgb_32f} + {xyz_to_rgb_8u, 0, APPEND_16U(xyz_to_rgb), 0, 0, xyz_to_rgb_32f}, + {xyz4_to_rgb_8u, 0, APPEND_16U(xyz4_to_rgb), 0, 0, xyz4_to_rgb_32f} }, { - {xyz_to_rgba_8u, 0, xyz_to_rgba_16u, 0, 0, xyz_to_rgba_32f}, - {xyz4_to_rgba_8u, 0, xyz4_to_rgba_16u, 0, 0, xyz4_to_rgba_32f} + {xyz_to_rgba_8u, 0, APPEND_16U(xyz_to_rgba), 0, 0, xyz_to_rgba_32f}, + {xyz4_to_rgba_8u, 0, APPEND_16U(xyz4_to_rgba), 0, 0, xyz4_to_rgba_32f} } }; @@ -711,6 +739,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -723,12 +752,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {xyz_to_bgr_8u, 0, xyz_to_bgr_16u, 0, 0, xyz_to_bgr_32f}, - {xyz4_to_bgr_8u, 0, xyz4_to_bgr_16u, 0, 0, xyz4_to_bgr_32f} + {xyz_to_bgr_8u, 0, APPEND_16U(xyz_to_bgr), 0, 0, xyz_to_bgr_32f}, + {xyz4_to_bgr_8u, 0, APPEND_16U(xyz4_to_bgr), 0, 0, xyz4_to_bgr_32f} }, { - {xyz_to_bgra_8u, 0, xyz_to_bgra_16u, 0, 0, xyz_to_bgra_32f}, - {xyz4_to_bgra_8u, 0, xyz4_to_bgra_16u, 0, 0, xyz4_to_bgra_32f} + {xyz_to_bgra_8u, 0, APPEND_16U(xyz_to_bgra), 0, 0, xyz_to_bgra_32f}, + {xyz4_to_bgra_8u, 0, APPEND_16U(xyz4_to_bgra), 0, 0, xyz4_to_bgra_32f} } }; @@ -737,6 +766,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 5d8f6cbbb5..3ac0c111c8 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -235,10 +235,16 @@ namespace cv { namespace gpu { namespace device #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \ OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits) -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) +#ifdef OPENCV_TINY_GPU_MODULE + #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) +#else + #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) +#endif #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \ OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ diff --git a/modules/gpu/src/cvt_color_internal.h b/modules/gpu/src/cvt_color_internal.h index 1b7c68f35f..f108da827f 100644 --- a/modules/gpu/src/cvt_color_internal.h +++ b/modules/gpu/src/cvt_color_internal.h @@ -48,10 +48,16 @@ namespace cv { namespace gpu { namespace device #define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \ void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \ - OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ - OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _16u) \ - OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) +#ifdef OPENCV_TINY_GPU_MODULE + #define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) +#else + #define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _16u) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) +#endif #define OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(name) \ OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ From f939d80f4cbf166b5d6d72600cf6a07b60af6347 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 5 Mar 2015 18:38:36 +0300 Subject: [PATCH 16/21] fixed seg faults --- modules/gpu/src/cuda/copy_make_border.cu | 6 +++++- modules/gpu/src/cuda/resize.cu | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu index 3bd0c8f5e9..3ae29a38a1 100644 --- a/modules/gpu/src/cuda/copy_make_border.cu +++ b/modules/gpu/src/cuda/copy_make_border.cu @@ -97,7 +97,11 @@ namespace cv { namespace gpu { namespace device #endif }; - callers[borderMode](PtrStepSz(src), PtrStepSz(dst), top, left, borderValue, stream); + const caller_t caller = callers[borderMode]; + if (!caller) + cv::gpu::error("Unsupported input parameters for copyMakeBorder", __FILE__, __LINE__, ""); + + caller(PtrStepSz(src), PtrStepSz(dst), top, left, borderValue, stream); } template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index ed3806be85..11a90ab243 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -485,7 +485,11 @@ namespace cv { namespace gpu { namespace device if (interpolation == 3 && (fx <= 1.f || fy <= 1.f)) interpolation = 1; - funcs[interpolation](static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), yoff, xoff, static_cast< PtrStepSz >(dst), fy, fx, stream); + const func_t func = funcs[interpolation]; + if (!func) + cv::gpu::error("Unsupported input parameters for resize", __FILE__, __LINE__, ""); + + func(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), yoff, xoff, static_cast< PtrStepSz >(dst), fy, fx, stream); } template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); From ad3123adfdaec8dd1cb95a0d84a3f729d767a201 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 6 Mar 2015 13:58:35 +0300 Subject: [PATCH 17/21] turn on some instantiates that required for other primitives --- modules/gpu/src/cuda/copy_make_border.cu | 2 -- modules/gpu/src/cuda/element_operations.cu | 8 +++++--- modules/gpu/src/cuda/matrix_reductions.cu | 2 +- modules/gpu/src/element_operations.cpp | 4 ++-- modules/gpu/src/imgproc.cpp | 4 ++-- modules/gpu/src/matrix_reductions.cpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu index 3ae29a38a1..ca5a4f779e 100644 --- a/modules/gpu/src/cuda/copy_make_border.cu +++ b/modules/gpu/src/cuda/copy_make_border.cu @@ -105,9 +105,7 @@ namespace cv { namespace gpu { namespace device } template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); -#ifndef OPENCV_TINY_GPU_MODULE template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); -#endif template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 1f94f6a5c3..bca89ad643 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -2193,15 +2193,17 @@ namespace arithm transform((PtrStepSz) src1, (PtrStepSz) dst, cv::gpu::device::bind2nd(minimum(), src2), WithOutMask(), stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#else template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); -#ifndef OPENCV_TINY_GPU_MODULE template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); -#endif template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); -#ifndef OPENCV_TINY_GPU_MODULE template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); #endif } diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 6048d41688..15d6612832 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -975,8 +975,8 @@ namespace minMaxLoc template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); - template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); #endif + template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); #ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index bd8ca81bf1..780745d733 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -3717,7 +3717,7 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) 0/*minScalar*/, 0/*minScalar*/, 0/*minScalar*/, - 0/*minScalar*/, + minScalar, minScalar, 0/*minScalar*/, }; @@ -3728,7 +3728,7 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) 0/*castScalar*/, 0/*castScalar*/, 0/*castScalar*/, - 0/*castScalar*/, + castScalar, castScalar, 0/*castScalar*/, }; diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index a8f1864054..c0dfc44d13 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -339,12 +339,12 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom #ifdef OPENCV_TINY_GPU_MODULE static const caller_t callers[6][4] = { - { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/, copyMakeBorder_caller , copyMakeBorder_caller}, + { copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, - { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/ , copyMakeBorder_caller , copyMakeBorder_caller} + { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/, copyMakeBorder_caller , copyMakeBorder_caller} }; #else static const caller_t callers[6][4] = diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index c7a760cf5e..c22790e35d 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -523,7 +523,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point 0/*::minMaxLoc::run*/, 0/*::minMaxLoc::run*/, 0/*::minMaxLoc::run*/, - 0/*::minMaxLoc::run*/, + ::minMaxLoc::run, ::minMaxLoc::run, 0/*::minMaxLoc::run*/, }; From dd93d48be46ceeb1a56c3cbee6ead8e339d2deea Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 6 Mar 2015 13:58:52 +0300 Subject: [PATCH 18/21] turn on stereob instantiates that required for tests --- modules/gpu/src/cuda/stereobm.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index 87af7e52e6..6082e6c3cc 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -342,7 +342,7 @@ namespace cv { namespace gpu { namespace device 0/*kernel_caller< 6>*/, 0/*kernel_caller< 7>*/, 0/*kernel_caller< 8>*/, - 0/*kernel_caller< 9>*/, + kernel_caller< 9>, 0/*kernel_caller<10>*/, 0/*kernel_caller<11>*/, 0/*kernel_caller<12>*/, From f1bec940b1e22ed5fa72e1c393f8ef724ac435a1 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 6 Mar 2015 13:59:08 +0300 Subject: [PATCH 19/21] fix accuracy tests in tiny mode --- modules/gpu/test/test_color.cpp | 4 ++ modules/gpu/test/test_copy_make_border.cpp | 9 +++ modules/gpu/test/test_core.cpp | 76 +++++++++++++++++++++- modules/gpu/test/test_features2d.cpp | 10 +++ modules/gpu/test/test_filters.cpp | 22 +++++++ modules/gpu/test/test_imgproc.cpp | 4 ++ modules/gpu/test/test_pyramids.cpp | 8 +++ modules/gpu/test/test_remap.cpp | 5 ++ modules/gpu/test/test_resize.cpp | 16 +++++ modules/gpu/test/test_threshold.cpp | 4 ++ modules/gpu/test/test_warp_affine.cpp | 16 ++++- modules/gpu/test/test_warp_perspective.cpp | 16 ++++- modules/ts/include/opencv2/ts/gpu_test.hpp | 11 ++++ 13 files changed, 198 insertions(+), 3 deletions(-) diff --git a/modules/gpu/test/test_color.cpp b/modules/gpu/test/test_color.cpp index 5720e0c9d0..49a3274c99 100644 --- a/modules/gpu/test/test_color.cpp +++ b/modules/gpu/test/test_color.cpp @@ -2288,7 +2288,11 @@ GPU_TEST_P(CvtColor, BayerGR2Gray) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), +#endif WHOLE_SUBMAT)); /////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/test/test_copy_make_border.cpp b/modules/gpu/test/test_copy_make_border.cpp index 24a75c0235..8159ba72a9 100644 --- a/modules/gpu/test/test_copy_make_border.cpp +++ b/modules/gpu/test/test_copy_make_border.cpp @@ -90,6 +90,14 @@ GPU_TEST_P(CopyMakeBorder, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), + MatType(CV_8UC3), + MatType(CV_8UC4), + MatType(CV_32FC1), + MatType(CV_32FC3), + MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), @@ -99,6 +107,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif testing::Values(Border(1), Border(10), Border(50)), ALL_BORDER_TYPES, WHOLE_SUBMAT)); diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index b8b83ef10c..6ad36c094c 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -1344,7 +1344,11 @@ GPU_TEST_P(Abs, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1384,10 +1388,14 @@ GPU_TEST_P(Sqr, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1454,10 +1462,14 @@ GPU_TEST_P(Sqrt, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1524,10 +1536,14 @@ GPU_TEST_P(Log, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1604,10 +1620,14 @@ GPU_TEST_P(Exp, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1778,7 +1798,11 @@ GPU_TEST_P(Compare_Scalar, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else TYPES(CV_8U, CV_64F, 1, 4), +#endif CmpCode::all(), WHOLE_SUBMAT)); @@ -1939,8 +1963,14 @@ GPU_TEST_P(Bitwise_Scalar, Xor) INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U)), + testing::Values(Channels(1)) +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)), - IMAGE_CHANNELS)); + IMAGE_CHANNELS +#endif +)); ////////////////////////////////////////////////////////////////////////////// // RShift @@ -2320,7 +2350,11 @@ GPU_TEST_P(Pow, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_32F)), +#else ALL_DEPTH, +#endif WHOLE_SUBMAT)); ////////////////////////////////////////////////////////////////////////////// @@ -2380,6 +2414,23 @@ GPU_TEST_P(AddWeighted, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core_1, AddWeighted, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U)), + testing::Values(MatDepth(CV_8U)), + testing::Values(MatDepth(CV_8U)), + WHOLE_SUBMAT)); + +INSTANTIATE_TEST_CASE_P(GPU_Core_2, AddWeighted, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + testing::Values(MatDepth(CV_32F)), + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -2387,6 +2438,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine( ALL_DEPTH, ALL_DEPTH, WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // GEMM @@ -2956,12 +3008,17 @@ GPU_TEST_P(Norm, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U), + MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F)), +#endif testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)), WHOLE_SUBMAT)); @@ -3139,7 +3196,11 @@ GPU_TEST_P(Sum, Sqr) INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)), +#else TYPES(CV_8U, CV_64F, 1, 4), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -3513,11 +3574,19 @@ PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, Reduc type = CV_MAKE_TYPE(depth, channels); if (reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN) + { dst_depth = depth; + } +#ifndef OPENCV_TINY_GPU_MODULE else if (reduceOp == CV_REDUCE_SUM) + { dst_depth = depth == CV_8U ? CV_32S : depth < CV_64F ? CV_32F : depth; + } +#endif else + { dst_depth = depth < CV_32F ? CV_32F : depth; + } dst_type = CV_MAKE_TYPE(dst_depth, channels); } @@ -3556,11 +3625,16 @@ GPU_TEST_P(Reduce, Cols) INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U), + MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F), MatDepth(CV_64F)), +#endif ALL_CHANNELS, ALL_REDUCE_CODES, WHOLE_SUBMAT)); diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp index 697483657e..41f71d9bdf 100644 --- a/modules/gpu/test/test_features2d.cpp +++ b/modules/gpu/test/test_features2d.cpp @@ -310,6 +310,7 @@ GPU_TEST_P(BruteForceMatcher, Match_Single) ASSERT_EQ(0, badCount); } +#ifndef OPENCV_TINY_GPU_MODULE GPU_TEST_P(BruteForceMatcher, Match_Collection) { cv::gpu::BFMatcher_GPU matcher(normCode); @@ -363,6 +364,7 @@ GPU_TEST_P(BruteForceMatcher, Match_Collection) ASSERT_EQ(0, badCount); } +#endif GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Single) { @@ -442,6 +444,7 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Single) ASSERT_EQ(0, badCount); } +#ifndef OPENCV_TINY_GPU_MODULE GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) { cv::gpu::BFMatcher_GPU matcher(normCode); @@ -565,6 +568,7 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) ASSERT_EQ(0, badCount); } +#endif GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single) { @@ -615,6 +619,7 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single) } } +#ifndef OPENCV_TINY_GPU_MODULE GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection) { cv::gpu::BFMatcher_GPU matcher(normCode); @@ -693,10 +698,15 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection) ASSERT_EQ(0, badCount); } } +#endif INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( ALL_DEVICES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(NormCode(cv::NORM_L2)), +#else testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2)), +#endif testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)), testing::Values(UseMask(false), UseMask(true)))); diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp index cbb6db8a2a..12aa91b602 100644 --- a/modules/gpu/test/test_filters.cpp +++ b/modules/gpu/test/test_filters.cpp @@ -167,7 +167,11 @@ GPU_TEST_P(Sobel, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif IMAGE_CHANNELS, testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), testing::Values(Deriv_X(0), Deriv_X(1), Deriv_X(2)), @@ -230,7 +234,11 @@ GPU_TEST_P(Scharr, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif IMAGE_CHANNELS, testing::Values(Deriv_X(0), Deriv_X(1)), testing::Values(Deriv_Y(0), Deriv_Y(1)), @@ -304,8 +312,17 @@ GPU_TEST_P(GaussianBlur, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), +#else testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), +#endif IMAGE_CHANNELS, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(KSize(cv::Size(3, 3)), + KSize(cv::Size(5, 5)), + KSize(cv::Size(7, 7))), +#else testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), @@ -321,6 +338,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( KSize(cv::Size(27, 27)), KSize(cv::Size(29, 29)), KSize(cv::Size(31, 31))), +#endif testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), @@ -568,7 +586,11 @@ GPU_TEST_P(Filter2D, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), +#endif testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index aa27bfe206..d834048cfe 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -359,7 +359,11 @@ GPU_TEST_P(Canny, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( ALL_DEVICES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(AppertureSize(3)), +#else testing::Values(AppertureSize(3), AppertureSize(5)), +#endif testing::Values(L2gradient(false), L2gradient(true)), WHOLE_SUBMAT)); diff --git a/modules/gpu/test/test_pyramids.cpp b/modules/gpu/test/test_pyramids.cpp index 6b0540fc10..64bcacc48e 100644 --- a/modules/gpu/test/test_pyramids.cpp +++ b/modules/gpu/test/test_pyramids.cpp @@ -83,7 +83,11 @@ GPU_TEST_P(PyrDown, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif WHOLE_SUBMAT)); //////////////////////////////////////////////////////// @@ -123,7 +127,11 @@ GPU_TEST_P(PyrUp, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif WHOLE_SUBMAT)); #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_remap.cpp b/modules/gpu/test/test_remap.cpp index eb4b9ece85..58635e4f3b 100644 --- a/modules/gpu/test/test_remap.cpp +++ b/modules/gpu/test/test_remap.cpp @@ -173,8 +173,13 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), +#else testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), +#endif WHOLE_SUBMAT)); #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index 25f0f0e2bb..4adfd8d3ef 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -177,9 +177,17 @@ GPU_TEST_P(Resize, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif testing::Values(0.3, 0.5, 1.5, 2.0), +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), +#else testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), +#endif WHOLE_SUBMAT)); ///////////////// @@ -224,7 +232,11 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif testing::Values(0.3, 0.5), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), WHOLE_SUBMAT)); @@ -232,7 +244,11 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif testing::Values(0.3, 0.5, 1.5, 2.0), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), WHOLE_SUBMAT)); diff --git a/modules/gpu/test/test_threshold.cpp b/modules/gpu/test/test_threshold.cpp index 52ebd7f592..3a410f7921 100644 --- a/modules/gpu/test/test_threshold.cpp +++ b/modules/gpu/test/test_threshold.cpp @@ -86,7 +86,11 @@ GPU_TEST_P(Threshold, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Threshold, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)), +#endif ThreshOp::all(), WHOLE_SUBMAT)); diff --git a/modules/gpu/test/test_warp_affine.cpp b/modules/gpu/test/test_warp_affine.cpp index 43bf0f6d9e..fdcacb0c58 100644 --- a/modules/gpu/test/test_warp_affine.cpp +++ b/modules/gpu/test/test_warp_affine.cpp @@ -225,10 +225,19 @@ GPU_TEST_P(WarpAffine, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif DIRECT_INVERSE, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)), +#else testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), +#endif WHOLE_SUBMAT)); /////////////////////////////////////////////////////////////////// @@ -275,6 +284,11 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffineNPP, testing::Combine( ALL_DEVICES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), DIRECT_INVERSE, - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))); +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)) +#else + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)) +#endif +)); #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_warp_perspective.cpp b/modules/gpu/test/test_warp_perspective.cpp index d225e58b66..e707eb4f41 100644 --- a/modules/gpu/test/test_warp_perspective.cpp +++ b/modules/gpu/test/test_warp_perspective.cpp @@ -228,10 +228,19 @@ GPU_TEST_P(WarpPerspective, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#else testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), +#endif DIRECT_INVERSE, +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)), +#else testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), +#endif WHOLE_SUBMAT)); /////////////////////////////////////////////////////////////////// @@ -278,6 +287,11 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspectiveNPP, testing::Combine( ALL_DEVICES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), DIRECT_INVERSE, - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))); +#ifdef OPENCV_TINY_GPU_MODULE + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)) +#else + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)) +#endif +)); #endif // HAVE_CUDA diff --git a/modules/ts/include/opencv2/ts/gpu_test.hpp b/modules/ts/include/opencv2/ts/gpu_test.hpp index 943a3536ca..01737bc951 100644 --- a/modules/ts/include/opencv2/ts/gpu_test.hpp +++ b/modules/ts/include/opencv2/ts/gpu_test.hpp @@ -215,6 +215,12 @@ namespace cvtest using perf::MatDepth; +#ifdef OPENCV_TINY_GPU_MODULE + #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)) + + #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \ + std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F))) +#else #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F)) #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \ @@ -242,6 +248,7 @@ namespace cvtest std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \ \ std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F))) +#endif // Type @@ -318,7 +325,11 @@ namespace cvtest CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA) CV_ENUM(BorderType, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) +#ifdef OPENCV_TINY_GPU_MODULE + #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)) +#else #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)) +#endif CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP) From 9d294cbcf564cfc76c5055d989a0ffa99c66033c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 6 Mar 2015 15:45:14 +0300 Subject: [PATCH 20/21] fix performance tests in tiny mode --- modules/gpu/perf/perf_core.cpp | 260 +++++++++++++++------ modules/gpu/perf/perf_features2d.cpp | 35 ++- modules/gpu/perf/perf_filters.cpp | 8 + modules/gpu/perf/perf_imgproc.cpp | 56 +++++ modules/ts/include/opencv2/ts/gpu_perf.hpp | 5 + 5 files changed, 282 insertions(+), 82 deletions(-) diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp index ae6ed865b1..21beedce73 100644 --- a/modules/gpu/perf/perf_core.cpp +++ b/modules/gpu/perf/perf_core.cpp @@ -46,7 +46,11 @@ using namespace std; using namespace testing; using namespace perf; +#ifdef OPENCV_TINY_GPU_MODULE +#define ARITHM_MAT_DEPTH Values(CV_8U, CV_32F) +#else #define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F) +#endif ////////////////////////////////////////////////////////////////////// // Merge @@ -524,9 +528,14 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar, ////////////////////////////////////////////////////////////////////// // Abs -PERF_TEST_P(Sz_Depth, Core_Abs, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_16S, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_Abs, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)) +#else + Values(CV_16S, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -552,9 +561,14 @@ PERF_TEST_P(Sz_Depth, Core_Abs, ////////////////////////////////////////////////////////////////////// // Sqr -PERF_TEST_P(Sz_Depth, Core_Sqr, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)) +#else + Values(CV_8U, CV_16S, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -580,9 +594,14 @@ PERF_TEST_P(Sz_Depth, Core_Sqr, ////////////////////////////////////////////////////////////////////// // Sqrt -PERF_TEST_P(Sz_Depth, Core_Sqrt, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)) +#else + Values(CV_8U, CV_16S, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -612,9 +631,14 @@ PERF_TEST_P(Sz_Depth, Core_Sqrt, ////////////////////////////////////////////////////////////////////// // Log -PERF_TEST_P(Sz_Depth, Core_Log, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_Log, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)) +#else + Values(CV_8U, CV_16S, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -644,9 +668,14 @@ PERF_TEST_P(Sz_Depth, Core_Log, ////////////////////////////////////////////////////////////////////// // Exp -PERF_TEST_P(Sz_Depth, Core_Exp, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_Exp, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)) +#else + Values(CV_8U, CV_16S, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -678,10 +707,15 @@ PERF_TEST_P(Sz_Depth, Core_Exp, DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double); -PERF_TEST_P(Sz_Depth_Power, Core_Pow, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F), - Values(0.3, 2.0, 2.4))) +PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)), +#else + Values(CV_8U, CV_16S, CV_32F), +#endif + Values(0.3, 2.0, 2.4) +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -859,10 +893,16 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat, ////////////////////////////////////////////////////////////////////// // BitwiseAndScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4)) +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) +#else + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -935,10 +975,16 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat, ////////////////////////////////////////////////////////////////////// // BitwiseOrScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4)) +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) +#else + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1011,10 +1057,16 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat, ////////////////////////////////////////////////////////////////////// // BitwiseXorScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4)) +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) +#else + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1155,9 +1207,14 @@ PERF_TEST_P(Sz_Depth, Core_MinMat, ////////////////////////////////////////////////////////////////////// // MinScalar -PERF_TEST_P(Sz_Depth, Core_MinScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F) +#else + Values(CV_8U, CV_16U, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1226,9 +1283,14 @@ PERF_TEST_P(Sz_Depth, Core_MaxMat, ////////////////////////////////////////////////////////////////////// // MaxScalar -PERF_TEST_P(Sz_Depth, Core_MaxScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F))) +PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F) +#else + Values(CV_8U, CV_16U, CV_32F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1265,9 +1327,16 @@ DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth); PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(MatDepth(CV_32F)), + Values(MatDepth(CV_32F)), + Values(MatDepth(CV_32F)) +#else Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(CV_8U, CV_16U, CV_32F, CV_64F))) + Values(CV_8U, CV_16U, CV_32F, CV_64F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth1 = GET_PARAM(1); @@ -1784,7 +1853,11 @@ DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType); PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32S, CV_32F), +#endif Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) { const cv::Size size = GET_PARAM(0); @@ -1859,10 +1932,16 @@ PERF_TEST_P(Sz_Norm, Core_NormDiff, ////////////////////////////////////////////////////////////////////// // Sum -PERF_TEST_P(Sz_Depth_Cn, Core_Sum, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) +#else + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1896,10 +1975,16 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Sum, ////////////////////////////////////////////////////////////////////// // SumAbs -PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) +#else + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1929,10 +2014,16 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, ////////////////////////////////////////////////////////////////////// // SumSqr -PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) +#else + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1962,9 +2053,14 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, ////////////////////////////////////////////////////////////////////// // MinMax -PERF_TEST_P(Sz_Depth, Core_MinMax, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F) +#else + Values(CV_8U, CV_16U, CV_32F, CV_64F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2000,9 +2096,14 @@ PERF_TEST_P(Sz_Depth, Core_MinMax, ////////////////////////////////////////////////////////////////////// // MinMaxLoc -PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F) +#else + Values(CV_8U, CV_16U, CV_32F, CV_64F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2040,9 +2141,14 @@ PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, ////////////////////////////////////////////////////////////////////// // CountNonZero -PERF_TEST_P(Sz_Depth, Core_CountNonZero, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F) +#else + Values(CV_8U, CV_16U, CV_32F, CV_64F) +#endif +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2079,12 +2185,17 @@ CV_ENUM(ReduceDim, Rows, Cols) DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim); -PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_16S, CV_32F), - Values(1, 2, 3, 4), - ReduceCode::all(), - ReduceDim::all())) +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else + Values(CV_8U, CV_16U, CV_16S, CV_32F), +#endif + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all() +)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2120,13 +2231,18 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType); -PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(NormType(cv::NORM_INF), - NormType(cv::NORM_L1), - NormType(cv::NORM_L2), - NormType(cv::NORM_MINMAX)))) +PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( + GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else + Values(CV_8U, CV_16U, CV_32F, CV_64F), +#endif + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)) +)) { const cv::Size size = GET_PARAM(0); const int type = GET_PARAM(1); diff --git a/modules/gpu/perf/perf_features2d.cpp b/modules/gpu/perf/perf_features2d.cpp index 2b1ab58129..dc8ccd057a 100644 --- a/modules/gpu/perf/perf_features2d.cpp +++ b/modules/gpu/perf/perf_features2d.cpp @@ -145,9 +145,14 @@ PERF_TEST_P(Image_NFeatures, Features2D_ORB, DEF_PARAM_TEST(DescSize_Norm, int, NormType); -PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, - Combine(Values(64, 128, 256), - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)))) +PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine( + Values(64, 128, 256), +#ifdef OPENCV_TINY_GPU_MODULE + Values(NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) +#else + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) +#endif +)) { declare.time(20.0); @@ -202,10 +207,15 @@ static void toOneRowMatches(const std::vector< std::vector >& src, s DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType); -PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, - Combine(Values(64, 128, 256), - Values(2, 3), - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( + Values(64, 128, 256), + Values(2, 3), +#ifdef OPENCV_TINY_GPU_MODULE + Values(NormType(cv::NORM_L2)) +#else + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +#endif +)) { declare.time(30.0); @@ -257,9 +267,14 @@ PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, ////////////////////////////////////////////////////////////////////// // BFRadiusMatch -PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, - Combine(Values(64, 128, 256), - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine( + Values(64, 128, 256), +#ifdef OPENCV_TINY_GPU_MODULE + Values(NormType(cv::NORM_L2)) +#else + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +#endif +)) { declare.time(30.0); diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp index adfc294f6d..c317a7bcec 100644 --- a/modules/gpu/perf/perf_filters.cpp +++ b/modules/gpu/perf/perf_filters.cpp @@ -87,7 +87,11 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, ////////////////////////////////////////////////////////////////////// // Sobel +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7))) +#else PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15))) +#endif { declare.time(20.0); @@ -154,7 +158,11 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U ////////////////////////////////////////////////////////////////////// // GaussianBlur +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7))) +#else PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15))) +#endif { declare.time(20.0); diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index c7c1022941..1181598ba0 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -93,9 +93,17 @@ DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpo PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4, +#ifdef OPENCV_TINY_GPU_MODULE + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), +#else Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), +#endif ALL_BORDER_MODES, RemapMode::all())) { @@ -145,9 +153,17 @@ DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4, +#ifdef OPENCV_TINY_GPU_MODULE + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), +#else Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), +#endif Values(0.5, 0.3, 2.0))) { declare.time(20.0); @@ -189,7 +205,11 @@ DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double); PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4, Values(0.2, 0.1, 0.05))) { @@ -232,9 +252,17 @@ DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolatio PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4, +#ifdef OPENCV_TINY_GPU_MODULE + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), +#else Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), +#endif ALL_BORDER_MODES)) { declare.time(20.0); @@ -282,9 +310,17 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4, +#ifdef OPENCV_TINY_GPU_MODULE + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), +#else Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), +#endif ALL_BORDER_MODES)) { declare.time(20.0); @@ -332,7 +368,11 @@ DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode); PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4, ALL_BORDER_MODES)) { @@ -374,7 +414,11 @@ DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp); PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F, CV_64F), +#endif ThreshOp::all())) { const cv::Size size = GET_PARAM(0); @@ -674,7 +718,11 @@ DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool); PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), +#ifdef OPENCV_TINY_GPU_MODULE + Values(3), +#else Values(3, 5), +#endif Bool())) { const string fileName = GET_PARAM(0); @@ -1302,7 +1350,11 @@ PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate, PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4)) { const cv::Size size = GET_PARAM(0); @@ -1338,7 +1390,11 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine(GPU_TYPICAL_MAT_SIZES, +#ifdef OPENCV_TINY_GPU_MODULE + Values(CV_8U, CV_32F), +#else Values(CV_8U, CV_16U, CV_32F), +#endif GPU_CHANNELS_1_3_4)) { const cv::Size size = GET_PARAM(0); diff --git a/modules/ts/include/opencv2/ts/gpu_perf.hpp b/modules/ts/include/opencv2/ts/gpu_perf.hpp index b7b73b7bc5..d74d7ea031 100644 --- a/modules/ts/include/opencv2/ts/gpu_perf.hpp +++ b/modules/ts/include/opencv2/ts/gpu_perf.hpp @@ -50,8 +50,13 @@ namespace perf { +#ifdef OPENCV_TINY_GPU_MODULE + #define ALL_BORDER_MODES testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT)) + #define ALL_INTERPOLATIONS testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)) +#else #define ALL_BORDER_MODES BorderMode::all() #define ALL_INTERPOLATIONS Interpolation::all() +#endif CV_ENUM(BorderMode, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA) From f10537cdd6d8e59d6ef408011c0330f667274804 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 23 Apr 2015 16:20:56 +0300 Subject: [PATCH 21/21] fix tests compilation issue on Windows --- modules/gpu/perf/perf_core.cpp | 371 ++++++++++++--------- modules/gpu/perf/perf_features2d.cpp | 48 +-- modules/gpu/perf/perf_filters.cpp | 24 +- modules/gpu/perf/perf_imgproc.cpp | 199 ++++++----- modules/gpu/test/test_color.cpp | 12 +- modules/gpu/test/test_copy_make_border.cpp | 10 +- modules/gpu/test/test_core.cpp | 110 ++++-- modules/gpu/test/test_features2d.cpp | 12 +- modules/gpu/test/test_filters.cpp | 72 +++- modules/gpu/test/test_imgproc.cpp | 12 +- modules/gpu/test/test_pyramids.cpp | 24 +- modules/gpu/test/test_remap.cpp | 9 +- modules/gpu/test/test_resize.cpp | 46 ++- modules/gpu/test/test_threshold.cpp | 13 +- modules/gpu/test/test_warp_affine.cpp | 25 +- modules/gpu/test/test_warp_perspective.cpp | 25 +- 16 files changed, 649 insertions(+), 363 deletions(-) diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp index 21beedce73..87e22c4695 100644 --- a/modules/gpu/perf/perf_core.cpp +++ b/modules/gpu/perf/perf_core.cpp @@ -528,14 +528,17 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar, ////////////////////////////////////////////////////////////////////// // Abs -PERF_TEST_P(Sz_Depth, Core_Abs, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)) -#else - Values(CV_16S, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_Abs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) )) +#else +PERF_TEST_P(Sz_Depth, Core_Abs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -561,14 +564,17 @@ PERF_TEST_P(Sz_Depth, Core_Abs, Combine( ////////////////////////////////////////////////////////////////////// // Sqr -PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)) -#else - Values(CV_8U, CV_16S, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) )) +#else +PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -594,14 +600,17 @@ PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( ////////////////////////////////////////////////////////////////////// // Sqrt -PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)) -#else - Values(CV_8U, CV_16S, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) )) +#else +PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -631,14 +640,17 @@ PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( ////////////////////////////////////////////////////////////////////// // Log -PERF_TEST_P(Sz_Depth, Core_Log, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)) -#else - Values(CV_8U, CV_16S, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_Log, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) )) +#else +PERF_TEST_P(Sz_Depth, Core_Log, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -668,14 +680,17 @@ PERF_TEST_P(Sz_Depth, Core_Log, Combine( ////////////////////////////////////////////////////////////////////// // Exp -PERF_TEST_P(Sz_Depth, Core_Exp, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)) -#else - Values(CV_8U, CV_16S, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_Exp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) )) +#else +PERF_TEST_P(Sz_Depth, Core_Exp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -707,15 +722,19 @@ PERF_TEST_P(Sz_Depth, Core_Exp, Combine( DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double); -PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)), -#else - Values(CV_8U, CV_16S, CV_32F), -#endif - Values(0.3, 2.0, 2.4) +PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)), + Values(0.3, 2.0, 2.4) )) +#else +PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F), + Values(0.3, 2.0, 2.4) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -893,16 +912,19 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat, ////////////////////////////////////////////////////////////////////// // BitwiseAndScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_8U)), - testing::Values(MatCn(Gray)) -#else - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4 -#endif +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) )) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -975,16 +997,19 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat, ////////////////////////////////////////////////////////////////////// // BitwiseOrScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_8U)), - testing::Values(MatCn(Gray)) -#else - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4 -#endif +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) )) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1057,16 +1082,19 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat, ////////////////////////////////////////////////////////////////////// // BitwiseXorScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_8U)), - testing::Values(MatCn(Gray)) -#else - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4 -#endif +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) )) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1207,14 +1235,17 @@ PERF_TEST_P(Sz_Depth, Core_MinMat, ////////////////////////////////////////////////////////////////////// // MinScalar -PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F) -#else - Values(CV_8U, CV_16U, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) )) +#else +PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1283,14 +1314,17 @@ PERF_TEST_P(Sz_Depth, Core_MaxMat, ////////////////////////////////////////////////////////////////////// // MaxScalar -PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F) -#else - Values(CV_8U, CV_16U, CV_32F) -#endif +PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) )) +#else +PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1325,18 +1359,21 @@ PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth); -PERF_TEST_P(Sz_3Depth, Core_AddWeighted, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(MatDepth(CV_32F)), - Values(MatDepth(CV_32F)), - Values(MatDepth(CV_32F)) -#else - Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(CV_8U, CV_16U, CV_32F, CV_64F) -#endif +PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)), + Values(MatDepth(CV_32F)), + Values(MatDepth(CV_32F)) )) +#else +PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth1 = GET_PARAM(1); @@ -1851,14 +1888,19 @@ PERF_TEST_P(Sz, Core_MeanStdDev, DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType); -PERF_TEST_P(Sz_Depth_Norm, Core_Norm, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) #else - Values(CV_8U, CV_16U, CV_32S, CV_32F), +PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S, CV_32F), + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) #endif - Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1932,16 +1974,19 @@ PERF_TEST_P(Sz_Norm, Core_NormDiff, ////////////////////////////////////////////////////////////////////// // Sum -PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), - testing::Values(MatCn(Gray)) -#else - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4 -#endif +PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) )) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1975,16 +2020,19 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( ////////////////////////////////////////////////////////////////////// // SumAbs -PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), - testing::Values(MatCn(Gray)) -#else - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4 -#endif +PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) )) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2014,16 +2062,19 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( ////////////////////////////////////////////////////////////////////// // SumSqr -PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), - testing::Values(MatCn(Gray)) -#else - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4 -#endif +PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) )) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2053,14 +2104,17 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( ////////////////////////////////////////////////////////////////////// // MinMax -PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F) -#else - Values(CV_8U, CV_16U, CV_32F, CV_64F) -#endif +PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) )) +#else +PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2096,14 +2150,17 @@ PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( ////////////////////////////////////////////////////////////////////// // MinMaxLoc -PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F) -#else - Values(CV_8U, CV_16U, CV_32F, CV_64F) -#endif +PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) )) +#else +PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2141,14 +2198,17 @@ PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( ////////////////////////////////////////////////////////////////////// // CountNonZero -PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F) -#else - Values(CV_8U, CV_16U, CV_32F, CV_64F) -#endif +PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) )) +#else +PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2185,17 +2245,23 @@ CV_ENUM(ReduceDim, Rows, Cols) DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim); -PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), -#else - Values(CV_8U, CV_16U, CV_16S, CV_32F), -#endif - Values(1, 2, 3, 4), - ReduceCode::all(), - ReduceDim::all() +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all() )) +#else +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_16S, CV_32F), + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all() +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2231,18 +2297,25 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType); -PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( - GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), -#else - Values(CV_8U, CV_16U, CV_32F, CV_64F), -#endif - Values(NormType(cv::NORM_INF), - NormType(cv::NORM_L1), - NormType(cv::NORM_L2), - NormType(cv::NORM_MINMAX)) +PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)) )) +#else +PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)) +)) +#endif { const cv::Size size = GET_PARAM(0); const int type = GET_PARAM(1); diff --git a/modules/gpu/perf/perf_features2d.cpp b/modules/gpu/perf/perf_features2d.cpp index dc8ccd057a..5a21acdff6 100644 --- a/modules/gpu/perf/perf_features2d.cpp +++ b/modules/gpu/perf/perf_features2d.cpp @@ -145,14 +145,17 @@ PERF_TEST_P(Image_NFeatures, Features2D_ORB, DEF_PARAM_TEST(DescSize_Norm, int, NormType); -PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine( - Values(64, 128, 256), #ifdef OPENCV_TINY_GPU_MODULE - Values(NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) -#else - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) -#endif +PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) )) +#else +PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) +)) +#endif { declare.time(20.0); @@ -207,15 +210,19 @@ static void toOneRowMatches(const std::vector< std::vector >& src, s DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType); -PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( - Values(64, 128, 256), - Values(2, 3), #ifdef OPENCV_TINY_GPU_MODULE - Values(NormType(cv::NORM_L2)) -#else - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) -#endif +PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( + Values(64, 128, 256), + Values(2, 3), + Values(NormType(cv::NORM_L2)) )) +#else +PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( + Values(64, 128, 256), + Values(2, 3), + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) +#endif { declare.time(30.0); @@ -267,14 +274,17 @@ PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( ////////////////////////////////////////////////////////////////////// // BFRadiusMatch -PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine( - Values(64, 128, 256), #ifdef OPENCV_TINY_GPU_MODULE - Values(NormType(cv::NORM_L2)) -#else - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) -#endif +PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L2)) )) +#else +PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) +#endif { declare.time(30.0); diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp index c317a7bcec..f064dd395e 100644 --- a/modules/gpu/perf/perf_filters.cpp +++ b/modules/gpu/perf/perf_filters.cpp @@ -88,9 +88,17 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, // Sobel #ifdef OPENCV_TINY_GPU_MODULE -PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7))) +PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7) +)) #else -PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15))) +PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7, 9, 11, 13, 15) +)) #endif { declare.time(20.0); @@ -159,9 +167,17 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U // GaussianBlur #ifdef OPENCV_TINY_GPU_MODULE -PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7))) +PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7) +)) #else -PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15))) +PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7, 9, 11, 13, 15) +)) #endif { declare.time(20.0); diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index 1181598ba0..f2762e07c0 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -91,21 +91,25 @@ void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode) DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpolation, BorderMode, RemapMode); -PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + ALL_BORDER_MODES, + RemapMode::all() +)) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + ALL_BORDER_MODES, + RemapMode::all() +)) #endif - GPU_CHANNELS_1_3_4, -#ifdef OPENCV_TINY_GPU_MODULE - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), -#else - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), -#endif - ALL_BORDER_MODES, - RemapMode::all())) { declare.time(20.0); @@ -151,20 +155,23 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation, double); -PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + Values(0.5, 0.3, 2.0) +)) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + Values(0.5, 0.3, 2.0) +)) #endif - GPU_CHANNELS_1_3_4, -#ifdef OPENCV_TINY_GPU_MODULE - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), -#else - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), -#endif - Values(0.5, 0.3, 2.0))) { declare.time(20.0); @@ -203,15 +210,21 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double); -PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(0.2, 0.1, 0.05) +)) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(0.2, 0.1, 0.05) +)) #endif - GPU_CHANNELS_1_3_4, - Values(0.2, 0.1, 0.05))) { declare.time(1.0); @@ -250,20 +263,23 @@ PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolation, BorderMode); -PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + ALL_BORDER_MODES) +) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + ALL_BORDER_MODES) +) #endif - GPU_CHANNELS_1_3_4, -#ifdef OPENCV_TINY_GPU_MODULE - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), -#else - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), -#endif - ALL_BORDER_MODES)) { declare.time(20.0); @@ -308,20 +324,23 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, ////////////////////////////////////////////////////////////////////// // WarpPerspective -PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + ALL_BORDER_MODES) +) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + ALL_BORDER_MODES) +) #endif - GPU_CHANNELS_1_3_4, -#ifdef OPENCV_TINY_GPU_MODULE - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), -#else - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), -#endif - ALL_BORDER_MODES)) { declare.time(20.0); @@ -366,15 +385,21 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode); -PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + ALL_BORDER_MODES) +) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + ALL_BORDER_MODES) +) #endif - GPU_CHANNELS_1_3_4, - ALL_BORDER_MODES)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -412,14 +437,19 @@ CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp); -PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + ThreshOp::all() +)) #else - Values(CV_8U, CV_16U, CV_32F, CV_64F), +PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + ThreshOp::all() +)) #endif - ThreshOp::all())) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -716,14 +746,19 @@ PERF_TEST_P(Sz, ImgProc_ColumnSum, DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool); -PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, - Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), #ifdef OPENCV_TINY_GPU_MODULE - Values(3), +PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine( + Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), + Values(3), + Bool() +)) #else - Values(3, 5), +PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine( + Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), + Values(3, 5), + Bool() +)) #endif - Bool())) { const string fileName = GET_PARAM(0); const int apperture_size = GET_PARAM(1); @@ -1348,14 +1383,19 @@ PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate, ////////////////////////////////////////////////////////////////////// // PyrDown -PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4) +) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4) +) #endif - GPU_CHANNELS_1_3_4)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1388,14 +1428,19 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, ////////////////////////////////////////////////////////////////////// // PyrUp -PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, - Combine(GPU_TYPICAL_MAT_SIZES, #ifdef OPENCV_TINY_GPU_MODULE - Values(CV_8U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4) +) #else - Values(CV_8U, CV_16U, CV_32F), +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4) +) #endif - GPU_CHANNELS_1_3_4)) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); diff --git a/modules/gpu/test/test_color.cpp b/modules/gpu/test/test_color.cpp index 49a3274c99..6d4c8c5423 100644 --- a/modules/gpu/test/test_color.cpp +++ b/modules/gpu/test/test_color.cpp @@ -2285,15 +2285,19 @@ GPU_TEST_P(CvtColor, BayerGR2Gray) EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), -#else - testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#endif /////////////////////////////////////////////////////////////////////////////////////////////////////// // Demosaicing diff --git a/modules/gpu/test/test_copy_make_border.cpp b/modules/gpu/test/test_copy_make_border.cpp index 8159ba72a9..b06f795411 100644 --- a/modules/gpu/test/test_copy_make_border.cpp +++ b/modules/gpu/test/test_copy_make_border.cpp @@ -87,17 +87,23 @@ GPU_TEST_P(CopyMakeBorder, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(Border(1), Border(10), Border(50)), + ALL_BORDER_TYPES, + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), @@ -107,9 +113,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif testing::Values(Border(1), Border(10), Border(50)), ALL_BORDER_TYPES, WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 6ad36c094c..dae80c72df 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -1341,15 +1341,19 @@ GPU_TEST_P(Abs, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_32F)), -#else - testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)), -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Sqr @@ -1385,18 +1389,22 @@ GPU_TEST_P(Sqr, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Sqrt @@ -1459,18 +1467,22 @@ GPU_TEST_P(Sqrt, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Log @@ -1533,18 +1545,22 @@ GPU_TEST_P(Log, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Exp @@ -1617,18 +1633,22 @@ GPU_TEST_P(Exp, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Compare_Array @@ -1795,16 +1815,21 @@ GPU_TEST_P(Compare_Scalar, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - TYPES(CV_8U, CV_64F, 1, 4), -#endif CmpCode::all(), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + TYPES(CV_8U, CV_64F, 1, 4), + CmpCode::all(), + WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // Bitwise_Array @@ -1960,17 +1985,19 @@ GPU_TEST_P(Bitwise_Scalar, Xor) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U)), - testing::Values(Channels(1)) + testing::Values(Channels(1)))); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)), - IMAGE_CHANNELS + IMAGE_CHANNELS)); #endif -)); ////////////////////////////////////////////////////////////////////////////// // RShift @@ -2347,15 +2374,19 @@ GPU_TEST_P(Pow, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_32F)), -#else - ALL_DEPTH, -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_DEPTH, + WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // AddWeighted @@ -3005,22 +3036,27 @@ GPU_TEST_P(Norm, Accuracy) EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), + testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F)), -#endif testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // normDiff @@ -3193,15 +3229,19 @@ GPU_TEST_P(Sum, Sqr) EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)), -#else - TYPES(CV_8U, CV_64F, 1, 4), -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + TYPES(CV_8U, CV_64F, 1, 4), + WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // MinMax @@ -3622,22 +3662,28 @@ GPU_TEST_P(Reduce, Cols) EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), + ALL_CHANNELS, + ALL_REDUCE_CODES, + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F), MatDepth(CV_64F)), -#endif ALL_CHANNELS, ALL_REDUCE_CODES, WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // Normalize diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp index 41f71d9bdf..99d9b2e8f6 100644 --- a/modules/gpu/test/test_features2d.cpp +++ b/modules/gpu/test/test_features2d.cpp @@ -700,14 +700,18 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection) } #endif +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( ALL_DEVICES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(NormCode(cv::NORM_L2)), -#else - testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2)), -#endif testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)), testing::Values(UseMask(false), UseMask(true)))); +#else +INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( + ALL_DEVICES, + testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2)), + testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)), + testing::Values(UseMask(false), UseMask(true)))); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp index 12aa91b602..cac3c70d79 100644 --- a/modules/gpu/test/test_filters.cpp +++ b/modules/gpu/test/test_filters.cpp @@ -164,14 +164,11 @@ GPU_TEST_P(Sobel, Accuracy) EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), -#else - testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif IMAGE_CHANNELS, testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), testing::Values(Deriv_X(0), Deriv_X(1), Deriv_X(2)), @@ -181,6 +178,21 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), + IMAGE_CHANNELS, + testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), + testing::Values(Deriv_X(0), Deriv_X(1), Deriv_X(2)), + testing::Values(Deriv_Y(0), Deriv_Y(1), Deriv_Y(2)), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#endif ///////////////////////////////////////////////////////////////////////////////////////////////// // Scharr @@ -231,14 +243,11 @@ GPU_TEST_P(Scharr, Accuracy) EXPECT_MAT_NEAR(getInnerROI(dst_gold, cv::Size(3, 3)), getInnerROI(dst, cv::Size(3, 3)), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), -#else - testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif IMAGE_CHANNELS, testing::Values(Deriv_X(0), Deriv_X(1)), testing::Values(Deriv_Y(0), Deriv_Y(1)), @@ -247,6 +256,20 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), + IMAGE_CHANNELS, + testing::Values(Deriv_X(0), Deriv_X(1)), + testing::Values(Deriv_Y(0), Deriv_Y(1)), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#endif ///////////////////////////////////////////////////////////////////////////////////////////////// // GaussianBlur @@ -309,20 +332,26 @@ GPU_TEST_P(GaussianBlur, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), -#else - testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), -#endif IMAGE_CHANNELS, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), + IMAGE_CHANNELS, testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), @@ -338,12 +367,12 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( KSize(cv::Size(27, 27)), KSize(cv::Size(29, 29)), KSize(cv::Size(31, 31))), -#endif testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#endif ///////////////////////////////////////////////////////////////////////////////////////////////// // Laplacian @@ -583,17 +612,24 @@ GPU_TEST_P(Filter2D, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), -#endif testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), + testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), + testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index d834048cfe..c6c0bf1868 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -357,15 +357,19 @@ GPU_TEST_P(Canny, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( ALL_DEVICES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(AppertureSize(3)), -#else - testing::Values(AppertureSize(3), AppertureSize(5)), -#endif testing::Values(L2gradient(false), L2gradient(true)), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( + ALL_DEVICES, + testing::Values(AppertureSize(3), AppertureSize(5)), + testing::Values(L2gradient(false), L2gradient(true)), + WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // MeanShift diff --git a/modules/gpu/test/test_pyramids.cpp b/modules/gpu/test/test_pyramids.cpp index 64bcacc48e..5ddecf49ac 100644 --- a/modules/gpu/test/test_pyramids.cpp +++ b/modules/gpu/test/test_pyramids.cpp @@ -80,15 +80,19 @@ GPU_TEST_P(PyrDown, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////// // pyrUp @@ -124,14 +128,18 @@ GPU_TEST_P(PyrUp, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_remap.cpp b/modules/gpu/test/test_remap.cpp index 58635e4f3b..cd0520070b 100644 --- a/modules/gpu/test/test_remap.cpp +++ b/modules/gpu/test/test_remap.cpp @@ -169,17 +169,22 @@ GPU_TEST_P(Remap, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), -#endif WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index 4adfd8d3ef..99cbfec3b4 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -174,21 +174,23 @@ GPU_TEST_P(Resize, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif testing::Values(0.3, 0.5, 1.5, 2.0), -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), -#else - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), -#endif WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5, 1.5, 2.0), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + WHOLE_SUBMAT)); +#endif ///////////////// @@ -229,28 +231,40 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : src.depth() == CV_8U ? 4.0 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif testing::Values(0.3, 0.5), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), + WHOLE_SUBMAT)); +#endif +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif testing::Values(0.3, 0.5, 1.5, 2.0), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5, 1.5, 2.0), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_threshold.cpp b/modules/gpu/test/test_threshold.cpp index 3a410f7921..cd06c17dd9 100644 --- a/modules/gpu/test/test_threshold.cpp +++ b/modules/gpu/test/test_threshold.cpp @@ -83,15 +83,20 @@ GPU_TEST_P(Threshold, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Threshold, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)), -#endif ThreshOp::all(), WHOLE_SUBMAT)); +#else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Threshold, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)), + ThreshOp::all(), + WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_warp_affine.cpp b/modules/gpu/test/test_warp_affine.cpp index fdcacb0c58..a20bbbeb4d 100644 --- a/modules/gpu/test/test_warp_affine.cpp +++ b/modules/gpu/test/test_warp_affine.cpp @@ -222,23 +222,25 @@ GPU_TEST_P(WarpAffine, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif DIRECT_INVERSE, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), -#endif WHOLE_SUBMAT)); +#endif /////////////////////////////////////////////////////////////////// // Test NPP @@ -280,15 +282,18 @@ GPU_TEST_P(WarpAffineNPP, Accuracy) EXPECT_MAT_SIMILAR(dst_gold, dst, 2e-2); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffineNPP, testing::Combine( ALL_DEVICES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), DIRECT_INVERSE, -#ifdef OPENCV_TINY_GPU_MODULE - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)) + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)))); #else - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)) +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffineNPP, testing::Combine( + ALL_DEVICES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))); #endif -)); #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_warp_perspective.cpp b/modules/gpu/test/test_warp_perspective.cpp index e707eb4f41..892704dd30 100644 --- a/modules/gpu/test/test_warp_perspective.cpp +++ b/modules/gpu/test/test_warp_perspective.cpp @@ -225,23 +225,25 @@ GPU_TEST_P(WarpPerspective, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#else - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), -#endif DIRECT_INVERSE, -#ifdef OPENCV_TINY_GPU_MODULE testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); #else +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), -#endif WHOLE_SUBMAT)); +#endif /////////////////////////////////////////////////////////////////// // Test NPP @@ -283,15 +285,18 @@ GPU_TEST_P(WarpPerspectiveNPP, Accuracy) EXPECT_MAT_SIMILAR(dst_gold, dst, 2e-2); } +#ifdef OPENCV_TINY_GPU_MODULE INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspectiveNPP, testing::Combine( ALL_DEVICES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), DIRECT_INVERSE, -#ifdef OPENCV_TINY_GPU_MODULE - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)) + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)))); #else - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)) +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspectiveNPP, testing::Combine( + ALL_DEVICES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))); #endif -)); #endif // HAVE_CUDA