From 22f5376e82c086b9b76f5779eaf21ba0f1bfa89b Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Mon, 26 Jul 2010 15:04:56 +0000 Subject: [PATCH] refactoring and minor code improvements added cuda_shared.hpp header was reorganized in order to speed up compilation --- modules/gpu/CMakeLists.txt | 2 +- modules/gpu/include/opencv2/gpu/gpu.hpp | 5 +- modules/gpu/src/cuda/cuda_shared.hpp | 22 +- modules/gpu/src/cuda/matrix_operations.cu | 358 +++++++++--------- modules/gpu/src/cuda/safe_call.hpp | 68 ++++ ...aturate_cast_gpu.hpp => saturate_cast.hpp} | 6 + modules/gpu/src/cuda/stereobm.cu | 11 +- modules/gpu/src/cudastream.cpp | 7 +- modules/gpu/src/imgproc_gpu.cpp | 2 +- modules/gpu/src/matrix_operations.cpp | 24 +- 10 files changed, 281 insertions(+), 224 deletions(-) create mode 100644 modules/gpu/src/cuda/safe_call.hpp rename modules/gpu/src/cuda/{saturate_cast_gpu.hpp => saturate_cast.hpp} (97%) diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index c13cfa6646..f6a5835ee6 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -25,7 +25,7 @@ file(GLOB lib_srcs "src/*.cpp") file(GLOB lib_int_hdrs "src/*.h*") file(GLOB lib_cuda "src/cuda/*.cu*") file(GLOB lib_cuda_hdrs "src/cuda/*.h*") -source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) +source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs}) source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs}) file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 46f8a5ec14..2576bba602 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -65,6 +65,7 @@ namespace cv //////////////////////////////// GpuMat //////////////////////////////// class CudaStream; + class MatPL; //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. class CV_EXPORTS GpuMat @@ -107,12 +108,12 @@ namespace cv //! pefroms blocking upload data to GpuMat. . void upload(const cv::Mat& m); - void upload(const cv::Mat& m, CudaStream & stream); + void upload(const MatPL& m, CudaStream& stream); //! Downloads data from device to host memory. Blocking calls. operator Mat() const; void download(cv::Mat& m) const; - void download(cv::Mat& m, CudaStream & stream) const; + void download(MatPL& m, CudaStream& stream) const; //! returns a new GpuMatrix header for the specified row GpuMat row(int y) const; diff --git a/modules/gpu/src/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp index fbec7cff69..675b8dc7b9 100644 --- a/modules/gpu/src/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/cuda_shared.hpp @@ -44,6 +44,7 @@ #define __OPENCV_CUDA_SHARED_HPP__ #include "opencv2/gpu/devmem2d.hpp" +#include "safe_call.hpp" #include "cuda_runtime_api.h" namespace cv @@ -55,32 +56,19 @@ namespace cv typedef unsigned short ushort; typedef unsigned int uint; - extern "C" void error( const char *error_string, const char *file, const int line, const char *func = ""); - namespace impl { static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } - extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); + extern "C" void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); - extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels, const cudaStream_t & stream = 0); - extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); + extern "C" void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0); + extern "C" void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); - extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream = 0); + extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0); } } } -#if defined(__GNUC__) - #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__); -#else /* defined(__CUDACC__) || defined(__MSVC__) */ - #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) -#endif - - static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") - { - if( cudaSuccess != err) - cv::gpu::error(cudaGetErrorString(err), file, line, func); - } #endif /* __OPENCV_CUDA_SHARED_HPP__ */ diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index b3cb37d6af..64ac7c2c3f 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -48,13 +48,14 @@ using namespace cv::gpu; using namespace cv::gpu::impl; -__constant__ double scalar_d[4]; namespace mat_operators { -/////////////////////////////////////////////////////////////////////////// -////////////////////////////////// CopyTo ///////////////////////////////// -/////////////////////////////////////////////////////////////////////////// + __constant__ double scalar_d[4]; + + /////////////////////////////////////////////////////////////////////////// + ////////////////////////////////// CopyTo ///////////////////////////////// + /////////////////////////////////////////////////////////////////////////// template __global__ void kernel_copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels) @@ -71,9 +72,9 @@ namespace mat_operators } -/////////////////////////////////////////////////////////////////////////// -////////////////////////////////// SetTo ////////////////////////////////// -/////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// + ////////////////////////////////// SetTo ////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// template __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step, int channels) @@ -103,9 +104,9 @@ namespace mat_operators } -/////////////////////////////////////////////////////////////////////////// -//////////////////////////////// ConvertTo //////////////////////////////// -/////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// + //////////////////////////////// ConvertTo //////////////////////////////// + /////////////////////////////////////////////////////////////////////////// template struct ScaleTraits @@ -229,212 +230,207 @@ namespace mat_operators namespace cv { - namespace gpu - { - namespace impl - { + namespace gpu + { + namespace impl + { -/////////////////////////////////////////////////////////////////////////// -////////////////////////////////// CopyTo ///////////////////////////////// -/////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// + ////////////////////////////////// CopyTo ///////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream); + typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream); - template - void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream) - { - dim3 threadsPerBlock(16,16, 1); - dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1); - if (stream == 0) - { - ::mat_operators::kernel_copy_to_with_mask<<>> - ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels); - cudaSafeCall ( cudaThreadSynchronize() ); - } - else - { - ::mat_operators::kernel_copy_to_with_mask<<>> - ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels); - } - } + template + void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream) + { + dim3 threadsPerBlock(16,16, 1); + dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1); + if (stream == 0) + { + ::mat_operators::kernel_copy_to_with_mask<<>> + ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels); + cudaSafeCall ( cudaThreadSynchronize() ); + } + else + { + ::mat_operators::kernel_copy_to_with_mask<<>> + ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels); + } + } - extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream) - { - static CopyToFunc tab[8] = - { - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - 0 - }; + extern "C" void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream) + { + static CopyToFunc tab[8] = + { + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + 0 + }; - CopyToFunc func = tab[depth]; + CopyToFunc func = tab[depth]; - if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); + if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); - func(mat_src, mat_dst, mask, channels, stream); - } + func(mat_src, mat_dst, mask, channels, stream); + } -/////////////////////////////////////////////////////////////////////////// -////////////////////////////////// SetTo ////////////////////////////////// -/////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// + ////////////////////////////////// SetTo ////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream); - typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream); + typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream); + typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream); - template - void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream) - { - dim3 threadsPerBlock(32, 8, 1); - dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1); - if (stream == 0) - { - ::mat_operators::kernel_set_to_with_mask<<>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step); - cudaSafeCall ( cudaThreadSynchronize() ); - } - else - { - ::mat_operators::kernel_set_to_with_mask<<>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step); - } + template + void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream) + { + dim3 threadsPerBlock(32, 8, 1); + dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1); - } + if (stream == 0) + { + ::mat_operators::kernel_set_to_with_mask<<>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step); + cudaSafeCall ( cudaThreadSynchronize() ); + } + else + { + ::mat_operators::kernel_set_to_with_mask<<>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step); + } - template - void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream) - { - dim3 threadsPerBlock(32, 8, 1); - dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1); - if (stream == 0) - { - ::mat_operators::kernel_set_to_without_mask<<>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels); - cudaSafeCall ( cudaThreadSynchronize() ); - } - else - { - ::mat_operators::kernel_set_to_without_mask<<>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels); - } - } + } - extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels, const cudaStream_t & stream) - { - double data[4]; - data[0] = scalar[0]; - data[1] = scalar[1]; - data[2] = scalar[2]; - data[3] = scalar[3]; - cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); + template + void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream) + { + dim3 threadsPerBlock(32, 8, 1); + dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1); - static SetToFunc_without_mask tab[8] = - { - set_to_without_mask_run, - set_to_without_mask_run, - set_to_without_mask_run, - set_to_without_mask_run, - set_to_without_mask_run, - set_to_without_mask_run, - set_to_without_mask_run, - 0 - }; + if (stream == 0) + { + mat_operators::kernel_set_to_without_mask<<>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels); + cudaSafeCall ( cudaThreadSynchronize() ); + } + else + { + mat_operators::kernel_set_to_without_mask<<>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels); + } + } - SetToFunc_without_mask func = tab[depth]; + extern "C" void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream) + { + cudaSafeCall( cudaMemcpyToSymbol(mat_operators::scalar_d, &scalar, sizeof(double) * 4)); - if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); + static SetToFunc_without_mask tab[8] = + { + set_to_without_mask_run, + set_to_without_mask_run, + set_to_without_mask_run, + set_to_without_mask_run, + set_to_without_mask_run, + set_to_without_mask_run, + set_to_without_mask_run, + 0 + }; - func(mat, channels, stream); - } + SetToFunc_without_mask func = tab[depth]; + + if (func == 0) + cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); + + func(mat, channels, stream); + } - extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream) - { - double data[4]; - data[0] = scalar[0]; - data[1] = scalar[1]; - data[2] = scalar[2]; - data[3] = scalar[3]; - cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); + extern "C" void set_to_with_mask(DevMem2D mat, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream) + { + cudaSafeCall( cudaMemcpyToSymbol(mat_operators::scalar_d, &scalar, sizeof(double) * 4)); - static SetToFunc_with_mask tab[8] = - { - set_to_with_mask_run, - set_to_with_mask_run, - set_to_with_mask_run, - set_to_with_mask_run, - set_to_with_mask_run, - set_to_with_mask_run, - set_to_with_mask_run, - 0 - }; + static SetToFunc_with_mask tab[8] = + { + set_to_with_mask_run, + set_to_with_mask_run, + set_to_with_mask_run, + set_to_with_mask_run, + set_to_with_mask_run, + set_to_with_mask_run, + set_to_with_mask_run, + 0 + }; - SetToFunc_with_mask func = tab[depth]; + SetToFunc_with_mask func = tab[depth]; - if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); + if (func == 0) + cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); - func(mat, mask, channels, stream); - } + func(mat, mask, channels, stream); + } -/////////////////////////////////////////////////////////////////////////// -//////////////////////////////// ConvertTo //////////////////////////////// -/////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// + //////////////////////////////// ConvertTo //////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream); + typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream); - template - void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream) - { - const int shift = ::mat_operators::ReadWriteTraits::shift; + template + void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream) + { + const int shift = ::mat_operators::ReadWriteTraits::shift; - dim3 block(32, 8); - dim3 grid(divUp(width, block.x * shift), divUp(height, block.y)); - if (stream == 0) - { - ::mat_operators::kernel_convert_to<<>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta); - cudaSafeCall( cudaThreadSynchronize() ); - } - else - { - ::mat_operators::kernel_convert_to<<>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta); - } - } + dim3 block(32, 8); + dim3 grid(divUp(width, block.x * shift), divUp(height, block.y)); - extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream) - { - static CvtFunc tab[8][8] = - { - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, + if (stream == 0) + { + mat_operators::kernel_convert_to<<>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta); + cudaSafeCall( cudaThreadSynchronize() ); + } + else + { + mat_operators::kernel_convert_to<<>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta); + } + } - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, + extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream) + { + static CvtFunc tab[8][8] = + { + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, - {cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, cvt_, 0}, + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, - {cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, cvt_, 0}, + {cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, cvt_, 0}, - {cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, cvt_, 0}, + {cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, cvt_, 0}, - {0,0,0,0,0,0,0,0} - }; + {cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, cvt_, 0}, - CvtFunc func = tab[sdepth][ddepth]; - if (func == 0) - cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); - func(src, dst, width, height, alpha, beta, stream); - } - } // namespace impl - } // namespace gpu - } // namespace cv + {0,0,0,0,0,0,0,0} + }; + + CvtFunc func = tab[sdepth][ddepth]; + if (func == 0) + cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); + func(src, dst, src.cols * channels, src.rows, alpha, beta, stream); + } + } // namespace impl + } // namespace gpu +} // namespace cv diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp new file mode 100644 index 0000000000..b088136192 --- /dev/null +++ b/modules/gpu/src/cuda/safe_call.hpp @@ -0,0 +1,68 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__ +#define __OPENCV_CUDA_SAFE_CALL_HPP__ + +#include "cuda_runtime_api.h" + +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__); +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) +#endif + +namespace cv +{ + namespace gpu + { + extern "C" void error( const char *error_string, const char *file, const int line, const char *func = ""); + + static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") + { + if( cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); + } + } +} + +#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/cuda/saturate_cast_gpu.hpp b/modules/gpu/src/cuda/saturate_cast.hpp similarity index 97% rename from modules/gpu/src/cuda/saturate_cast_gpu.hpp rename to modules/gpu/src/cuda/saturate_cast.hpp index f398e98c39..027ea29386 100644 --- a/modules/gpu/src/cuda/saturate_cast_gpu.hpp +++ b/modules/gpu/src/cuda/saturate_cast.hpp @@ -44,4 +44,10 @@ #define __OPENCV_GPU_SATURATE_CAST_HPP__ +template +__device__ void saturate_cast(F) +{ + +} + #endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index d53d81a3c4..0c9152ce6c 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -40,7 +40,12 @@ // //M*/ -#include "cuda_shared.hpp" +//#include "cuda_shared.hpp" +#include "opencv2/gpu/devmem2d.hpp" +#include "safe_call.hpp" +static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } + + using namespace cv::gpu; @@ -392,7 +397,7 @@ namespace cv { namespace gpu { namespace impl { extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap) { - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D( 0, stereobm_gpu::texForSobel, input.ptr, desc, input.cols, input.rows, input.step ) ); dim3 threads(16, 16, 1); @@ -520,7 +525,7 @@ namespace cv { namespace gpu { namespace impl stereobm_gpu::texForTF.addressMode[0] = cudaAddressModeWrap; stereobm_gpu::texForTF.addressMode[1] = cudaAddressModeWrap; - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D( 0, stereobm_gpu::texForTF, input.ptr, desc, input.cols, input.rows, input.step ) ); dim3 threads(128, 1, 1); diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 44d61b37af..0c4d8ff0ba 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -41,7 +41,6 @@ //M*/ #include "precomp.hpp" -#include "cuda_shared.hpp" using namespace cv; using namespace cv::gpu; @@ -159,12 +158,12 @@ void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy( void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) { - cv::gpu::impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream); + impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream); } void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) { - cv::gpu::impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream); + impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream); } void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) @@ -189,7 +188,7 @@ void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rty psrc = &(temp = src); dst.create( src.size(), rtype ); - cv::gpu::impl::convert_to(*psrc, sdepth, dst, ddepth, psrc->cols * psrc->channels(), psrc->rows, alpha, beta, impl->stream); + impl::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta, impl->stream); } diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 57dcfc4fa4..4b073e1228 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -47,7 +47,7 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) -cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index f8e57ed5b7..d62435fcb0 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -73,10 +73,8 @@ namespace cv } - #else /* !defined (HAVE_CUDA) */ - void cv::gpu::GpuMat::upload(const Mat& m) { CV_DbgAssert(!m.empty()); @@ -84,7 +82,7 @@ void cv::gpu::GpuMat::upload(const Mat& m) cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); } -void cv::gpu::GpuMat::upload(const cv::Mat& m, CudaStream & stream) +void cv::gpu::GpuMat::upload(const MatPL& m, CudaStream& stream) { CV_DbgAssert(!m.empty()); stream.enqueueUpload(m, *this); @@ -97,7 +95,7 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); } -void cv::gpu::GpuMat::download(cv::Mat& m, CudaStream & stream) const +void cv::gpu::GpuMat::download(MatPL& m, CudaStream& stream) const { CV_DbgAssert(!m.empty()); stream.enqueueDownload(*this, m); @@ -115,12 +113,12 @@ void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const { if (mask.empty()) { - this->copyTo(mat); + copyTo(mat); } else { - mat.create(this->size(), this->type()); - cv::gpu::impl::copy_to_with_mask(*this, mat, this->depth() , mask, this->channels()); + mat.create(size(), type()); + cv::gpu::impl::copy_to_with_mask(*this, mat, depth(), mask, channels()); } } @@ -146,12 +144,12 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be psrc = &(temp = *this); dst.create( size(), rtype ); - impl::convert_to(*psrc, sdepth, dst, ddepth, psrc->cols * psrc->channels(), psrc->rows, alpha, beta); + impl::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta); } GpuMat& GpuMat::operator = (const Scalar& s) { - cv::gpu::impl::set_to_without_mask( *this, this->depth(), s.val, this->channels()); + cv::gpu::impl::set_to_without_mask( *this, depth(), s.val, channels()); return *this; } @@ -162,13 +160,9 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) CV_DbgAssert(!this->empty()); if (mask.empty()) - { - cv::gpu::impl::set_to_without_mask( *this, this->depth(), s.val, this->channels()); - } + impl::set_to_without_mask( *this, depth(), s.val, channels()); else - { - cv::gpu::impl::set_to_with_mask( *this, this->depth(), s.val, mask, this->channels()); - } + impl::set_to_with_mask( *this, depth(), s.val, mask, channels()); return *this; }