diff --git a/modules/core/include/opencv2/core/devmem2d.hpp b/modules/core/include/opencv2/core/devmem2d.hpp index ae8935f68e..276aeb2331 100644 --- a/modules/core/include/opencv2/core/devmem2d.hpp +++ b/modules/core/include/opencv2/core/devmem2d.hpp @@ -45,14 +45,14 @@ #ifdef __cplusplus -#ifdef __CUDACC__ - #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__ +#ifdef __CUDACC__ + #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__ #else #define __CV_GPU_HOST_DEVICE__ #endif namespace cv -{ +{ namespace gpu { // Simple lightweight structures that encapsulates information about an image on device. @@ -61,88 +61,88 @@ namespace cv template struct StaticAssert; template <> struct StaticAssert {static __CV_GPU_HOST_DEVICE__ void check(){}}; - template struct DevPtr - { - typedef T elem_type; - typedef int index_type; + template struct DevPtr + { + typedef T elem_type; + typedef int index_type; - enum { elem_size = sizeof(elem_type) }; + enum { elem_size = sizeof(elem_type) }; - T* data; + T* data; - __CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {} - __CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {} + __CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {} + __CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {} - __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } - __CV_GPU_HOST_DEVICE__ operator T*() { return data; } - __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; } - }; - - template struct PtrSz : public DevPtr - { + __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } + __CV_GPU_HOST_DEVICE__ operator T*() { return data; } + __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; } + }; + + template struct PtrSz : public DevPtr + { __CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {} __CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr(data_), size(size_) {} size_t size; }; - template struct PtrStep : public DevPtr - { + template struct PtrStep : public DevPtr + { __CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {} - __CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr(data_), step(step_) {} + __CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr(data_), step(step_) {} /** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */ - size_t step; + size_t step; - __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr::data + y * step); } + __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr::data + y * step); } __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr::data + y * step); } - __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } }; - template struct PtrStepSz : public PtrStep - { + template struct PtrStepSz : public PtrStep + { __CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {} - __CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) + __CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) : PtrStep(data_, step_), cols(cols_), rows(rows_) {} int cols; - int rows; + int rows; }; - template struct DevMem2D_ : public PtrStepSz - { + template struct DevMem2D_ : public PtrStepSz + { DevMem2D_() {} - DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz(rows_, cols_, data_, step_) {} - - template - explicit DevMem2D_(const DevMem2D_& d) : PtrStepSz(d.rows, d.cols, (T*)d.data, d.step) {} + DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz(rows_, cols_, data_, step_) {} + + template + explicit DevMem2D_(const DevMem2D_& d) : PtrStepSz(d.rows, d.cols, (T*)d.data, d.step) {} }; - + template struct PtrElemStep_ : public PtrStep - { - PtrElemStep_(const DevMem2D_& mem) : PtrStep(mem.data, mem.step) + { + PtrElemStep_(const DevMem2D_& mem) : PtrStep(mem.data, mem.step) { StaticAssert<256 % sizeof(T) == 0>::check(); - PtrStep::step /= PtrStep::elem_size; + PtrStep::step /= PtrStep::elem_size; } __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep::data + y * PtrStep::step; } - __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep::data + y * PtrStep::step; } + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep::data + y * PtrStep::step; } __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } - __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } + __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } }; - template struct PtrStep_ : public PtrStep - { + template struct PtrStep_ : public PtrStep + { PtrStep_() {} - PtrStep_(const DevMem2D_& mem) : PtrStep(mem.data, mem.step) {} + PtrStep_(const DevMem2D_& mem) : PtrStep(mem.data, mem.step) {} }; typedef DevMem2D_ DevMem2Db; - typedef DevMem2Db DevMem2D; + typedef DevMem2Db DevMem2D; typedef DevMem2D_ DevMem2Df; typedef DevMem2D_ DevMem2Di; @@ -152,8 +152,8 @@ namespace cv typedef PtrElemStep_ PtrElemStep; typedef PtrElemStep_ PtrElemStepf; - typedef PtrElemStep_ PtrElemStepi; - } + typedef PtrElemStep_ PtrElemStepi; + } } #endif // __cplusplus diff --git a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu b/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu index 44e6fde31c..3e57444d6a 100644 --- a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu +++ b/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu @@ -172,11 +172,11 @@ static void add(float *res, const float *rhs, const int count, cudaStream_t stre /////////////////////////////////////////////////////////////////////////////// __global__ void scaleVector(float *d_res, const float *d_src, float scale, const int len) { - const int pos = blockIdx.x * blockDim.x + threadIdx.x; - - if (pos >= len) return; - - d_res[pos] = d_src[pos] * scale; + const int pos = blockIdx.x * blockDim.x + threadIdx.x; + + if (pos >= len) return; + + d_res[pos] = d_src[pos] * scale; } /////////////////////////////////////////////////////////////////////////////// @@ -191,10 +191,10 @@ __global__ void scaleVector(float *d_res, const float *d_src, float scale, const /////////////////////////////////////////////////////////////////////////////// static void ScaleVector(float *d_res, const float *d_src, float scale, const int len, cudaStream_t stream) { - dim3 threads(256); - dim3 blocks(iDivUp(len, threads.x)); - - scaleVector<<>>(d_res, d_src, scale, len); + dim3 threads(256); + dim3 blocks(iDivUp(len, threads.x)); + + scaleVector<<>>(d_res, d_src, scale, len); } const int SOR_TILE_WIDTH = 32; @@ -1128,14 +1128,14 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrU->ptr(), srcSize, kLevelStride * sizeof (float), srcROI, ptrUNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) ); - - ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream); + + ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream); ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR); ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrV->ptr(), srcSize, kLevelStride * sizeof (float), srcROI, ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) ); - - ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream); + + ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream); ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR); cv::gpu::device::swap(ptrU, ptrUNew);