Merge pull request #9509 from borisfom:cuda9-shfl

This commit is contained in:
Alexander Alekhin 2017-08-31 09:20:12 +00:00
commit 5c572ffa1f

View File

@ -51,6 +51,11 @@
namespace cv { namespace cuda { namespace device
{
#if __CUDACC_VER_MAJOR__ >= 9
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
#endif
template <typename T>
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
{
@ -148,6 +153,10 @@ namespace cv { namespace cuda { namespace device
}
}}}
# undef __shfl
# undef __shfl_up
# undef __shfl_down
//! @endcond
#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP