mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 06:26:29 +08:00
Merge pull request #13958 from nglee:dev_FixCuda101BuildIssue
This commit is contained in:
commit
3132c8ee08
@ -157,43 +157,6 @@ CV_CUDEV_SHFL_VEC_INST(double)
|
||||
|
||||
// shfl_up
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T compatible_shfl_up(T val, uint delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDACC_VER_MAJOR__ < 9
|
||||
|
||||
return shfl_up(val, delta, width);
|
||||
|
||||
#else // __CUDACC_VER_MAJOR__ < 9
|
||||
|
||||
#if CV_CUDEV_ARCH >= 700
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
#else
|
||||
const int block_size = Block::blockSize();
|
||||
const int residual = block_size & (warpSize - 1);
|
||||
|
||||
if (0 == residual)
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
else
|
||||
{
|
||||
const int n_warps = divUp(block_size, warpSize);
|
||||
const int warp_id = Warp::warpId();
|
||||
|
||||
if (warp_id < n_warps - 1)
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
else
|
||||
{
|
||||
// We are at the last threads of a block whose number of threads
|
||||
// is not a multiple of the warp size
|
||||
uint mask = (1LU << residual) - 1;
|
||||
return shfl_up_sync(mask, val, delta, width);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __CUDACC_VER_MAJOR__ < 9
|
||||
}
|
||||
|
||||
#if __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
template <typename T>
|
||||
@ -300,6 +263,43 @@ CV_CUDEV_SHFL_UP_VEC_INST(double)
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T compatible_shfl_up(T val, uint delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDACC_VER_MAJOR__ < 9
|
||||
|
||||
return shfl_up(val, delta, width);
|
||||
|
||||
#else // __CUDACC_VER_MAJOR__ < 9
|
||||
|
||||
#if CV_CUDEV_ARCH >= 700
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
#else
|
||||
const int block_size = Block::blockSize();
|
||||
const int residual = block_size & (warpSize - 1);
|
||||
|
||||
if (0 == residual)
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
else
|
||||
{
|
||||
const int n_warps = divUp(block_size, warpSize);
|
||||
const int warp_id = Warp::warpId();
|
||||
|
||||
if (warp_id < n_warps - 1)
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
else
|
||||
{
|
||||
// We are at the last threads of a block whose number of threads
|
||||
// is not a multiple of the warp size
|
||||
uint mask = (1LU << residual) - 1;
|
||||
return shfl_up_sync(mask, val, delta, width);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __CUDACC_VER_MAJOR__ < 9
|
||||
}
|
||||
|
||||
// shfl_down
|
||||
|
||||
__device__ __forceinline__ uchar shfl_down(uchar val, uint delta, int width = warpSize)
|
||||
|
Loading…
Reference in New Issue
Block a user