Merge branch 'work'

This commit is contained in:
Marina Kolpakova 2012-06-19 13:41:15 +00:00
parent 18918a5e44
commit 6620c5c92d

View File

@ -44,25 +44,25 @@
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
#define OPENCV_GPU_WARP_REDUCE_HPP__
namespace cv { namespace gpu { namespace device
{
template <class T>
namespace cv { namespace gpu { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
if (lane < 16)
{
T partial = ptr[tid];
return ptr[tid - lane];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace gpu { namespace device {