mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
__shfl_up_sync with mask for CUDA >= 9
* __shfl_up_sync with proper mask value for CUDA >= 9 * BlockScanInclusive for CUDA >= 9 * compatible_shfl_up for use in integral.hpp * Use CLAHE in cudev * Add tests for BlockScan
This commit is contained in:
parent
0395b2ea9c
commit
970293a229
@ -42,15 +42,9 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/scan.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cuda::device;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace clahe
|
||||
{
|
||||
@ -73,7 +67,7 @@ namespace clahe
|
||||
for (int j = threadIdx.x; j < tileSize.x; j += blockDim.x)
|
||||
{
|
||||
const int data = srcPtr[j];
|
||||
Emulation::smem::atomicAdd(&smem[data], 1);
|
||||
::atomicAdd(&smem[data], 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,7 +90,7 @@ namespace clahe
|
||||
|
||||
// find number of overall clipped samples
|
||||
|
||||
reduce<256>(smem, clipped, tid, plus<int>());
|
||||
blockReduce<256>(smem, clipped, tid, plus<int>());
|
||||
|
||||
// broadcast evaluated value
|
||||
|
||||
@ -128,10 +122,10 @@ namespace clahe
|
||||
|
||||
calcLutKernel<<<grid, block, 0, stream>>>(src, lut, tileSize, tilesX, clipLimit, lutScale);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void transformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY)
|
||||
@ -173,13 +167,13 @@ namespace clahe
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(transformKernel, cudaFuncCachePreferL1) );
|
||||
CV_CUDEV_SAFE_CALL( cudaFuncSetCacheConfig(transformKernel, cudaFuncCachePreferL1) );
|
||||
|
||||
transformKernel<<<grid, block, 0, stream>>>(src, dst, lut, tileSize, tilesX, tilesY);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,12 +48,134 @@
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../warp/scan.hpp"
|
||||
#include "../warp/warp.hpp"
|
||||
|
||||
namespace cv { namespace cudev {
|
||||
|
||||
//! @addtogroup cudev
|
||||
//! @{
|
||||
|
||||
#if __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
// Usage Note
|
||||
// - THREADS_NUM should be equal to the number of threads in this block.
|
||||
// - smem must be able to contain at least n elements of type T, where n is equal to the number
|
||||
// of warps in this block. The number can be calculated by divUp(THREADS_NUM, WARP_SIZE).
|
||||
//
|
||||
// Dev Note
|
||||
// - Starting from CUDA 9.0, support for Fermi is dropped. So CV_CUDEV_ARCH >= 300 is implied.
|
||||
// - "For Pascal and earlier architectures (CV_CUDEV_ARCH < 700), all threads in mask must execute
|
||||
// the same warp intrinsic instruction in convergence, and the union of all values in mask must
|
||||
// be equal to the warp's active mask."
|
||||
// (https://docs.nvidia.com/cuda/archive/10.0/cuda-c-programming-guide#independent-thread-scheduling-7-x)
|
||||
// - Above restriction does not apply starting from Volta (CV_CUDEV_ARCH >= 700). We just need to
|
||||
// take care so that "all non-exited threads named in mask must execute the same intrinsic with
|
||||
// the same mask."
|
||||
// (https://docs.nvidia.com/cuda/archive/10.0/cuda-c-programming-guide#warp-description)
|
||||
|
||||
template <int THREADS_NUM, typename T>
|
||||
__device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
|
||||
{
|
||||
const int residual = THREADS_NUM & (WARP_SIZE - 1);
|
||||
|
||||
#if CV_CUDEV_ARCH < 700
|
||||
const uint residual_mask = (1U << residual) - 1;
|
||||
#endif
|
||||
|
||||
if (THREADS_NUM > WARP_SIZE)
|
||||
{
|
||||
// bottom-level inclusive warp scan
|
||||
#if CV_CUDEV_ARCH >= 700
|
||||
T warpResult = warpScanInclusive(0xFFFFFFFFU, data);
|
||||
#else
|
||||
T warpResult;
|
||||
|
||||
if (0 == residual)
|
||||
warpResult = warpScanInclusive(0xFFFFFFFFU, data);
|
||||
else
|
||||
{
|
||||
const int n_warps = divUp(THREADS_NUM, WARP_SIZE);
|
||||
const int warp_num = Warp::warpId();
|
||||
|
||||
if (warp_num < n_warps - 1)
|
||||
warpResult = warpScanInclusive(0xFFFFFFFFU, data);
|
||||
else
|
||||
{
|
||||
// We are at the last threads of a block whose number of threads
|
||||
// is not a multiple of the warp size
|
||||
warpResult = warpScanInclusive(residual_mask, data);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// save top elements of each warp for exclusive warp scan
|
||||
// sync to wait for warp scans to complete (because smem is being overwritten)
|
||||
if ((tid & (WARP_SIZE - 1)) == (WARP_SIZE - 1))
|
||||
{
|
||||
smem[tid >> LOG_WARP_SIZE] = warpResult;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
int quot = THREADS_NUM / WARP_SIZE;
|
||||
|
||||
if (tid < quot)
|
||||
{
|
||||
// grab top warp elements
|
||||
T val = smem[tid];
|
||||
|
||||
uint mask = (1LLU << quot) - 1;
|
||||
|
||||
if (0 == residual)
|
||||
{
|
||||
// calculate exclusive scan and write back to shared memory
|
||||
smem[tid] = warpScanExclusive(mask, val);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate inclusive scan and write back to shared memory with offset 1
|
||||
smem[tid + 1] = warpScanInclusive(mask, val);
|
||||
|
||||
if (tid == 0)
|
||||
smem[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// return updated warp scans
|
||||
return warpResult + smem[tid >> LOG_WARP_SIZE];
|
||||
}
|
||||
else
|
||||
{
|
||||
#if CV_CUDEV_ARCH >= 700
|
||||
return warpScanInclusive(0xFFFFFFFFU, data);
|
||||
#else
|
||||
if (THREADS_NUM == WARP_SIZE)
|
||||
return warpScanInclusive(0xFFFFFFFFU, data);
|
||||
else
|
||||
return warpScanInclusive(residual_mask, data);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <int THREADS_NUM, typename T>
|
||||
__device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid)
|
||||
{
|
||||
return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data;
|
||||
}
|
||||
|
||||
#else // __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
// Usage Note
|
||||
// - THREADS_NUM should be equal to the number of threads in this block.
|
||||
// - (>= Kepler) smem must be able to contain at least n elements of type T, where n is equal to the number
|
||||
// of warps in this block. The number can be calculated by divUp(THREADS_NUM, WARP_SIZE).
|
||||
// - (Fermi) smem must be able to contain at least n elements of type T, where n is equal to the number
|
||||
// of threads in this block (= THREADS_NUM).
|
||||
|
||||
template <int THREADS_NUM, typename T>
|
||||
__device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
|
||||
{
|
||||
@ -73,18 +195,31 @@ __device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (tid < (THREADS_NUM / WARP_SIZE))
|
||||
int quot = THREADS_NUM / WARP_SIZE;
|
||||
|
||||
if (tid < quot)
|
||||
{
|
||||
// grab top warp elements
|
||||
T val = smem[tid];
|
||||
|
||||
// calculate exclusive scan and write back to shared memory
|
||||
smem[tid] = warpScanExclusive(val, smem, tid);
|
||||
if (0 == (THREADS_NUM & (WARP_SIZE - 1)))
|
||||
{
|
||||
// calculate exclusive scan and write back to shared memory
|
||||
smem[tid] = warpScanExclusive(val, smem, tid);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate inclusive scan and write back to shared memory with offset 1
|
||||
smem[tid + 1] = warpScanInclusive(val, smem, tid);
|
||||
|
||||
if (tid == 0)
|
||||
smem[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// return updated warp scans with exclusive scan results
|
||||
// return updated warp scans
|
||||
return warpResult + smem[tid >> LOG_WARP_SIZE];
|
||||
}
|
||||
else
|
||||
@ -99,6 +234,8 @@ __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint t
|
||||
return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data;
|
||||
}
|
||||
|
||||
#endif // __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
//! @}
|
||||
|
||||
}}
|
||||
|
@ -215,7 +215,7 @@ namespace integral_detail
|
||||
#pragma unroll
|
||||
for (int i = 1; i < 32; i *= 2)
|
||||
{
|
||||
const int n = shfl_up(sum, i, 32);
|
||||
const int n = compatible_shfl_up(sum, i, 32);
|
||||
|
||||
if (lane_id >= i)
|
||||
{
|
||||
@ -245,9 +245,9 @@ namespace integral_detail
|
||||
int warp_sum = sums[lane_id];
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 1; i <= 32; i *= 2)
|
||||
for (int i = 1; i < 32; i *= 2)
|
||||
{
|
||||
const int n = shfl_up(warp_sum, i, 32);
|
||||
const int n = compatible_shfl_up(warp_sum, i, 32);
|
||||
|
||||
if (lane_id >= i)
|
||||
warp_sum += n;
|
||||
@ -453,7 +453,7 @@ namespace integral_detail
|
||||
|
||||
for (int i = 1; i <= 8; i *= 2)
|
||||
{
|
||||
T n = shfl_up(partial_sum, i, 32);
|
||||
T n = compatible_shfl_up(partial_sum, i, 32);
|
||||
|
||||
if (lane_id >= i)
|
||||
partial_sum += n;
|
||||
|
@ -55,6 +55,36 @@ namespace cv { namespace cudev {
|
||||
//! @addtogroup cudev
|
||||
//! @{
|
||||
|
||||
#if __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
// Starting from CUDA 9.0, support for Fermi is dropped.
|
||||
// So CV_CUDEV_ARCH >= 300 is implied.
|
||||
|
||||
template <typename T>
|
||||
__device__ T warpScanInclusive(uint mask, T data)
|
||||
{
|
||||
const uint laneId = Warp::laneId();
|
||||
|
||||
// scan on shufl functions
|
||||
#pragma unroll
|
||||
for (int i = 1; i <= (WARP_SIZE / 2); i *= 2)
|
||||
{
|
||||
const T val = shfl_up_sync(mask, data, i);
|
||||
if (laneId >= i)
|
||||
data += val;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T warpScanExclusive(uint mask, T data)
|
||||
{
|
||||
return warpScanInclusive(mask, data) - data;
|
||||
}
|
||||
|
||||
#else // __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
template <typename T>
|
||||
__device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
|
||||
{
|
||||
@ -75,19 +105,16 @@ __device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
|
||||
|
||||
return data;
|
||||
#else
|
||||
uint pos = 2 * tid - (tid & (WARP_SIZE - 1));
|
||||
smem[pos] = 0;
|
||||
const uint laneId = Warp::laneId();
|
||||
|
||||
pos += WARP_SIZE;
|
||||
smem[pos] = data;
|
||||
smem[tid] = data;
|
||||
|
||||
smem[pos] += smem[pos - 1];
|
||||
smem[pos] += smem[pos - 2];
|
||||
smem[pos] += smem[pos - 4];
|
||||
smem[pos] += smem[pos - 8];
|
||||
smem[pos] += smem[pos - 16];
|
||||
#pragma unroll
|
||||
for (int i = 1; i <= (WARP_SIZE / 2); i *= 2)
|
||||
if (laneId >= i)
|
||||
smem[tid] += smem[tid - i];
|
||||
|
||||
return smem[pos];
|
||||
return smem[tid];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -97,6 +124,8 @@ __device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint ti
|
||||
return warpScanInclusive(data, smem, tid) - data;
|
||||
}
|
||||
|
||||
#endif // __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
//! @}
|
||||
|
||||
}}
|
||||
|
@ -48,6 +48,8 @@
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../util/vec_traits.hpp"
|
||||
#include "../block/block.hpp"
|
||||
#include "warp.hpp"
|
||||
|
||||
namespace cv { namespace cudev {
|
||||
|
||||
@ -59,7 +61,7 @@ namespace cv { namespace cudev {
|
||||
#if __CUDACC_VER_MAJOR__ >= 9
|
||||
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
|
||||
# define __shfl_xor(x, y, z) __shfl_xor_sync(0xFFFFFFFFU, x, y, z)
|
||||
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
|
||||
//# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
|
||||
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
|
||||
#endif
|
||||
|
||||
@ -155,6 +157,53 @@ CV_CUDEV_SHFL_VEC_INST(double)
|
||||
|
||||
// shfl_up
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T compatible_shfl_up(T val, uint delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDACC_VER_MAJOR__ < 9
|
||||
|
||||
return shfl_up(val, delta, width);
|
||||
|
||||
#else // __CUDACC_VER_MAJOR__ < 9
|
||||
|
||||
#if CV_CUDEV_ARCH >= 700
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
#else
|
||||
const int block_size = Block::blockSize();
|
||||
const int residual = block_size & (warpSize - 1);
|
||||
|
||||
if (0 == residual)
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
else
|
||||
{
|
||||
const int n_warps = divUp(block_size, warpSize);
|
||||
const int warp_id = Warp::warpId();
|
||||
|
||||
if (warp_id < n_warps - 1)
|
||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||
else
|
||||
{
|
||||
// We are at the last threads of a block whose number of threads
|
||||
// is not a multiple of the warp size
|
||||
uint mask = (1LU << residual) - 1;
|
||||
return shfl_up_sync(mask, val, delta, width);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __CUDACC_VER_MAJOR__ < 9
|
||||
}
|
||||
|
||||
#if __CUDACC_VER_MAJOR__ >= 9
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl_up_sync(uint mask, T val, uint delta, int width = warpSize)
|
||||
{
|
||||
return (T) __shfl_up_sync(mask, val, delta, width);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__device__ __forceinline__ uchar shfl_up(uchar val, uint delta, int width = warpSize)
|
||||
{
|
||||
return (uchar) __shfl_up((int) val, delta, width);
|
||||
@ -244,6 +293,8 @@ CV_CUDEV_SHFL_UP_VEC_INST(double)
|
||||
|
||||
#undef CV_CUDEV_SHFL_UP_VEC_INST
|
||||
|
||||
#endif
|
||||
|
||||
// shfl_down
|
||||
|
||||
__device__ __forceinline__ uchar shfl_down(uchar val, uint delta, int width = warpSize)
|
||||
|
140
modules/cudev/test/test_scan.cu
Normal file
140
modules/cudev/test/test_scan.cu
Normal file
@ -0,0 +1,140 @@
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cudev;
|
||||
using namespace cvtest;
|
||||
|
||||
// BlockScanInt
|
||||
|
||||
template <int THREADS_NUM>
|
||||
__global__ void int_kernel(int* data)
|
||||
{
|
||||
uint tid = Block::threadLineId();
|
||||
|
||||
#if CV_CUDEV_ARCH >= 300
|
||||
const int n_warps = (THREADS_NUM - 1) / WARP_SIZE + 1;
|
||||
__shared__ int smem[n_warps];
|
||||
#else
|
||||
__shared__ int smem[THREADS_NUM];
|
||||
#endif
|
||||
|
||||
data[tid] = blockScanInclusive<THREADS_NUM>(data[tid], smem, tid);
|
||||
}
|
||||
|
||||
#define BLOCK_SCAN_INT_TEST(block_size) \
|
||||
TEST(BlockScanInt, BlockSize##block_size) \
|
||||
{ \
|
||||
Mat src = randomMat(Size(block_size, 1), CV_32SC1, 0, 1024); \
|
||||
\
|
||||
GpuMat d_src; \
|
||||
d_src.upload(src); \
|
||||
\
|
||||
for (int col = 1; col < block_size; col++) \
|
||||
src.at<int>(0, col) += src.at<int>(0, col - 1); \
|
||||
\
|
||||
int_kernel<block_size><<<1, block_size>>>((int*)d_src.data); \
|
||||
\
|
||||
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize()); \
|
||||
\
|
||||
EXPECT_MAT_NEAR(d_src, src, 0); \
|
||||
}
|
||||
|
||||
BLOCK_SCAN_INT_TEST(29)
|
||||
BLOCK_SCAN_INT_TEST(30)
|
||||
BLOCK_SCAN_INT_TEST(32)
|
||||
BLOCK_SCAN_INT_TEST(40)
|
||||
BLOCK_SCAN_INT_TEST(41)
|
||||
|
||||
BLOCK_SCAN_INT_TEST(59)
|
||||
BLOCK_SCAN_INT_TEST(60)
|
||||
BLOCK_SCAN_INT_TEST(64)
|
||||
BLOCK_SCAN_INT_TEST(70)
|
||||
BLOCK_SCAN_INT_TEST(71)
|
||||
|
||||
BLOCK_SCAN_INT_TEST(109)
|
||||
BLOCK_SCAN_INT_TEST(110)
|
||||
BLOCK_SCAN_INT_TEST(128)
|
||||
BLOCK_SCAN_INT_TEST(130)
|
||||
BLOCK_SCAN_INT_TEST(131)
|
||||
|
||||
BLOCK_SCAN_INT_TEST(189)
|
||||
BLOCK_SCAN_INT_TEST(200)
|
||||
BLOCK_SCAN_INT_TEST(256)
|
||||
BLOCK_SCAN_INT_TEST(300)
|
||||
BLOCK_SCAN_INT_TEST(311)
|
||||
|
||||
BLOCK_SCAN_INT_TEST(489)
|
||||
BLOCK_SCAN_INT_TEST(500)
|
||||
BLOCK_SCAN_INT_TEST(512)
|
||||
BLOCK_SCAN_INT_TEST(600)
|
||||
BLOCK_SCAN_INT_TEST(611)
|
||||
|
||||
BLOCK_SCAN_INT_TEST(1024)
|
||||
|
||||
// BlockScanDouble
|
||||
|
||||
template <int THREADS_NUM>
|
||||
__global__ void double_kernel(double* data)
|
||||
{
|
||||
uint tid = Block::threadLineId();
|
||||
|
||||
#if CV_CUDEV_ARCH >= 300
|
||||
const int n_warps = (THREADS_NUM - 1) / WARP_SIZE + 1;
|
||||
__shared__ double smem[n_warps];
|
||||
#else
|
||||
__shared__ double smem[THREADS_NUM];
|
||||
#endif
|
||||
|
||||
data[tid] = blockScanInclusive<THREADS_NUM>(data[tid], smem, tid);
|
||||
}
|
||||
|
||||
#define BLOCK_SCAN_DOUBLE_TEST(block_size) \
|
||||
TEST(BlockScanDouble, BlockSize##block_size) \
|
||||
{ \
|
||||
Mat src = randomMat(Size(block_size, 1), CV_64FC1, 0.0, 1.0); \
|
||||
\
|
||||
GpuMat d_src; \
|
||||
d_src.upload(src); \
|
||||
\
|
||||
for (int col = 1; col < block_size; col++) \
|
||||
src.at<double>(0, col) += src.at<double>(0, col - 1); \
|
||||
\
|
||||
double_kernel<block_size><<<1, block_size>>>((double*)d_src.data); \
|
||||
\
|
||||
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize()); \
|
||||
\
|
||||
EXPECT_MAT_NEAR(d_src, src, 1e-10); \
|
||||
}
|
||||
|
||||
BLOCK_SCAN_DOUBLE_TEST(29)
|
||||
BLOCK_SCAN_DOUBLE_TEST(30)
|
||||
BLOCK_SCAN_DOUBLE_TEST(32)
|
||||
BLOCK_SCAN_DOUBLE_TEST(40)
|
||||
BLOCK_SCAN_DOUBLE_TEST(41)
|
||||
|
||||
BLOCK_SCAN_DOUBLE_TEST(59)
|
||||
BLOCK_SCAN_DOUBLE_TEST(60)
|
||||
BLOCK_SCAN_DOUBLE_TEST(64)
|
||||
BLOCK_SCAN_DOUBLE_TEST(70)
|
||||
BLOCK_SCAN_DOUBLE_TEST(71)
|
||||
|
||||
BLOCK_SCAN_DOUBLE_TEST(109)
|
||||
BLOCK_SCAN_DOUBLE_TEST(110)
|
||||
BLOCK_SCAN_DOUBLE_TEST(128)
|
||||
BLOCK_SCAN_DOUBLE_TEST(130)
|
||||
BLOCK_SCAN_DOUBLE_TEST(131)
|
||||
|
||||
BLOCK_SCAN_DOUBLE_TEST(189)
|
||||
BLOCK_SCAN_DOUBLE_TEST(200)
|
||||
BLOCK_SCAN_DOUBLE_TEST(256)
|
||||
BLOCK_SCAN_DOUBLE_TEST(300)
|
||||
BLOCK_SCAN_DOUBLE_TEST(311)
|
||||
|
||||
BLOCK_SCAN_DOUBLE_TEST(489)
|
||||
BLOCK_SCAN_DOUBLE_TEST(500)
|
||||
BLOCK_SCAN_DOUBLE_TEST(512)
|
||||
BLOCK_SCAN_DOUBLE_TEST(600)
|
||||
BLOCK_SCAN_DOUBLE_TEST(611)
|
||||
|
||||
BLOCK_SCAN_DOUBLE_TEST(1024)
|
Loading…
Reference in New Issue
Block a user