mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 21:20:18 +08:00
fixed and updated gpu implementation of separable liner filters
now it supports kernel's size up to 32
This commit is contained in:
parent
b96a556fff
commit
5af529c1bd
@ -139,6 +139,6 @@ INSTANTIATE_TEST_CASE_P(Filter, SeparableLinearFilter, testing::Combine(
|
|||||||
ALL_DEVICES,
|
ALL_DEVICES,
|
||||||
GPU_TYPICAL_MAT_SIZES,
|
GPU_TYPICAL_MAT_SIZES,
|
||||||
testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
|
testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
|
||||||
testing::Values(3, 5)));
|
testing::Values(3, 5, 7, 9, 11, 13, 15)));
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -46,17 +46,14 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
#include "opencv2/gpu/device/static_check.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
#define MAX_KERNEL_SIZE 16
|
|
||||||
#define BLOCK_DIM_X 16
|
|
||||||
#define BLOCK_DIM_Y 4
|
|
||||||
#define RESULT_STEPS 8
|
|
||||||
#define HALO_STEPS 1
|
|
||||||
|
|
||||||
namespace column_filter
|
namespace column_filter
|
||||||
{
|
{
|
||||||
|
#define MAX_KERNEL_SIZE 32
|
||||||
|
|
||||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
void loadKernel(const float kernel[], int ksize)
|
void loadKernel(const float kernel[], int ksize)
|
||||||
@ -64,64 +61,75 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int KERNEL_SIZE, typename T, typename D, typename B>
|
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int PATCH_PER_BLOCK, int HALO_SIZE, int KSIZE, typename T, typename D, typename B>
|
||||||
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
|
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||||
{
|
{
|
||||||
|
Static<KSIZE <= MAX_KERNEL_SIZE>::check();
|
||||||
|
Static<HALO_SIZE * BLOCK_DIM_Y >= KSIZE>::check();
|
||||||
|
Static<VecTraits<T>::cn == VecTraits<D>::cn>::check();
|
||||||
|
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
__shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1];
|
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
|
||||||
|
|
||||||
//Offset to the upper halo edge
|
|
||||||
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
|
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
|
||||||
const int y = (blockIdx.y * RESULT_STEPS - HALO_STEPS) * BLOCK_DIM_Y + threadIdx.y;
|
|
||||||
|
|
||||||
if (x < src.cols)
|
if (x >= src.cols)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const T* src_col = src.ptr() + x;
|
||||||
|
|
||||||
|
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
|
||||||
|
|
||||||
|
//Upper halo
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < HALO_SIZE; ++j)
|
||||||
|
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
|
||||||
|
|
||||||
|
//Main data
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||||
|
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
|
||||||
|
|
||||||
|
//Lower halo
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < HALO_SIZE; ++j)
|
||||||
|
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||||
{
|
{
|
||||||
const T* src_col = src.ptr() + x;
|
const int y = yStart + j * BLOCK_DIM_Y;
|
||||||
|
|
||||||
//Main data
|
if (y >= src.rows)
|
||||||
#pragma unroll
|
return;
|
||||||
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
|
|
||||||
smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y] = b.at_high(y + i * BLOCK_DIM_Y, src_col, src.step);
|
|
||||||
|
|
||||||
//Upper halo
|
sum_t sum = VecTraits<sum_t>::all(0);
|
||||||
#pragma unroll
|
|
||||||
for(int i = 0; i < HALO_STEPS; ++i)
|
|
||||||
smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y] = b.at_low(y + i * BLOCK_DIM_Y, src_col, src.step);
|
|
||||||
|
|
||||||
//Lower halo
|
|
||||||
#pragma unroll
|
|
||||||
for(int i = HALO_STEPS + RESULT_STEPS; i < HALO_STEPS + RESULT_STEPS + HALO_STEPS; ++i)
|
|
||||||
smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y]= b.at_high(y + i * BLOCK_DIM_Y, src_col, src.step);
|
|
||||||
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
|
for (int k = 0; k < KSIZE; ++k)
|
||||||
{
|
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
|
||||||
sum_t sum = VecTraits<sum_t>::all(0);
|
|
||||||
|
|
||||||
#pragma unroll
|
dst(y, x) = saturate_cast<D>(sum);
|
||||||
for(int j = 0; j < KERNEL_SIZE; ++j)
|
|
||||||
sum = sum + smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y + j - anchor] * c_kernel[j];
|
|
||||||
|
|
||||||
int dstY = y + i * BLOCK_DIM_Y;
|
|
||||||
|
|
||||||
if (dstY < src.rows)
|
|
||||||
dst.ptr(dstY)[x] = saturate_cast<D>(sum);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||||
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
void linearColumnFilter_caller(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
const int BLOCK_DIM_X = 16;
|
||||||
|
const int BLOCK_DIM_Y = 16;
|
||||||
|
const int PATCH_PER_BLOCK = 4;
|
||||||
|
|
||||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y));
|
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
|
||||||
|
|
||||||
|
B<T> brd(src.rows);
|
||||||
|
|
||||||
B<T> b(src.rows);
|
linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, PATCH_PER_BLOCK, KSIZE <= 16 ? 1 : 2, KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||||
|
|
||||||
linearColumnFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -129,106 +137,187 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, cudaStream_t stream);
|
||||||
static const caller_t callers[5][17] =
|
|
||||||
|
static const caller_t callers[5][33] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearColumnFilter_caller<1 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 1, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<2 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 2, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<3 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 3, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<4 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 4, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<5 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 5, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<6 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 6, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<7 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 7, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<8 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 8, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<9 , T, D, BrdColReflect101>,
|
linearColumnFilter_caller< 9, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
|
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
|
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
|
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
|
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
|
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
|
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
|
||||||
linearColumnFilter_caller<16, T, D, BrdColReflect101>
|
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<17, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<18, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<19, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<20, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<21, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<22, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<23, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<24, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<25, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<26, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<27, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<28, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<29, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<30, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<31, T, D, BrdColReflect101>,
|
||||||
|
linearColumnFilter_caller<32, T, D, BrdColReflect101>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearColumnFilter_caller<1 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 1, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<2 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 2, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<3 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 3, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<4 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 4, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<5 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 5, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<6 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 6, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<7 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 7, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<8 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 8, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<9 , T, D, BrdColReplicate>,
|
linearColumnFilter_caller< 9, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
|
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
|
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
|
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
|
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
|
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
|
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
|
||||||
linearColumnFilter_caller<16, T, D, BrdColReplicate>
|
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<17, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<18, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<19, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<20, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<21, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<22, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<23, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<24, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<25, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<26, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<27, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<28, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<29, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<30, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<31, T, D, BrdColReplicate>,
|
||||||
|
linearColumnFilter_caller<32, T, D, BrdColReplicate>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearColumnFilter_caller<1 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 1, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<2 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 2, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<3 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 3, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<4 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 4, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<5 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 5, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<6 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 6, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<7 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 7, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<8 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 8, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<9 , T, D, BrdColConstant>,
|
linearColumnFilter_caller< 9, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<10, T, D, BrdColConstant>,
|
linearColumnFilter_caller<10, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<11, T, D, BrdColConstant>,
|
linearColumnFilter_caller<11, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<12, T, D, BrdColConstant>,
|
linearColumnFilter_caller<12, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<13, T, D, BrdColConstant>,
|
linearColumnFilter_caller<13, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<14, T, D, BrdColConstant>,
|
linearColumnFilter_caller<14, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<15, T, D, BrdColConstant>,
|
linearColumnFilter_caller<15, T, D, BrdColConstant>,
|
||||||
linearColumnFilter_caller<16, T, D, BrdColConstant>
|
linearColumnFilter_caller<16, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<17, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<18, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<19, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<20, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<21, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<22, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<23, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<24, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<25, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<26, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<27, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<28, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<29, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<30, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<31, T, D, BrdColConstant>,
|
||||||
|
linearColumnFilter_caller<32, T, D, BrdColConstant>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearColumnFilter_caller<1 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 1, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<2 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 2, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<3 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 3, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<4 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 4, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<5 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 5, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<6 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 6, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<7 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 7, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<8 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 8, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<9 , T, D, BrdColReflect>,
|
linearColumnFilter_caller< 9, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<10, T, D, BrdColReflect>,
|
linearColumnFilter_caller<10, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<11, T, D, BrdColReflect>,
|
linearColumnFilter_caller<11, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<12, T, D, BrdColReflect>,
|
linearColumnFilter_caller<12, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<13, T, D, BrdColReflect>,
|
linearColumnFilter_caller<13, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<14, T, D, BrdColReflect>,
|
linearColumnFilter_caller<14, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<15, T, D, BrdColReflect>,
|
linearColumnFilter_caller<15, T, D, BrdColReflect>,
|
||||||
linearColumnFilter_caller<16, T, D, BrdColReflect>
|
linearColumnFilter_caller<16, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<17, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<18, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<19, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<20, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<21, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<22, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<23, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<24, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<25, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<26, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<27, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<28, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<29, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<30, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<31, T, D, BrdColReflect>,
|
||||||
|
linearColumnFilter_caller<32, T, D, BrdColReflect>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearColumnFilter_caller<1 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 1, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<2 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 2, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<3 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 3, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<4 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 4, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<5 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 5, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<6 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 6, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<7 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 7, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<8 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 8, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<9 , T, D, BrdColWrap>,
|
linearColumnFilter_caller< 9, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<10, T, D, BrdColWrap>,
|
linearColumnFilter_caller<10, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<11, T, D, BrdColWrap>,
|
linearColumnFilter_caller<11, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<12, T, D, BrdColWrap>,
|
linearColumnFilter_caller<12, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<13, T, D, BrdColWrap>,
|
linearColumnFilter_caller<13, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<14, T, D, BrdColWrap>,
|
linearColumnFilter_caller<14, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<15, T, D, BrdColWrap>,
|
linearColumnFilter_caller<15, T, D, BrdColWrap>,
|
||||||
linearColumnFilter_caller<16, T, D, BrdColWrap>,
|
linearColumnFilter_caller<16, T, D, BrdColWrap>,
|
||||||
}
|
linearColumnFilter_caller<17, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<18, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<19, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<20, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<21, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<22, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<23, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<24, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<25, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<26, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<27, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<28, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<29, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<30, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<31, T, D, BrdColWrap>,
|
||||||
|
linearColumnFilter_caller<32, T, D, BrdColWrap>
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
@ -236,12 +325,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , uchar >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float4, uchar4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float3, short3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , int >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
|
||||||
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
|
||||||
} // namespace column_filter
|
} // namespace column_filter
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -46,17 +46,14 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
#include "opencv2/gpu/device/static_check.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
#define MAX_KERNEL_SIZE 16
|
|
||||||
#define BLOCK_DIM_X 16
|
|
||||||
#define BLOCK_DIM_Y 4
|
|
||||||
#define RESULT_STEPS 8
|
|
||||||
#define HALO_STEPS 1
|
|
||||||
|
|
||||||
namespace row_filter
|
namespace row_filter
|
||||||
{
|
{
|
||||||
|
#define MAX_KERNEL_SIZE 32
|
||||||
|
|
||||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
void loadKernel(const float kernel[], int ksize)
|
void loadKernel(const float kernel[], int ksize)
|
||||||
@ -64,87 +61,74 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace detail
|
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int PATCH_PER_BLOCK, int HALO_SIZE, int KSIZE, typename T, typename D, typename B>
|
||||||
|
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||||
{
|
{
|
||||||
template <typename T, size_t size> struct SmemType
|
Static<KSIZE <= MAX_KERNEL_SIZE>::check();
|
||||||
{
|
Static<HALO_SIZE * BLOCK_DIM_X >= KSIZE>::check();
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
|
Static<VecTraits<T>::cn == VecTraits<D>::cn>::check();
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T> struct SmemType<T, 4>
|
|
||||||
{
|
|
||||||
typedef T smem_t;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T> struct SmemType
|
|
||||||
{
|
|
||||||
typedef typename detail::SmemType<T, sizeof(T)>::smem_t smem_t;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <int KERNEL_SIZE, typename T, typename D, typename B>
|
|
||||||
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
|
|
||||||
{
|
|
||||||
typedef typename SmemType<T>::smem_t smem_t;
|
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
__shared__ smem_t smem[BLOCK_DIM_Y][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_X];
|
__shared__ typename sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
|
||||||
|
|
||||||
//Offset to the left halo edge
|
|
||||||
const int x = (blockIdx.x * RESULT_STEPS - HALO_STEPS) * BLOCK_DIM_X + threadIdx.x;
|
|
||||||
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
|
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
|
||||||
|
|
||||||
if (y < src.rows)
|
if (y >= src.rows)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const T* src_row = src.ptr(y);
|
||||||
|
|
||||||
|
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
|
||||||
|
|
||||||
|
//Load left halo
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < HALO_SIZE; ++j)
|
||||||
|
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
|
||||||
|
|
||||||
|
//Load main data
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||||
|
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
|
||||||
|
|
||||||
|
//Load right halo
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < HALO_SIZE; ++j)
|
||||||
|
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||||
{
|
{
|
||||||
const T* src_row = src.ptr(y);
|
const int x = xStart + j * BLOCK_DIM_X;
|
||||||
|
|
||||||
//Load main data
|
if (x >= src.cols)
|
||||||
#pragma unroll
|
return;
|
||||||
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
|
|
||||||
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_high(i * BLOCK_DIM_X + x, src_row);
|
|
||||||
|
|
||||||
//Load left halo
|
sum_t sum = VecTraits<sum_t>::all(0);
|
||||||
#pragma unroll
|
|
||||||
for(int i = 0; i < HALO_STEPS; ++i)
|
|
||||||
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_low(i * BLOCK_DIM_X + x, src_row);
|
|
||||||
|
|
||||||
//Load right halo
|
|
||||||
#pragma unroll
|
|
||||||
for(int i = HALO_STEPS + RESULT_STEPS; i < HALO_STEPS + RESULT_STEPS + HALO_STEPS; ++i)
|
|
||||||
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_high(i * BLOCK_DIM_X + x, src_row);
|
|
||||||
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
D* dst_row = dst.ptr(y);
|
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
|
for (int k = 0; k < KSIZE; ++k)
|
||||||
{
|
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
|
||||||
sum_t sum = VecTraits<sum_t>::all(0);
|
|
||||||
|
|
||||||
#pragma unroll
|
dst(y, x) = saturate_cast<D>(sum);
|
||||||
for (int j = 0; j < KERNEL_SIZE; ++j)
|
|
||||||
sum = sum + smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X + j - anchor] * c_kernel[j];
|
|
||||||
|
|
||||||
int dstX = x + i * BLOCK_DIM_X;
|
|
||||||
|
|
||||||
if (dstX < src.cols)
|
|
||||||
dst_row[dstX] = saturate_cast<D>(sum);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||||
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
void linearRowFilter_caller(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename SmemType<T>::smem_t smem_t;
|
const int BLOCK_DIM_X = 32;
|
||||||
|
const int BLOCK_DIM_Y = 8;
|
||||||
|
const int PATCH_PER_BLOCK = 4;
|
||||||
|
|
||||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
const dim3 grid(divUp(src.cols, RESULT_STEPS * BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
|
||||||
|
|
||||||
B<smem_t> b(src.cols);
|
B<T> brd(src.cols);
|
||||||
|
|
||||||
linearRowFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
|
linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, PATCH_PER_BLOCK, 1, KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -152,106 +136,187 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
void linearRowFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, cudaStream_t stream);
|
||||||
static const caller_t callers[5][17] =
|
|
||||||
|
static const caller_t callers[5][33] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearRowFilter_caller<1 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 1, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<2 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 2, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<3 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 3, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<4 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 4, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<5 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 5, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<6 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 6, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<7 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 7, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<8 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 8, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<9 , T, D, BrdRowReflect101>,
|
linearRowFilter_caller< 9, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
|
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
|
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
|
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
|
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
|
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
|
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
|
||||||
linearRowFilter_caller<16, T, D, BrdRowReflect101>
|
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
|
||||||
|
linearRowFilter_caller<32, T, D, BrdRowReflect101>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearRowFilter_caller<1 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 1, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<2 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 2, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<3 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 3, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<4 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 4, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<5 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 5, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<6 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 6, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<7 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 7, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<8 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 8, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<9 , T, D, BrdRowReplicate>,
|
linearRowFilter_caller< 9, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
|
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
|
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
|
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
|
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
|
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
|
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
|
||||||
linearRowFilter_caller<16, T, D, BrdRowReplicate>
|
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
|
||||||
|
linearRowFilter_caller<32, T, D, BrdRowReplicate>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearRowFilter_caller<1 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 1, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<2 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 2, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<3 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 3, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<4 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 4, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<5 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 5, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<6 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 6, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<7 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 7, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<8 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 8, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<9 , T, D, BrdRowConstant>,
|
linearRowFilter_caller< 9, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<10, T, D, BrdRowConstant>,
|
linearRowFilter_caller<10, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<11, T, D, BrdRowConstant>,
|
linearRowFilter_caller<11, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<12, T, D, BrdRowConstant>,
|
linearRowFilter_caller<12, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<13, T, D, BrdRowConstant>,
|
linearRowFilter_caller<13, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<14, T, D, BrdRowConstant>,
|
linearRowFilter_caller<14, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<15, T, D, BrdRowConstant>,
|
linearRowFilter_caller<15, T, D, BrdRowConstant>,
|
||||||
linearRowFilter_caller<16, T, D, BrdRowConstant>
|
linearRowFilter_caller<16, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<17, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<18, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<19, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<20, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<21, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<22, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<23, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<24, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<25, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<26, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<27, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<28, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<29, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<30, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<31, T, D, BrdRowConstant>,
|
||||||
|
linearRowFilter_caller<32, T, D, BrdRowConstant>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearRowFilter_caller<1 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 1, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<2 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 2, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<3 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 3, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<4 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 4, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<5 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 5, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<6 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 6, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<7 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 7, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<8 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 8, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<9 , T, D, BrdRowReflect>,
|
linearRowFilter_caller< 9, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<10, T, D, BrdRowReflect>,
|
linearRowFilter_caller<10, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<11, T, D, BrdRowReflect>,
|
linearRowFilter_caller<11, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<12, T, D, BrdRowReflect>,
|
linearRowFilter_caller<12, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<13, T, D, BrdRowReflect>,
|
linearRowFilter_caller<13, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<14, T, D, BrdRowReflect>,
|
linearRowFilter_caller<14, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<15, T, D, BrdRowReflect>,
|
linearRowFilter_caller<15, T, D, BrdRowReflect>,
|
||||||
linearRowFilter_caller<16, T, D, BrdRowReflect>
|
linearRowFilter_caller<16, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<17, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<18, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<19, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<20, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<21, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<22, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<23, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<24, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<25, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<26, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<27, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<28, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<29, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<30, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<31, T, D, BrdRowReflect>,
|
||||||
|
linearRowFilter_caller<32, T, D, BrdRowReflect>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
linearRowFilter_caller<1 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 1, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<2 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 2, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<3 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 3, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<4 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 4, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<5 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 5, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<6 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 6, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<7 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 7, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<8 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 8, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<9 , T, D, BrdRowWrap>,
|
linearRowFilter_caller< 9, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<10, T, D, BrdRowWrap>,
|
linearRowFilter_caller<10, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<11, T, D, BrdRowWrap>,
|
linearRowFilter_caller<11, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<12, T, D, BrdRowWrap>,
|
linearRowFilter_caller<12, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<13, T, D, BrdRowWrap>,
|
linearRowFilter_caller<13, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<14, T, D, BrdRowWrap>,
|
linearRowFilter_caller<14, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<15, T, D, BrdRowWrap>,
|
linearRowFilter_caller<15, T, D, BrdRowWrap>,
|
||||||
linearRowFilter_caller<16, T, D, BrdRowWrap>
|
linearRowFilter_caller<16, T, D, BrdRowWrap>,
|
||||||
}
|
linearRowFilter_caller<17, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<18, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<19, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<20, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<21, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<22, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<23, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<24, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<25, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<26, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<27, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<28, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<29, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<30, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<31, T, D, BrdRowWrap>,
|
||||||
|
linearRowFilter_caller<32, T, D, BrdRowWrap>
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
@ -259,12 +324,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearRowFilter_gpu<uchar , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<uchar , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<uchar4, float4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearRowFilter_gpu<short , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<short3, float3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearRowFilter_gpu<short2, float2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<int , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<int , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
|
||||||
template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
|
||||||
} // namespace row_filter
|
} // namespace row_filter
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -740,13 +740,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace row_filter
|
namespace row_filter
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
void linearRowFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace column_filter
|
namespace column_filter
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -755,7 +755,7 @@ namespace
|
|||||||
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
||||||
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
||||||
|
|
||||||
typedef void (*gpuFilter1D_t)(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
typedef void (*gpuFilter1D_t)(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
|
|
||||||
struct NppLinearRowFilter : public BaseRowFilter_GPU
|
struct NppLinearRowFilter : public BaseRowFilter_GPU
|
||||||
{
|
{
|
||||||
@ -825,8 +825,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
|||||||
int gpuBorderType;
|
int gpuBorderType;
|
||||||
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
||||||
|
|
||||||
CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 /*|| srcType == CV_16SC1*/ /*|| srcType == CV_16SC2*/
|
CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1);
|
||||||
|| srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1);
|
|
||||||
|
|
||||||
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
|
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
|
||||||
|
|
||||||
@ -836,7 +835,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
|||||||
|
|
||||||
int ksize = cont_krnl.cols;
|
int ksize = cont_krnl.cols;
|
||||||
|
|
||||||
CV_Assert(ksize > 0 && ksize <= 16);
|
CV_Assert(ksize > 0 && ksize <= 32);
|
||||||
|
|
||||||
normalizeAnchor(anchor, ksize);
|
normalizeAnchor(anchor, ksize);
|
||||||
|
|
||||||
@ -850,12 +849,6 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
|||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
func = linearRowFilter_gpu<uchar4, float4>;
|
func = linearRowFilter_gpu<uchar4, float4>;
|
||||||
break;
|
break;
|
||||||
/*case CV_16SC1:
|
|
||||||
func = linearRowFilter_gpu<short, float>;
|
|
||||||
break;*/
|
|
||||||
/*case CV_16SC2:
|
|
||||||
func = linearRowFilter_gpu<short2, float2>;
|
|
||||||
break;*/
|
|
||||||
case CV_16SC3:
|
case CV_16SC3:
|
||||||
func = linearRowFilter_gpu<short3, float3>;
|
func = linearRowFilter_gpu<short3, float3>;
|
||||||
break;
|
break;
|
||||||
@ -940,8 +933,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
|
|||||||
int gpuBorderType;
|
int gpuBorderType;
|
||||||
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
||||||
|
|
||||||
CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 /*|| dstType == CV_16SC1*/ /*|| dstType == CV_16SC2*/
|
CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1);
|
||||||
|| dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1);
|
|
||||||
|
|
||||||
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
|
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
|
||||||
|
|
||||||
@ -951,7 +943,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
|
|||||||
|
|
||||||
int ksize = cont_krnl.cols;
|
int ksize = cont_krnl.cols;
|
||||||
|
|
||||||
CV_Assert(ksize > 0 && ksize <= 16);
|
CV_Assert(ksize > 0 && ksize <= 32);
|
||||||
|
|
||||||
normalizeAnchor(anchor, ksize);
|
normalizeAnchor(anchor, ksize);
|
||||||
|
|
||||||
@ -965,12 +957,6 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
|
|||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
func = linearColumnFilter_gpu<float4, uchar4>;
|
func = linearColumnFilter_gpu<float4, uchar4>;
|
||||||
break;
|
break;
|
||||||
/*case CV_16SC1:
|
|
||||||
func = linearColumnFilter_gpu<float, short>;
|
|
||||||
break;*/
|
|
||||||
/*case CV_16SC2:
|
|
||||||
func = linearColumnFilter_gpu<float2, short2>;
|
|
||||||
break;*/
|
|
||||||
case CV_16SC3:
|
case CV_16SC3:
|
||||||
func = linearColumnFilter_gpu<float3, short3>;
|
func = linearColumnFilter_gpu<float3, short3>;
|
||||||
break;
|
break;
|
||||||
|
@ -188,7 +188,7 @@ TEST_P(Sobel, Rgba)
|
|||||||
|
|
||||||
dev_dst_rgba.download(dst_rgba);
|
dev_dst_rgba.download(dst_rgba);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 0.0);
|
EXPECT_MAT_NEAR(dst_gold_rgba, dst_rgba, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Sobel, Gray)
|
TEST_P(Sobel, Gray)
|
||||||
@ -204,7 +204,7 @@ TEST_P(Sobel, Gray)
|
|||||||
|
|
||||||
dev_dst_gray.download(dst_gray);
|
dev_dst_gray.download(dst_gray);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 0.0);
|
EXPECT_MAT_NEAR(dst_gold_gray, dst_gray, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
|
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
|
||||||
@ -342,7 +342,7 @@ TEST_P(GaussianBlur, Rgba)
|
|||||||
|
|
||||||
dev_dst_rgba.download(dst_rgba);
|
dev_dst_rgba.download(dst_rgba);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 3.0);
|
EXPECT_MAT_NEAR(dst_gold_rgba, dst_rgba, 4.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(GaussianBlur, Gray)
|
TEST_P(GaussianBlur, Gray)
|
||||||
@ -355,12 +355,12 @@ TEST_P(GaussianBlur, Gray)
|
|||||||
|
|
||||||
dev_dst_gray.download(dst_gray);
|
dev_dst_gray.download(dst_gray);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 3.0);
|
EXPECT_MAT_NEAR(dst_gold_gray, dst_gray, 4.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
||||||
ALL_DEVICES,
|
ALL_DEVICES,
|
||||||
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
|
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7), cv::Size(9, 9), cv::Size(11, 11), cv::Size(13, 13), cv::Size(15, 15), cv::Size(17, 17), cv::Size(19, 19), cv::Size(21, 21), cv::Size(23, 23), cv::Size(25, 25), cv::Size(27, 27), cv::Size(29, 29), cv::Size(31, 31)),
|
||||||
USE_ROI));
|
USE_ROI));
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user