mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 11:40:44 +08:00
fixed bug in gpu filter engine (incorrect buffer type) and in vector's saturate_cast.
changed buffer type in linear filters to float. added support of 1 channel image to linear filters. added support of BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border type to gpu linear filters. minor fix in tests. update comments in gpu.hpp.
This commit is contained in:
parent
108ab94023
commit
49ec8ba742
@ -388,7 +388,7 @@ namespace cv
|
||||
CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c);
|
||||
|
||||
//! transposes the matrix
|
||||
//! supports only CV_8UC1 type
|
||||
//! supports CV_8UC1, CV_8SC1, CV_8UC4, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32FC1 type
|
||||
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst);
|
||||
|
||||
//! computes element-wise absolute difference of two arrays (c = abs(a - b))
|
||||
@ -725,11 +725,11 @@ namespace cv
|
||||
};
|
||||
|
||||
//! returns the non-separable filter engine with the specified filter
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D, int srcType, int dstType);
|
||||
|
||||
//! returns the separable filter engine with the specified filters
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter);
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
|
||||
|
||||
//! returns horizontal 1D box filter
|
||||
//! supports only CV_8UC1 source type and CV_32FC1 sum type
|
||||
@ -767,23 +767,40 @@ namespace cv
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
|
||||
const Point& anchor = Point(-1,-1));
|
||||
|
||||
//! returns the primitive row filter with the specified kernel
|
||||
//! returns the primitive row filter with the specified kernel.
|
||||
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
|
||||
//! there are two version of algorithm: NPP and OpenCV.
|
||||
//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
|
||||
//! otherwise calls OpenCV version.
|
||||
//! NPP supports only BORDER_CONSTANT border type.
|
||||
//! OpenCV version supports only CV_32F as buffer depth and
|
||||
//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
|
||||
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
|
||||
int anchor = -1);
|
||||
int anchor = -1, int borderType = BORDER_CONSTANT);
|
||||
|
||||
//! returns the primitive column filter with the specified kernel
|
||||
//! returns the primitive column filter with the specified kernel.
|
||||
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
|
||||
//! there are two version of algorithm: NPP and OpenCV.
|
||||
//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
|
||||
//! otherwise calls OpenCV version.
|
||||
//! NPP supports only BORDER_CONSTANT border type.
|
||||
//! OpenCV version supports only CV_32F as buffer depth and
|
||||
//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
|
||||
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
|
||||
int anchor = -1);
|
||||
int anchor = -1, int borderType = BORDER_CONSTANT);
|
||||
|
||||
//! returns the separable linear filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
|
||||
const Mat& columnKernel, const Point& anchor = Point(-1,-1));
|
||||
const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
|
||||
int columnBorderType = -1);
|
||||
|
||||
//! returns filter engine for the generalized Sobel operator
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize);
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
|
||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||
|
||||
//! returns the Gaussian filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0);
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
|
||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||
|
||||
//! returns maximum filter
|
||||
CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
|
||||
@ -812,16 +829,19 @@ namespace cv
|
||||
|
||||
//! applies separable 2D linear filter to the image
|
||||
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
||||
Point anchor = Point(-1,-1));
|
||||
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||
|
||||
//! applies generalized Sobel operator to the image
|
||||
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);
|
||||
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
|
||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||
|
||||
//! applies the vertical or horizontal Scharr operator to the image
|
||||
CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1);
|
||||
CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
|
||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||
|
||||
//! smooths the image using Gaussian filter.
|
||||
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0);
|
||||
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
|
||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||
|
||||
//! applies Laplacian operator to the image
|
||||
//! supports only ksize = 1 and ksize = 3
|
||||
|
@ -277,12 +277,12 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8SC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4
|
||||
|| src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
|
||||
|
||||
dst.create( src.cols, src.rows, src.type() );
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
if (src.type() == CV_8UC1 || src.type() == CV_8SC1)
|
||||
{
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
|
||||
#include "safe_call.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
@ -50,14 +51,198 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
#ifndef FLT_MAX
|
||||
#define FLT_MAX 3.402823466e+30F
|
||||
#endif
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
namespace device
|
||||
{
|
||||
struct BrdReflect101
|
||||
{
|
||||
explicit BrdReflect101(int len): last(len - 1) {}
|
||||
|
||||
__device__ int idx_low(int i) const
|
||||
{
|
||||
return abs(i);
|
||||
}
|
||||
|
||||
__device__ int idx_high(int i) const
|
||||
{
|
||||
return last - abs(last - i);
|
||||
}
|
||||
|
||||
__device__ int idx(int i) const
|
||||
{
|
||||
return abs(idx_high(i));
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return -last <= mini && maxi <= 2 * last;
|
||||
}
|
||||
|
||||
int last;
|
||||
};
|
||||
template <typename D>
|
||||
struct BrdRowReflect101: BrdReflect101
|
||||
{
|
||||
explicit BrdRowReflect101(int len): BrdReflect101(len) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_low(i)]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_high(i)]);
|
||||
}
|
||||
};
|
||||
template <typename D>
|
||||
struct BrdColReflect101: BrdReflect101
|
||||
{
|
||||
BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_low(i) * step]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_high(i) * step]);
|
||||
}
|
||||
|
||||
int step;
|
||||
};
|
||||
|
||||
struct BrdReplicate
|
||||
{
|
||||
explicit BrdReplicate(int len): last(len - 1) {}
|
||||
|
||||
__device__ int idx_low(int i) const
|
||||
{
|
||||
return max(i, 0);
|
||||
}
|
||||
|
||||
__device__ int idx_high(int i) const
|
||||
{
|
||||
return min(i, last);
|
||||
}
|
||||
|
||||
__device__ int idx(int i) const
|
||||
{
|
||||
return max(min(i, last), 0);
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int last;
|
||||
};
|
||||
template <typename D>
|
||||
struct BrdRowReplicate: BrdReplicate
|
||||
{
|
||||
explicit BrdRowReplicate(int len): BrdReplicate(len) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_low(i)]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_high(i)]);
|
||||
}
|
||||
};
|
||||
template <typename D>
|
||||
struct BrdColReplicate: BrdReplicate
|
||||
{
|
||||
BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_low(i) * step]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_high(i) * step]);
|
||||
}
|
||||
int step;
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
struct BrdRowConstant
|
||||
{
|
||||
explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return i >= 0 ? saturate_cast<D>(data[i]) : val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return i < len ? saturate_cast<D>(data[i]) : val;
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int len;
|
||||
D val;
|
||||
};
|
||||
template <typename D>
|
||||
struct BrdColConstant
|
||||
{
|
||||
BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return i >= 0 ? saturate_cast<D>(data[i * step]) : val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return i < len ? saturate_cast<D>(data[i * step]) : val;
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int len;
|
||||
int step;
|
||||
D val;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Linear filters
|
||||
|
||||
#define MAX_KERNEL_SIZE 16
|
||||
#define BLOCK_DIM_X 16
|
||||
#define BLOCK_DIM_Y 16
|
||||
|
||||
namespace filter_krnls
|
||||
{
|
||||
@ -74,46 +259,53 @@ namespace cv { namespace gpu { namespace filters
|
||||
|
||||
namespace filter_krnls
|
||||
{
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
|
||||
__global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
|
||||
template <typename T, size_t size> struct SmemType_
|
||||
{
|
||||
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t smem_t;
|
||||
};
|
||||
template <typename T> struct SmemType_<T, 4>
|
||||
{
|
||||
typedef T smem_t;
|
||||
};
|
||||
template <typename T> struct SmemType
|
||||
{
|
||||
typedef typename SmemType_<T, sizeof(T)>::smem_t smem_t;
|
||||
};
|
||||
|
||||
template <int ksize, typename T, typename D, typename B>
|
||||
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep_<D> dst, int anchor, const B b)
|
||||
{
|
||||
typedef typename SmemType<T>::smem_t smem_t;
|
||||
|
||||
__shared__ smem_t smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||
|
||||
const int blockStartX = blockDim.x * blockIdx.x;
|
||||
const int blockStartY = blockDim.y * blockIdx.y;
|
||||
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
|
||||
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
|
||||
|
||||
const int threadX = blockStartX + threadIdx.x;
|
||||
const int prevThreadX = threadX - blockDim.x;
|
||||
const int nextThreadX = threadX + blockDim.x;
|
||||
smem_t* sDataRow = smem + threadIdx.y * BLOCK_DIM_X * 3;
|
||||
|
||||
const int threadY = blockStartY + threadIdx.y;
|
||||
|
||||
T* sDataRow = smem + threadIdx.y * blockDim.x * 3;
|
||||
|
||||
if (threadY < height)
|
||||
if (y < src.rows)
|
||||
{
|
||||
const T* rowSrc = src + threadY * src_step;
|
||||
const T* rowSrc = src.ptr(y);
|
||||
|
||||
sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : VecTraits<T>::all(0);
|
||||
|
||||
sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : VecTraits<T>::all(0);
|
||||
|
||||
sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : VecTraits<T>::all(0);
|
||||
sDataRow[threadIdx.x ] = b.at_low(x - BLOCK_DIM_X, rowSrc);
|
||||
sDataRow[threadIdx.x + BLOCK_DIM_X ] = b.at_high(x, rowSrc);
|
||||
sDataRow[threadIdx.x + BLOCK_DIM_X * 2] = b.at_high(x + BLOCK_DIM_X, rowSrc);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadX < width)
|
||||
if (x < src.cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
sDataRow += threadIdx.x + blockDim.x - anchor;
|
||||
sDataRow += threadIdx.x + BLOCK_DIM_X - anchor;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < KERNEL_SIZE; ++i)
|
||||
for(int i = 0; i < ksize; ++i)
|
||||
sum = sum + sDataRow[i] * cLinearKernel[i];
|
||||
|
||||
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
|
||||
dst.ptr(y)[x] = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -121,100 +313,138 @@ namespace filter_krnls
|
||||
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
template <int KERNEL_SIZE, typename T, typename D>
|
||||
template <int ksize, typename T, typename D, template<typename> class B>
|
||||
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
|
||||
{
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 16;
|
||||
|
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||
|
||||
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.data, src.step/src.elemSize(),
|
||||
dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows);
|
||||
typedef typename filter_krnls::SmemType<T>::smem_t smem_t;
|
||||
B<smem_t> b(src.cols);
|
||||
|
||||
if (!b.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))
|
||||
{
|
||||
cv::gpu::error("linearRowFilter: can't use specified border extrapolation, image is too small, "
|
||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
filter_krnls::linearRowFilter<ksize, T, D><<<grid, threads>>>(src, dst, anchor, b);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
|
||||
static const caller_t callers[] =
|
||||
{linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>,
|
||||
linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>,
|
||||
linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>,
|
||||
linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>,
|
||||
linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>,
|
||||
linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>,
|
||||
linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>,
|
||||
linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>};
|
||||
|
||||
static const caller_t callers[3][17] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller<1 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<2 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<3 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<4 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<5 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<6 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<7 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<8 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<9 , T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller<1 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<2 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<3 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<4 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<5 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<6 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<7 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<8 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<9 , T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller<1 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<2 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<3 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<4 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<5 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<6 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<7 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<8 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<9 , T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowConstant>,
|
||||
}
|
||||
};
|
||||
|
||||
loadLinearKernel(kernel, ksize);
|
||||
|
||||
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||
}
|
||||
|
||||
template void linearRowFilter_gpu<uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<uchar4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<char4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<char4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
|
||||
template void linearRowFilter_gpu<ushort2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<ushort2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<short2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<short2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
|
||||
template void linearRowFilter_gpu<int, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<int, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<float, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<float, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearRowFilter_gpu<uchar , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearRowFilter_gpu<short , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);;
|
||||
template void linearRowFilter_gpu<short2, float2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearRowFilter_gpu<int , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearRowFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
}}}
|
||||
|
||||
namespace filter_krnls
|
||||
{
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
|
||||
__global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
|
||||
template <int ksize, typename T, typename D, typename B>
|
||||
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep_<D> dst, int anchor, const B b)
|
||||
{
|
||||
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||
|
||||
const int blockStartX = blockDim.x * blockIdx.x;
|
||||
const int blockStartY = blockDim.y * blockIdx.y;
|
||||
|
||||
const int threadX = blockStartX + threadIdx.x;
|
||||
|
||||
const int threadY = blockStartY + threadIdx.y;
|
||||
const int prevThreadY = threadY - blockDim.y;
|
||||
const int nextThreadY = threadY + blockDim.y;
|
||||
|
||||
const int smem_step = blockDim.x;
|
||||
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
|
||||
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
|
||||
|
||||
T* sDataColumn = smem + threadIdx.x;
|
||||
|
||||
if (threadX < width)
|
||||
if (x < src.cols)
|
||||
{
|
||||
const T* colSrc = src + threadX;
|
||||
const T* srcCol = src.ptr() + x;
|
||||
|
||||
sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : VecTraits<T>::all(0);
|
||||
|
||||
sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : VecTraits<T>::all(0);
|
||||
|
||||
sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : VecTraits<T>::all(0);
|
||||
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol);
|
||||
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol);
|
||||
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadY < height)
|
||||
if (y < src.rows)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;
|
||||
sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < KERNEL_SIZE; ++i)
|
||||
sum = sum + sDataColumn[i * smem_step] * cLinearKernel[i];
|
||||
for(int i = 0; i < ksize; ++i)
|
||||
sum = sum + sDataColumn[i * BLOCK_DIM_X] * cLinearKernel[i];
|
||||
|
||||
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
|
||||
dst.ptr(y)[x] = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -222,54 +452,101 @@ namespace filter_krnls
|
||||
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
template <int KERNEL_SIZE, typename T, typename D>
|
||||
template <int ksize, typename T, typename D, template<typename> class B>
|
||||
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
|
||||
{
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 16;
|
||||
|
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||
|
||||
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.data, src.step/src.elemSize(),
|
||||
dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows);
|
||||
B<T> b(src.rows, src.step / src.elemSize());
|
||||
|
||||
if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
|
||||
{
|
||||
cv::gpu::error("linearColumnFilter: can't use specified border extrapolation, image is too small, "
|
||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
filter_krnls::linearColumnFilter<ksize, T, D><<<grid, threads>>>(src, dst, anchor, b);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
|
||||
static const caller_t callers[] =
|
||||
{linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>,
|
||||
linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>,
|
||||
linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>,
|
||||
linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>,
|
||||
linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>,
|
||||
linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>,
|
||||
linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>,
|
||||
linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>};
|
||||
|
||||
static const caller_t callers[3][17] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller<1 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<2 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<3 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<4 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<5 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<6 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<7 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<8 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<9 , T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller<1 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<2 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<3 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<4 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<5 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<6 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<7 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<8 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<9 , T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller<1 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<2 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<3 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<4 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<5 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<6 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<7 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<8 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<9 , T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColConstant>,
|
||||
}
|
||||
};
|
||||
|
||||
loadLinearKernel(kernel, ksize);
|
||||
|
||||
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||
}
|
||||
|
||||
template void linearColumnFilter_gpu<uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<uchar4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<char4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<char4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
|
||||
template void linearColumnFilter_gpu<ushort2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<ushort2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<short2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<short2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
|
||||
template void linearColumnFilter_gpu<int, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<int, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<float, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<float, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||
template void linearColumnFilter_gpu<float , uchar >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearColumnFilter_gpu<float , short >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearColumnFilter_gpu<float2, short2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearColumnFilter_gpu<float , int >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
template void linearColumnFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
}}}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -377,7 +654,7 @@ namespace bf_krnls
|
||||
}
|
||||
}
|
||||
|
||||
float minimum = FLT_MAX;
|
||||
float minimum = numeric_limits_gpu<float>::max();
|
||||
int id = 0;
|
||||
|
||||
if (cost[0] < minimum)
|
||||
|
@ -59,7 +59,8 @@ namespace cv
|
||||
enum
|
||||
{
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU
|
||||
BORDER_REPLICATE_GPU,
|
||||
BORDER_CONSTANT_GPU
|
||||
};
|
||||
|
||||
// Converts CPU border extrapolation mode into GPU internal analogue.
|
||||
|
@ -48,8 +48,8 @@ using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
@ -58,11 +58,11 @@ Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
|
||||
@ -71,10 +71,10 @@ void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nog
|
||||
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point) { throw_nogpu(); }
|
||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point) { throw_nogpu(); }
|
||||
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }
|
||||
void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double) { throw_nogpu(); }
|
||||
void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }
|
||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
@ -133,13 +133,17 @@ namespace
|
||||
class Filter2DEngine_GPU : public FilterEngine_GPU
|
||||
{
|
||||
public:
|
||||
Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_) : filter2D(filter2D_) {}
|
||||
Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int srcType_, int dstType_) :
|
||||
filter2D(filter2D_), srcType(srcType_), dstType(dstType_)
|
||||
{}
|
||||
|
||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
||||
{
|
||||
CV_Assert(src.type() == srcType);
|
||||
|
||||
Size src_size = src.size();
|
||||
|
||||
dst.create(src_size, src.type());
|
||||
dst.create(src_size, dstType);
|
||||
dst = Scalar(0.0);
|
||||
|
||||
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
|
||||
@ -151,12 +155,13 @@ namespace
|
||||
}
|
||||
|
||||
Ptr<BaseFilter_GPU> filter2D;
|
||||
int srcType, dstType;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D)
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D, int srcType, int dstType)
|
||||
{
|
||||
return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D));
|
||||
return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D, srcType, dstType));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -168,8 +173,9 @@ namespace
|
||||
{
|
||||
public:
|
||||
SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter_) :
|
||||
rowFilter(rowFilter_), columnFilter(columnFilter_)
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter_, int srcType_, int bufType_, int dstType_) :
|
||||
rowFilter(rowFilter_), columnFilter(columnFilter_),
|
||||
srcType(srcType_), bufType(bufType_), dstType(dstType_)
|
||||
{
|
||||
ksize = Size(rowFilter->ksize, columnFilter->ksize);
|
||||
anchor = Point(rowFilter->anchor, columnFilter->anchor);
|
||||
@ -177,19 +183,20 @@ namespace
|
||||
|
||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
||||
{
|
||||
Size src_size = src.size();
|
||||
int src_type = src.type();
|
||||
CV_Assert(src.type() == srcType);
|
||||
|
||||
dst.create(src_size, src_type);
|
||||
Size src_size = src.size();
|
||||
|
||||
dst.create(src_size, dstType);
|
||||
dst = Scalar(0.0);
|
||||
dstBuf.create(src_size, src_type);
|
||||
dstBuf.create(src_size, bufType);
|
||||
dstBuf = Scalar(0.0);
|
||||
|
||||
normalizeROI(roi, ksize, anchor, src_size);
|
||||
|
||||
srcROI = src(roi);
|
||||
dstROI = dst(roi);
|
||||
dstBufROI = dstBuf(roi);
|
||||
GpuMat srcROI = src(roi);
|
||||
GpuMat dstROI = dst(roi);
|
||||
GpuMat dstBufROI = dstBuf(roi);
|
||||
|
||||
(*rowFilter)(srcROI, dstBufROI);
|
||||
(*columnFilter)(dstBufROI, dstROI);
|
||||
@ -197,19 +204,19 @@ namespace
|
||||
|
||||
Ptr<BaseRowFilter_GPU> rowFilter;
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter;
|
||||
int srcType, bufType, dstType;
|
||||
|
||||
Size ksize;
|
||||
Point anchor;
|
||||
GpuMat dstBuf;
|
||||
GpuMat srcROI;
|
||||
GpuMat dstROI;
|
||||
GpuMat dstBufROI;
|
||||
|
||||
GpuMat dstBuf;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter)
|
||||
const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType)
|
||||
{
|
||||
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
|
||||
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -315,7 +322,7 @@ Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int srcType, int dstType, const Si
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor)
|
||||
{
|
||||
Ptr<BaseFilter_GPU> boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor);
|
||||
return createFilter2D_GPU(boxFilter);
|
||||
return createFilter2D_GPU(boxFilter, srcType, dstType);
|
||||
}
|
||||
|
||||
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor)
|
||||
@ -386,8 +393,8 @@ namespace
|
||||
class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU
|
||||
{
|
||||
public:
|
||||
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int iters_) :
|
||||
Filter2DEngine_GPU(filter2D_), iters(iters_) {}
|
||||
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type, int iters_) :
|
||||
Filter2DEngine_GPU(filter2D_, type, type), iters(iters_) {}
|
||||
|
||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
||||
{
|
||||
@ -415,7 +422,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, cons
|
||||
|
||||
Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);
|
||||
|
||||
return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, iterations));
|
||||
return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, type, iterations));
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -558,7 +565,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType,
|
||||
|
||||
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, ksize, anchor);
|
||||
|
||||
return createFilter2D_GPU(linearFilter);
|
||||
return createFilter2D_GPU(linearFilter, srcType, dstType);
|
||||
}
|
||||
|
||||
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor)
|
||||
@ -578,10 +585,10 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
||||
namespace cv { namespace gpu { namespace filters
|
||||
{
|
||||
template <typename T, typename D>
|
||||
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
}}}
|
||||
|
||||
namespace
|
||||
@ -589,7 +596,7 @@ namespace
|
||||
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
||||
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
||||
|
||||
typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||
typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
||||
|
||||
class NppLinearRowFilter : public BaseRowFilter_GPU
|
||||
{
|
||||
@ -614,35 +621,28 @@ namespace
|
||||
class GpuLinearRowFilter : public BaseRowFilter_GPU
|
||||
{
|
||||
public:
|
||||
GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) :
|
||||
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
|
||||
GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
|
||||
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
|
||||
|
||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor);
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type);
|
||||
}
|
||||
|
||||
Mat kernel;
|
||||
gpuFilter1D_t func;
|
||||
int brd_type;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor)
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
|
||||
{
|
||||
using namespace cv::gpu::filters;
|
||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
||||
static const gpuFilter1D_t gpuFilter1D_callers[6][6] =
|
||||
{
|
||||
{linearRowFilter_gpu<uchar4, uchar4>,linearRowFilter_gpu<uchar4, char4>,0,0,0,0},
|
||||
{linearRowFilter_gpu<char4, uchar4>,linearRowFilter_gpu<char4, char4>,0,0,0,0},
|
||||
{0,0,linearRowFilter_gpu<ushort2, ushort2>,linearRowFilter_gpu<ushort2, short2>,0,0},
|
||||
{0,0,linearRowFilter_gpu<short2, ushort2>,linearRowFilter_gpu<short2, short2>,0,0},
|
||||
{0,0,0,0,linearRowFilter_gpu<int, int>, linearRowFilter_gpu<int, float>},
|
||||
{0,0,0,0,linearRowFilter_gpu<float, int>, linearRowFilter_gpu<float, float>}
|
||||
};
|
||||
|
||||
if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))
|
||||
{
|
||||
CV_Assert(borderType == BORDER_CONSTANT);
|
||||
|
||||
GpuMat gpu_row_krnl;
|
||||
int nDivisor;
|
||||
normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true);
|
||||
@ -653,9 +653,15 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
||||
return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,
|
||||
nppFilter1D_callers[CV_MAT_CN(srcType)]));
|
||||
}
|
||||
|
||||
CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
||||
|
||||
CV_Assert(srcType == CV_8UC4 || srcType == CV_8SC4 || srcType == CV_16UC2 || srcType == CV_16SC2 || srcType == CV_32SC1 || srcType == CV_32FC1);
|
||||
CV_Assert(bufType == CV_8UC4 || bufType == CV_8SC4 || bufType == CV_16UC2 || bufType == CV_16SC2 || bufType == CV_32SC1 || bufType == CV_32FC1);
|
||||
CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_16SC1 || srcType == CV_16SC2
|
||||
|| srcType == CV_32SC1 || srcType == CV_32FC1);
|
||||
|
||||
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
|
||||
|
||||
Mat temp(rowKernel.size(), CV_32FC1);
|
||||
rowKernel.convertTo(temp, CV_32FC1);
|
||||
@ -663,12 +669,35 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
||||
|
||||
int ksize = cont_krnl.cols;
|
||||
|
||||
CV_Assert(ksize < 16);
|
||||
CV_Assert(ksize > 0 && ksize <= 16);
|
||||
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl,
|
||||
gpuFilter1D_callers[CV_MAT_DEPTH(srcType)][CV_MAT_DEPTH(bufType)]));
|
||||
gpuFilter1D_t func = 0;
|
||||
|
||||
switch (srcType)
|
||||
{
|
||||
case CV_8UC1:
|
||||
func = filters::linearRowFilter_gpu<uchar, float>;
|
||||
break;
|
||||
case CV_8UC4:
|
||||
func = filters::linearRowFilter_gpu<uchar4, float4>;
|
||||
break;
|
||||
case CV_16SC1:
|
||||
func = filters::linearRowFilter_gpu<short, float>;
|
||||
break;
|
||||
case CV_16SC2:
|
||||
func = filters::linearRowFilter_gpu<short2, float2>;
|
||||
break;
|
||||
case CV_32SC1:
|
||||
func = filters::linearRowFilter_gpu<int, float>;
|
||||
break;
|
||||
case CV_32FC1:
|
||||
func = filters::linearRowFilter_gpu<float, float>;
|
||||
break;
|
||||
}
|
||||
|
||||
return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl, func, gpuBorderType));
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -696,35 +725,28 @@ namespace
|
||||
class GpuLinearColumnFilter : public BaseColumnFilter_GPU
|
||||
{
|
||||
public:
|
||||
GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) :
|
||||
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
|
||||
GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
|
||||
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
|
||||
|
||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor);
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type);
|
||||
}
|
||||
|
||||
Mat kernel;
|
||||
gpuFilter1D_t func;
|
||||
int brd_type;
|
||||
};
|
||||
}
|
||||
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor)
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
|
||||
{
|
||||
using namespace cv::gpu::filters;
|
||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
|
||||
static const gpuFilter1D_t gpuFilter1D_callers[6][6] =
|
||||
{
|
||||
{linearColumnFilter_gpu<uchar4, uchar4>,linearColumnFilter_gpu<uchar4, char4>,0,0,0,0},
|
||||
{linearColumnFilter_gpu<char4, uchar4>,linearColumnFilter_gpu<char4, char4>,0,0,0,0},
|
||||
{0,0,linearColumnFilter_gpu<ushort2, ushort2>,linearColumnFilter_gpu<ushort2, short2>,0,0},
|
||||
{0,0,linearColumnFilter_gpu<short2, ushort2>,linearColumnFilter_gpu<short2, short2>,0,0},
|
||||
{0,0,0,0,linearColumnFilter_gpu<int, int>, linearColumnFilter_gpu<int, float>},
|
||||
{0,0,0,0,linearColumnFilter_gpu<float, int>, linearColumnFilter_gpu<float, float>}
|
||||
};
|
||||
|
||||
if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))
|
||||
{
|
||||
CV_Assert(borderType == BORDER_CONSTANT);
|
||||
|
||||
GpuMat gpu_col_krnl;
|
||||
int nDivisor;
|
||||
normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true);
|
||||
@ -735,9 +757,15 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
|
||||
return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,
|
||||
nppFilter1D_callers[CV_MAT_CN(bufType)]));
|
||||
}
|
||||
|
||||
CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
||||
|
||||
CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC1 || dstType == CV_16SC2
|
||||
|| dstType == CV_32SC1 || dstType == CV_32FC1);
|
||||
|
||||
CV_Assert(dstType == CV_8UC4 || dstType == CV_8SC4 || dstType == CV_16UC2 || dstType == CV_16SC2 || dstType == CV_32SC1 || dstType == CV_32FC1);
|
||||
CV_Assert(bufType == CV_8UC4 || bufType == CV_8SC4 || bufType == CV_16UC2 || bufType == CV_16SC2 || bufType == CV_32SC1 || bufType == CV_32FC1);
|
||||
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
|
||||
|
||||
Mat temp(columnKernel.size(), CV_32FC1);
|
||||
columnKernel.convertTo(temp, CV_32FC1);
|
||||
@ -745,50 +773,76 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
|
||||
|
||||
int ksize = cont_krnl.cols;
|
||||
|
||||
CV_Assert(ksize < 16);
|
||||
CV_Assert(ksize > 0 && ksize <= 16);
|
||||
|
||||
normalizeAnchor(anchor, ksize);
|
||||
|
||||
return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl,
|
||||
gpuFilter1D_callers[CV_MAT_DEPTH(bufType)][CV_MAT_DEPTH(dstType)]));
|
||||
gpuFilter1D_t func = 0;
|
||||
|
||||
switch (dstType)
|
||||
{
|
||||
case CV_8UC1:
|
||||
func = filters::linearColumnFilter_gpu<float, uchar>;
|
||||
break;
|
||||
case CV_8UC4:
|
||||
func = filters::linearColumnFilter_gpu<float4, uchar4>;
|
||||
break;
|
||||
case CV_16SC1:
|
||||
func = filters::linearColumnFilter_gpu<float, short>;
|
||||
break;
|
||||
case CV_16SC2:
|
||||
func = filters::linearColumnFilter_gpu<float2, short2>;
|
||||
break;
|
||||
case CV_32SC1:
|
||||
func = filters::linearColumnFilter_gpu<float, int>;
|
||||
break;
|
||||
case CV_32FC1:
|
||||
func = filters::linearColumnFilter_gpu<float, float>;
|
||||
break;
|
||||
}
|
||||
|
||||
return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, func, gpuBorderType));
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,
|
||||
const Point& anchor)
|
||||
const Point& anchor, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
if (columnBorderType < 0)
|
||||
columnBorderType = rowBorderType;
|
||||
|
||||
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
|
||||
int cn = CV_MAT_CN(srcType);
|
||||
int bdepth = std::max(sdepth, ddepth);
|
||||
int bdepth = CV_32F;
|
||||
int bufType = CV_MAKETYPE(bdepth, cn);
|
||||
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y);
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, rowBorderType);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, columnBorderType);
|
||||
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter);
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType);
|
||||
}
|
||||
|
||||
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor)
|
||||
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
if( ddepth < 0 )
|
||||
ddepth = src.depth();
|
||||
|
||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor);
|
||||
f->apply(src, dst);
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType);
|
||||
f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deriv Filter
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize)
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
Mat kx, ky;
|
||||
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
||||
return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1));
|
||||
return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
||||
}
|
||||
|
||||
void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)
|
||||
void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
Mat kx, ky;
|
||||
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
||||
@ -803,10 +857,10 @@ void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
||||
ky *= scale;
|
||||
}
|
||||
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
||||
}
|
||||
|
||||
void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale)
|
||||
void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
Mat kx, ky;
|
||||
getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);
|
||||
@ -821,7 +875,7 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
||||
ky *= scale;
|
||||
}
|
||||
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));
|
||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
||||
}
|
||||
|
||||
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale)
|
||||
@ -843,7 +897,7 @@ void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, d
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Gaussian Filter
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2)
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(type);
|
||||
|
||||
@ -868,10 +922,10 @@ Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, do
|
||||
else
|
||||
ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );
|
||||
|
||||
return createSeparableLinearFilter_GPU(type, type, kx, ky);
|
||||
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
||||
}
|
||||
|
||||
void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2)
|
||||
void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
|
||||
{
|
||||
if (ksize.width == 1 && ksize.height == 1)
|
||||
{
|
||||
@ -881,8 +935,8 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double si
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2);
|
||||
f->apply(src, dst);
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType);
|
||||
f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -972,6 +972,12 @@ bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
|
||||
gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cpuBorderType == cv::BORDER_CONSTANT)
|
||||
{
|
||||
gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -123,278 +123,295 @@ namespace cv
|
||||
{
|
||||
typedef uchar elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ uchar all(uchar v) {return v;}
|
||||
static __device__ __host__ uchar all(uchar v) {return v;}
|
||||
static __device__ __host__ uchar make(uchar x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<uchar1>
|
||||
{
|
||||
typedef uchar elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ uchar1 all(uchar v) {return make_uchar1(v);}
|
||||
static __device__ __host__ uchar1 all(uchar v) {return make_uchar1(v);}
|
||||
static __device__ __host__ uchar1 make(uchar x) {return make_uchar1(x);}
|
||||
};
|
||||
template<> struct VecTraits<uchar2>
|
||||
{
|
||||
typedef uchar elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ uchar2 all(uchar v) {return make_uchar2(v, v);}
|
||||
static __device__ __host__ uchar2 all(uchar v) {return make_uchar2(v, v);}
|
||||
static __device__ __host__ uchar2 make(uchar x, uchar y) {return make_uchar2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<uchar3>
|
||||
{
|
||||
typedef uchar elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ uchar3 all(uchar v) {return make_uchar3(v, v, v);}
|
||||
static __device__ __host__ uchar3 all(uchar v) {return make_uchar3(v, v, v);}
|
||||
static __device__ __host__ uchar3 make(uchar x, uchar y, uchar z) {return make_uchar3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<uchar4>
|
||||
{
|
||||
typedef uchar elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);}
|
||||
static __device__ __host__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);}
|
||||
static __device__ __host__ uchar4 make(uchar x, uchar y, uchar z, uchar w) {return make_uchar4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template<> struct VecTraits<char>
|
||||
{
|
||||
typedef char elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ char all(char v) {return v;}
|
||||
static __device__ __host__ char all(char v) {return v;}
|
||||
static __device__ __host__ char make(char x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<char1>
|
||||
{
|
||||
typedef char elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ char1 all(char v) {return make_char1(v);}
|
||||
static __device__ __host__ char1 all(char v) {return make_char1(v);}
|
||||
static __device__ __host__ char1 make(char x) {return make_char1(x);}
|
||||
};
|
||||
template<> struct VecTraits<char2>
|
||||
{
|
||||
typedef char elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ char2 all(char v) {return make_char2(v, v);}
|
||||
static __device__ __host__ char2 all(char v) {return make_char2(v, v);}
|
||||
static __device__ __host__ char2 make(char x, char y) {return make_char2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<char3>
|
||||
{
|
||||
typedef char elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ char3 all(char v) {return make_char3(v, v, v);}
|
||||
static __device__ __host__ char3 all(char v) {return make_char3(v, v, v);}
|
||||
static __device__ __host__ char3 make(char x, char y, char z) {return make_char3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<char4>
|
||||
{
|
||||
typedef char elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ char4 all(char v) {return make_char4(v, v, v, v);}
|
||||
static __device__ __host__ char4 all(char v) {return make_char4(v, v, v, v);}
|
||||
static __device__ __host__ char4 make(char x, char y, char z, char w) {return make_char4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template<> struct VecTraits<ushort>
|
||||
{
|
||||
typedef ushort elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ ushort all(ushort v) {return v;}
|
||||
static __device__ __host__ ushort all(ushort v) {return v;}
|
||||
static __device__ __host__ ushort make(ushort x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<ushort1>
|
||||
{
|
||||
typedef ushort elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ ushort1 all(ushort v) {return make_ushort1(v);}
|
||||
static __device__ __host__ ushort1 all(ushort v) {return make_ushort1(v);}
|
||||
static __device__ __host__ ushort1 make(ushort x) {return make_ushort1(x);}
|
||||
};
|
||||
template<> struct VecTraits<ushort2>
|
||||
{
|
||||
typedef ushort elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ ushort2 all(ushort v) {return make_ushort2(v, v);}
|
||||
static __device__ __host__ ushort2 all(ushort v) {return make_ushort2(v, v);}
|
||||
static __device__ __host__ ushort2 make(ushort x, ushort y) {return make_ushort2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<ushort3>
|
||||
{
|
||||
typedef ushort elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ ushort3 all(ushort v) {return make_ushort3(v, v, v);}
|
||||
static __device__ __host__ ushort3 all(ushort v) {return make_ushort3(v, v, v);}
|
||||
static __device__ __host__ ushort3 make(ushort x, ushort y, ushort z) {return make_ushort3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<ushort4>
|
||||
{
|
||||
typedef ushort elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);}
|
||||
static __device__ __host__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);}
|
||||
static __device__ __host__ ushort4 make(ushort x, ushort y, ushort z, ushort w) {return make_ushort4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template<> struct VecTraits<short>
|
||||
{
|
||||
typedef short elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ short all(short v) {return v;}
|
||||
static __device__ __host__ short all(short v) {return v;}
|
||||
static __device__ __host__ short make(short x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<short1>
|
||||
{
|
||||
typedef short elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ short1 all(short v) {return make_short1(v);}
|
||||
static __device__ __host__ short1 all(short v) {return make_short1(v);}
|
||||
static __device__ __host__ short1 make(short x) {return make_short1(x);}
|
||||
};
|
||||
template<> struct VecTraits<short2>
|
||||
{
|
||||
typedef short elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ short2 all(short v) {return make_short2(v, v);}
|
||||
static __device__ __host__ short2 all(short v) {return make_short2(v, v);}
|
||||
static __device__ __host__ short2 make(short x, short y) {return make_short2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<short3>
|
||||
{
|
||||
typedef short elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ short3 all(short v) {return make_short3(v, v, v);}
|
||||
static __device__ __host__ short3 all(short v) {return make_short3(v, v, v);}
|
||||
static __device__ __host__ short3 make(short x, short y, short z) {return make_short3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<short4>
|
||||
{
|
||||
typedef short elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ short4 all(short v) {return make_short4(v, v, v, v);}
|
||||
static __device__ __host__ short4 all(short v) {return make_short4(v, v, v, v);}
|
||||
static __device__ __host__ short4 make(short x, short y, short z, short w) {return make_short4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template<> struct VecTraits<uint>
|
||||
{
|
||||
typedef uint elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ uint all(uint v) {return v;}
|
||||
static __device__ __host__ uint all(uint v) {return v;}
|
||||
static __device__ __host__ uint make(uint x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<uint1>
|
||||
{
|
||||
typedef uint elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ uint1 all(uint v) {return make_uint1(v);}
|
||||
static __device__ __host__ uint1 all(uint v) {return make_uint1(v);}
|
||||
static __device__ __host__ uint1 make(uint x) {return make_uint1(x);}
|
||||
};
|
||||
template<> struct VecTraits<uint2>
|
||||
{
|
||||
typedef uint elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ uint2 all(uint v) {return make_uint2(v, v);}
|
||||
static __device__ __host__ uint2 all(uint v) {return make_uint2(v, v);}
|
||||
static __device__ __host__ uint2 make(uint x, uint y) {return make_uint2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<uint3>
|
||||
{
|
||||
typedef uint elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ uint3 all(uint v) {return make_uint3(v, v, v);}
|
||||
static __device__ __host__ uint3 all(uint v) {return make_uint3(v, v, v);}
|
||||
static __device__ __host__ uint3 make(uint x, uint y, uint z) {return make_uint3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<uint4>
|
||||
{
|
||||
typedef uint elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ uint4 all(uint v) {return make_uint4(v, v, v, v);}
|
||||
static __device__ __host__ uint4 all(uint v) {return make_uint4(v, v, v, v);}
|
||||
static __device__ __host__ uint4 make(uint x, uint y, uint z, uint w) {return make_uint4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template<> struct VecTraits<int>
|
||||
{
|
||||
typedef int elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ int all(int v) {return v;}
|
||||
static __device__ __host__ int all(int v) {return v;}
|
||||
static __device__ __host__ int make(int x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<int1>
|
||||
{
|
||||
typedef int elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ int1 all(int v) {return make_int1(v);}
|
||||
static __device__ __host__ int1 all(int v) {return make_int1(v);}
|
||||
static __device__ __host__ int1 make(int x) {return make_int1(x);}
|
||||
};
|
||||
template<> struct VecTraits<int2>
|
||||
{
|
||||
typedef int elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ int2 all(int v) {return make_int2(v, v);}
|
||||
static __device__ __host__ int2 all(int v) {return make_int2(v, v);}
|
||||
static __device__ __host__ int2 make(int x, int y) {return make_int2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<int3>
|
||||
{
|
||||
typedef int elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ int3 all(int v) {return make_int3(v, v, v);}
|
||||
static __device__ __host__ int3 all(int v) {return make_int3(v, v, v);}
|
||||
static __device__ __host__ int3 make(int x, int y, int z) {return make_int3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<int4>
|
||||
{
|
||||
typedef int elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ int4 all(int v) {return make_int4(v, v, v, v);}
|
||||
static __device__ __host__ int4 all(int v) {return make_int4(v, v, v, v);}
|
||||
static __device__ __host__ int4 make(int x, int y, int z, int w) {return make_int4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template<> struct VecTraits<float>
|
||||
{
|
||||
typedef float elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ float all(float v) {return v;}
|
||||
static __device__ __host__ float all(float v) {return v;}
|
||||
static __device__ __host__ float make(float x) {return x;}
|
||||
};
|
||||
template<> struct VecTraits<float1>
|
||||
{
|
||||
typedef float elem_t;
|
||||
enum {cn=1};
|
||||
static __device__ float1 all(float v) {return make_float1(v);}
|
||||
static __device__ __host__ float1 all(float v) {return make_float1(v);}
|
||||
static __device__ __host__ float1 make(float x) {return make_float1(x);}
|
||||
};
|
||||
template<> struct VecTraits<float2>
|
||||
{
|
||||
typedef float elem_t;
|
||||
enum {cn=2};
|
||||
static __device__ float2 all(float v) {return make_float2(v, v);}
|
||||
static __device__ __host__ float2 all(float v) {return make_float2(v, v);}
|
||||
static __device__ __host__ float2 make(float x, float y) {return make_float2(x, y);}
|
||||
};
|
||||
template<> struct VecTraits<float3>
|
||||
{
|
||||
typedef float elem_t;
|
||||
enum {cn=3};
|
||||
static __device__ float3 all(float v) {return make_float3(v, v, v);}
|
||||
static __device__ __host__ float3 all(float v) {return make_float3(v, v, v);}
|
||||
static __device__ __host__ float3 make(float x, float y, float z) {return make_float3(x, y, z);}
|
||||
};
|
||||
template<> struct VecTraits<float4>
|
||||
{
|
||||
typedef float elem_t;
|
||||
enum {cn=4};
|
||||
static __device__ float4 all(float v) {return make_float4(v, v, v, v);}
|
||||
static __device__ __host__ float4 all(float v) {return make_float4(v, v, v, v);}
|
||||
static __device__ __host__ float4 make(float x, float y, float z, float w) {return make_float4(x, y, z, w);}
|
||||
};
|
||||
|
||||
template <int cn, typename VecD> struct SatCast;
|
||||
template <typename VecD> struct SatCast<1, VecD>
|
||||
{
|
||||
template <typename VecS>
|
||||
__device__ VecD operator()(const VecS& v)
|
||||
static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
VecD res;
|
||||
res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);
|
||||
return res;
|
||||
typedef typename VecTraits<VecD>::elem_t D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCast<2, VecD>
|
||||
{
|
||||
template <typename VecS>
|
||||
__device__ VecD operator()(const VecS& v)
|
||||
static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
VecD res;
|
||||
res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);
|
||||
res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y);
|
||||
return res;
|
||||
typedef typename VecTraits<VecD>::elem_t D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCast<3, VecD>
|
||||
{
|
||||
template <typename VecS>
|
||||
__device__ VecD operator()(const VecS& v)
|
||||
static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
VecD res;
|
||||
res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);
|
||||
res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y);
|
||||
res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.z);
|
||||
return res;
|
||||
typedef typename VecTraits<VecD>::elem_t D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCast<4, VecD>
|
||||
{
|
||||
template <typename VecS>
|
||||
__device__ VecD operator()(const VecS& v)
|
||||
static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
VecD res;
|
||||
res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);
|
||||
res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y);
|
||||
res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.z);
|
||||
res.w = saturate_cast< VecTraits<VecD>::elem_t >(v.w);
|
||||
return res;
|
||||
typedef typename VecTraits<VecD>::elem_t D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VecD, typename VecS> static __device__ VecD saturate_cast_caller(const VecS& v)
|
||||
{
|
||||
SatCast<
|
||||
|
||||
VecTraits<VecD>::cn,
|
||||
|
||||
VecD
|
||||
>
|
||||
|
||||
cast;
|
||||
return cast(v);
|
||||
return SatCast<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||
}
|
||||
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uchar1& v) {return saturate_cast_caller<_Tp>(v);}
|
||||
|
@ -107,7 +107,7 @@ protected:
|
||||
|
||||
if (!compareMatches(matchesCPU, matchesGPU))
|
||||
{
|
||||
ts->printf(CvTS::LOG, "Match FAIL");
|
||||
ts->printf(CvTS::LOG, "Match FAIL\n");
|
||||
ts->set_failed_test_info(CvTS::FAIL_MISMATCH);
|
||||
return;
|
||||
}
|
||||
@ -119,7 +119,7 @@ protected:
|
||||
|
||||
if (!compareMatches(knnMatchesCPU, knnMatchesGPU))
|
||||
{
|
||||
ts->printf(CvTS::LOG, "KNN Match FAIL");
|
||||
ts->printf(CvTS::LOG, "KNN Match FAIL\n");
|
||||
ts->set_failed_test_info(CvTS::FAIL_MISMATCH);
|
||||
return;
|
||||
}
|
||||
@ -131,7 +131,7 @@ protected:
|
||||
|
||||
if (!compareMatches(radiusMatchesCPU, radiusMatchesGPU))
|
||||
{
|
||||
ts->printf(CvTS::LOG, "Radius Match FAIL");
|
||||
ts->printf(CvTS::LOG, "Radius Match FAIL\n");
|
||||
ts->set_failed_test_info(CvTS::FAIL_MISMATCH);
|
||||
return;
|
||||
}
|
||||
|
@ -80,7 +80,8 @@ protected:
|
||||
|
||||
double res = norm(m1ROI, m2ROI, NORM_INF);
|
||||
|
||||
if (res <= 1)
|
||||
// Max difference (2.0) in GaussianBlur
|
||||
if (res <= 2)
|
||||
return CvTS::OK;
|
||||
|
||||
ts->printf(CvTS::LOG, "Norm: %f\n", res);
|
||||
@ -166,8 +167,6 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
|
||||
|
||||
int test(const Mat& img)
|
||||
{
|
||||
if (img.type() != CV_8UC1)
|
||||
return CvTS::OK;
|
||||
int ksizes[] = {3, 5, 7};
|
||||
int ksizes_num = sizeof(ksizes) / sizeof(int);
|
||||
|
||||
@ -183,10 +182,8 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
|
||||
cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]);
|
||||
|
||||
GpuMat gpu1(img);
|
||||
gpu1.convertTo(gpu1, CV_32S);
|
||||
GpuMat gpudst;
|
||||
cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]);
|
||||
gpudst.convertTo(gpudst, CV_8U);
|
||||
|
||||
if (CheckNorm(cpudst, gpudst, Size(ksizes[i], ksizes[i])) != CvTS::OK)
|
||||
test_res = CvTS::FAIL_GENERIC;
|
||||
@ -204,20 +201,15 @@ struct CV_GpuNppImageScharrTest : public CV_GpuNppFilterTest
|
||||
|
||||
int test(const Mat& img)
|
||||
{
|
||||
if (img.type() != CV_8UC1)
|
||||
return CvTS::OK;
|
||||
|
||||
int dx = 1, dy = 0;
|
||||
|
||||
Mat cpudst;
|
||||
cv::Scharr(img, cpudst, -1, dx, dy);
|
||||
|
||||
GpuMat gpu1(img);
|
||||
gpu1.convertTo(gpu1, CV_32S);
|
||||
GpuMat gpudst;
|
||||
cv::gpu::Scharr(gpu1, gpudst, -1, dx, dy);
|
||||
gpudst.convertTo(gpudst, CV_8U);
|
||||
|
||||
|
||||
return CheckNorm(cpudst, gpudst, Size(3, 3));
|
||||
}
|
||||
};
|
||||
@ -244,7 +236,7 @@ struct CV_GpuNppImageGaussianBlurTest : public CV_GpuNppFilterTest
|
||||
{
|
||||
cv::Size ksize(ksizes[i], ksizes[j]);
|
||||
|
||||
ts->printf(CvTS::LOG, "ksize = (%dx%d)\t", ksizes[i], ksizes[j]);
|
||||
ts->printf(CvTS::LOG, "ksize = (%dx%d)\t\n", ksizes[i], ksizes[j]);
|
||||
|
||||
Mat cpudst;
|
||||
cv::GaussianBlur(img, cpudst, ksize, sigma1);
|
||||
|
Loading…
Reference in New Issue
Block a user