Merge pull request #7838 from khnaba:morphology-32f

This commit is contained in:
Alexander Alekhin 2016-12-19 16:22:03 +00:00
commit ece08374a1
2 changed files with 45 additions and 14 deletions

View File

@ -250,7 +250,7 @@ CV_EXPORTS Ptr<Filter> createGaussianFilter(int srcType, int dstType, Size ksize
- **MORPH_GRADIENT** morphological gradient
- **MORPH_TOPHAT** "top hat"
- **MORPH_BLACKHAT** "black hat"
@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported.
@param srcType Input/output image type. Only CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported.
@param kernel 2D 8-bit structuring element for the morphological operation.
@param anchor Anchor position within the structuring element. Negative values mean that the anchor
is at the center.

View File

@ -525,14 +525,17 @@ namespace
void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
private:
typedef NppStatus (*nppMorfFilter_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
typedef NppStatus (*nppMorfFilter8u_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor);
typedef NppStatus (*nppMorfFilter32f_t)(const Npp32f* pSrc, Npp32s nSrcStep, Npp32f* pDst, Npp32s nDstStep, NppiSize oSizeROI,
const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor);
int type_;
GpuMat kernel_;
Point anchor_;
int iters_;
nppMorfFilter_t func_;
nppMorfFilter8u_t func8u_;
nppMorfFilter32f_t func32f_;
GpuMat srcBorder_;
GpuMat buf_;
@ -541,14 +544,19 @@ namespace
MorphologyFilter::MorphologyFilter(int op, int srcType, InputArray _kernel, Point anchor, int iterations) :
type_(srcType), anchor_(anchor), iters_(iterations)
{
static const nppMorfFilter_t funcs[2][5] =
static const nppMorfFilter8u_t funcs8u[2][5] =
{
{0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },
{0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }
};
static const nppMorfFilter32f_t funcs32f[2][5] =
{
{0, nppiErode_32f_C1R, 0, 0, nppiErode_32f_C4R },
{0, nppiDilate_32f_C1R, 0, 0, nppiDilate_32f_C4R }
};
CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4 );
Mat kernel = _kernel.getMat();
Size ksize = !kernel.empty() ? _kernel.size() : Size(3, 3);
@ -579,7 +587,14 @@ namespace
kernel_ = cuda::createContinuous(kernel.size(), CV_8UC1);
kernel_.upload(kernel8U);
func_ = funcs[op][CV_MAT_CN(srcType)];
if(srcType == CV_8UC1 || srcType == CV_8UC4)
{
func8u_ = funcs8u[op][CV_MAT_CN(srcType)];
}
else if(srcType == CV_32FC1 || srcType == CV_32FC4)
{
func32f_ = funcs32f[op][CV_MAT_CN(srcType)];
}
}
void MorphologyFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
@ -618,16 +633,32 @@ namespace
oAnchor.x = anchor_.x;
oAnchor.y = anchor_.y;
nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
if (type_ == CV_8UC1 || type_ == CV_8UC4)
{
nppSafeCall( func8u_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
for(int i = 1; i < iters_; ++i)
{
dst.copyTo(bufRoi, _stream);
nppSafeCall( func_(bufRoi.ptr<Npp8u>(), static_cast<int>(bufRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
nppSafeCall( func8u_(bufRoi.ptr<Npp8u>(), static_cast<int>(bufRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
}
}
else if (type_ == CV_32FC1 || type_ == CV_32FC4)
{
nppSafeCall( func32f_(srcRoi.ptr<Npp32f>(), static_cast<int>(srcRoi.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step),
oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
for(int i = 1; i < iters_; ++i)
{
dst.copyTo(bufRoi, _stream);
nppSafeCall( func32f_(bufRoi.ptr<Npp32f>(), static_cast<int>(bufRoi.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step),
oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
}
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );