Merge pull request #7838 from khnaba:morphology-32f

2025-07-01 01:10:52 +08:00 · 2016-12-19 16:22:03 +00:00 · 2016-12-19 16:22:03 +00:00 · ece08374a1
commit ece08374a1
parent c64ac42b27 a5d857eda8
2 changed files with 45 additions and 14 deletions
--- a/modules/cudafilters/include/opencv2/cudafilters.hpp
+++ b/modules/cudafilters/include/opencv2/cudafilters.hpp
@ -250,7 +250,7 @@ CV_EXPORTS Ptr<Filter> createGaussianFilter(int srcType, int dstType, Size ksize
 -   **MORPH_GRADIENT** morphological gradient
 -   **MORPH_TOPHAT** "top hat"
 -   **MORPH_BLACKHAT** "black hat"
-@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported.
+@param srcType Input/output image type. Only CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported.
@param kernel 2D 8-bit structuring element for the morphological operation.
@param anchor Anchor position within the structuring element. Negative values mean that the anchor
 is at the center.
--- a/modules/cudafilters/src/filtering.cpp
+++ b/modules/cudafilters/src/filtering.cpp
@ -525,14 +525,17 @@ namespace
        void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());

    private:
-        typedef NppStatus (*nppMorfFilter_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
+        typedef NppStatus (*nppMorfFilter8u_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
+                                               const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor);
+        typedef NppStatus (*nppMorfFilter32f_t)(const Npp32f* pSrc, Npp32s nSrcStep, Npp32f* pDst, Npp32s nDstStep, NppiSize oSizeROI,
                                                const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor);

        int type_;
        GpuMat kernel_;
        Point anchor_;
        int iters_;
-        nppMorfFilter_t func_;
+        nppMorfFilter8u_t func8u_;
+        nppMorfFilter32f_t func32f_;

        GpuMat srcBorder_;
        GpuMat buf_;
@ -541,14 +544,19 @@ namespace
    MorphologyFilter::MorphologyFilter(int op, int srcType, InputArray _kernel, Point anchor, int iterations) :
        type_(srcType), anchor_(anchor), iters_(iterations)
    {
-        static const nppMorfFilter_t funcs[2][5] =
+        static const nppMorfFilter8u_t funcs8u[2][5] =
        {
            {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },
            {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }
        };
+        static const nppMorfFilter32f_t funcs32f[2][5] =
+        {
+            {0, nppiErode_32f_C1R, 0, 0, nppiErode_32f_C4R },
+            {0, nppiDilate_32f_C1R, 0, 0, nppiDilate_32f_C4R }
+        };

        CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
-        CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
+        CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4 );

        Mat kernel = _kernel.getMat();
        Size ksize = !kernel.empty() ? _kernel.size() : Size(3, 3);
@ -579,7 +587,14 @@ namespace
        kernel_ = cuda::createContinuous(kernel.size(), CV_8UC1);
        kernel_.upload(kernel8U);

-        func_ = funcs[op][CV_MAT_CN(srcType)];
+        if(srcType == CV_8UC1 || srcType == CV_8UC4)
+        {
+            func8u_ = funcs8u[op][CV_MAT_CN(srcType)];
+        }
+        else if(srcType == CV_32FC1 || srcType == CV_32FC4)
+        {
+            func32f_ = funcs32f[op][CV_MAT_CN(srcType)];
+        }
    }

    void MorphologyFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
@ -618,16 +633,32 @@ namespace
        oAnchor.x = anchor_.x;
        oAnchor.y = anchor_.y;

-        nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
+        if (type_ == CV_8UC1 || type_ == CV_8UC4)
+        {
+            nppSafeCall( func8u_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
                                 oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );

            for(int i = 1; i < iters_; ++i)
            {
                dst.copyTo(bufRoi, _stream);

-            nppSafeCall( func_(bufRoi.ptr<Npp8u>(), static_cast<int>(bufRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
+                nppSafeCall( func8u_(bufRoi.ptr<Npp8u>(), static_cast<int>(bufRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
                                     oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
            }
+        }
+        else if (type_ == CV_32FC1 || type_ == CV_32FC4)
+        {
+            nppSafeCall( func32f_(srcRoi.ptr<Npp32f>(), static_cast<int>(srcRoi.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step),
+                                  oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
+
+            for(int i = 1; i < iters_; ++i)
+            {
+                dst.copyTo(bufRoi, _stream);
+
+                nppSafeCall( func32f_(bufRoi.ptr<Npp32f>(), static_cast<int>(bufRoi.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step),
+                                      oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
+            }
+        }

        if (stream == 0)
            cudaSafeCall( cudaDeviceSynchronize() );