opencv/modules/core/src/nan_mask.dispatch.cpp

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.

#include "precomp.hpp"
#include "opencl_kernels_core.hpp"

#include "nan_mask.simd.hpp"
#include "nan_mask.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content

namespace cv {

#ifdef HAVE_OPENCL

static bool ocl_patchNaNs( InputOutputArray _a, double value )
{
    int ftype = _a.depth();

    const ocl::Device d = ocl::Device::getDefault();
    bool doubleSupport = d.doubleFPConfig() > 0;
    if (!doubleSupport && ftype == CV_64F)
    {
        return false;
    }

    int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
                     format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d %s",
                            ftype == CV_64F ? "double" : "float", ftype, rowsPerWI,
                            doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat a = _a.getUMat();
    int cn = a.channels();

    // to pass float or double to args
    if (ftype == CV_32F)
    {
        k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), (float)value);
    }
    else // CV_64F
    {
        k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), value);
    }

    size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
}

#endif

static PatchNanFunc getPatchNanFunc(bool isDouble)
{
    CV_INSTRUMENT_REGION();
    CV_CPU_DISPATCH(getPatchNanFunc, (isDouble), CV_CPU_DISPATCH_MODES_ALL);
}

void patchNaNs( InputOutputArray _a, double _val )
{
    CV_INSTRUMENT_REGION();
    CV_Assert( _a.depth() == CV_32F || _a.depth() == CV_64F);

    CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
               ocl_patchNaNs(_a, _val))

    Mat a = _a.getMat();
    const Mat* arrays[] = {&a, 0};
    uchar* ptrs[1] = {};
    NAryMatIterator it(arrays, ptrs);
    size_t len = it.size*a.channels();

    PatchNanFunc func = getPatchNanFunc(_a.depth() == CV_64F);

    for (size_t i = 0; i < it.nplanes; i++, ++it)
    {
        func(ptrs[0], len, _val);
    }
}


#ifdef HAVE_OPENCL

static bool ocl_finiteMask(const UMat img, UMat mask)
{
    int channels = img.channels();
    int depth = img.depth();

    const ocl::Device d = ocl::Device::getDefault();
    bool doubleSupport = d.doubleFPConfig() > 0;
    if (!doubleSupport && depth == CV_64F)
    {
        return false;
    }

    int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
    ocl::Kernel k("finiteMask", ocl::core::finitemask_oclsrc,
                  format("-D srcT=%s -D cn=%d -D rowsPerWI=%d %s",
                         depth == CV_32F ? "float" : "double", channels, rowsPerWI,
                         doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnlyNoSize(img), ocl::KernelArg::WriteOnly(mask));

    size_t globalsize[2] = { (size_t)img.cols, ((size_t)img.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
}

#endif

static FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)
{
    CV_INSTRUMENT_REGION();
    CV_CPU_DISPATCH(getFiniteMaskFunc, (isDouble, cn), CV_CPU_DISPATCH_MODES_ALL);
}

void finiteMask(InputArray _src, OutputArray _mask)
{
    CV_INSTRUMENT_REGION();

    int channels = _src.channels();
    int depth = _src.depth();
    CV_Assert( channels > 0 && channels <= 4);
    CV_Assert( depth == CV_32F || depth == CV_64F );
    std::vector<int> vsz(_src.dims());
    _src.sizend(vsz.data());
    _mask.create(_src.dims(), vsz.data(), CV_8UC1);

    CV_OCL_RUN(_src.isUMat() && _mask.isUMat() && _src.dims() <= 2,
               ocl_finiteMask(_src.getUMat(), _mask.getUMat()));

    Mat src = _src.getMat();
    Mat mask = _mask.getMat();

    const Mat *arrays[]={&src, &mask, 0};
    Mat planes[2];
    NAryMatIterator it(arrays, planes);
    size_t total = planes[0].total();
    size_t i, nplanes = it.nplanes;

    FiniteMaskFunc func = getFiniteMaskFunc((depth == CV_64F), channels);

    for( i = 0; i < nplanes; i++, ++it )
    {
        const uchar* sptr = planes[0].ptr();
        uchar* dptr = planes[1].ptr();

        func(sptr, dptr, total);
    }
}
} //namespace cv
Merge pull request #23098 from savuor:nanMask finiteMask() and doubles for patchNaNs() #23098 Related to #22826 Connected PR in extra: [#1037@extra](https://github.com/opencv/opencv_extra/pull/1037) ### TODOs: - [ ] Vectorize `finiteMask()` for 64FC3 and 64FC4 ### Changes This PR: * adds a new function `finiteMask()` * extends `patchNaNs()` by CV_64F support * moves `patchNaNs()` and `finiteMask()` to a separate file NOTE: now the function is called `finiteMask()` as discussed with the OpenCV core team ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake 2023-11-09 15:32:47 +08:00			`// This file is part of OpenCV project.`
			`// It is subject to the license terms in the LICENSE file found in the top-level directory`
			`// of this distribution and at http://opencv.org/license.html.`

			`#include "precomp.hpp"`
			`#include "opencl_kernels_core.hpp"`

			`#include "nan_mask.simd.hpp"`
			`#include "nan_mask.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content`

			`namespace cv {`

			`#ifdef HAVE_OPENCL`

			`static bool ocl_patchNaNs( InputOutputArray _a, double value )`
			`{`
			`int ftype = _a.depth();`

			`const ocl::Device d = ocl::Device::getDefault();`
			`bool doubleSupport = d.doubleFPConfig() > 0;`
			`if (!doubleSupport && ftype == CV_64F)`
			`{`
			`return false;`
			`}`

			`int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;`
			`ocl::Kernel k("KF", ocl::core::arithm_oclsrc,`
			`format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d %s",`
			`ftype == CV_64F ? "double" : "float", ftype, rowsPerWI,`
			`doubleSupport ? "-D DOUBLE_SUPPORT" : ""));`
			`if (k.empty())`
			`return false;`

			`UMat a = _a.getUMat();`
			`int cn = a.channels();`

			`// to pass float or double to args`
			`if (ftype == CV_32F)`
			`{`
			`k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), (float)value);`
			`}`
			`else // CV_64F`
			`{`
			`k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), value);`
			`}`

			`size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };`
			`return k.run(2, globalsize, NULL, false);`
			`}`

			`#endif`

			`static PatchNanFunc getPatchNanFunc(bool isDouble)`
			`{`
			`CV_INSTRUMENT_REGION();`
			`CV_CPU_DISPATCH(getPatchNanFunc, (isDouble), CV_CPU_DISPATCH_MODES_ALL);`
			`}`

			`void patchNaNs( InputOutputArray _a, double _val )`
			`{`
			`CV_INSTRUMENT_REGION();`
			`CV_Assert( _a.depth() == CV_32F \|\| _a.depth() == CV_64F);`

			`CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,`
			`ocl_patchNaNs(_a, _val))`

			`Mat a = _a.getMat();`
			`const Mat* arrays[] = {&a, 0};`
			`uchar* ptrs[1] = {};`
			`NAryMatIterator it(arrays, ptrs);`
			`size_t len = it.size*a.channels();`

			`PatchNanFunc func = getPatchNanFunc(_a.depth() == CV_64F);`

			`for (size_t i = 0; i < it.nplanes; i++, ++it)`
			`{`
			`func(ptrs[0], len, _val);`
			`}`
			`}`


			`#ifdef HAVE_OPENCL`

			`static bool ocl_finiteMask(const UMat img, UMat mask)`
			`{`
			`int channels = img.channels();`
			`int depth = img.depth();`

			`const ocl::Device d = ocl::Device::getDefault();`
			`bool doubleSupport = d.doubleFPConfig() > 0;`
			`if (!doubleSupport && depth == CV_64F)`
			`{`
			`return false;`
			`}`

			`int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;`
			`ocl::Kernel k("finiteMask", ocl::core::finitemask_oclsrc,`
			`format("-D srcT=%s -D cn=%d -D rowsPerWI=%d %s",`
			`depth == CV_32F ? "float" : "double", channels, rowsPerWI,`
			`doubleSupport ? "-D DOUBLE_SUPPORT" : ""));`
			`if (k.empty())`
			`return false;`

			`k.args(ocl::KernelArg::ReadOnlyNoSize(img), ocl::KernelArg::WriteOnly(mask));`

			`size_t globalsize[2] = { (size_t)img.cols, ((size_t)img.rows + rowsPerWI - 1) / rowsPerWI };`
			`return k.run(2, globalsize, NULL, false);`
			`}`

			`#endif`

			`static FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)`
			`{`
			`CV_INSTRUMENT_REGION();`
			`CV_CPU_DISPATCH(getFiniteMaskFunc, (isDouble, cn), CV_CPU_DISPATCH_MODES_ALL);`
			`}`

			`void finiteMask(InputArray _src, OutputArray _mask)`
			`{`
			`CV_INSTRUMENT_REGION();`

			`int channels = _src.channels();`
			`int depth = _src.depth();`
			`CV_Assert( channels > 0 && channels <= 4);`
			`CV_Assert( depth == CV_32F \|\| depth == CV_64F );`
			`std::vector<int> vsz(_src.dims());`
			`_src.sizend(vsz.data());`
			`_mask.create(_src.dims(), vsz.data(), CV_8UC1);`

			`CV_OCL_RUN(_src.isUMat() && _mask.isUMat() && _src.dims() <= 2,`
			`ocl_finiteMask(_src.getUMat(), _mask.getUMat()));`

			`Mat src = _src.getMat();`
			`Mat mask = _mask.getMat();`

			`const Mat *arrays[]={&src, &mask, 0};`
			`Mat planes[2];`
			`NAryMatIterator it(arrays, planes);`
			`size_t total = planes[0].total();`
			`size_t i, nplanes = it.nplanes;`

			`FiniteMaskFunc func = getFiniteMaskFunc((depth == CV_64F), channels);`

			`for( i = 0; i < nplanes; i++, ++it )`
			`{`
			`const uchar* sptr = planes[0].ptr();`
			`uchar* dptr = planes[1].ptr();`

			`func(sptr, dptr, total);`
			`}`
			`}`
			`} //namespace cv`