opencv/modules/core/src/nan_mask.dispatch.cpp
Rostislav Vasilikhin 53aad98a1a
Merge pull request #23098 from savuor:nanMask
finiteMask() and doubles for patchNaNs() #23098

Related to #22826
Connected PR in extra: [#1037@extra](https://github.com/opencv/opencv_extra/pull/1037)

### TODOs:
- [ ] Vectorize `finiteMask()` for 64FC3 and 64FC4

### Changes

This PR:
* adds a new function `finiteMask()`
* extends `patchNaNs()` by CV_64F support
* moves `patchNaNs()` and `finiteMask()` to a separate file

**NOTE:** now the function is called `finiteMask()` as discussed with the OpenCV core team

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
2023-11-09 10:32:47 +03:00

152 lines
4.4 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "nan_mask.simd.hpp"
#include "nan_mask.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace cv {
#ifdef HAVE_OPENCL
static bool ocl_patchNaNs( InputOutputArray _a, double value )
{
int ftype = _a.depth();
const ocl::Device d = ocl::Device::getDefault();
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && ftype == CV_64F)
{
return false;
}
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d %s",
ftype == CV_64F ? "double" : "float", ftype, rowsPerWI,
doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
UMat a = _a.getUMat();
int cn = a.channels();
// to pass float or double to args
if (ftype == CV_32F)
{
k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), (float)value);
}
else // CV_64F
{
k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), value);
}
size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
static PatchNanFunc getPatchNanFunc(bool isDouble)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(getPatchNanFunc, (isDouble), CV_CPU_DISPATCH_MODES_ALL);
}
void patchNaNs( InputOutputArray _a, double _val )
{
CV_INSTRUMENT_REGION();
CV_Assert( _a.depth() == CV_32F || _a.depth() == CV_64F);
CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
ocl_patchNaNs(_a, _val))
Mat a = _a.getMat();
const Mat* arrays[] = {&a, 0};
uchar* ptrs[1] = {};
NAryMatIterator it(arrays, ptrs);
size_t len = it.size*a.channels();
PatchNanFunc func = getPatchNanFunc(_a.depth() == CV_64F);
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
func(ptrs[0], len, _val);
}
}
#ifdef HAVE_OPENCL
static bool ocl_finiteMask(const UMat img, UMat mask)
{
int channels = img.channels();
int depth = img.depth();
const ocl::Device d = ocl::Device::getDefault();
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F)
{
return false;
}
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("finiteMask", ocl::core::finitemask_oclsrc,
format("-D srcT=%s -D cn=%d -D rowsPerWI=%d %s",
depth == CV_32F ? "float" : "double", channels, rowsPerWI,
doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::ReadOnlyNoSize(img), ocl::KernelArg::WriteOnly(mask));
size_t globalsize[2] = { (size_t)img.cols, ((size_t)img.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
static FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(getFiniteMaskFunc, (isDouble, cn), CV_CPU_DISPATCH_MODES_ALL);
}
void finiteMask(InputArray _src, OutputArray _mask)
{
CV_INSTRUMENT_REGION();
int channels = _src.channels();
int depth = _src.depth();
CV_Assert( channels > 0 && channels <= 4);
CV_Assert( depth == CV_32F || depth == CV_64F );
std::vector<int> vsz(_src.dims());
_src.sizend(vsz.data());
_mask.create(_src.dims(), vsz.data(), CV_8UC1);
CV_OCL_RUN(_src.isUMat() && _mask.isUMat() && _src.dims() <= 2,
ocl_finiteMask(_src.getUMat(), _mask.getUMat()));
Mat src = _src.getMat();
Mat mask = _mask.getMat();
const Mat *arrays[]={&src, &mask, 0};
Mat planes[2];
NAryMatIterator it(arrays, planes);
size_t total = planes[0].total();
size_t i, nplanes = it.nplanes;
FiniteMaskFunc func = getFiniteMaskFunc((depth == CV_64F), channels);
for( i = 0; i < nplanes; i++, ++it )
{
const uchar* sptr = planes[0].ptr();
uchar* dptr = planes[1].ptr();
func(sptr, dptr, total);
}
}
} //namespace cv