2010-05-12 01:44:00 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
//
|
|
|
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
2011-04-17 21:14:45 +08:00
|
|
|
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
|
2015-01-12 15:59:30 +08:00
|
|
|
// Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
|
2010-05-12 01:44:00 +08:00
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#include "precomp.hpp"
|
2014-08-01 22:11:20 +08:00
|
|
|
#include "opencl_kernels_core.hpp"
|
2019-10-08 07:32:25 +08:00
|
|
|
#include <atomic>
|
2015-05-05 23:03:40 +08:00
|
|
|
#include <limits>
|
2015-11-21 04:03:32 +08:00
|
|
|
#include <iostream>
|
2018-08-07 23:49:05 +08:00
|
|
|
#include "mathfuncs.hpp"
|
2010-12-05 10:26:04 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
namespace cv
|
|
|
|
{
|
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
typedef void (*MathFunc)(const void* src, void* dst, int len);
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#ifdef HAVE_OPENCL
|
2013-12-03 18:07:00 +08:00
|
|
|
|
|
|
|
enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
|
|
|
|
|
|
|
|
static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 };
|
|
|
|
|
2013-12-03 18:57:13 +08:00
|
|
|
static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
|
2013-12-03 18:07:00 +08:00
|
|
|
{
|
2014-03-08 05:29:27 +08:00
|
|
|
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
2014-03-12 03:41:44 +08:00
|
|
|
int kercn = oclop == OCL_OP_PHASE_DEGREES ||
|
2014-03-08 05:29:27 +08:00
|
|
|
oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
|
2013-12-03 18:07:00 +08:00
|
|
|
|
2014-05-14 19:42:30 +08:00
|
|
|
const ocl::Device d = ocl::Device::getDefault();
|
|
|
|
bool double_support = d.doubleFPConfig() > 0;
|
2014-03-08 05:29:27 +08:00
|
|
|
if (!double_support && depth == CV_64F)
|
2013-12-03 18:07:00 +08:00
|
|
|
return false;
|
2014-05-14 19:42:30 +08:00
|
|
|
int rowsPerWI = d.isIntel() ? 4 : 1;
|
2013-12-03 18:07:00 +08:00
|
|
|
|
2014-03-08 05:29:27 +08:00
|
|
|
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
2018-10-14 09:49:14 +08:00
|
|
|
format("-D %s -D %s -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
|
|
|
|
oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth, rowsPerWI,
|
2014-03-08 05:29:27 +08:00
|
|
|
double_support ? " -D DOUBLE_SUPPORT" : ""));
|
|
|
|
if (k.empty())
|
2013-12-03 18:07:00 +08:00
|
|
|
return false;
|
|
|
|
|
2014-03-08 05:29:27 +08:00
|
|
|
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
|
|
|
|
_dst.create(src1.size(), type);
|
2013-12-03 18:07:00 +08:00
|
|
|
UMat dst = _dst.getUMat();
|
|
|
|
|
2014-03-08 05:29:27 +08:00
|
|
|
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
|
|
|
|
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
|
|
|
|
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
|
2013-12-03 18:07:00 +08:00
|
|
|
|
2014-03-08 05:29:27 +08:00
|
|
|
if (src2.empty())
|
2013-12-03 18:07:00 +08:00
|
|
|
k.args(src1arg, dstarg);
|
|
|
|
else
|
|
|
|
k.args(src1arg, src2arg, dstarg);
|
|
|
|
|
2015-10-16 22:10:00 +08:00
|
|
|
size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
|
2013-12-03 18:07:00 +08:00
|
|
|
return k.run(2, globalsize, 0, false);
|
|
|
|
}
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#endif
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
/* ************************************************************************** *\
|
|
|
|
Fast cube root by Ken Turkowski
|
|
|
|
(http://www.worldserver.com/turk/computergraphics/papers.html)
|
|
|
|
\* ************************************************************************** */
|
|
|
|
float cubeRoot( float value )
|
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
float fr;
|
|
|
|
Cv32suf v, m;
|
|
|
|
int ix, s;
|
|
|
|
int ex, shx;
|
|
|
|
|
|
|
|
v.f = value;
|
|
|
|
ix = v.i & 0x7fffffff;
|
|
|
|
s = v.i & 0x80000000;
|
|
|
|
ex = (ix >> 23) - 127;
|
|
|
|
shx = ex % 3;
|
|
|
|
shx -= shx >= 0 ? 3 : 0;
|
|
|
|
ex = (ex - shx) / 3; /* exponent of cube root */
|
|
|
|
v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23);
|
|
|
|
fr = v.f;
|
|
|
|
|
|
|
|
/* 0.125 <= fr < 1.0 */
|
|
|
|
/* Use quartic rational polynomial with error < 2^(-24) */
|
|
|
|
fr = (float)(((((45.2548339756803022511987494 * fr +
|
|
|
|
192.2798368355061050458134625) * fr +
|
|
|
|
119.1654824285581628956914143) * fr +
|
|
|
|
13.43250139086239872172837314) * fr +
|
|
|
|
0.1636161226585754240958355063)/
|
|
|
|
((((14.80884093219134573786480845 * fr +
|
|
|
|
151.9714051044435648658557668) * fr +
|
|
|
|
168.5254414101568283957668343) * fr +
|
|
|
|
33.9905941350215598754191872) * fr +
|
|
|
|
1.0));
|
|
|
|
|
|
|
|
/* fr *= 2^ex * sign */
|
|
|
|
m.f = value;
|
|
|
|
v.f = fr;
|
|
|
|
v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0);
|
|
|
|
return v.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************************\
|
|
|
|
* Cartezian -> Polar *
|
|
|
|
\****************************************************************************************/
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void magnitude( InputArray src1, InputArray src2, OutputArray dst )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2013-12-03 18:07:00 +08:00
|
|
|
int type = src1.type(), depth = src1.depth(), cn = src1.channels();
|
|
|
|
CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
|
|
|
|
ocl_math_op(src1, src2, dst, OCL_OP_MAG))
|
2013-12-03 18:07:00 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat X = src1.getMat(), Y = src2.getMat();
|
|
|
|
dst.create(X.dims, X.size, X.type());
|
|
|
|
Mat Mag = dst.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&X, &Y, &Mag, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[3] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
|
|
|
int len = (int)it.size*cn;
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
if( depth == CV_32F )
|
|
|
|
{
|
|
|
|
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
|
|
|
|
float *mag = (float*)ptrs[2];
|
2015-12-15 20:55:43 +08:00
|
|
|
hal::magnitude32f( x, y, mag, len );
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
|
|
|
|
double *mag = (double*)ptrs[2];
|
2015-12-15 20:55:43 +08:00
|
|
|
hal::magnitude64f( x, y, mag, len );
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2013-12-03 18:07:00 +08:00
|
|
|
int type = src1.type(), depth = src1.depth(), cn = src1.channels();
|
|
|
|
CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
|
|
|
|
ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS))
|
2013-12-03 18:07:00 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat X = src1.getMat(), Y = src2.getMat();
|
|
|
|
dst.create( X.dims, X.size, type );
|
|
|
|
Mat Angle = dst.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&X, &Y, &Angle, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[3] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
2016-05-24 18:57:27 +08:00
|
|
|
int j, total = (int)(it.size*cn), blockSize = total;
|
2011-04-17 21:14:45 +08:00
|
|
|
size_t esz1 = X.elemSize1();
|
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
for( j = 0; j < total; j += blockSize )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
int len = std::min(total - j, blockSize);
|
|
|
|
if( depth == CV_32F )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
|
|
|
|
float *angle = (float*)ptrs[2];
|
2016-05-24 18:57:27 +08:00
|
|
|
hal::fastAtan32f( y, x, angle, len, angleInDegrees );
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
|
|
|
|
double *angle = (double*)ptrs[2];
|
2016-05-24 18:57:27 +08:00
|
|
|
hal::fastAtan64f(y, x, angle, len, angleInDegrees);
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2011-04-17 21:14:45 +08:00
|
|
|
ptrs[0] += len*esz1;
|
|
|
|
ptrs[1] += len*esz1;
|
|
|
|
ptrs[2] += len*esz1;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
|
2013-12-01 00:05:54 +08:00
|
|
|
static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
|
|
|
|
OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
|
|
|
|
{
|
2014-05-14 19:42:30 +08:00
|
|
|
const ocl::Device & d = ocl::Device::getDefault();
|
|
|
|
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
|
|
|
rowsPerWI = d.isIntel() ? 4 : 1;
|
|
|
|
bool doubleSupport = d.doubleFPConfig() > 0;
|
2013-12-01 00:05:54 +08:00
|
|
|
|
|
|
|
if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
|
|
|
|
(depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
|
|
|
|
(depth == CV_64F && !doubleSupport) )
|
|
|
|
return false;
|
|
|
|
|
2013-12-03 04:41:07 +08:00
|
|
|
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
2018-10-14 09:49:14 +08:00
|
|
|
format("-D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
|
|
|
|
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth,
|
|
|
|
rowsPerWI, angleInDegrees ? "AD" : "AR",
|
2013-12-03 04:41:07 +08:00
|
|
|
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
|
|
|
if (k.empty())
|
|
|
|
return false;
|
|
|
|
|
2013-12-01 00:05:54 +08:00
|
|
|
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
|
|
|
|
Size size = src1.size();
|
|
|
|
CV_Assert( size == src2.size() );
|
|
|
|
|
|
|
|
_dst1.create(size, type);
|
|
|
|
_dst2.create(size, type);
|
|
|
|
UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
|
|
|
|
|
|
|
|
k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
|
|
|
|
ocl::KernelArg::ReadOnlyNoSize(src2),
|
|
|
|
ocl::KernelArg::WriteOnly(dst1, cn),
|
|
|
|
ocl::KernelArg::WriteOnlyNoSize(dst2));
|
|
|
|
|
2015-10-16 22:10:00 +08:00
|
|
|
size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI };
|
2013-12-01 00:05:54 +08:00
|
|
|
return k.run(2, globalsize, NULL, false);
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#endif
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void cartToPolar( InputArray src1, InputArray src2,
|
2011-04-17 21:14:45 +08:00
|
|
|
OutputArray dst1, OutputArray dst2, bool angleInDegrees )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2022-03-17 21:14:38 +08:00
|
|
|
CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() &&
|
|
|
|
src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj());
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
|
2013-12-01 05:57:13 +08:00
|
|
|
ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))
|
2013-12-01 00:05:54 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat X = src1.getMat(), Y = src2.getMat();
|
|
|
|
int type = X.type(), depth = X.depth(), cn = X.channels();
|
|
|
|
CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
|
|
|
|
dst1.create( X.dims, X.size, type );
|
|
|
|
dst2.create( X.dims, X.size, type );
|
|
|
|
Mat Mag = dst1.getMat(), Angle = dst2.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[4] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
2016-05-24 18:57:27 +08:00
|
|
|
int j, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
|
2011-04-17 21:14:45 +08:00
|
|
|
size_t esz1 = X.elemSize1();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
for( j = 0; j < total; j += blockSize )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
int len = std::min(total - j, blockSize);
|
|
|
|
if( depth == CV_32F )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
|
|
|
|
float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
|
2015-12-15 20:55:43 +08:00
|
|
|
hal::magnitude32f( x, y, mag, len );
|
2016-05-24 18:57:27 +08:00
|
|
|
hal::fastAtan32f( y, x, angle, len, angleInDegrees );
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2011-04-17 21:14:45 +08:00
|
|
|
else
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
|
|
|
|
double *angle = (double*)ptrs[3];
|
2015-12-15 20:55:43 +08:00
|
|
|
hal::magnitude64f(x, y, (double*)ptrs[2], len);
|
2016-05-24 18:57:27 +08:00
|
|
|
hal::fastAtan64f(y, x, angle, len, angleInDegrees);
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2011-04-17 21:14:45 +08:00
|
|
|
ptrs[0] += len*esz1;
|
|
|
|
ptrs[1] += len*esz1;
|
|
|
|
ptrs[2] += len*esz1;
|
|
|
|
ptrs[3] += len*esz1;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************************\
|
|
|
|
* Polar -> Cartezian *
|
|
|
|
\****************************************************************************************/
|
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
static void SinCos_32f( const float *angle, float *sinval, float* cosval,
|
|
|
|
int len, int angle_in_degrees )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
|
|
|
const int N = 64;
|
|
|
|
|
|
|
|
static const double sin_table[] =
|
|
|
|
{
|
|
|
|
0.00000000000000000000, 0.09801714032956060400,
|
|
|
|
0.19509032201612825000, 0.29028467725446233000,
|
|
|
|
0.38268343236508978000, 0.47139673682599764000,
|
|
|
|
0.55557023301960218000, 0.63439328416364549000,
|
|
|
|
0.70710678118654746000, 0.77301045336273699000,
|
|
|
|
0.83146961230254524000, 0.88192126434835494000,
|
|
|
|
0.92387953251128674000, 0.95694033573220894000,
|
|
|
|
0.98078528040323043000, 0.99518472667219682000,
|
|
|
|
1.00000000000000000000, 0.99518472667219693000,
|
|
|
|
0.98078528040323043000, 0.95694033573220894000,
|
|
|
|
0.92387953251128674000, 0.88192126434835505000,
|
|
|
|
0.83146961230254546000, 0.77301045336273710000,
|
|
|
|
0.70710678118654757000, 0.63439328416364549000,
|
|
|
|
0.55557023301960218000, 0.47139673682599786000,
|
|
|
|
0.38268343236508989000, 0.29028467725446239000,
|
|
|
|
0.19509032201612861000, 0.09801714032956082600,
|
|
|
|
0.00000000000000012246, -0.09801714032956059000,
|
|
|
|
-0.19509032201612836000, -0.29028467725446211000,
|
|
|
|
-0.38268343236508967000, -0.47139673682599764000,
|
|
|
|
-0.55557023301960196000, -0.63439328416364527000,
|
|
|
|
-0.70710678118654746000, -0.77301045336273666000,
|
|
|
|
-0.83146961230254524000, -0.88192126434835494000,
|
|
|
|
-0.92387953251128652000, -0.95694033573220882000,
|
|
|
|
-0.98078528040323032000, -0.99518472667219693000,
|
|
|
|
-1.00000000000000000000, -0.99518472667219693000,
|
|
|
|
-0.98078528040323043000, -0.95694033573220894000,
|
|
|
|
-0.92387953251128663000, -0.88192126434835505000,
|
|
|
|
-0.83146961230254546000, -0.77301045336273688000,
|
|
|
|
-0.70710678118654768000, -0.63439328416364593000,
|
|
|
|
-0.55557023301960218000, -0.47139673682599792000,
|
|
|
|
-0.38268343236509039000, -0.29028467725446250000,
|
|
|
|
-0.19509032201612872000, -0.09801714032956050600,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const double k2 = (2*CV_PI)/N;
|
|
|
|
|
|
|
|
static const double sin_a0 = -0.166630293345647*k2*k2*k2;
|
|
|
|
static const double sin_a2 = k2;
|
|
|
|
|
|
|
|
static const double cos_a0 = -0.499818138450326*k2*k2;
|
|
|
|
/*static const double cos_a2 = 1;*/
|
|
|
|
|
|
|
|
double k1;
|
2015-01-12 15:59:31 +08:00
|
|
|
int i = 0;
|
2010-05-12 01:44:00 +08:00
|
|
|
|
|
|
|
if( !angle_in_degrees )
|
|
|
|
k1 = N/(2*CV_PI);
|
|
|
|
else
|
|
|
|
k1 = N/360.;
|
|
|
|
|
2015-01-12 15:59:31 +08:00
|
|
|
#if CV_AVX2
|
|
|
|
if (USE_AVX2)
|
|
|
|
{
|
2015-01-12 15:59:31 +08:00
|
|
|
__m128d v_k1 = _mm_set1_pd(k1);
|
|
|
|
__m128d v_1 = _mm_set1_pd(1);
|
|
|
|
__m128i v_N1 = _mm_set1_epi32(N - 1);
|
|
|
|
__m128i v_N4 = _mm_set1_epi32(N >> 2);
|
|
|
|
__m128d v_sin_a0 = _mm_set1_pd(sin_a0);
|
|
|
|
__m128d v_sin_a2 = _mm_set1_pd(sin_a2);
|
|
|
|
__m128d v_cos_a0 = _mm_set1_pd(cos_a0);
|
|
|
|
|
2015-01-12 15:59:31 +08:00
|
|
|
for ( ; i <= len - 4; i += 4)
|
|
|
|
{
|
|
|
|
__m128 v_angle = _mm_loadu_ps(angle + i);
|
|
|
|
|
|
|
|
// 0-1
|
|
|
|
__m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1);
|
|
|
|
__m128i v_it = _mm_cvtpd_epi32(v_t);
|
|
|
|
v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
|
|
|
|
|
|
|
|
__m128i v_sin_idx = _mm_and_si128(v_it, v_N1);
|
|
|
|
__m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
|
|
|
|
|
|
|
|
__m128d v_t2 = _mm_mul_pd(v_t, v_t);
|
|
|
|
__m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
|
|
|
|
__m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
|
|
|
|
|
2015-01-12 15:59:31 +08:00
|
|
|
__m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
|
|
|
|
__m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
|
2015-01-12 15:59:31 +08:00
|
|
|
|
|
|
|
__m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
|
|
|
|
_mm_mul_pd(v_cos_a, v_sin_b));
|
|
|
|
__m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
|
|
|
|
_mm_mul_pd(v_sin_a, v_sin_b));
|
|
|
|
|
|
|
|
// 2-3
|
2015-01-12 15:59:31 +08:00
|
|
|
v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1);
|
2015-01-12 15:59:31 +08:00
|
|
|
v_it = _mm_cvtpd_epi32(v_t);
|
|
|
|
v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
|
|
|
|
|
|
|
|
v_sin_idx = _mm_and_si128(v_it, v_N1);
|
|
|
|
v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
|
|
|
|
|
|
|
|
v_t2 = _mm_mul_pd(v_t, v_t);
|
|
|
|
v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
|
|
|
|
v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
|
|
|
|
|
2015-01-12 15:59:31 +08:00
|
|
|
v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
|
|
|
|
v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
|
2015-01-12 15:59:31 +08:00
|
|
|
|
|
|
|
__m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
|
|
|
|
_mm_mul_pd(v_cos_a, v_sin_b));
|
|
|
|
__m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
|
|
|
|
_mm_mul_pd(v_sin_a, v_sin_b));
|
|
|
|
|
|
|
|
_mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0),
|
|
|
|
_mm_cvtpd_ps(v_sin_val_1)));
|
|
|
|
_mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0),
|
|
|
|
_mm_cvtpd_ps(v_cos_val_1)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for( ; i < len; i++ )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
|
|
|
double t = angle[i]*k1;
|
|
|
|
int it = cvRound(t);
|
|
|
|
t -= it;
|
|
|
|
int sin_idx = it & (N - 1);
|
|
|
|
int cos_idx = (N/4 - sin_idx) & (N - 1);
|
|
|
|
|
|
|
|
double sin_b = (sin_a0*t*t + sin_a2)*t;
|
|
|
|
double cos_b = cos_a0*t*t + 1;
|
|
|
|
|
|
|
|
double sin_a = sin_table[sin_idx];
|
|
|
|
double cos_a = sin_table[cos_idx];
|
|
|
|
|
|
|
|
double sin_val = sin_a*cos_b + cos_a*sin_b;
|
|
|
|
double cos_val = cos_a*cos_b - sin_a*sin_b;
|
|
|
|
|
|
|
|
sinval[i] = (float)sin_val;
|
|
|
|
cosval[i] = (float)cos_val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
|
2013-12-01 04:17:23 +08:00
|
|
|
static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
|
|
|
|
OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
|
|
|
|
{
|
2014-05-14 19:42:30 +08:00
|
|
|
const ocl::Device & d = ocl::Device::getDefault();
|
|
|
|
int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
|
|
|
rowsPerWI = d.isIntel() ? 4 : 1;
|
|
|
|
bool doubleSupport = d.doubleFPConfig() > 0;
|
2013-12-01 04:17:23 +08:00
|
|
|
|
2013-12-01 19:37:56 +08:00
|
|
|
if ( !doubleSupport && depth == CV_64F )
|
2013-12-01 04:17:23 +08:00
|
|
|
return false;
|
|
|
|
|
2013-12-03 04:41:07 +08:00
|
|
|
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
2018-10-14 09:49:14 +08:00
|
|
|
format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D BINARY_OP -D OP_PTC_%s%s",
|
|
|
|
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth,
|
|
|
|
rowsPerWI,
|
|
|
|
angleInDegrees ? "AD" : "AR",
|
2013-12-03 04:41:07 +08:00
|
|
|
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
|
|
|
if (k.empty())
|
|
|
|
return false;
|
|
|
|
|
2013-12-01 04:17:23 +08:00
|
|
|
UMat mag = _mag.getUMat(), angle = _angle.getUMat();
|
|
|
|
Size size = angle.size();
|
|
|
|
CV_Assert(mag.size() == size);
|
|
|
|
|
|
|
|
_dst1.create(size, type);
|
|
|
|
_dst2.create(size, type);
|
|
|
|
UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
|
|
|
|
|
|
|
|
k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
|
|
|
|
ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
|
|
|
|
|
2015-10-16 22:10:00 +08:00
|
|
|
size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI };
|
2013-12-01 04:17:23 +08:00
|
|
|
return k.run(2, globalsize, NULL, false);
|
|
|
|
}
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#endif
|
|
|
|
|
2017-04-21 19:52:45 +08:00
|
|
|
#ifdef HAVE_IPP
|
|
|
|
static bool ipp_polarToCart(Mat &mag, Mat &angle, Mat &x, Mat &y)
|
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION_IPP();
|
2017-04-21 19:52:45 +08:00
|
|
|
|
|
|
|
int depth = angle.depth();
|
|
|
|
if(depth != CV_32F && depth != CV_64F)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if(angle.dims <= 2)
|
|
|
|
{
|
|
|
|
int len = (int)(angle.cols*angle.channels());
|
|
|
|
|
|
|
|
if(depth == CV_32F)
|
|
|
|
{
|
|
|
|
for (int h = 0; h < angle.rows; h++)
|
|
|
|
{
|
|
|
|
if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_32f, (const float*)mag.ptr(h), (const float*)angle.ptr(h), (float*)x.ptr(h), (float*)y.ptr(h), len) < 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (int h = 0; h < angle.rows; h++)
|
|
|
|
{
|
|
|
|
if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_64f, (const double*)mag.ptr(h), (const double*)angle.ptr(h), (double*)x.ptr(h), (double*)y.ptr(h), len) < 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const Mat *arrays[] = {&mag, &angle, &x, &y, NULL};
|
|
|
|
uchar *ptrs[4] = {NULL};
|
|
|
|
NAryMatIterator it(arrays, ptrs);
|
|
|
|
int len = (int)(it.size*angle.channels());
|
|
|
|
|
|
|
|
if(depth == CV_32F)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
|
|
|
{
|
|
|
|
if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_32f, (const float*)ptrs[0], (const float*)ptrs[1], (float*)ptrs[2], (float*)ptrs[3], len) < 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
|
|
|
{
|
|
|
|
if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_64f, (const double*)ptrs[0], (const double*)ptrs[1], (double*)ptrs[2], (double*)ptrs[3], len) < 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void polarToCart( InputArray src1, InputArray src2,
|
2011-04-17 21:14:45 +08:00
|
|
|
OutputArray dst1, OutputArray dst2, bool angleInDegrees )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2022-03-17 21:14:38 +08:00
|
|
|
CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() &&
|
|
|
|
src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj());
|
|
|
|
|
2013-12-01 04:17:23 +08:00
|
|
|
int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
|
|
|
CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(),
|
|
|
|
ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees))
|
2013-12-01 04:17:23 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat Mag = src1.getMat(), Angle = src2.getMat();
|
2013-12-01 04:17:23 +08:00
|
|
|
CV_Assert( Mag.empty() || Angle.size == Mag.size);
|
2011-04-17 21:14:45 +08:00
|
|
|
dst1.create( Angle.dims, Angle.size, type );
|
|
|
|
dst2.create( Angle.dims, Angle.size, type );
|
|
|
|
Mat X = dst1.getMat(), Y = dst2.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2017-04-21 19:52:45 +08:00
|
|
|
CV_IPP_RUN(!angleInDegrees, ipp_polarToCart(Mag, Angle, X, Y));
|
2014-04-04 03:45:41 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[4] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
|
|
|
cv::AutoBuffer<float> _buf;
|
|
|
|
float* buf[2] = {0, 0};
|
|
|
|
int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
|
|
|
|
size_t esz1 = Angle.elemSize1();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
if( depth == CV_64F )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
_buf.allocate(blockSize*2);
|
2018-06-11 06:42:00 +08:00
|
|
|
buf[0] = _buf.data();
|
2011-04-17 21:14:45 +08:00
|
|
|
buf[1] = buf[0] + blockSize;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
for( j = 0; j < total; j += blockSize )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
int len = std::min(total - j, blockSize);
|
|
|
|
if( depth == CV_32F )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1];
|
|
|
|
float *x = (float*)ptrs[2], *y = (float*)ptrs[3];
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
SinCos_32f( angle, y, x, len, angleInDegrees );
|
|
|
|
if( mag )
|
2014-10-12 19:54:27 +08:00
|
|
|
{
|
|
|
|
k = 0;
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
int cWidth = VTraits<v_float32>::vlanes();
|
2018-09-05 21:17:43 +08:00
|
|
|
for( ; k <= len - cWidth; k += cWidth )
|
2015-01-12 15:59:28 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_float32 v_m = vx_load(mag + k);
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_store(x + k, v_mul(vx_load(x + k), v_m));
|
|
|
|
v_store(y + k, v_mul(vx_load(y + k), v_m));
|
2015-01-12 15:59:28 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2017-10-14 23:24:31 +08:00
|
|
|
#endif
|
2014-10-12 19:54:27 +08:00
|
|
|
|
|
|
|
for( ; k < len; k++ )
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
|
|
|
float m = mag[k];
|
|
|
|
x[k] *= m; y[k] *= m;
|
|
|
|
}
|
2014-10-12 19:54:27 +08:00
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2011-04-17 21:14:45 +08:00
|
|
|
else
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1];
|
|
|
|
double *x = (double*)ptrs[2], *y = (double*)ptrs[3];
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( k = 0; k < len; k++ )
|
|
|
|
buf[0][k] = (float)angle[k];
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees );
|
|
|
|
if( mag )
|
|
|
|
for( k = 0; k < len; k++ )
|
|
|
|
{
|
|
|
|
double m = mag[k];
|
|
|
|
x[k] = buf[0][k]*m; y[k] = buf[1][k]*m;
|
|
|
|
}
|
|
|
|
else
|
2015-01-12 15:59:31 +08:00
|
|
|
{
|
|
|
|
std::memcpy(x, buf[0], sizeof(float) * len);
|
|
|
|
std::memcpy(y, buf[1], sizeof(float) * len);
|
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
if( ptrs[0] )
|
|
|
|
ptrs[0] += len*esz1;
|
|
|
|
ptrs[1] += len*esz1;
|
|
|
|
ptrs[2] += len*esz1;
|
|
|
|
ptrs[3] += len*esz1;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************************\
|
|
|
|
* E X P *
|
|
|
|
\****************************************************************************************/
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void exp( InputArray _src, OutputArray _dst )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2013-12-03 18:07:00 +08:00
|
|
|
int type = _src.type(), depth = _src.depth(), cn = _src.channels();
|
|
|
|
CV_Assert( depth == CV_32F || depth == CV_64F );
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
|
|
|
|
ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP))
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2013-12-03 18:07:00 +08:00
|
|
|
Mat src = _src.getMat();
|
2011-04-17 21:14:45 +08:00
|
|
|
_dst.create( src.dims, src.size, type );
|
|
|
|
Mat dst = _dst.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&src, &dst, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[2] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
|
|
|
int len = (int)(it.size*cn);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-10-12 20:31:40 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
if( depth == CV_32F )
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::exp32f((const float*)ptrs[0], (float*)ptrs[1], len);
|
2011-04-17 21:14:45 +08:00
|
|
|
else
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::exp64f((const double*)ptrs[0], (double*)ptrs[1], len);
|
2010-10-12 20:31:40 +08:00
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************************\
|
|
|
|
* L O G *
|
|
|
|
\****************************************************************************************/
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void log( InputArray _src, OutputArray _dst )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2013-12-03 18:07:00 +08:00
|
|
|
int type = _src.type(), depth = _src.depth(), cn = _src.channels();
|
|
|
|
CV_Assert( depth == CV_32F || depth == CV_64F );
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2,
|
|
|
|
ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG))
|
2013-12-03 18:07:00 +08:00
|
|
|
|
|
|
|
Mat src = _src.getMat();
|
2011-04-17 21:14:45 +08:00
|
|
|
_dst.create( src.dims, src.size, type );
|
|
|
|
Mat dst = _dst.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&src, &dst, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[2] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
|
|
|
int len = (int)(it.size*cn);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-10-12 20:31:40 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
if( depth == CV_32F )
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::log32f( (const float*)ptrs[0], (float*)ptrs[1], len );
|
2011-04-17 21:14:45 +08:00
|
|
|
else
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::log64f( (const double*)ptrs[0], (double*)ptrs[1], len );
|
2010-10-12 20:31:40 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
|
|
|
|
/****************************************************************************************\
|
|
|
|
* P O W E R *
|
|
|
|
\****************************************************************************************/
|
|
|
|
|
2014-10-12 23:31:25 +08:00
|
|
|
template <typename T, typename WT>
|
|
|
|
struct iPow_SIMD
|
|
|
|
{
|
|
|
|
int operator() ( const T *, T *, int, int)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<uchar, int>
|
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
int operator() ( const uchar * src, uchar * dst, int len, int power )
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_uint32 v_1 = vx_setall_u32(1u);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_uint16>::vlanes(); i += VTraits<v_uint16>::vlanes())
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_uint32 v_a1 = v_1, v_a2 = v_1;
|
|
|
|
v_uint16 v = vx_load_expand(src + i);
|
|
|
|
v_uint32 v_b1, v_b2;
|
2015-05-02 02:49:11 +08:00
|
|
|
v_expand(v, v_b1, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
int p = power;
|
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v = v_pack(v_a1, v_a2);
|
|
|
|
v_pack_store(dst + i, v);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<schar, int>
|
|
|
|
{
|
|
|
|
int operator() ( const schar * src, schar * dst, int len, int power)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_1 = vx_setall_s32(1);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_int16>::vlanes(); i += VTraits<v_int16>::vlanes())
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_a1 = v_1, v_a2 = v_1;
|
|
|
|
v_int16 v = vx_load_expand(src + i);
|
|
|
|
v_int32 v_b1, v_b2;
|
2015-05-02 02:49:11 +08:00
|
|
|
v_expand(v, v_b1, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
int p = power;
|
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v = v_pack(v_a1, v_a2);
|
|
|
|
v_pack_store(dst + i, v);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<ushort, int>
|
|
|
|
{
|
|
|
|
int operator() ( const ushort * src, ushort * dst, int len, int power)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_uint32 v_1 = vx_setall_u32(1u);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_uint16>::vlanes(); i += VTraits<v_uint16>::vlanes())
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_uint32 v_a1 = v_1, v_a2 = v_1;
|
|
|
|
v_uint16 v = vx_load(src + i);
|
|
|
|
v_uint32 v_b1, v_b2;
|
2015-05-02 02:49:11 +08:00
|
|
|
v_expand(v, v_b1, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
int p = power;
|
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v = v_pack(v_a1, v_a2);
|
|
|
|
v_store(dst + i, v);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<short, int>
|
|
|
|
{
|
|
|
|
int operator() ( const short * src, short * dst, int len, int power)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_1 = vx_setall_s32(1);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_int16>::vlanes(); i += VTraits<v_int16>::vlanes())
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_a1 = v_1, v_a2 = v_1;
|
|
|
|
v_int16 v = vx_load(src + i);
|
|
|
|
v_int32 v_b1, v_b2;
|
2015-05-02 02:49:11 +08:00
|
|
|
v_expand(v, v_b1, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
int p = power;
|
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v = v_pack(v_a1, v_a2);
|
|
|
|
v_store(dst + i, v);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<int, int>
|
|
|
|
{
|
|
|
|
int operator() ( const int * src, int * dst, int len, int power)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_1 = vx_setall_s32(1);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_int32>::vlanes()*2; i += VTraits<v_int32>::vlanes()*2)
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_a1 = v_1, v_a2 = v_1;
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits<v_int32>::vlanes());
|
2014-10-12 23:31:25 +08:00
|
|
|
int p = power;
|
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
2015-05-02 02:49:11 +08:00
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v_store(dst + i, v_a1);
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_store(dst + i + VTraits<v_int32>::vlanes(), v_a2);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<float, float>
|
|
|
|
{
|
|
|
|
int operator() ( const float * src, float * dst, int len, int power)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_float32 v_1 = vx_setall_f32(1.f);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_float32>::vlanes()*2; i += VTraits<v_float32>::vlanes()*2)
|
2015-05-02 02:49:11 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_float32 v_a1 = v_1, v_a2 = v_1;
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits<v_float32>::vlanes());
|
2015-05-02 02:49:11 +08:00
|
|
|
int p = std::abs(power);
|
|
|
|
if( power < 0 )
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_div(v_1, v_b1);
|
|
|
|
v_b2 = v_div(v_1, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v_store(dst + i, v_a1);
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_store(dst + i + VTraits<v_float32>::vlanes(), v_a2);
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
2015-05-02 02:49:11 +08:00
|
|
|
template <>
|
|
|
|
struct iPow_SIMD<double, double>
|
|
|
|
{
|
|
|
|
int operator() ( const double * src, double * dst, int len, int power)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2018-09-05 21:17:43 +08:00
|
|
|
v_float64 v_1 = vx_setall_f64(1.);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
for ( ; i <= len - VTraits<v_float64>::vlanes()*2; i += VTraits<v_float64>::vlanes()*2)
|
2014-10-12 23:31:25 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_float64 v_a1 = v_1, v_a2 = v_1;
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits<v_float64>::vlanes());
|
2015-05-02 02:49:11 +08:00
|
|
|
int p = std::abs(power);
|
|
|
|
if( power < 0 )
|
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_div(v_1, v_b1);
|
|
|
|
v_b2 = v_div(v_1, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if (p & 1)
|
2015-05-02 02:49:11 +08:00
|
|
|
{
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_b1 = v_mul(v_b1, v_b1);
|
|
|
|
v_b2 = v_mul(v_b2, v_b2);
|
2014-10-12 23:31:25 +08:00
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_a1 = v_mul(v_a1, v_b1);
|
|
|
|
v_a2 = v_mul(v_a2, v_b2);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
v_store(dst + i, v_a1);
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_store(dst + i + VTraits<v_float64>::vlanes(), v_a2);
|
2014-10-12 23:31:25 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
2015-05-02 02:49:11 +08:00
|
|
|
#endif
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
template<typename T, typename WT>
|
2011-04-17 21:14:45 +08:00
|
|
|
static void
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_i( const T* src, T* dst, int len, int power )
|
|
|
|
{
|
|
|
|
if( power < 0 )
|
|
|
|
{
|
|
|
|
T tab[5] =
|
|
|
|
{
|
2015-10-16 22:10:00 +08:00
|
|
|
saturate_cast<T>(power == -1 ? -1 : 0), saturate_cast<T>((power & 1) ? -1 : 1),
|
|
|
|
std::numeric_limits<T>::max(), 1, saturate_cast<T>(power == -1 ? 1 : 0)
|
2015-05-02 02:49:11 +08:00
|
|
|
};
|
|
|
|
for( int i = 0; i < len; i++ )
|
|
|
|
{
|
|
|
|
T val = src[i];
|
|
|
|
dst[i] = cv_abs(val) <= 2 ? tab[val + 2] : (T)0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
iPow_SIMD<T, WT> vop;
|
|
|
|
int i = vop(src, dst, len, power);
|
|
|
|
|
|
|
|
for( ; i < len; i++ )
|
|
|
|
{
|
|
|
|
WT a = 1, b = src[i];
|
|
|
|
int p = power;
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if( p & 1 )
|
|
|
|
a *= b;
|
|
|
|
b *= b;
|
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
a *= b;
|
|
|
|
dst[i] = saturate_cast<T>(a);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
static void
|
|
|
|
iPow_f( const T* src, T* dst, int len, int power0 )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_SIMD<T, T> vop;
|
|
|
|
int i = vop(src, dst, len, power0);
|
|
|
|
int power = std::abs(power0);
|
2014-10-12 23:31:25 +08:00
|
|
|
|
|
|
|
for( ; i < len; i++ )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
T a = 1, b = src[i];
|
2010-05-12 01:44:00 +08:00
|
|
|
int p = power;
|
2015-05-02 02:49:11 +08:00
|
|
|
if( power0 < 0 )
|
|
|
|
b = 1/b;
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if( p & 1 )
|
|
|
|
a *= b;
|
|
|
|
b *= b;
|
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
a *= b;
|
2015-05-02 02:49:11 +08:00
|
|
|
dst[i] = a;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
static void iPow8u(const uchar* src, uchar* dst, int len, int power)
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_i<uchar, unsigned>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
static void iPow8s(const schar* src, schar* dst, int len, int power)
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_i<schar, int>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
|
|
|
static void iPow16u(const ushort* src, ushort* dst, int len, int power)
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_i<ushort, unsigned>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
static void iPow16s(const short* src, short* dst, int len, int power)
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_i<short, int>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
|
|
|
static void iPow32s(const int* src, int* dst, int len, int power)
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_i<int, int>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
static void iPow32f(const float* src, float* dst, int len, int power)
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_f<float>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
static void iPow64f(const double* src, double* dst, int len, int power)
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
iPow_f<double>(src, dst, len, power);
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power );
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
static IPowFunc ipowTab[] =
|
|
|
|
{
|
|
|
|
(IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s,
|
|
|
|
(IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0
|
|
|
|
};
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
|
2014-02-03 21:38:28 +08:00
|
|
|
static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
|
|
|
bool is_ipower, int ipower)
|
2013-11-30 21:20:45 +08:00
|
|
|
{
|
2014-05-14 19:42:30 +08:00
|
|
|
const ocl::Device & d = ocl::Device::getDefault();
|
|
|
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
|
|
|
rowsPerWI = d.isIntel() ? 4 : 1;
|
|
|
|
bool doubleSupport = d.doubleFPConfig() > 0;
|
2013-11-30 21:20:45 +08:00
|
|
|
|
2014-06-10 22:34:50 +08:00
|
|
|
_dst.createSameSize(_src, type);
|
2015-05-02 02:49:11 +08:00
|
|
|
if (is_ipower)
|
2014-06-10 22:34:50 +08:00
|
|
|
{
|
|
|
|
if (ipower == 0)
|
2015-05-02 02:49:11 +08:00
|
|
|
{
|
2014-06-10 22:34:50 +08:00
|
|
|
_dst.setTo(Scalar::all(1));
|
2015-05-02 02:49:11 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (ipower == 1)
|
|
|
|
{
|
2014-06-10 22:34:50 +08:00
|
|
|
_src.copyTo(_dst);
|
2015-05-02 02:49:11 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if( ipower < 0 )
|
|
|
|
{
|
|
|
|
if( depth == CV_32F || depth == CV_64F )
|
|
|
|
is_ipower = false;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
2014-06-10 22:34:50 +08:00
|
|
|
}
|
|
|
|
|
2014-02-03 21:38:28 +08:00
|
|
|
if (depth == CV_64F && !doubleSupport)
|
2013-11-30 21:20:45 +08:00
|
|
|
return false;
|
|
|
|
|
2014-11-06 21:23:02 +08:00
|
|
|
bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
|
|
|
|
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
|
2013-12-12 22:01:30 +08:00
|
|
|
|
2013-11-30 21:20:45 +08:00
|
|
|
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
2018-10-14 09:49:14 +08:00
|
|
|
format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
|
|
|
|
ocl::typeToStr(depth), depth, rowsPerWI, op,
|
2014-06-10 22:34:50 +08:00
|
|
|
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
2013-12-03 04:41:07 +08:00
|
|
|
if (k.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
UMat src = _src.getUMat();
|
|
|
|
_dst.create(src.size(), type);
|
|
|
|
UMat dst = _dst.getUMat();
|
2013-11-30 21:20:45 +08:00
|
|
|
|
|
|
|
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
|
|
|
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
|
|
|
|
|
2014-01-13 23:58:27 +08:00
|
|
|
if (issqrt)
|
|
|
|
k.args(srcarg, dstarg);
|
2014-02-03 21:38:28 +08:00
|
|
|
else if (is_ipower)
|
|
|
|
k.args(srcarg, dstarg, ipower);
|
2013-11-30 21:20:45 +08:00
|
|
|
else
|
2014-01-13 23:58:27 +08:00
|
|
|
{
|
|
|
|
if (depth == CV_32F)
|
|
|
|
k.args(srcarg, dstarg, (float)power);
|
|
|
|
else
|
|
|
|
k.args(srcarg, dstarg, power);
|
|
|
|
}
|
2013-11-30 21:20:45 +08:00
|
|
|
|
2015-10-16 22:10:00 +08:00
|
|
|
size_t globalsize[2] = { (size_t)dst.cols * cn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
|
2013-11-30 21:20:45 +08:00
|
|
|
return k.run(2, globalsize, NULL, false);
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#endif
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void pow( InputArray _src, double power, OutputArray _dst )
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2014-02-03 21:38:28 +08:00
|
|
|
int type = _src.type(), depth = CV_MAT_DEPTH(type),
|
|
|
|
cn = CV_MAT_CN(type), ipower = cvRound(power);
|
2015-06-20 01:52:14 +08:00
|
|
|
bool is_ipower = fabs(ipower - power) < DBL_EPSILON;
|
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
bool useOpenCL = _dst.isUMat() && _src.dims() <= 2;
|
|
|
|
#endif
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-06-20 01:52:14 +08:00
|
|
|
if( is_ipower
|
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
&& !(useOpenCL && ocl::Device::getDefault().isIntel() && depth != CV_64F)
|
|
|
|
#endif
|
|
|
|
)
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
|
|
|
switch( ipower )
|
|
|
|
{
|
|
|
|
case 0:
|
2014-02-03 21:38:28 +08:00
|
|
|
_dst.createSameSize(_src, type);
|
|
|
|
_dst.setTo(Scalar::all(1));
|
2010-05-12 01:44:00 +08:00
|
|
|
return;
|
|
|
|
case 1:
|
2014-02-03 21:38:28 +08:00
|
|
|
_src.copyTo(_dst);
|
2010-05-12 01:44:00 +08:00
|
|
|
return;
|
|
|
|
case 2:
|
2015-05-02 02:49:11 +08:00
|
|
|
multiply(_src, _src, _dst);
|
2010-05-12 01:44:00 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
CV_Assert( depth == CV_32F || depth == CV_64F );
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-05-05 22:49:36 +08:00
|
|
|
CV_OCL_RUN(useOpenCL, ocl_pow(_src, power, _dst, is_ipower, ipower))
|
2014-02-03 21:38:28 +08:00
|
|
|
|
2015-05-02 02:49:11 +08:00
|
|
|
Mat src = _src.getMat();
|
|
|
|
_dst.create( src.dims, src.size, type );
|
|
|
|
Mat dst = _dst.getMat();
|
2014-02-03 21:38:28 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const Mat* arrays[] = {&src, &dst, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
uchar* ptrs[2] = {};
|
2011-04-17 21:14:45 +08:00
|
|
|
NAryMatIterator it(arrays, ptrs);
|
|
|
|
int len = (int)(it.size*cn);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
if( is_ipower )
|
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
IPowFunc func = ipowTab[depth];
|
2010-05-12 01:44:00 +08:00
|
|
|
CV_Assert( func != 0 );
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
|
|
|
func( ptrs[0], ptrs[1], len, ipower );
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
|
|
|
|
{
|
|
|
|
MathFunc func = power < 0 ?
|
2017-04-21 19:52:45 +08:00
|
|
|
(depth == CV_32F ? (MathFunc)hal::invSqrt32f : (MathFunc)hal::invSqrt64f) :
|
|
|
|
(depth == CV_32F ? (MathFunc)hal::sqrt32f : (MathFunc)hal::sqrt64f);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
|
|
|
func( ptrs[0], ptrs[1], len );
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
|
|
|
|
size_t esz1 = src.elemSize1();
|
2015-05-02 02:49:11 +08:00
|
|
|
AutoBuffer<uchar> buf;
|
|
|
|
Cv32suf inf32, nan32;
|
|
|
|
Cv64suf inf64, nan64;
|
|
|
|
float* fbuf = 0;
|
|
|
|
double* dbuf = 0;
|
2018-10-15 22:35:21 +08:00
|
|
|
#ifndef __EMSCRIPTEN__
|
2015-05-02 02:49:11 +08:00
|
|
|
inf32.i = 0x7f800000;
|
|
|
|
nan32.i = 0x7fffffff;
|
|
|
|
inf64.i = CV_BIG_INT(0x7FF0000000000000);
|
|
|
|
nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF);
|
2018-10-15 22:35:21 +08:00
|
|
|
#else
|
|
|
|
inf32.f = std::numeric_limits<float>::infinity();
|
|
|
|
nan32.f = std::numeric_limits<float>::quiet_NaN();
|
|
|
|
inf64.f = std::numeric_limits<double>::infinity();
|
|
|
|
nan64.f = std::numeric_limits<double>::quiet_NaN();
|
|
|
|
#endif
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
if( src.ptr() == dst.ptr() )
|
|
|
|
{
|
|
|
|
buf.allocate(blockSize*esz1);
|
2018-06-11 06:42:00 +08:00
|
|
|
fbuf = (float*)buf.data();
|
|
|
|
dbuf = (double*)buf.data();
|
2015-05-02 02:49:11 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
for( j = 0; j < len; j += blockSize )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
int bsz = std::min(len - j, blockSize);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
if( depth == CV_32F )
|
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
float* x0 = (float*)ptrs[0];
|
|
|
|
float* x = fbuf ? fbuf : x0;
|
2011-04-17 21:14:45 +08:00
|
|
|
float* y = (float*)ptrs[1];
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-05-02 02:49:11 +08:00
|
|
|
if( x != x0 )
|
|
|
|
memcpy(x, x0, bsz*esz1);
|
|
|
|
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::log32f(x, y, bsz);
|
2011-04-17 21:14:45 +08:00
|
|
|
for( k = 0; k < bsz; k++ )
|
|
|
|
y[k] = (float)(y[k]*power);
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::exp32f(y, y, bsz);
|
2015-05-02 02:49:11 +08:00
|
|
|
for( k = 0; k < bsz; k++ )
|
|
|
|
{
|
|
|
|
if( x0[k] <= 0 )
|
|
|
|
{
|
|
|
|
if( x0[k] == 0.f )
|
|
|
|
{
|
|
|
|
if( power < 0 )
|
|
|
|
y[k] = inf32.f;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
y[k] = nan32.f;
|
|
|
|
}
|
|
|
|
}
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-05-02 02:49:11 +08:00
|
|
|
double* x0 = (double*)ptrs[0];
|
|
|
|
double* x = dbuf ? dbuf : x0;
|
2011-04-17 21:14:45 +08:00
|
|
|
double* y = (double*)ptrs[1];
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-05-02 02:49:11 +08:00
|
|
|
if( x != x0 )
|
|
|
|
memcpy(x, x0, bsz*esz1);
|
|
|
|
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::log64f(x, y, bsz);
|
2011-04-17 21:14:45 +08:00
|
|
|
for( k = 0; k < bsz; k++ )
|
|
|
|
y[k] *= power;
|
2016-08-12 23:16:04 +08:00
|
|
|
hal::exp64f(y, y, bsz);
|
2015-05-02 02:49:11 +08:00
|
|
|
|
|
|
|
for( k = 0; k < bsz; k++ )
|
|
|
|
{
|
|
|
|
if( x0[k] <= 0 )
|
|
|
|
{
|
|
|
|
if( x0[k] == 0. )
|
|
|
|
{
|
|
|
|
if( power < 0 )
|
|
|
|
y[k] = inf64.f;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
y[k] = nan64.f;
|
|
|
|
}
|
|
|
|
}
|
2011-04-17 21:14:45 +08:00
|
|
|
}
|
|
|
|
ptrs[0] += bsz*esz1;
|
|
|
|
ptrs[1] += bsz*esz1;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
void sqrt(InputArray a, OutputArray b)
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2013-02-25 00:14:01 +08:00
|
|
|
cv::pow(a, 0.5, b);
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/************************** CheckArray for NaN's, Inf's *********************************/
|
|
|
|
|
2012-03-29 04:09:13 +08:00
|
|
|
template<int cv_mat_type> struct mat_type_assotiations{};
|
|
|
|
|
|
|
|
template<> struct mat_type_assotiations<CV_8U>
|
|
|
|
{
|
|
|
|
typedef unsigned char type;
|
|
|
|
static const type min_allowable = 0x0;
|
|
|
|
static const type max_allowable = 0xFF;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<> struct mat_type_assotiations<CV_8S>
|
|
|
|
{
|
|
|
|
typedef signed char type;
|
|
|
|
static const type min_allowable = SCHAR_MIN;
|
|
|
|
static const type max_allowable = SCHAR_MAX;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<> struct mat_type_assotiations<CV_16U>
|
|
|
|
{
|
|
|
|
typedef unsigned short type;
|
|
|
|
static const type min_allowable = 0x0;
|
|
|
|
static const type max_allowable = USHRT_MAX;
|
|
|
|
};
|
|
|
|
template<> struct mat_type_assotiations<CV_16S>
|
|
|
|
{
|
|
|
|
typedef signed short type;
|
|
|
|
static const type min_allowable = SHRT_MIN;
|
|
|
|
static const type max_allowable = SHRT_MAX;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<> struct mat_type_assotiations<CV_32S>
|
|
|
|
{
|
|
|
|
typedef int type;
|
|
|
|
static const type min_allowable = (-INT_MAX - 1);
|
|
|
|
static const type max_allowable = INT_MAX;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int depth>
|
2015-12-09 22:45:59 +08:00
|
|
|
static bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal)
|
2012-03-29 04:09:13 +08:00
|
|
|
{
|
2012-06-08 01:21:29 +08:00
|
|
|
typedef mat_type_assotiations<depth> type_ass;
|
|
|
|
|
2012-03-29 04:09:13 +08:00
|
|
|
if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2012-04-14 05:50:59 +08:00
|
|
|
else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal)
|
2012-03-29 04:09:13 +08:00
|
|
|
{
|
|
|
|
bad_pt = cv::Point(0,0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
cv::Mat as_one_channel = src.reshape(1,0);
|
|
|
|
|
|
|
|
for (int j = 0; j < as_one_channel.rows; ++j)
|
|
|
|
for (int i = 0; i < as_one_channel.cols; ++i)
|
2012-06-08 01:21:29 +08:00
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
typename type_ass::type v = as_one_channel.at<typename type_ass::type>(j ,i);
|
|
|
|
if (v < minVal || v > maxVal)
|
2012-06-08 01:21:29 +08:00
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
bad_pt.y = j;
|
|
|
|
bad_pt.x = i / src.channels();
|
2012-03-29 04:09:13 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2012-03-29 04:09:13 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-12-09 22:45:59 +08:00
|
|
|
typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal);
|
2012-03-29 04:09:13 +08:00
|
|
|
|
2012-06-08 01:21:29 +08:00
|
|
|
check_range_function check_range_functions[] =
|
2012-03-29 04:09:13 +08:00
|
|
|
{
|
2012-04-14 05:50:59 +08:00
|
|
|
&checkIntegerRange<CV_8U>,
|
|
|
|
&checkIntegerRange<CV_8S>,
|
|
|
|
&checkIntegerRange<CV_16U>,
|
|
|
|
&checkIntegerRange<CV_16S>,
|
|
|
|
&checkIntegerRange<CV_32S>
|
2012-03-29 04:09:13 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal)
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat src = _src.getMat();
|
2012-03-29 04:09:13 +08:00
|
|
|
|
|
|
|
if ( src.dims > 2 )
|
2010-10-12 20:31:40 +08:00
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
CV_Assert(pt == NULL); // no way to provide location info
|
|
|
|
|
2010-10-12 20:31:40 +08:00
|
|
|
const Mat* arrays[] = {&src, 0};
|
|
|
|
Mat planes[1];
|
|
|
|
NAryMatIterator it(arrays, planes);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2012-03-29 04:09:13 +08:00
|
|
|
for ( size_t i = 0; i < it.nplanes; i++, ++it )
|
2010-10-12 20:31:40 +08:00
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
if (!checkRange( it.planes[0], quiet, NULL, minVal, maxVal ))
|
2010-10-12 20:31:40 +08:00
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
int depth = src.depth();
|
|
|
|
Point badPt(-1, -1);
|
|
|
|
|
2012-03-29 04:09:13 +08:00
|
|
|
if (depth < CV_32F)
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
int minVali = minVal <= INT_MIN ? INT_MIN : cvFloor(minVal);
|
|
|
|
int maxVali = maxVal > INT_MAX ? INT_MAX : cvCeil(maxVal) - 1;
|
2012-03-29 04:09:13 +08:00
|
|
|
|
2015-12-09 22:45:59 +08:00
|
|
|
(check_range_functions[depth])(src, badPt, minVali, maxVali);
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int i, loc = 0;
|
2015-12-09 22:45:59 +08:00
|
|
|
int cn = src.channels();
|
2018-11-10 18:17:34 +08:00
|
|
|
Size size = getContinuousSize2D(src, cn);
|
2010-05-12 01:44:00 +08:00
|
|
|
|
|
|
|
if( depth == CV_32F )
|
|
|
|
{
|
|
|
|
Cv32suf a, b;
|
|
|
|
int ia, ib;
|
2014-08-13 19:08:27 +08:00
|
|
|
const int* isrc = src.ptr<int>();
|
2010-05-12 01:44:00 +08:00
|
|
|
size_t step = src.step/sizeof(isrc[0]);
|
|
|
|
|
|
|
|
a.f = (float)std::max(minVal, (double)-FLT_MAX);
|
|
|
|
b.f = (float)std::min(maxVal, (double)FLT_MAX);
|
|
|
|
|
|
|
|
ia = CV_TOGGLE_FLT(a.i);
|
|
|
|
ib = CV_TOGGLE_FLT(b.i);
|
|
|
|
|
|
|
|
for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
|
|
|
|
{
|
|
|
|
for( i = 0; i < size.width; i++ )
|
|
|
|
{
|
|
|
|
int val = isrc[i];
|
|
|
|
val = CV_TOGGLE_FLT(val);
|
|
|
|
|
|
|
|
if( val < ia || val >= ib )
|
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
int pixelId = (loc + i) / cn;
|
|
|
|
badPt = Point(pixelId % src.cols, pixelId / src.cols);
|
2010-05-12 01:44:00 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Cv64suf a, b;
|
|
|
|
int64 ia, ib;
|
2014-08-13 19:08:27 +08:00
|
|
|
const int64* isrc = src.ptr<int64>();
|
2010-05-12 01:44:00 +08:00
|
|
|
size_t step = src.step/sizeof(isrc[0]);
|
|
|
|
|
|
|
|
a.f = minVal;
|
|
|
|
b.f = maxVal;
|
|
|
|
|
|
|
|
ia = CV_TOGGLE_DBL(a.i);
|
|
|
|
ib = CV_TOGGLE_DBL(b.i);
|
|
|
|
|
|
|
|
for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
|
|
|
|
{
|
|
|
|
for( i = 0; i < size.width; i++ )
|
|
|
|
{
|
|
|
|
int64 val = isrc[i];
|
|
|
|
val = CV_TOGGLE_DBL(val);
|
|
|
|
|
|
|
|
if( val < ia || val >= ib )
|
|
|
|
{
|
2015-12-09 22:45:59 +08:00
|
|
|
int pixelId = (loc + i) / cn;
|
|
|
|
badPt = Point(pixelId % src.cols, pixelId / src.cols);
|
2010-05-12 01:44:00 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( badPt.x >= 0 )
|
|
|
|
{
|
|
|
|
if( pt )
|
|
|
|
*pt = badPt;
|
|
|
|
if( !quiet )
|
2015-12-09 22:45:59 +08:00
|
|
|
{
|
|
|
|
cv::String value_str;
|
|
|
|
value_str << src(cv::Range(badPt.y, badPt.y + 1), cv::Range(badPt.x, badPt.x + 1));
|
2010-05-12 01:44:00 +08:00
|
|
|
CV_Error_( CV_StsOutOfRange,
|
2015-12-09 22:45:59 +08:00
|
|
|
("the value at (%d, %d)=%s is out of range [%f, %f)", badPt.x, badPt.y, value_str.c_str(), minVal, maxVal));
|
|
|
|
}
|
|
|
|
return false;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2015-12-09 22:45:59 +08:00
|
|
|
return true;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
|
2013-12-30 05:21:04 +08:00
|
|
|
static bool ocl_patchNaNs( InputOutputArray _a, float value )
|
|
|
|
{
|
2014-05-14 19:42:30 +08:00
|
|
|
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
|
2013-12-30 05:21:04 +08:00
|
|
|
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
2018-10-14 09:49:14 +08:00
|
|
|
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D DEPTH_dst=%d -D rowsPerWI=%d",
|
|
|
|
CV_32F, rowsPerWI));
|
2013-12-30 05:21:04 +08:00
|
|
|
if (k.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
UMat a = _a.getUMat();
|
|
|
|
int cn = a.channels();
|
|
|
|
|
|
|
|
k.args(ocl::KernelArg::ReadOnlyNoSize(a),
|
2014-01-07 19:08:48 +08:00
|
|
|
ocl::KernelArg::WriteOnly(a, cn), (float)value);
|
2013-12-30 05:21:04 +08:00
|
|
|
|
2015-10-16 22:10:00 +08:00
|
|
|
size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
|
2013-12-30 05:21:04 +08:00
|
|
|
return k.run(2, globalsize, NULL, false);
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
#endif
|
|
|
|
|
2012-04-30 22:33:52 +08:00
|
|
|
void patchNaNs( InputOutputArray _a, double _val )
|
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2013-12-30 05:21:04 +08:00
|
|
|
CV_Assert( _a.depth() == CV_32F );
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2014-01-22 19:51:28 +08:00
|
|
|
CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
|
|
|
|
ocl_patchNaNs(_a, (float)_val))
|
2013-12-30 05:21:04 +08:00
|
|
|
|
|
|
|
Mat a = _a.getMat();
|
2012-04-30 22:33:52 +08:00
|
|
|
const Mat* arrays[] = {&a, 0};
|
2018-09-04 21:44:47 +08:00
|
|
|
int* ptrs[1] = {};
|
2012-04-30 22:33:52 +08:00
|
|
|
NAryMatIterator it(arrays, (uchar**)ptrs);
|
|
|
|
size_t len = it.size*a.channels();
|
|
|
|
Cv32suf val;
|
2014-09-23 03:39:34 +08:00
|
|
|
val.f = (float)_val;
|
2014-07-04 01:28:54 +08:00
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000);
|
|
|
|
v_int32 v_val = vx_setall_s32(val.i);
|
2014-07-04 01:28:54 +08:00
|
|
|
#endif
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2012-04-30 22:33:52 +08:00
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
|
|
|
{
|
|
|
|
int* tptr = ptrs[0];
|
2014-07-04 01:28:54 +08:00
|
|
|
size_t j = 0;
|
|
|
|
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
size_t cWidth = (size_t)VTraits<v_int32>::vlanes();
|
2018-09-05 21:17:43 +08:00
|
|
|
for ( ; j + cWidth <= len; j += cWidth)
|
2014-07-04 01:28:54 +08:00
|
|
|
{
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_src = vx_load(tptr + j);
|
Merge pull request #23980 from hanliutong:rewrite-core
Rewrite Universal Intrinsic code by using new API: Core module. #23980
The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.
The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.
Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:
1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp
2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
explicit v_atan_f32(const float& scale)
{
...
}
v_float32 compute(const v_float32& y, const v_float32& x)
{
...
}
...
v_float32 val90; // sizeless type can not used in a class
v_float32 val180;
v_float32 val360;
v_float32 s;
};
```
3. The API interface does not support/does not match
- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed
```cpp
/** @brief Bitwise OR
Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```
```cpp
#if CV_SIMD
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
{
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
....
src += v_type::nlanes;
v0 |= v1; //Illegal ?
....
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
}
v_cleanup();
#endif
```
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
|
|
|
v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1));
|
2018-09-05 21:17:43 +08:00
|
|
|
v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src);
|
|
|
|
v_store(tptr + j, v_dst);
|
2014-07-04 01:28:54 +08:00
|
|
|
}
|
2018-09-05 21:17:43 +08:00
|
|
|
vx_cleanup();
|
2014-07-04 01:28:54 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
for( ; j < len; j++ )
|
2012-04-30 22:33:52 +08:00
|
|
|
if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
|
|
|
|
tptr[j] = val.i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
2021-02-23 08:22:06 +08:00
|
|
|
|
|
|
|
#ifndef OPENCV_EXCLUDE_C_API
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
|
|
|
|
CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
|
|
|
|
|
|
|
|
CV_IMPL void
|
|
|
|
cvCartToPolar( const CvArr* xarr, const CvArr* yarr,
|
|
|
|
CvArr* magarr, CvArr* anglearr,
|
|
|
|
int angle_in_degrees )
|
|
|
|
{
|
|
|
|
cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle;
|
|
|
|
if( magarr )
|
|
|
|
{
|
|
|
|
Mag = cv::cvarrToMat(magarr);
|
|
|
|
CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() );
|
|
|
|
}
|
|
|
|
if( anglearr )
|
|
|
|
{
|
|
|
|
Angle = cv::cvarrToMat(anglearr);
|
|
|
|
CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() );
|
|
|
|
}
|
2012-10-17 15:12:04 +08:00
|
|
|
if( magarr )
|
|
|
|
{
|
|
|
|
if( anglearr )
|
|
|
|
cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 );
|
|
|
|
else
|
|
|
|
cv::magnitude( X, Y, Mag );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
cv::phase( X, Y, Angle, angle_in_degrees != 0 );
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
CV_IMPL void
|
|
|
|
cvPolarToCart( const CvArr* magarr, const CvArr* anglearr,
|
|
|
|
CvArr* xarr, CvArr* yarr, int angle_in_degrees )
|
|
|
|
{
|
|
|
|
cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag;
|
|
|
|
if( magarr )
|
|
|
|
{
|
|
|
|
Mag = cv::cvarrToMat(magarr);
|
|
|
|
CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() );
|
|
|
|
}
|
|
|
|
if( xarr )
|
|
|
|
{
|
|
|
|
X = cv::cvarrToMat(xarr);
|
|
|
|
CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() );
|
|
|
|
}
|
|
|
|
if( yarr )
|
|
|
|
{
|
|
|
|
Y = cv::cvarrToMat(yarr);
|
|
|
|
CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() );
|
|
|
|
}
|
|
|
|
|
|
|
|
cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 );
|
|
|
|
}
|
|
|
|
|
|
|
|
CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr )
|
|
|
|
{
|
|
|
|
cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
|
2010-10-20 20:33:57 +08:00
|
|
|
CV_Assert( src.type() == dst.type() && src.size == dst.size );
|
2010-05-12 01:44:00 +08:00
|
|
|
cv::exp( src, dst );
|
|
|
|
}
|
|
|
|
|
|
|
|
CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr )
|
|
|
|
{
|
|
|
|
cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
|
2010-10-20 20:33:57 +08:00
|
|
|
CV_Assert( src.type() == dst.type() && src.size == dst.size );
|
2010-05-12 01:44:00 +08:00
|
|
|
cv::log( src, dst );
|
|
|
|
}
|
|
|
|
|
|
|
|
CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power )
|
|
|
|
{
|
|
|
|
cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
|
2010-10-20 20:33:57 +08:00
|
|
|
CV_Assert( src.type() == dst.type() && src.size == dst.size );
|
2010-05-12 01:44:00 +08:00
|
|
|
cv::pow( src, power, dst );
|
|
|
|
}
|
|
|
|
|
|
|
|
CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
|
|
|
|
double minVal, double maxVal )
|
|
|
|
{
|
|
|
|
if( (flags & CV_CHECK_RANGE) == 0 )
|
|
|
|
minVal = -DBL_MAX, maxVal = DBL_MAX;
|
|
|
|
return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
|
|
|
|
}
|
|
|
|
|
2021-02-23 08:22:06 +08:00
|
|
|
#endif // OPENCV_EXCLUDE_C_API
|
2010-05-12 01:44:00 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
Finds real roots of cubic, quadratic or linear equation.
|
|
|
|
The original code has been taken from Ken Turkowski web page
|
|
|
|
(http://www.worldserver.com/turk/opensource/) and adopted for OpenCV.
|
|
|
|
Here is the copyright notice.
|
|
|
|
|
|
|
|
-----------------------------------------------------------------------
|
|
|
|
Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org>
|
|
|
|
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Warranty Information
|
|
|
|
Even though I have reviewed this software, I make no warranty
|
|
|
|
or representation, either express or implied, with respect to this
|
|
|
|
software, its quality, accuracy, merchantability, or fitness for a
|
|
|
|
particular purpose. As a result, this software is provided "as is,"
|
|
|
|
and you, its user, are assuming the entire risk as to its quality
|
|
|
|
and accuracy.
|
|
|
|
|
|
|
|
This code may be used and freely distributed as long as it includes
|
|
|
|
this copyright notice and the above warranty information.
|
|
|
|
-----------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
2011-06-06 22:51:27 +08:00
|
|
|
int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
|
2011-04-17 21:14:45 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
const int n0 = 3;
|
|
|
|
Mat coeffs = _coeffs.getMat();
|
|
|
|
int ctype = coeffs.type();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
CV_Assert( ctype == CV_32F || ctype == CV_64F );
|
|
|
|
CV_Assert( (coeffs.size() == Size(n0, 1) ||
|
|
|
|
coeffs.size() == Size(n0+1, 1) ||
|
|
|
|
coeffs.size() == Size(1, n0) ||
|
|
|
|
coeffs.size() == Size(1, n0+1)) );
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2013-03-29 01:01:12 +08:00
|
|
|
_roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT);
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat roots = _roots.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
int i = -1, n = 0;
|
2010-05-12 01:44:00 +08:00
|
|
|
double a0 = 1., a1, a2, a3;
|
|
|
|
double x0 = 0., x1 = 0., x2 = 0.;
|
2011-04-17 21:14:45 +08:00
|
|
|
int ncoeffs = coeffs.rows + coeffs.cols - 1;
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
if( ctype == CV_32FC1 )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
if( ncoeffs == 4 )
|
|
|
|
a0 = coeffs.at<float>(++i);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
a1 = coeffs.at<float>(i+1);
|
|
|
|
a2 = coeffs.at<float>(i+2);
|
|
|
|
a3 = coeffs.at<float>(i+3);
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
if( ncoeffs == 4 )
|
|
|
|
a0 = coeffs.at<double>(++i);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
a1 = coeffs.at<double>(i+1);
|
|
|
|
a2 = coeffs.at<double>(i+2);
|
|
|
|
a3 = coeffs.at<double>(i+3);
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
if( a0 == 0 )
|
|
|
|
{
|
|
|
|
if( a1 == 0 )
|
|
|
|
{
|
|
|
|
if( a2 == 0 )
|
|
|
|
n = a3 == 0 ? -1 : 0;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// linear equation
|
2010-11-08 03:23:48 +08:00
|
|
|
x0 = -a3/a2;
|
2010-05-12 01:44:00 +08:00
|
|
|
n = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// quadratic equation
|
|
|
|
double d = a2*a2 - 4*a1*a3;
|
|
|
|
if( d >= 0 )
|
|
|
|
{
|
2013-02-25 00:14:01 +08:00
|
|
|
d = std::sqrt(d);
|
2010-11-08 03:23:48 +08:00
|
|
|
double q1 = (-a2 + d) * 0.5;
|
|
|
|
double q2 = (a2 + d) * -0.5;
|
|
|
|
if( fabs(q1) > fabs(q2) )
|
|
|
|
{
|
|
|
|
x0 = q1 / a1;
|
|
|
|
x1 = a3 / q1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
x0 = q2 / a1;
|
|
|
|
x1 = a3 / q2;
|
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
n = d > 0 ? 2 : 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
a0 = 1./a0;
|
|
|
|
a1 *= a0;
|
|
|
|
a2 *= a0;
|
|
|
|
a3 *= a0;
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
double Q = (a1 * a1 - 3 * a2) * (1./9);
|
|
|
|
double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
|
|
|
|
double Qcubed = Q * Q * Q;
|
|
|
|
double d = Qcubed - R * R;
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-11-21 04:03:32 +08:00
|
|
|
if( d > 0 )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2015-11-21 04:03:32 +08:00
|
|
|
double theta = acos(R / sqrt(Qcubed));
|
|
|
|
double sqrtQ = sqrt(Q);
|
2010-05-12 01:44:00 +08:00
|
|
|
double t0 = -2 * sqrtQ;
|
|
|
|
double t1 = theta * (1./3);
|
|
|
|
double t2 = a1 * (1./3);
|
|
|
|
x0 = t0 * cos(t1) - t2;
|
|
|
|
x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2;
|
|
|
|
x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2;
|
|
|
|
n = 3;
|
|
|
|
}
|
2015-11-21 04:03:32 +08:00
|
|
|
else if( d == 0 )
|
|
|
|
{
|
|
|
|
if(R >= 0)
|
|
|
|
{
|
|
|
|
x0 = -2*pow(R, 1./3) - a1/3;
|
|
|
|
x1 = pow(R, 1./3) - a1/3;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
x0 = 2*pow(-R, 1./3) - a1/3;
|
|
|
|
x1 = -pow(-R, 1./3) - a1/3;
|
|
|
|
}
|
|
|
|
x2 = 0;
|
|
|
|
n = x0 == x1 ? 1 : 2;
|
|
|
|
x1 = x0 == x1 ? 0 : x1;
|
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
else
|
|
|
|
{
|
|
|
|
double e;
|
2015-11-21 04:03:32 +08:00
|
|
|
d = sqrt(-d);
|
|
|
|
e = pow(d + fabs(R), 1./3);
|
2010-05-12 01:44:00 +08:00
|
|
|
if( R > 0 )
|
|
|
|
e = -e;
|
|
|
|
x0 = (e + Q / e) - a1 * (1./3);
|
|
|
|
n = 1;
|
|
|
|
}
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
if( roots.type() == CV_32FC1 )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
roots.at<float>(0) = (float)x0;
|
|
|
|
roots.at<float>(1) = (float)x1;
|
|
|
|
roots.at<float>(2) = (float)x2;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-04-17 21:14:45 +08:00
|
|
|
roots.at<double>(0) = x0;
|
|
|
|
roots.at<double>(1) = x1;
|
|
|
|
roots.at<double>(2) = x2;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* finds complex roots of a polynomial using Durand-Kerner method:
|
|
|
|
http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */
|
2011-06-06 22:51:27 +08:00
|
|
|
double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2018-09-14 05:35:26 +08:00
|
|
|
CV_INSTRUMENT_REGION();
|
2016-08-18 14:53:00 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
typedef Complex<double> C;
|
|
|
|
|
|
|
|
double maxDiff = 0;
|
2011-04-17 21:14:45 +08:00
|
|
|
int iter, i, j;
|
|
|
|
Mat coeffs0 = _coeffs0.getMat();
|
|
|
|
int ctype = _coeffs0.type();
|
|
|
|
int cdepth = CV_MAT_DEPTH(ctype);
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 );
|
|
|
|
CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 );
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-05-26 04:43:39 +08:00
|
|
|
int n0 = coeffs0.cols + coeffs0.rows - 2, n = n0;
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2013-03-29 01:01:12 +08:00
|
|
|
_roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT);
|
2011-04-17 21:14:45 +08:00
|
|
|
Mat roots0 = _roots0.getMat();
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
AutoBuffer<C> buf(n*2+2);
|
2018-06-11 06:42:00 +08:00
|
|
|
C *coeffs = buf.data(), *roots = coeffs + n + 1;
|
2010-05-12 01:44:00 +08:00
|
|
|
Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
|
|
|
|
coeffs0.convertTo(coeffs1, coeffs1.type());
|
|
|
|
if( coeffs0.channels() == 1 )
|
|
|
|
{
|
|
|
|
const double* rcoeffs = (const double*)roots;
|
|
|
|
for( i = 0; i <= n; i++ )
|
|
|
|
coeffs[i] = C(rcoeffs[i], 0);
|
|
|
|
}
|
|
|
|
|
2015-05-26 04:43:39 +08:00
|
|
|
for( ; n > 1; n-- )
|
|
|
|
{
|
|
|
|
if( std::abs(coeffs[n].re) + std::abs(coeffs[n].im) > DBL_EPSILON )
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
C p(1, 0), r(1, 1);
|
|
|
|
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
roots[i] = p;
|
|
|
|
p = p * r;
|
|
|
|
}
|
|
|
|
|
|
|
|
maxIters = maxIters <= 0 ? 1000 : maxIters;
|
|
|
|
for( iter = 0; iter < maxIters; iter++ )
|
|
|
|
{
|
|
|
|
maxDiff = 0;
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
p = roots[i];
|
2012-04-14 05:50:59 +08:00
|
|
|
C num = coeffs[n], denom = coeffs[n];
|
2015-11-21 03:48:29 +08:00
|
|
|
int num_same_root = 1;
|
2010-05-12 01:44:00 +08:00
|
|
|
for( j = 0; j < n; j++ )
|
|
|
|
{
|
|
|
|
num = num*p + coeffs[n-j-1];
|
2015-11-21 03:48:29 +08:00
|
|
|
if( j != i )
|
|
|
|
{
|
|
|
|
if ( (p - roots[j]).re != 0 || (p - roots[j]).im != 0 )
|
|
|
|
denom = denom * (p - roots[j]);
|
|
|
|
else
|
|
|
|
num_same_root++;
|
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
num /= denom;
|
2015-11-21 03:48:29 +08:00
|
|
|
if( num_same_root > 1)
|
|
|
|
{
|
|
|
|
double old_num_re = num.re;
|
|
|
|
double old_num_im = num.im;
|
|
|
|
int square_root_times = num_same_root % 2 == 0 ? num_same_root / 2 : num_same_root / 2 - 1;
|
|
|
|
|
|
|
|
for( j = 0; j < square_root_times; j++)
|
|
|
|
{
|
|
|
|
num.re = old_num_re*old_num_re + old_num_im*old_num_im;
|
|
|
|
num.re = sqrt(num.re);
|
|
|
|
num.re += old_num_re;
|
|
|
|
num.im = num.re - old_num_re;
|
|
|
|
num.re /= 2;
|
|
|
|
num.re = sqrt(num.re);
|
|
|
|
|
|
|
|
num.im /= 2;
|
|
|
|
num.im = sqrt(num.im);
|
|
|
|
if( old_num_re < 0 ) num.im = -num.im;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( num_same_root % 2 != 0){
|
|
|
|
Mat cube_coefs(4, 1, CV_64FC1);
|
|
|
|
Mat cube_roots(3, 1, CV_64FC2);
|
|
|
|
cube_coefs.at<double>(3) = -(pow(old_num_re, 3));
|
|
|
|
cube_coefs.at<double>(2) = -(15*pow(old_num_re, 2) + 27*pow(old_num_im, 2));
|
|
|
|
cube_coefs.at<double>(1) = -48*old_num_re;
|
|
|
|
cube_coefs.at<double>(0) = 64;
|
|
|
|
solveCubic(cube_coefs, cube_roots);
|
|
|
|
|
|
|
|
if(cube_roots.at<double>(0) >= 0) num.re = pow(cube_roots.at<double>(0), 1./3);
|
|
|
|
else num.re = -pow(-cube_roots.at<double>(0), 1./3);
|
|
|
|
num.im = sqrt(pow(num.re, 2) / 3 - old_num_re / (3*num.re));
|
|
|
|
}
|
|
|
|
}
|
2010-05-12 01:44:00 +08:00
|
|
|
roots[i] = p - num;
|
2013-02-25 00:14:01 +08:00
|
|
|
maxDiff = std::max(maxDiff, cv::abs(num));
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
if( maxDiff <= 0 )
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( coeffs0.channels() == 1 )
|
|
|
|
{
|
|
|
|
const double verySmallEps = 1e-100;
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
if( fabs(roots[i].im) < verySmallEps )
|
|
|
|
roots[i].im = 0;
|
|
|
|
}
|
|
|
|
|
2015-05-26 04:43:39 +08:00
|
|
|
for( ; n < n0; n++ )
|
|
|
|
roots[n+1] = roots[n];
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type());
|
|
|
|
return maxDiff;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-02-23 08:22:06 +08:00
|
|
|
#ifndef OPENCV_EXCLUDE_C_API
|
|
|
|
|
2011-04-17 21:14:45 +08:00
|
|
|
CV_IMPL int
|
|
|
|
cvSolveCubic( const CvMat* coeffs, CvMat* roots )
|
|
|
|
{
|
|
|
|
cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots;
|
|
|
|
int nroots = cv::solveCubic(_coeffs, _roots);
|
|
|
|
CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated
|
|
|
|
return nroots;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
|
|
|
|
{
|
2013-03-29 01:01:12 +08:00
|
|
|
cv::Mat _a = cv::cvarrToMat(a);
|
|
|
|
cv::Mat _r = cv::cvarrToMat(r);
|
|
|
|
cv::Mat _r0 = _r;
|
2010-05-12 01:44:00 +08:00
|
|
|
cv::solvePoly(_a, _r, maxiter);
|
|
|
|
CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated
|
|
|
|
}
|
|
|
|
|
2021-02-23 08:22:06 +08:00
|
|
|
#endif // OPENCV_EXCLUDE_C_API
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2018-08-07 23:49:05 +08:00
|
|
|
|
|
|
|
// Common constants for dispatched code
|
|
|
|
namespace cv { namespace details {
|
|
|
|
|
|
|
|
#define EXPTAB_SCALE 6
|
|
|
|
#define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
|
|
|
|
|
|
|
|
#define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
|
|
|
|
|
|
|
|
static const double CV_DECL_ALIGNED(64) expTab[EXPTAB_MASK + 1] = {
|
|
|
|
1.0 * EXPPOLY_32F_A0,
|
|
|
|
1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
|
|
|
|
1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
|
|
|
|
1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
|
|
|
|
1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
|
|
|
|
1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
|
|
|
|
1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
|
|
|
|
1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
|
|
|
|
1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
|
|
|
|
1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
|
|
|
|
1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
|
|
|
|
1.126521618608241899794798643787 * EXPPOLY_32F_A0,
|
|
|
|
1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
|
|
|
|
1.151189229952982705817759635202 * EXPPOLY_32F_A0,
|
|
|
|
1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
|
|
|
|
1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
|
|
|
|
1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
|
|
|
|
1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
|
|
|
|
1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
|
|
|
|
1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
|
|
|
|
1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
|
|
|
|
1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
|
|
|
|
1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
|
|
|
|
1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
|
|
|
|
1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
|
|
|
|
1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
|
|
|
|
1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
|
|
|
|
1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
|
|
|
|
1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
|
|
|
|
1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
|
|
|
|
1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
|
|
|
|
1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
|
|
|
|
1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
|
|
|
|
1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
|
|
|
|
1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
|
|
|
|
1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
|
|
|
|
1.476826145939499311386907480374 * EXPPOLY_32F_A0,
|
|
|
|
1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
|
|
|
|
1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
|
|
|
|
1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
|
|
|
|
1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
|
|
|
|
1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
|
|
|
|
1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
|
|
|
|
1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
|
|
|
|
1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
|
|
|
|
1.628027421857347766848218522014 * EXPPOLY_32F_A0,
|
|
|
|
1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
|
|
|
|
1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
|
|
|
|
1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
|
|
|
|
1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
|
|
|
|
1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
|
|
|
|
1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
|
|
|
|
1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
|
|
|
|
1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
|
|
|
|
1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
|
|
|
|
1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
|
|
|
|
1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
|
|
|
|
1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
|
|
|
|
1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
|
|
|
|
1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
|
|
|
|
1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
|
|
|
|
1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
|
|
|
|
1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
|
|
|
|
1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
|
|
|
|
};
|
|
|
|
|
|
|
|
const double* getExpTab64f()
|
|
|
|
{
|
|
|
|
return expTab;
|
|
|
|
}
|
|
|
|
|
|
|
|
const float* getExpTab32f()
|
|
|
|
{
|
|
|
|
static float CV_DECL_ALIGNED(64) expTab_f[EXPTAB_MASK+1];
|
2019-10-08 07:32:25 +08:00
|
|
|
static std::atomic<bool> expTab_f_initialized(false);
|
|
|
|
if (!expTab_f_initialized.load())
|
2018-08-07 23:49:05 +08:00
|
|
|
{
|
|
|
|
for( int j = 0; j <= EXPTAB_MASK; j++ )
|
|
|
|
expTab_f[j] = (float)expTab[j];
|
|
|
|
expTab_f_initialized = true;
|
|
|
|
}
|
|
|
|
return expTab_f;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define LOGTAB_SCALE 8
|
|
|
|
#define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1)
|
|
|
|
|
|
|
|
static const double CV_DECL_ALIGNED(64) logTab[(LOGTAB_MASK+1)*2] = {
|
|
|
|
0.0000000000000000000000000000000000000000, 1.000000000000000000000000000000000000000,
|
|
|
|
.00389864041565732288852075271279318258166, .9961089494163424124513618677042801556420,
|
|
|
|
.00778214044205494809292034119607706088573, .9922480620155038759689922480620155038760,
|
|
|
|
.01165061721997527263705585198749759001657, .9884169884169884169884169884169884169884,
|
|
|
|
.01550418653596525274396267235488267033361, .9846153846153846153846153846153846153846,
|
|
|
|
.01934296284313093139406447562578250654042, .9808429118773946360153256704980842911877,
|
|
|
|
.02316705928153437593630670221500622574241, .9770992366412213740458015267175572519084,
|
|
|
|
.02697658769820207233514075539915211265906, .9733840304182509505703422053231939163498,
|
|
|
|
.03077165866675368732785500469617545604706, .9696969696969696969696969696969696969697,
|
|
|
|
.03455238150665972812758397481047722976656, .9660377358490566037735849056603773584906,
|
|
|
|
.03831886430213659461285757856785494368522, .9624060150375939849624060150375939849624,
|
|
|
|
.04207121392068705056921373852674150839447, .9588014981273408239700374531835205992509,
|
|
|
|
.04580953603129420126371940114040626212953, .9552238805970149253731343283582089552239,
|
|
|
|
.04953393512227662748292900118940451648088, .9516728624535315985130111524163568773234,
|
|
|
|
.05324451451881227759255210685296333394944, .9481481481481481481481481481481481481481,
|
|
|
|
.05694137640013842427411105973078520037234, .9446494464944649446494464944649446494465,
|
|
|
|
.06062462181643483993820353816772694699466, .9411764705882352941176470588235294117647,
|
|
|
|
.06429435070539725460836422143984236754475, .9377289377289377289377289377289377289377,
|
|
|
|
.06795066190850773679699159401934593915938, .9343065693430656934306569343065693430657,
|
|
|
|
.07159365318700880442825962290953611955044, .9309090909090909090909090909090909090909,
|
|
|
|
.07522342123758751775142172846244648098944, .9275362318840579710144927536231884057971,
|
|
|
|
.07884006170777602129362549021607264876369, .9241877256317689530685920577617328519856,
|
|
|
|
.08244366921107458556772229485432035289706, .9208633093525179856115107913669064748201,
|
|
|
|
.08603433734180314373940490213499288074675, .9175627240143369175627240143369175627240,
|
|
|
|
.08961215868968712416897659522874164395031, .9142857142857142857142857142857142857143,
|
|
|
|
.09317722485418328259854092721070628613231, .9110320284697508896797153024911032028470,
|
|
|
|
.09672962645855109897752299730200320482256, .9078014184397163120567375886524822695035,
|
|
|
|
.10026945316367513738597949668474029749630, .9045936395759717314487632508833922261484,
|
|
|
|
.10379679368164355934833764649738441221420, .9014084507042253521126760563380281690141,
|
|
|
|
.10731173578908805021914218968959175981580, .8982456140350877192982456140350877192982,
|
|
|
|
.11081436634029011301105782649756292812530, .8951048951048951048951048951048951048951,
|
|
|
|
.11430477128005862852422325204315711744130, .8919860627177700348432055749128919860627,
|
|
|
|
.11778303565638344185817487641543266363440, .8888888888888888888888888888888888888889,
|
|
|
|
.12124924363286967987640707633545389398930, .8858131487889273356401384083044982698962,
|
|
|
|
.12470347850095722663787967121606925502420, .8827586206896551724137931034482758620690,
|
|
|
|
.12814582269193003360996385708858724683530, .8797250859106529209621993127147766323024,
|
|
|
|
.13157635778871926146571524895989568904040, .8767123287671232876712328767123287671233,
|
|
|
|
.13499516453750481925766280255629681050780, .8737201365187713310580204778156996587031,
|
|
|
|
.13840232285911913123754857224412262439730, .8707482993197278911564625850340136054422,
|
|
|
|
.14179791186025733629172407290752744302150, .8677966101694915254237288135593220338983,
|
|
|
|
.14518200984449788903951628071808954700830, .8648648648648648648648648648648648648649,
|
|
|
|
.14855469432313711530824207329715136438610, .8619528619528619528619528619528619528620,
|
|
|
|
.15191604202584196858794030049466527998450, .8590604026845637583892617449664429530201,
|
|
|
|
.15526612891112392955683674244937719777230, .8561872909698996655518394648829431438127,
|
|
|
|
.15860503017663857283636730244325008243330, .8533333333333333333333333333333333333333,
|
|
|
|
.16193282026931324346641360989451641216880, .8504983388704318936877076411960132890365,
|
|
|
|
.16524957289530714521497145597095368430010, .8476821192052980132450331125827814569536,
|
|
|
|
.16855536102980664403538924034364754334090, .8448844884488448844884488448844884488449,
|
|
|
|
.17185025692665920060697715143760433420540, .8421052631578947368421052631578947368421,
|
|
|
|
.17513433212784912385018287750426679849630, .8393442622950819672131147540983606557377,
|
|
|
|
.17840765747281828179637841458315961062910, .8366013071895424836601307189542483660131,
|
|
|
|
.18167030310763465639212199675966985523700, .8338762214983713355048859934853420195440,
|
|
|
|
.18492233849401198964024217730184318497780, .8311688311688311688311688311688311688312,
|
|
|
|
.18816383241818296356839823602058459073300, .8284789644012944983818770226537216828479,
|
|
|
|
.19139485299962943898322009772527962923050, .8258064516129032258064516129032258064516,
|
|
|
|
.19461546769967164038916962454095482826240, .8231511254019292604501607717041800643087,
|
|
|
|
.19782574332991986754137769821682013571260, .8205128205128205128205128205128205128205,
|
|
|
|
.20102574606059073203390141770796617493040, .8178913738019169329073482428115015974441,
|
|
|
|
.20421554142869088876999228432396193966280, .8152866242038216560509554140127388535032,
|
|
|
|
.20739519434607056602715147164417430758480, .8126984126984126984126984126984126984127,
|
|
|
|
.21056476910734961416338251183333341032260, .8101265822784810126582278481012658227848,
|
|
|
|
.21372432939771812687723695489694364368910, .8075709779179810725552050473186119873817,
|
|
|
|
.21687393830061435506806333251006435602900, .8050314465408805031446540880503144654088,
|
|
|
|
.22001365830528207823135744547471404075630, .8025078369905956112852664576802507836991,
|
|
|
|
.22314355131420973710199007200571941211830, .8000000000000000000000000000000000000000,
|
|
|
|
.22626367865045338145790765338460914790630, .7975077881619937694704049844236760124611,
|
|
|
|
.22937410106484582006380890106811420992010, .7950310559006211180124223602484472049689,
|
|
|
|
.23247487874309405442296849741978803649550, .7925696594427244582043343653250773993808,
|
|
|
|
.23556607131276688371634975283086532726890, .7901234567901234567901234567901234567901,
|
|
|
|
.23864773785017498464178231643018079921600, .7876923076923076923076923076923076923077,
|
|
|
|
.24171993688714515924331749374687206000090, .7852760736196319018404907975460122699387,
|
|
|
|
.24478272641769091566565919038112042471760, .7828746177370030581039755351681957186544,
|
|
|
|
.24783616390458124145723672882013488560910, .7804878048780487804878048780487804878049,
|
|
|
|
.25088030628580937353433455427875742316250, .7781155015197568389057750759878419452888,
|
|
|
|
.25391520998096339667426946107298135757450, .7757575757575757575757575757575757575758,
|
|
|
|
.25694093089750041913887912414793390780680, .7734138972809667673716012084592145015106,
|
|
|
|
.25995752443692604627401010475296061486000, .7710843373493975903614457831325301204819,
|
|
|
|
.26296504550088134477547896494797896593800, .7687687687687687687687687687687687687688,
|
|
|
|
.26596354849713793599974565040611196309330, .7664670658682634730538922155688622754491,
|
|
|
|
.26895308734550393836570947314612567424780, .7641791044776119402985074626865671641791,
|
|
|
|
.27193371548364175804834985683555714786050, .7619047619047619047619047619047619047619,
|
|
|
|
.27490548587279922676529508862586226314300, .7596439169139465875370919881305637982196,
|
|
|
|
.27786845100345625159121709657483734190480, .7573964497041420118343195266272189349112,
|
|
|
|
.28082266290088775395616949026589281857030, .7551622418879056047197640117994100294985,
|
|
|
|
.28376817313064456316240580235898960381750, .7529411764705882352941176470588235294118,
|
|
|
|
.28670503280395426282112225635501090437180, .7507331378299120234604105571847507331378,
|
|
|
|
.28963329258304265634293983566749375313530, .7485380116959064327485380116959064327485,
|
|
|
|
.29255300268637740579436012922087684273730, .7463556851311953352769679300291545189504,
|
|
|
|
.29546421289383584252163927885703742504130, .7441860465116279069767441860465116279070,
|
|
|
|
.29836697255179722709783618483925238251680, .7420289855072463768115942028985507246377,
|
|
|
|
.30126133057816173455023545102449133992200, .7398843930635838150289017341040462427746,
|
|
|
|
.30414733546729666446850615102448500692850, .7377521613832853025936599423631123919308,
|
|
|
|
.30702503529491181888388950937951449304830, .7356321839080459770114942528735632183908,
|
|
|
|
.30989447772286465854207904158101882785550, .7335243553008595988538681948424068767908,
|
|
|
|
.31275571000389684739317885942000430077330, .7314285714285714285714285714285714285714,
|
|
|
|
.31560877898630329552176476681779604405180, .7293447293447293447293447293447293447293,
|
|
|
|
.31845373111853458869546784626436419785030, .7272727272727272727272727272727272727273,
|
|
|
|
.32129061245373424782201254856772720813750, .7252124645892351274787535410764872521246,
|
|
|
|
.32411946865421192853773391107097268104550, .7231638418079096045197740112994350282486,
|
|
|
|
.32694034499585328257253991068864706903700, .7211267605633802816901408450704225352113,
|
|
|
|
.32975328637246797969240219572384376078850, .7191011235955056179775280898876404494382,
|
|
|
|
.33255833730007655635318997155991382896900, .7170868347338935574229691876750700280112,
|
|
|
|
.33535554192113781191153520921943709254280, .7150837988826815642458100558659217877095,
|
|
|
|
.33814494400871636381467055798566434532400, .7130919220055710306406685236768802228412,
|
|
|
|
.34092658697059319283795275623560883104800, .7111111111111111111111111111111111111111,
|
|
|
|
.34370051385331840121395430287520866841080, .7091412742382271468144044321329639889197,
|
|
|
|
.34646676734620857063262633346312213689100, .7071823204419889502762430939226519337017,
|
|
|
|
.34922538978528827602332285096053965389730, .7052341597796143250688705234159779614325,
|
|
|
|
.35197642315717814209818925519357435405250, .7032967032967032967032967032967032967033,
|
|
|
|
.35471990910292899856770532096561510115850, .7013698630136986301369863013698630136986,
|
|
|
|
.35745588892180374385176833129662554711100, .6994535519125683060109289617486338797814,
|
|
|
|
.36018440357500774995358483465679455548530, .6975476839237057220708446866485013623978,
|
|
|
|
.36290549368936841911903457003063522279280, .6956521739130434782608695652173913043478,
|
|
|
|
.36561919956096466943762379742111079394830, .6937669376693766937669376693766937669377,
|
|
|
|
.36832556115870762614150635272380895912650, .6918918918918918918918918918918918918919,
|
|
|
|
.37102461812787262962487488948681857436900, .6900269541778975741239892183288409703504,
|
|
|
|
.37371640979358405898480555151763837784530, .6881720430107526881720430107526881720430,
|
|
|
|
.37640097516425302659470730759494472295050, .6863270777479892761394101876675603217158,
|
|
|
|
.37907835293496944251145919224654790014030, .6844919786096256684491978609625668449198,
|
|
|
|
.38174858149084833769393299007788300514230, .6826666666666666666666666666666666666667,
|
|
|
|
.38441169891033200034513583887019194662580, .6808510638297872340425531914893617021277,
|
|
|
|
.38706774296844825844488013899535872042180, .6790450928381962864721485411140583554377,
|
|
|
|
.38971675114002518602873692543653305619950, .6772486772486772486772486772486772486772,
|
|
|
|
.39235876060286384303665840889152605086580, .6754617414248021108179419525065963060686,
|
|
|
|
.39499380824086893770896722344332374632350, .6736842105263157894736842105263157894737,
|
|
|
|
.39762193064713846624158577469643205404280, .6719160104986876640419947506561679790026,
|
|
|
|
.40024316412701266276741307592601515352730, .6701570680628272251308900523560209424084,
|
|
|
|
.40285754470108348090917615991202183067800, .6684073107049608355091383812010443864230,
|
|
|
|
.40546510810816432934799991016916465014230, .6666666666666666666666666666666666666667,
|
|
|
|
.40806588980822172674223224930756259709600, .6649350649350649350649350649350649350649,
|
|
|
|
.41065992498526837639616360320360399782650, .6632124352331606217616580310880829015544,
|
|
|
|
.41324724855021932601317757871584035456180, .6614987080103359173126614987080103359173,
|
|
|
|
.41582789514371093497757669865677598863850, .6597938144329896907216494845360824742268,
|
|
|
|
.41840189913888381489925905043492093682300, .6580976863753213367609254498714652956298,
|
|
|
|
.42096929464412963239894338585145305842150, .6564102564102564102564102564102564102564,
|
|
|
|
.42353011550580327293502591601281892508280, .6547314578005115089514066496163682864450,
|
|
|
|
.42608439531090003260516141381231136620050, .6530612244897959183673469387755102040816,
|
|
|
|
.42863216738969872610098832410585600882780, .6513994910941475826972010178117048346056,
|
|
|
|
.43117346481837132143866142541810404509300, .6497461928934010152284263959390862944162,
|
|
|
|
.43370832042155937902094819946796633303180, .6481012658227848101265822784810126582278,
|
|
|
|
.43623676677491801667585491486534010618930, .6464646464646464646464646464646464646465,
|
|
|
|
.43875883620762790027214350629947148263450, .6448362720403022670025188916876574307305,
|
|
|
|
.44127456080487520440058801796112675219780, .6432160804020100502512562814070351758794,
|
|
|
|
.44378397241030093089975139264424797147500, .6416040100250626566416040100250626566416,
|
|
|
|
.44628710262841947420398014401143882423650, .6400000000000000000000000000000000000000,
|
|
|
|
.44878398282700665555822183705458883196130, .6384039900249376558603491271820448877805,
|
|
|
|
.45127464413945855836729492693848442286250, .6368159203980099502487562189054726368159,
|
|
|
|
.45375911746712049854579618113348260521900, .6352357320099255583126550868486352357320,
|
|
|
|
.45623743348158757315857769754074979573500, .6336633663366336633663366336633663366337,
|
|
|
|
.45870962262697662081833982483658473938700, .6320987654320987654320987654320987654321,
|
|
|
|
.46117571512217014895185229761409573256980, .6305418719211822660098522167487684729064,
|
|
|
|
.46363574096303250549055974261136725544930, .6289926289926289926289926289926289926290,
|
|
|
|
.46608972992459918316399125615134835243230, .6274509803921568627450980392156862745098,
|
|
|
|
.46853771156323925639597405279346276074650, .6259168704156479217603911980440097799511,
|
|
|
|
.47097971521879100631480241645476780831830, .6243902439024390243902439024390243902439,
|
|
|
|
.47341577001667212165614273544633761048330, .6228710462287104622871046228710462287105,
|
|
|
|
.47584590486996386493601107758877333253630, .6213592233009708737864077669902912621359,
|
|
|
|
.47827014848147025860569669930555392056700, .6198547215496368038740920096852300242131,
|
|
|
|
.48068852934575190261057286988943815231330, .6183574879227053140096618357487922705314,
|
|
|
|
.48310107575113581113157579238759353756900, .6168674698795180722891566265060240963855,
|
|
|
|
.48550781578170076890899053978500887751580, .6153846153846153846153846153846153846154,
|
|
|
|
.48790877731923892879351001283794175833480, .6139088729016786570743405275779376498801,
|
|
|
|
.49030398804519381705802061333088204264650, .6124401913875598086124401913875598086124,
|
|
|
|
.49269347544257524607047571407747454941280, .6109785202863961813842482100238663484487,
|
|
|
|
.49507726679785146739476431321236304938800, .6095238095238095238095238095238095238095,
|
|
|
|
.49745538920281889838648226032091770321130, .6080760095011876484560570071258907363420,
|
|
|
|
.49982786955644931126130359189119189977650, .6066350710900473933649289099526066350711,
|
|
|
|
.50219473456671548383667413872899487614650, .6052009456264775413711583924349881796690,
|
|
|
|
.50455601075239520092452494282042607665050, .6037735849056603773584905660377358490566,
|
|
|
|
.50691172444485432801997148999362252652650, .6023529411764705882352941176470588235294,
|
|
|
|
.50926190178980790257412536448100581765150, .6009389671361502347417840375586854460094,
|
|
|
|
.51160656874906207391973111953120678663250, .5995316159250585480093676814988290398126,
|
|
|
|
.51394575110223428282552049495279788970950, .5981308411214953271028037383177570093458,
|
|
|
|
.51627947444845445623684554448118433356300, .5967365967365967365967365967365967365967,
|
|
|
|
.51860776420804555186805373523384332656850, .5953488372093023255813953488372093023256,
|
|
|
|
.52093064562418522900344441950437612831600, .5939675174013921113689095127610208816705,
|
|
|
|
.52324814376454775732838697877014055848100, .5925925925925925925925925925925925925926,
|
|
|
|
.52556028352292727401362526507000438869000, .5912240184757505773672055427251732101617,
|
|
|
|
.52786708962084227803046587723656557500350, .5898617511520737327188940092165898617512,
|
|
|
|
.53016858660912158374145519701414741575700, .5885057471264367816091954022988505747126,
|
|
|
|
.53246479886947173376654518506256863474850, .5871559633027522935779816513761467889908,
|
|
|
|
.53475575061602764748158733709715306758900, .5858123569794050343249427917620137299771,
|
|
|
|
.53704146589688361856929077475797384977350, .5844748858447488584474885844748858447489,
|
|
|
|
.53932196859560876944783558428753167390800, .5831435079726651480637813211845102505695,
|
|
|
|
.54159728243274429804188230264117009937750, .5818181818181818181818181818181818181818,
|
|
|
|
.54386743096728351609669971367111429572100, .5804988662131519274376417233560090702948,
|
|
|
|
.54613243759813556721383065450936555862450, .5791855203619909502262443438914027149321,
|
|
|
|
.54839232556557315767520321969641372561450, .5778781038374717832957110609480812641084,
|
|
|
|
.55064711795266219063194057525834068655950, .5765765765765765765765765765765765765766,
|
|
|
|
.55289683768667763352766542084282264113450, .5752808988764044943820224719101123595506,
|
|
|
|
.55514150754050151093110798683483153581600, .5739910313901345291479820627802690582960,
|
|
|
|
.55738115013400635344709144192165695130850, .5727069351230425055928411633109619686801,
|
|
|
|
.55961578793542265941596269840374588966350, .5714285714285714285714285714285714285714,
|
|
|
|
.56184544326269181269140062795486301183700, .5701559020044543429844097995545657015590,
|
|
|
|
.56407013828480290218436721261241473257550, .5688888888888888888888888888888888888889,
|
|
|
|
.56628989502311577464155334382667206227800, .5676274944567627494456762749445676274945,
|
|
|
|
.56850473535266865532378233183408156037350, .5663716814159292035398230088495575221239,
|
|
|
|
.57071468100347144680739575051120482385150, .5651214128035320088300220750551876379691,
|
|
|
|
.57291975356178548306473885531886480748650, .5638766519823788546255506607929515418502,
|
|
|
|
.57511997447138785144460371157038025558000, .5626373626373626373626373626373626373626,
|
|
|
|
.57731536503482350219940144597785547375700, .5614035087719298245614035087719298245614,
|
|
|
|
.57950594641464214795689713355386629700650, .5601750547045951859956236323851203501094,
|
|
|
|
.58169173963462239562716149521293118596100, .5589519650655021834061135371179039301310,
|
|
|
|
.58387276558098266665552955601015128195300, .5577342047930283224400871459694989106754,
|
|
|
|
.58604904500357812846544902640744112432000, .5565217391304347826086956521739130434783,
|
|
|
|
.58822059851708596855957011939608491957200, .5553145336225596529284164859002169197397,
|
|
|
|
.59038744660217634674381770309992134571100, .5541125541125541125541125541125541125541,
|
|
|
|
.59254960960667157898740242671919986605650, .5529157667386609071274298056155507559395,
|
|
|
|
.59470710774669277576265358220553025603300, .5517241379310344827586206896551724137931,
|
|
|
|
.59685996110779382384237123915227130055450, .5505376344086021505376344086021505376344,
|
|
|
|
.59900818964608337768851242799428291618800, .5493562231759656652360515021459227467811,
|
|
|
|
.60115181318933474940990890900138765573500, .5481798715203426124197002141327623126338,
|
|
|
|
.60329085143808425240052883964381180703650, .5470085470085470085470085470085470085470,
|
|
|
|
.60542532396671688843525771517306566238400, .5458422174840085287846481876332622601279,
|
|
|
|
.60755525022454170969155029524699784815300, .5446808510638297872340425531914893617021,
|
|
|
|
.60968064953685519036241657886421307921400, .5435244161358811040339702760084925690021,
|
|
|
|
.61180154110599282990534675263916142284850, .5423728813559322033898305084745762711864,
|
|
|
|
.61391794401237043121710712512140162289150, .5412262156448202959830866807610993657505,
|
|
|
|
.61602987721551394351138242200249806046500, .5400843881856540084388185654008438818565,
|
|
|
|
.61813735955507864705538167982012964785100, .5389473684210526315789473684210526315789,
|
|
|
|
.62024040975185745772080281312810257077200, .5378151260504201680672268907563025210084,
|
|
|
|
.62233904640877868441606324267922900617100, .5366876310272536687631027253668763102725,
|
|
|
|
.62443328801189346144440150965237990021700, .5355648535564853556485355648535564853556,
|
|
|
|
.62652315293135274476554741340805776417250, .5344467640918580375782881002087682672234,
|
|
|
|
.62860865942237409420556559780379757285100, .5333333333333333333333333333333333333333,
|
|
|
|
.63068982562619868570408243613201193511500, .5322245322245322245322245322245322245322,
|
|
|
|
.63276666957103777644277897707070223987100, .5311203319502074688796680497925311203320,
|
|
|
|
.63483920917301017716738442686619237065300, .5300207039337474120082815734989648033126,
|
|
|
|
.63690746223706917739093569252872839570050, .5289256198347107438016528925619834710744,
|
|
|
|
.63897144645792069983514238629140891134750, .5278350515463917525773195876288659793814,
|
|
|
|
.64103117942093124081992527862894348800200, .5267489711934156378600823045267489711934,
|
|
|
|
.64308667860302726193566513757104985415950, .5256673511293634496919917864476386036961,
|
|
|
|
.64513796137358470073053240412264131009600, .5245901639344262295081967213114754098361,
|
|
|
|
.64718504499530948859131740391603671014300, .5235173824130879345603271983640081799591,
|
|
|
|
.64922794662510974195157587018911726772800, .5224489795918367346938775510204081632653,
|
|
|
|
.65126668331495807251485530287027359008800, .5213849287169042769857433808553971486762,
|
|
|
|
.65330127201274557080523663898929953575150, .5203252032520325203252032520325203252033,
|
|
|
|
.65533172956312757406749369692988693714150, .5192697768762677484787018255578093306288,
|
|
|
|
.65735807270835999727154330685152672231200, .5182186234817813765182186234817813765182,
|
|
|
|
.65938031808912778153342060249997302889800, .5171717171717171717171717171717171717172,
|
|
|
|
.66139848224536490484126716182800009846700, .5161290322580645161290322580645161290323,
|
|
|
|
.66341258161706617713093692145776003599150, .5150905432595573440643863179074446680080,
|
|
|
|
.66542263254509037562201001492212526500250, .5140562248995983935742971887550200803213,
|
|
|
|
.66742865127195616370414654738851822912700, .5130260521042084168336673346693386773547,
|
|
|
|
.66943065394262923906154583164607174694550, .5120000000000000000000000000000000000000,
|
|
|
|
.67142865660530226534774556057527661323550, .5109780439121756487025948103792415169661,
|
|
|
|
.67342267521216669923234121597488410770900, .5099601593625498007968127490039840637450,
|
|
|
|
.67541272562017662384192817626171745359900, .5089463220675944333996023856858846918489,
|
|
|
|
.67739882359180603188519853574689477682100, .5079365079365079365079365079365079365079,
|
|
|
|
.67938098479579733801614338517538271844400, .5069306930693069306930693069306930693069,
|
|
|
|
.68135922480790300781450241629499942064300, .5059288537549407114624505928853754940711,
|
|
|
|
.68333355911162063645036823800182901322850, .5049309664694280078895463510848126232742,
|
|
|
|
.68530400309891936760919861626462079584600, .5039370078740157480314960629921259842520,
|
|
|
|
.68727057207096020619019327568821609020250, .5029469548133595284872298624754420432220,
|
|
|
|
.68923328123880889251040571252815425395950, .5019607843137254901960784313725490196078,
|
|
|
|
.69314718055994530941723212145818, 5.0e-01,
|
|
|
|
};
|
|
|
|
|
|
|
|
const double* getLogTab64f()
|
|
|
|
{
|
|
|
|
return logTab;
|
|
|
|
}
|
|
|
|
|
|
|
|
const float* getLogTab32f()
|
|
|
|
{
|
|
|
|
static float CV_DECL_ALIGNED(64) logTab_f[(LOGTAB_MASK+1)*2];
|
2019-10-08 07:32:25 +08:00
|
|
|
static std::atomic<bool> logTab_f_initialized(false);
|
|
|
|
if (!logTab_f_initialized.load())
|
2018-08-07 23:49:05 +08:00
|
|
|
{
|
|
|
|
for (int j = 0; j < (LOGTAB_MASK+1)*2; j++)
|
|
|
|
logTab_f[j] = (float)logTab[j];
|
|
|
|
logTab_f_initialized = true;
|
|
|
|
}
|
|
|
|
return logTab_f;
|
|
|
|
}
|
|
|
|
|
|
|
|
}} // namespace
|
|
|
|
|
2010-05-12 01:44:00 +08:00
|
|
|
/* End of file. */
|