opencv/modules/core/src/lapack.cpp

1747 lines
52 KiB
C++
Raw Normal View History

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <limits>
#ifdef HAVE_EIGEN
# if defined(_MSC_VER)
# pragma warning(push)
# pragma warning(disable:4701) // potentially uninitialized local variable
# pragma warning(disable:4702) // unreachable code
# pragma warning(disable:4714) // const marked as __forceinline not inlined
# endif
# include <Eigen/Core>
# include <Eigen/Eigenvalues>
# if defined(_MSC_VER)
# pragma warning(pop)
# endif
# include "opencv2/core/eigen.hpp"
#endif
2013-02-26 15:16:57 +08:00
#if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700
#pragma float_control(precise, on)
#endif
namespace cv
{
2012-10-17 07:18:30 +08:00
int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
CV_INSTRUMENT_REGION();
2015-12-15 20:55:43 +08:00
return hal::LU32f(A, astep, m, b, bstep, n);
}
int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
{
CV_INSTRUMENT_REGION();
2015-12-15 20:55:43 +08:00
return hal::LU64f(A, astep, m, b, bstep, n);
}
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
CV_INSTRUMENT_REGION();
2015-12-15 20:55:43 +08:00
return hal::Cholesky32f(A, astep, m, b, bstep, n);
}
bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
{
CV_INSTRUMENT_REGION();
2015-12-15 20:55:43 +08:00
return hal::Cholesky64f(A, astep, m, b, bstep, n);
}
2011-04-26 05:44:22 +08:00
template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
{
a = std::abs(a);
b = std::abs(b);
if( a > b )
{
b /= a;
return a*std::sqrt(1 + b*b);
}
if( b > 0 )
{
a /= b;
return b*std::sqrt(1 + a*a);
}
return 0;
}
template<typename _Tp> bool
JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf )
{
const _Tp eps = std::numeric_limits<_Tp>::epsilon();
int i, j, k, m;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
astep /= sizeof(A[0]);
if( V )
{
vstep /= sizeof(V[0]);
for( i = 0; i < n; i++ )
{
for( j = 0; j < n; j++ )
V[i*vstep + j] = (_Tp)0;
V[i*vstep + i] = (_Tp)1;
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
int iters, maxIters = n*n*30;
2012-10-17 07:18:30 +08:00
2012-01-27 03:41:59 +08:00
int* indR = (int*)alignPtr(buf, sizeof(int));
2011-04-26 05:44:22 +08:00
int* indC = indR + n;
_Tp mv = (_Tp)0;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( k = 0; k < n; k++ )
{
W[k] = A[(astep + 1)*k];
if( k < n - 1 )
{
for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ )
{
_Tp val = std::abs(A[astep*k+i]);
if( mv < val )
mv = val, m = i;
}
indR[k] = m;
}
if( k > 0 )
{
for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ )
{
_Tp val = std::abs(A[astep*i+k]);
if( mv < val )
mv = val, m = i;
}
indC[k] = m;
}
}
2012-10-17 07:18:30 +08:00
if( n > 1 ) for( iters = 0; iters < maxIters; iters++ )
2011-04-26 05:44:22 +08:00
{
// find index (k,l) of pivot p
2012-01-27 03:41:59 +08:00
for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ )
2011-04-26 05:44:22 +08:00
{
2012-01-27 03:41:59 +08:00
_Tp val = std::abs(A[astep*i + indR[i]]);
2011-04-26 05:44:22 +08:00
if( mv < val )
mv = val, k = i;
}
int l = indR[k];
for( i = 1; i < n; i++ )
{
2012-01-27 03:41:59 +08:00
_Tp val = std::abs(A[astep*indC[i] + i]);
2011-04-26 05:44:22 +08:00
if( mv < val )
mv = val, k = indC[i], l = i;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
_Tp p = A[astep*k + l];
if( std::abs(p) <= eps )
break;
_Tp y = (_Tp)((W[l] - W[k])*0.5);
_Tp t = std::abs(y) + hypot(p, y);
_Tp s = hypot(p, t);
_Tp c = t/s;
s = p/s; t = (p/t)*p;
if( y < 0 )
s = -s, t = -t;
A[astep*k + l] = 0;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
W[k] -= t;
W[l] += t;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
_Tp a0, b0;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
#undef rotate
#define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
// rotate rows and columns k and l
for( i = 0; i < k; i++ )
rotate(A[astep*i+k], A[astep*i+l]);
for( i = k+1; i < l; i++ )
rotate(A[astep*k+i], A[astep*i+l]);
for( i = l+1; i < n; i++ )
rotate(A[astep*k+i], A[astep*l+i]);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
// rotate eigenvectors
if( V )
for( i = 0; i < n; i++ )
rotate(V[vstep*k+i], V[vstep*l+i]);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
#undef rotate
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( j = 0; j < 2; j++ )
{
int idx = j == 0 ? k : l;
if( idx < n - 1 )
{
for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ )
{
_Tp val = std::abs(A[astep*idx+i]);
if( mv < val )
mv = val, m = i;
}
indR[idx] = m;
}
if( idx > 0 )
{
for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ )
{
_Tp val = std::abs(A[astep*i+idx]);
if( mv < val )
mv = val, m = i;
}
indC[idx] = m;
}
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
// sort eigenvalues & eigenvectors
for( k = 0; k < n-1; k++ )
{
m = k;
for( i = k+1; i < n; i++ )
{
if( W[m] < W[i] )
m = i;
}
if( k != m )
{
std::swap(W[m], W[k]);
if( V )
for( i = 0; i < n; i++ )
std::swap(V[vstep*m + i], V[vstep*k + i]);
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
return true;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf )
{
return JacobiImpl_(S, sstep, e, E, estep, n, buf);
}
static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf )
{
return JacobiImpl_(S, sstep, e, E, estep, n, buf);
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
template<typename T> struct VBLAS
{
int dot(const T*, const T*, int, T*) const { return 0; }
int givens(T*, T*, int, T, T) const { return 0; }
};
Merge pull request #24325 from hanliutong:rewrite Rewrite Universal Intrinsic code: float related part #24325 The goal of this series of PRs is to modify the SIMD code blocks guarded by CV_SIMD macro: rewrite them by using the new Universal Intrinsic API. The series of PRs is listed below: #23885 First patch, an example #23980 Core module #24058 ImgProc module, part 1 #24132 ImgProc module, part 2 #24166 ImgProc module, part 3 #24301 Features2d and calib3d module #24324 Gapi module This patch (hopefully) is the last one in the series. This patch mainly involves 3 parts 1. Add some modifications related to float (CV_SIMD_64F) 2. Use `#if (CV_SIMD || CV_SIMD_SCALABLE)` instead of `#if CV_SIMD || CV_SIMD_SCALABLE`, then we can get the `CV_SIMD` module that is not enabled for `CV_SIMD_SCALABLE` by looking for `if CV_SIMD` 3. Summary of `CV_SIMD` blocks that remains unmodified: Updated comments - Some blocks will cause test fail when enable for RVV, marked as `TODO: enable for CV_SIMD_SCALABLE, ....` - Some blocks can not be rewrited directly. (Not commented in the source code, just listed here) - ./modules/core/src/mathfuncs_core.simd.hpp (Vector type wrapped in class/struct) - ./modules/imgproc/src/color_lab.cpp (Array of vector type) - ./modules/imgproc/src/color_rgb.simd.hpp (Array of vector type) - ./modules/imgproc/src/sumpixels.simd.hpp (fixed length algorithm, strongly ralated with `CV_SIMD_WIDTH`) These algorithms will need to be redesigned to accommodate scalable backends. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-10-05 22:57:25 +08:00
#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE, GCC 13 related
2011-04-26 05:44:22 +08:00
template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
{
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
if( n < 2*VTraits<v_float32>::vlanes() )
2011-04-26 05:44:22 +08:00
return 0;
int k = 0;
v_float32 s0 = vx_setzero_f32();
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
for( ; k <= n - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
2011-04-26 05:44:22 +08:00
{
v_float32 a0 = vx_load(a + k);
v_float32 b0 = vx_load(b + k);
2012-10-17 07:18:30 +08:00
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
s0 = v_add(s0, v_mul(a0, b0));
2011-04-26 05:44:22 +08:00
}
*result = v_reduce_sum(s0);
vx_cleanup();
2011-04-26 05:44:22 +08:00
return k;
}
template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
{
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
if( n < VTraits<v_float32>::vlanes())
2011-04-26 05:44:22 +08:00
return 0;
int k = 0;
v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
for( ; k <= n - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
2011-04-26 05:44:22 +08:00
{
v_float32 a0 = vx_load(a + k);
v_float32 b0 = vx_load(b + k);
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
v_float32 t0 = v_add(v_mul(a0, c4), v_mul(b0, s4));
v_float32 t1 = v_sub(v_mul(b0, c4), v_mul(a0, s4));
v_store(a + k, t0);
v_store(b + k, t1);
2011-04-26 05:44:22 +08:00
}
vx_cleanup();
2011-04-26 05:44:22 +08:00
return k;
}
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
2011-04-26 05:44:22 +08:00
template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
{
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
if( n < 2*VTraits<v_float64>::vlanes() )
2011-04-26 05:44:22 +08:00
return 0;
int k = 0;
v_float64 s0 = vx_setzero_f64();
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
for( ; k <= n - VTraits<v_float64>::vlanes(); k += VTraits<v_float64>::vlanes() )
2011-04-26 05:44:22 +08:00
{
v_float64 a0 = vx_load(a + k);
v_float64 b0 = vx_load(b + k);
2012-10-17 07:18:30 +08:00
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
s0 = v_add(s0, v_mul(a0, b0));
2011-04-26 05:44:22 +08:00
}
double sbuf[2];
v_store(sbuf, s0);
2011-04-26 05:44:22 +08:00
*result = sbuf[0] + sbuf[1];
vx_cleanup();
2011-04-26 05:44:22 +08:00
return k;
}
template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
{
int k = 0;
v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
for( ; k <= n - VTraits<v_float64>::vlanes(); k += VTraits<v_float64>::vlanes() )
2011-04-26 05:44:22 +08:00
{
v_float64 a0 = vx_load(a + k);
v_float64 b0 = vx_load(b + k);
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
v_float64 t0 = v_add(v_mul(a0, c2), v_mul(b0, s2));
v_float64 t1 = v_sub(v_mul(b0, c2), v_mul(a0, s2));
v_store(a + k, t0);
v_store(b + k, t1);
2011-04-26 05:44:22 +08:00
}
vx_cleanup();
2011-04-26 05:44:22 +08:00
return k;
}
#endif //CV_SIMD_64F
#endif //CV_SIMD
2011-04-26 05:44:22 +08:00
template<typename _Tp> void
JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
int m, int n, int n1, double minval, _Tp eps)
2011-04-26 05:44:22 +08:00
{
VBLAS<_Tp> vblas;
AutoBuffer<double> Wbuf(n);
2018-06-11 06:42:00 +08:00
double* W = Wbuf.data();
2011-04-26 05:44:22 +08:00
int i, j, k, iter, max_iter = std::max(m, 30);
_Tp c, s;
double sd;
astep /= sizeof(At[0]);
vstep /= sizeof(Vt[0]);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( i = 0; i < n; i++ )
{
for( k = 0, sd = 0; k < m; k++ )
2011-04-26 05:44:22 +08:00
{
_Tp t = At[i*astep + k];
sd += (double)t*t;
2011-04-26 05:44:22 +08:00
}
W[i] = sd;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( Vt )
{
for( k = 0; k < n; k++ )
Vt[i*vstep + k] = 0;
Vt[i*vstep + i] = 1;
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( iter = 0; iter < max_iter; iter++ )
{
bool changed = false;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( i = 0; i < n-1; i++ )
for( j = i+1; j < n; j++ )
{
_Tp *Ai = At + i*astep, *Aj = At + j*astep;
double a = W[i], p = 0, b = W[j];
2012-10-17 07:18:30 +08:00
for( k = 0; k < m; k++ )
p += (double)Ai[k]*Aj[k];
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( std::abs(p) <= eps*std::sqrt((double)a*b) )
2012-10-17 07:18:30 +08:00
continue;
2011-04-26 05:44:22 +08:00
p *= 2;
double beta = a - b, gamma = hypot((double)p, beta);
2011-04-26 05:44:22 +08:00
if( beta < 0 )
{
double delta = (gamma - beta)*0.5;
2011-04-26 05:44:22 +08:00
s = (_Tp)std::sqrt(delta/gamma);
c = (_Tp)(p/(gamma*s*2));
}
else
{
c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
s = (_Tp)(p/(gamma*c*2));
}
2012-10-17 07:18:30 +08:00
a = b = 0;
for( k = 0; k < m; k++ )
2011-04-26 05:44:22 +08:00
{
_Tp t0 = c*Ai[k] + s*Aj[k];
_Tp t1 = -s*Ai[k] + c*Aj[k];
Ai[k] = t0; Aj[k] = t1;
2012-10-17 07:18:30 +08:00
a += (double)t0*t0; b += (double)t1*t1;
2011-04-26 05:44:22 +08:00
}
W[i] = a; W[j] = b;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
changed = true;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( Vt )
{
_Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep;
k = vblas.givens(Vi, Vj, n, c, s);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( ; k < n; k++ )
{
_Tp t0 = c*Vi[k] + s*Vj[k];
_Tp t1 = -s*Vi[k] + c*Vj[k];
Vi[k] = t0; Vj[k] = t1;
}
}
}
if( !changed )
break;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( i = 0; i < n; i++ )
{
for( k = 0, sd = 0; k < m; k++ )
{
_Tp t = At[i*astep + k];
sd += (double)t*t;
}
W[i] = std::sqrt(sd);
2011-04-26 05:44:22 +08:00
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( i = 0; i < n-1; i++ )
{
j = i;
for( k = i+1; k < n; k++ )
{
if( W[j] < W[k] )
j = k;
}
if( i != j )
{
std::swap(W[i], W[j]);
if( Vt )
{
for( k = 0; k < m; k++ )
std::swap(At[i*astep + k], At[j*astep + k]);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( k = 0; k < n; k++ )
std::swap(Vt[i*vstep + k], Vt[j*vstep + k]);
}
}
}
2012-10-17 07:18:30 +08:00
for( i = 0; i < n; i++ )
_W[i] = (_Tp)W[i];
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( !Vt )
return;
2012-10-17 07:18:30 +08:00
2011-09-22 17:58:58 +08:00
RNG rng(0x12345678);
2011-04-26 05:44:22 +08:00
for( i = 0; i < n1; i++ )
{
sd = i < n ? W[i] : 0;
2012-10-17 07:18:30 +08:00
for( int ii = 0; ii < 100 && sd <= minval; ii++ )
2011-04-26 05:44:22 +08:00
{
// if we got a zero singular value, then in order to get the corresponding left singular vector
// we generate a random vector, project it to the previously computed left singular vectors,
// subtract the projection and normalize the difference.
const _Tp val0 = (_Tp)(1./m);
for( k = 0; k < m; k++ )
{
2011-09-22 17:58:58 +08:00
_Tp val = (rng.next() & 256) != 0 ? val0 : -val0;
2011-04-26 05:44:22 +08:00
At[i*astep + k] = val;
}
for( iter = 0; iter < 2; iter++ )
{
for( j = 0; j < i; j++ )
{
sd = 0;
for( k = 0; k < m; k++ )
sd += At[i*astep + k]*At[j*astep + k];
_Tp asum = 0;
for( k = 0; k < m; k++ )
{
_Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]);
At[i*astep + k] = t;
asum += std::abs(t);
}
asum = asum > eps*100 ? 1/asum : 0;
2011-04-26 05:44:22 +08:00
for( k = 0; k < m; k++ )
At[i*astep + k] *= asum;
}
}
sd = 0;
for( k = 0; k < m; k++ )
{
_Tp t = At[i*astep + k];
sd += (double)t*t;
}
sd = std::sqrt(sd);
2011-04-26 05:44:22 +08:00
}
2012-10-17 07:18:30 +08:00
s = (_Tp)(sd > minval ? 1/sd : 0.);
2011-04-26 05:44:22 +08:00
for( k = 0; k < m; k++ )
At[i*astep + k] *= s;
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
{
hal::SVD32f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
2011-04-26 05:44:22 +08:00
}
static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
{
hal::SVD64f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
}
template <typename fptype> static inline int
decodeSVDParameters(const fptype* U, const fptype* Vt, int m, int n, int n1)
{
int halSVDFlag = 0;
if(Vt == NULL)
halSVDFlag = CV_HAL_SVD_NO_UV;
else if(n1 <= 0 || n1 == n)
{
halSVDFlag = CV_HAL_SVD_SHORT_UV;
if(U == NULL)
halSVDFlag |= CV_HAL_SVD_MODIFY_A;
}
else if(n1 == m)
{
halSVDFlag = CV_HAL_SVD_FULL_UV;
if(U == NULL)
halSVDFlag |= CV_HAL_SVD_MODIFY_A;
}
return halSVDFlag;
}
void hal::SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int n1)
{
CALL_HAL(SVD32f, cv_hal_SVD32f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
}
void hal::SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int n1)
{
CALL_HAL(SVD64f, cv_hal_SVD64f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
2011-04-26 05:44:22 +08:00
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
/* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
template<typename T1, typename T2, typename T3> static void
MatrAXPY( int m, int n, const T1* x, int dx,
const T2* a, int inca, T3* y, int dy )
{
int i;
2011-04-26 05:44:22 +08:00
for( i = 0; i < m; i++, x += dx, y += dy )
{
T2 s = a[i*inca];
int j = 0;
2012-10-17 07:18:30 +08:00
#if CV_ENABLE_UNROLLED
2012-02-10 14:05:04 +08:00
for(; j <= n - 4; j += 4 )
2011-04-26 05:44:22 +08:00
{
T3 t0 = (T3)(y[j] + s*x[j]);
T3 t1 = (T3)(y[j+1] + s*x[j+1]);
y[j] = t0;
y[j+1] = t1;
t0 = (T3)(y[j+2] + s*x[j+2]);
t1 = (T3)(y[j+3] + s*x[j+3]);
y[j+2] = t0;
y[j+3] = t1;
}
2012-02-10 14:05:04 +08:00
#endif
2011-04-26 05:44:22 +08:00
for( ; j < n; j++ )
y[j] = (T3)(y[j] + s*x[j]);
}
}
template<typename T> static void
SVBkSbImpl_( int m, int n, const T* w, int incw,
const T* u, int ldu, bool uT,
const T* v, int ldv, bool vT,
const T* b, int ldb, int nb,
T* x, int ldx, double* buffer, T eps )
{
double threshold = 0;
int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu;
int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv;
int i, j, nm = std::min(m, n);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( !b )
nb = m;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( i = 0; i < n; i++ )
for( j = 0; j < nb; j++ )
x[i*ldx + j] = 0;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( i = 0; i < nm; i++ )
threshold += w[i*incw];
threshold *= eps;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
// v * inv(w) * uT * b
for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 )
{
double wi = w[i*incw];
if( (double)std::abs(wi) <= threshold )
continue;
wi = 1/wi;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( nb == 1 )
{
double s = 0;
if( b )
for( j = 0; j < m; j++ )
s += u[j*udelta1]*b[j*ldb];
else
s = u[0];
s *= wi;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
for( j = 0; j < n; j++ )
x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]);
}
else
{
if( b )
{
for( j = 0; j < nb; j++ )
buffer[j] = 0;
MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 );
for( j = 0; j < nb; j++ )
buffer[j] *= wi;
}
else
{
for( j = 0; j < nb; j++ )
buffer[j] = u[j*udelta1]*wi;
}
MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx );
}
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
static void
SVBkSb( int m, int n, const float* w, size_t wstep,
const float* u, size_t ustep, bool uT,
const float* v, size_t vstep, bool vT,
const float* b, size_t bstep, int nb,
float* x, size_t xstep, uchar* buffer )
{
2011-04-26 05:44:22 +08:00
SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
u, (int)(ustep/sizeof(u[0])), uT,
v, (int)(vstep/sizeof(v[0])), vT,
b, (int)(bstep/sizeof(b[0])), nb,
x, (int)(xstep/sizeof(x[0])),
(double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) );
}
2011-04-26 05:44:22 +08:00
static void
SVBkSb( int m, int n, const double* w, size_t wstep,
const double* u, size_t ustep, bool uT,
const double* v, size_t vstep, bool vT,
const double* b, size_t bstep, int nb,
double* x, size_t xstep, uchar* buffer )
{
2011-04-26 05:44:22 +08:00
SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
u, (int)(ustep/sizeof(u[0])), uT,
v, (int)(vstep/sizeof(v[0])), vT,
b, (int)(bstep/sizeof(b[0])), nb,
x, (int)(xstep/sizeof(x[0])),
(double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
2012-10-17 07:18:30 +08:00
}
/****************************************************************************************\
* Determinant of the matrix *
\****************************************************************************************/
#define det2(m) ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0))
#define det3(m) (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) - \
m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \
m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
double determinant( InputArray _mat )
{
CV_INSTRUMENT_REGION();
Mat mat = _mat.getMat();
double result = 0;
int type = mat.type(), rows = mat.rows;
size_t step = mat.step;
const uchar* m = mat.ptr();
CV_Assert( !mat.empty() );
CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F));
#define Mf(y, x) ((float*)(m + y*step))[x]
#define Md(y, x) ((double*)(m + y*step))[x]
2011-04-26 05:44:22 +08:00
if( type == CV_32F )
{
2011-04-26 05:44:22 +08:00
if( rows == 2 )
result = det2(Mf);
else if( rows == 3 )
result = det3(Mf);
else if( rows == 1 )
result = Mf(0,0);
else
{
2011-04-26 05:44:22 +08:00
size_t bufSize = rows*rows*sizeof(float);
AutoBuffer<uchar> buffer(bufSize);
2018-06-11 06:42:00 +08:00
Mat a(rows, rows, CV_32F, buffer.data());
2011-04-26 05:44:22 +08:00
mat.copyTo(a);
2012-10-17 07:18:30 +08:00
2015-12-15 20:55:43 +08:00
result = hal::LU32f(a.ptr<float>(), a.step, rows, 0, 0, 0);
2011-04-26 05:44:22 +08:00
if( result )
{
2011-04-26 05:44:22 +08:00
for( int i = 0; i < rows; i++ )
result *= a.at<float>(i,i);
}
}
}
else
{
2011-04-26 05:44:22 +08:00
if( rows == 2 )
result = det2(Md);
else if( rows == 3 )
result = det3(Md);
else if( rows == 1 )
result = Md(0,0);
else
{
2011-04-26 05:44:22 +08:00
size_t bufSize = rows*rows*sizeof(double);
AutoBuffer<uchar> buffer(bufSize);
2018-06-11 06:42:00 +08:00
Mat a(rows, rows, CV_64F, buffer.data());
2011-04-26 05:44:22 +08:00
mat.copyTo(a);
2012-10-17 07:18:30 +08:00
2015-12-15 20:55:43 +08:00
result = hal::LU64f(a.ptr<double>(), a.step, rows, 0, 0, 0);
2011-04-26 05:44:22 +08:00
if( result )
{
2011-04-26 05:44:22 +08:00
for( int i = 0; i < rows; i++ )
result *= a.at<double>(i,i);
}
}
}
#undef Mf
#undef Md
return result;
}
/****************************************************************************************\
* Inverse (or pseudo-inverse) of a matrix *
\****************************************************************************************/
#define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x]
#define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x]
#define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
#define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
double invert( InputArray _src, OutputArray _dst, int method )
{
CV_INSTRUMENT_REGION();
2011-04-26 05:44:22 +08:00
bool result = false;
Mat src = _src.getMat();
int type = src.type();
2012-05-23 17:28:26 +08:00
CV_Assert(type == CV_32F || type == CV_64F);
2011-12-27 23:56:17 +08:00
size_t esz = CV_ELEM_SIZE(type);
int m = src.rows, n = src.cols;
2012-10-17 07:18:30 +08:00
if( method == DECOMP_SVD )
{
2011-12-27 23:56:17 +08:00
int nm = std::min(m, n);
2012-10-17 07:18:30 +08:00
2011-12-27 23:56:17 +08:00
AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
2018-06-11 06:42:00 +08:00
uchar* buf = alignPtr((uchar*)_buf.data(), (int)esz);
2011-12-27 23:56:17 +08:00
Mat u(m, nm, type, buf);
Mat w(nm, 1, type, u.ptr() + m*nm*esz);
Mat vt(nm, n, type, w.ptr() + nm*esz);
2012-10-17 07:18:30 +08:00
2011-12-27 23:56:17 +08:00
SVD::compute(src, w, u, vt);
SVD::backSubst(w, u, vt, Mat(), _dst);
return type == CV_32F ?
(w.ptr<float>()[0] >= FLT_EPSILON ?
w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
(w.ptr<double>()[0] >= DBL_EPSILON ?
w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
}
2012-10-17 07:18:30 +08:00
2012-05-23 17:28:26 +08:00
CV_Assert( m == n );
2012-10-17 07:18:30 +08:00
2011-12-27 23:56:17 +08:00
if( method == DECOMP_EIG )
{
AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
2018-06-11 06:42:00 +08:00
uchar* buf = alignPtr((uchar*)_buf.data(), (int)esz);
2011-12-27 23:56:17 +08:00
Mat u(n, n, type, buf);
Mat w(n, 1, type, u.ptr() + n*n*esz);
Mat vt(n, n, type, w.ptr() + n*esz);
2012-10-17 07:18:30 +08:00
2011-12-27 23:56:17 +08:00
eigen(src, w, vt);
transpose(vt, u);
SVD::backSubst(w, u, vt, Mat(), _dst);
return type == CV_32F ?
(w.ptr<float>()[0] >= FLT_EPSILON ?
w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
(w.ptr<double>()[0] >= DBL_EPSILON ?
w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
2011-12-27 23:56:17 +08:00
}
2012-10-17 07:18:30 +08:00
2011-12-27 23:56:17 +08:00
CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
2012-10-17 07:18:30 +08:00
2011-12-27 23:56:17 +08:00
_dst.create( n, n, type );
Mat dst = _dst.getMat();
if( n <= 3 )
{
const uchar* srcdata = src.ptr();
uchar* dstdata = dst.ptr();
size_t srcstep = src.step;
size_t dststep = dst.step;
2011-12-27 23:56:17 +08:00
if( n == 2 )
{
if( type == CV_32FC1 )
{
double d = det2(Sf);
2012-10-17 07:18:30 +08:00
if( d != 0. )
{
2012-10-17 07:18:30 +08:00
result = true;
d = 1./d;
#if CV_SIMD128
const float d_32f = (float)d;
const v_float32x4 d_vec(d_32f, -d_32f, -d_32f, d_32f);
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
v_float32x4 s0 = v_mul(v_load_halves((const float *)srcdata, (const float *)(srcdata + srcstep)), d_vec);//0123//3120
s0 = v_extract<3>(s0, v_combine_low(v_rotate_right<1>(s0), s0));
v_store_low((float*)dstdata, s0);
v_store_high((float*)(dstdata + dststep), s0);
#else
double t0, t1;
t0 = Sf(0,0)*d;
t1 = Sf(1,1)*d;
Df(1,1) = (float)t0;
Df(0,0) = (float)t1;
t0 = -Sf(0,1)*d;
t1 = -Sf(1,0)*d;
Df(0,1) = (float)t0;
Df(1,0) = (float)t1;
#endif
2012-10-17 07:18:30 +08:00
}
}
else
{
2012-10-17 07:18:30 +08:00
double d = det2(Sd);
if( d != 0. )
{
2012-10-17 07:18:30 +08:00
result = true;
d = 1./d;
#if CV_SIMD128_64F
v_float64x2 det = v_setall_f64(d);
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
v_float64x2 s0 = v_mul(v_load((const double *)srcdata), det);
v_float64x2 s1 = v_mul(v_load((const double *)(srcdata + srcstep)), det);
v_float64x2 sm = v_extract<1>(s1, s0);//30
Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast<int>((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-08-11 13:33:33 +08:00
v_float64x2 ss = v_sub(v_setall<double>(0), v_extract<1>(s0, s1));//12
v_store((double*)dstdata, v_combine_low(sm, ss));//31
v_store((double*)(dstdata + dststep), v_combine_high(ss, sm));//20
#else
double t0, t1;
t0 = Sd(0,0)*d;
t1 = Sd(1,1)*d;
Dd(1,1) = t0;
Dd(0,0) = t1;
t0 = -Sd(0,1)*d;
t1 = -Sd(1,0)*d;
Dd(0,1) = t0;
Dd(1,0) = t1;
#endif
2012-10-17 07:18:30 +08:00
}
}
}
2011-12-27 23:56:17 +08:00
else if( n == 3 )
{
if( type == CV_32FC1 )
{
double d = det3(Sf);
2012-10-17 07:18:30 +08:00
2013-02-26 15:16:57 +08:00
if( d != 0. )
2013-02-26 03:10:38 +08:00
{
2013-02-26 15:16:57 +08:00
double t[12];
2013-02-26 03:10:38 +08:00
2013-02-26 15:16:57 +08:00
result = true;
d = 1./d;
t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
2013-02-26 03:10:38 +08:00
2013-02-26 15:16:57 +08:00
t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
2013-02-26 03:10:38 +08:00
2013-02-26 15:16:57 +08:00
t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
2013-02-26 03:10:38 +08:00
2013-02-26 15:16:57 +08:00
Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2];
Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5];
Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8];
2013-02-26 03:10:38 +08:00
}
}
else
{
double d = det3(Sd);
if( d != 0. )
{
2012-10-17 07:18:30 +08:00
result = true;
d = 1./d;
double t[9];
t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d;
t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d;
t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d;
2012-10-17 07:18:30 +08:00
t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d;
t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d;
t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d;
2012-10-17 07:18:30 +08:00
t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d;
t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d;
t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d;
Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2];
Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5];
Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8];
}
}
}
else
{
CV_Assert( n == 1 );
if( type == CV_32FC1 )
{
double d = Sf(0,0);
if( d != 0. )
{
2011-04-26 05:44:22 +08:00
result = true;
Df(0,0) = (float)(1./d);
}
}
else
{
double d = Sd(0,0);
if( d != 0. )
{
2011-04-26 05:44:22 +08:00
result = true;
Dd(0,0) = 1./d;
}
}
}
2011-04-26 05:44:22 +08:00
if( !result )
dst = Scalar(0);
return result;
}
int elem_size = CV_ELEM_SIZE(type);
2011-04-26 05:44:22 +08:00
AutoBuffer<uchar> buf(n*n*elem_size);
2018-06-11 06:42:00 +08:00
Mat src1(n, n, type, buf.data());
2011-04-26 05:44:22 +08:00
src.copyTo(src1);
setIdentity(dst);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( method == DECOMP_LU && type == CV_32F )
2015-12-15 20:55:43 +08:00
result = hal::LU32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
2011-04-26 05:44:22 +08:00
else if( method == DECOMP_LU && type == CV_64F )
2015-12-15 20:55:43 +08:00
result = hal::LU64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
2011-04-26 05:44:22 +08:00
else if( method == DECOMP_CHOLESKY && type == CV_32F )
2015-12-15 20:55:43 +08:00
result = hal::Cholesky32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
else
2015-12-15 20:55:43 +08:00
result = hal::Cholesky64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
if( !result )
dst = Scalar(0);
return result;
}
UMat UMat::inv(int method) const
{
UMat m;
invert(*this, m, method);
return m;
}
/****************************************************************************************\
* Solving a linear system *
\****************************************************************************************/
bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
{
CV_INSTRUMENT_REGION();
bool result = true;
Mat src = _src.getMat(), _src2 = _src2arg.getMat();
int type = src.type();
bool is_normal = (method & DECOMP_NORMAL) != 0;
CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) );
method &= ~DECOMP_NORMAL;
CV_Check(method, method == DECOMP_LU || method == DECOMP_SVD || method == DECOMP_EIG ||
method == DECOMP_CHOLESKY || method == DECOMP_QR,
"Unsupported method, see #DecompTypes");
CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) ||
is_normal || src.rows == src.cols );
// check case of a single equation and small matrix
2011-04-26 05:44:22 +08:00
if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal &&
src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 )
{
_dst.create( src.cols, _src2.cols, src.type() );
Mat dst = _dst.getMat();
2012-10-17 07:18:30 +08:00
#define bf(y) ((float*)(bdata + y*src2step))[0]
#define bd(y) ((double*)(bdata + y*src2step))[0]
const uchar* srcdata = src.ptr();
const uchar* bdata = _src2.ptr();
uchar* dstdata = dst.ptr();
size_t srcstep = src.step;
size_t src2step = _src2.step;
size_t dststep = dst.step;
if( src.rows == 2 )
{
if( type == CV_32FC1 )
{
double d = det2(Sf);
if( d != 0. )
{
double t;
d = 1./d;
t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d);
Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d);
Df(0,0) = (float)t;
}
else
result = false;
}
else
{
double d = det2(Sd);
if( d != 0. )
{
double t;
d = 1./d;
t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d;
Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d;
Dd(0,0) = t;
}
else
result = false;
}
}
else if( src.rows == 3 )
{
if( type == CV_32FC1 )
{
double d = det3(Sf);
if( d != 0. )
{
float t[3];
d = 1./d;
t[0] = (float)(d*
(bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) -
Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) +
Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2))));
t[1] = (float)(d*
(Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) -
bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) +
Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0))));
t[2] = (float)(d*
(Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) -
Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) +
bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0))));
Df(0,0) = t[0];
Df(1,0) = t[1];
Df(2,0) = t[2];
}
else
result = false;
}
else
{
double d = det3(Sd);
if( d != 0. )
{
double t[9];
d = 1./d;
2012-10-17 07:18:30 +08:00
t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) +
(Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) +
(Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d;
t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) +
(Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) +
(Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d;
t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) +
(Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) +
(Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d;
Dd(0,0) = t[0];
Dd(1,0) = t[1];
Dd(2,0) = t[2];
}
else
result = false;
}
}
else
{
CV_Assert( src.rows == 1 );
if( type == CV_32FC1 )
{
2011-04-26 05:44:22 +08:00
double d = Sf(0,0);
if( d != 0. )
Df(0,0) = (float)(bf(0)/d);
else
result = false;
}
2011-04-26 05:44:22 +08:00
else
{
2011-04-26 05:44:22 +08:00
double d = Sd(0,0);
if( d != 0. )
Dd(0,0) = (bd(0)/d);
else
result = false;
}
}
2011-04-26 05:44:22 +08:00
return result;
}
2011-04-26 05:44:22 +08:00
int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols;
size_t esz = CV_ELEM_SIZE(type), bufsize = 0;
size_t vstep = alignSize(n*esz, 16);
size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep;
AutoBuffer<uchar> buffer;
Mat src2 = _src2;
_dst.create( src.cols, src2.cols, src.type() );
Mat dst = _dst.getMat();
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( m < n )
2024-03-04 20:51:05 +08:00
CV_Error(cv::Error::StsBadArg, "The function can not solve under-determined linear systems" );
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( m == n )
is_normal = false;
else if( is_normal )
{
m_ = n;
if( method == DECOMP_SVD )
method = DECOMP_EIG;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m);
bufsize += asize + 32;
2011-04-26 05:44:22 +08:00
if( is_normal )
bufsize += n*nb*esz;
if( method == DECOMP_SVD || method == DECOMP_EIG )
bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
buffer.allocate(bufsize);
2018-06-11 06:42:00 +08:00
uchar* ptr = alignPtr(buffer.data(), 16);
2011-04-26 05:44:22 +08:00
Mat a(m_, n, type, ptr, astep);
2011-04-26 05:44:22 +08:00
if( is_normal )
mulTransposed(src, a, true);
else if( method != DECOMP_SVD )
src.copyTo(a);
else
{
a = Mat(n, m_, type, ptr, astep);
transpose(src, a);
}
ptr += asize;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( !is_normal )
{
2011-04-26 05:44:22 +08:00
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
src2.copyTo(dst);
}
else
{
// a'*b
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T );
else
{
Mat tmp(n, nb, type, ptr);
ptr += n*nb*esz;
gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T );
src2 = tmp;
}
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( method == DECOMP_LU )
{
if( type == CV_32F )
2015-12-15 20:55:43 +08:00
result = hal::LU32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
else
2015-12-15 20:55:43 +08:00
result = hal::LU64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
}
2011-04-26 05:44:22 +08:00
else if( method == DECOMP_CHOLESKY )
{
2011-04-26 05:44:22 +08:00
if( type == CV_32F )
2015-12-15 20:55:43 +08:00
result = hal::Cholesky32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
else
2015-12-15 20:55:43 +08:00
result = hal::Cholesky64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
}
2016-08-23 21:35:03 +08:00
else if( method == DECOMP_QR )
{
Mat rhsMat;
if( is_normal || m == n )
{
src2.copyTo(dst);
rhsMat = dst;
}
else
{
rhsMat = Mat(m, nb, type);
src2.copyTo(rhsMat);
}
if( type == CV_32F )
result = hal::QR32f(a.ptr<float>(), a.step, a.rows, a.cols, rhsMat.cols, rhsMat.ptr<float>(), rhsMat.step, NULL) != 0;
else
result = hal::QR64f(a.ptr<double>(), a.step, a.rows, a.cols, rhsMat.cols, rhsMat.ptr<double>(), rhsMat.step, NULL) != 0;
if (rhsMat.rows != dst.rows)
rhsMat.rowRange(0, dst.rows).copyTo(dst);
}
else
{
2011-04-26 05:44:22 +08:00
ptr = alignPtr(ptr, 16);
Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u;
ptr += n*(vstep + esz);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( method == DECOMP_EIG )
{
if( type == CV_32F )
Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr);
else
Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
u = v;
}
else
{
if( type == CV_32F )
JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n);
else
JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n);
u = a;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( type == CV_32F )
{
SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true,
v.ptr<float>(), v.step, true, src2.ptr<float>(),
src2.step, nb, dst.ptr<float>(), dst.step, ptr);
}
else
2011-04-26 05:44:22 +08:00
{
SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true,
v.ptr<double>(), v.step, true, src2.ptr<double>(),
src2.step, nb, dst.ptr<double>(), dst.step, ptr);
}
result = true;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( !result )
dst = Scalar(0);
2011-04-26 05:44:22 +08:00
return result;
}
2011-04-26 05:44:22 +08:00
/////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
2011-04-26 05:44:22 +08:00
{
CV_INSTRUMENT_REGION();
2011-04-26 05:44:22 +08:00
Mat src = _src.getMat();
int type = src.type();
int n = src.rows;
CV_Assert( src.rows == src.cols );
CV_Assert (type == CV_32F || type == CV_64F);
Mat v;
if( _evects.needed() )
2011-04-26 05:44:22 +08:00
{
_evects.create(n, n, type);
v = _evects.getMat();
}
2012-10-17 07:18:30 +08:00
#ifdef HAVE_EIGEN
const bool evecNeeded = _evects.needed();
const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly;
_evals.create(n, 1, type);
Mat evals = _evals.getMat();
if ( type == CV_64F )
{
Eigen::MatrixXd src_eig, zeros_eig;
cv::cv2eigen(src, src_eig);
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> es;
es.compute(src_eig, esOptions);
if ( es.info() == Eigen::Success )
{
cv::eigen2cv(es.eigenvalues().reverse().eval(), evals);
if ( evecNeeded )
{
cv::Mat evects = _evects.getMat();
cv::eigen2cv(es.eigenvectors().rowwise().reverse().transpose().eval(), v);
}
return true;
}
} else { // CV_32F
Eigen::MatrixXf src_eig, zeros_eig;
cv::cv2eigen(src, src_eig);
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXf> es;
es.compute(src_eig, esOptions);
if ( es.info() == Eigen::Success )
{
cv::eigen2cv(es.eigenvalues().reverse().eval(), evals);
if ( evecNeeded )
{
cv::eigen2cv(es.eigenvectors().rowwise().reverse().transpose().eval(), v);
}
return true;
}
}
return false;
#else
size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32);
2018-06-11 06:42:00 +08:00
uchar* ptr = alignPtr(buf.data(), 16);
Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
ptr += astep*n + elemSize*n;
2011-04-26 05:44:22 +08:00
src.copyTo(a);
bool ok = type == CV_32F ?
Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) :
Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
w.copyTo(_evals);
return ok;
#endif
}
static void _SVDcompute( InputArray _aarr, OutputArray _w,
OutputArray _u, OutputArray _vt, int flags )
{
2011-04-26 05:44:22 +08:00
Mat src = _aarr.getMat();
int m = src.rows, n = src.cols;
int type = src.type();
bool compute_uv = _u.needed() || _vt.needed();
bool full_uv = (flags & SVD::FULL_UV) != 0;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
CV_Assert( type == CV_32F || type == CV_64F );
2012-10-17 07:18:30 +08:00
if( flags & SVD::NO_UV )
{
_u.release();
_vt.release();
2011-04-26 05:44:22 +08:00
compute_uv = full_uv = false;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
bool at = false;
if( m < n )
{
2011-04-26 05:44:22 +08:00
std::swap(m, n);
at = true;
}
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
int urows = full_uv ? m : n;
size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
2018-06-11 06:42:00 +08:00
uchar* buf = alignPtr(_buf.data(), 16);
2011-04-26 05:44:22 +08:00
Mat temp_a(n, m, type, buf, astep);
Mat temp_w(n, 1, type, buf + urows*astep);
Mat temp_u(urows, m, type, buf, astep), temp_v;
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( compute_uv )
temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep);
2012-10-17 07:18:30 +08:00
2011-09-22 17:58:58 +08:00
if( urows > n )
temp_u = Scalar::all(0);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
if( !at )
transpose(src, temp_a);
else
src.copyTo(temp_a);
2012-10-17 07:18:30 +08:00
if( type == CV_32F )
{
2011-09-22 17:58:58 +08:00
JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(),
2011-04-26 05:44:22 +08:00
temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0);
}
else
{
2011-09-22 17:58:58 +08:00
JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(),
2011-04-26 05:44:22 +08:00
temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0);
}
2011-04-26 05:44:22 +08:00
temp_w.copyTo(_w);
if( compute_uv )
{
2011-04-26 05:44:22 +08:00
if( !at )
{
if( _u.needed() )
transpose(temp_u, _u);
if( _vt.needed() )
temp_v.copyTo(_vt);
}
2011-04-26 05:44:22 +08:00
else
{
if( _u.needed() )
transpose(temp_v, _u);
if( _vt.needed() )
temp_u.copyTo(_vt);
}
}
2012-10-17 07:18:30 +08:00
}
void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags )
{
CV_INSTRUMENT_REGION();
_SVDcompute(a, w, u, vt, flags);
}
void SVD::compute( InputArray a, OutputArray w, int flags )
{
CV_INSTRUMENT_REGION();
_SVDcompute(a, w, noArray(), noArray(), flags);
}
2012-10-17 07:18:30 +08:00
void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
InputArray _rhs, OutputArray _dst )
{
Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat();
int type = w.type(), esz = (int)w.elemSize();
2011-04-26 05:44:22 +08:00
int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n);
size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz;
2011-04-26 05:44:22 +08:00
AutoBuffer<uchar> buffer(nb*sizeof(double) + 16);
CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data );
CV_Assert( u.cols >= nm && vt.rows >= nm &&
(w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) );
2010-11-20 20:05:28 +08:00
CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) );
2012-10-17 07:18:30 +08:00
_dst.create( n, nb, type );
Mat dst = _dst.getMat();
if( type == CV_32F )
2011-04-26 05:44:22 +08:00
SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
2018-06-11 06:42:00 +08:00
dst.ptr<float>(), dst.step, buffer.data());
else if( type == CV_64F )
2011-04-26 05:44:22 +08:00
SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
2018-06-11 06:42:00 +08:00
dst.ptr<double>(), dst.step, buffer.data());
else
2024-03-04 20:51:05 +08:00
CV_Error( cv::Error::StsUnsupportedFormat, "" );
}
2012-10-17 07:18:30 +08:00
SVD& SVD::operator ()(InputArray a, int flags)
{
_SVDcompute(a, w, u, vt, flags);
return *this;
}
void SVD::backSubst( InputArray rhs, OutputArray dst ) const
{
backSubst( w, u, vt, rhs, dst );
}
}
void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags)
{
CV_INSTRUMENT_REGION();
SVD::compute(src, w, u, vt, flags);
}
void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst)
{
CV_INSTRUMENT_REGION();
SVD::backSubst(w, u, vt, rhs, dst);
}
#ifndef OPENCV_EXCLUDE_C_API
CV_IMPL double
cvDet( const CvArr* arr )
{
if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 )
{
CvMat* mat = (CvMat*)arr;
int type = CV_MAT_TYPE(mat->type);
int rows = mat->rows;
uchar* m = mat->data.ptr;
int step = mat->step;
CV_Assert( rows == mat->cols );
#define Mf(y, x) ((float*)(m + y*step))[x]
#define Md(y, x) ((double*)(m + y*step))[x]
if( type == CV_32F )
{
if( rows == 2 )
return det2(Mf);
if( rows == 3 )
return det3(Mf);
}
else if( type == CV_64F )
{
if( rows == 2 )
return det2(Md);
if( rows == 3 )
return det3(Md);
}
}
return cv::determinant(cv::cvarrToMat(arr));
}
CV_IMPL double
cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
{
cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
2011-12-27 23:56:17 +08:00
method == CV_SVD ? cv::DECOMP_SVD :
method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
}
CV_IMPL int
cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
{
cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr);
CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols );
2011-04-26 05:44:22 +08:00
bool is_normal = (method & CV_NORMAL) != 0;
method &= ~CV_NORMAL;
return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
2011-12-27 23:56:17 +08:00
method == CV_SVD ? cv::DECOMP_SVD :
method == CV_SVD_SYM ? cv::DECOMP_EIG :
2011-04-26 05:44:22 +08:00
A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
}
CV_IMPL void
cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
int, int )
{
cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0;
if( evectsarr )
{
cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0;
eigen(src, evals, evects);
if( evects0.data != evects.data )
{
const uchar* p = evects0.ptr();
evects.convertTo(evects0, evects0.type());
CV_Assert( p == evects0.ptr() );
}
}
else
eigen(src, evals);
if( evals0.data != evals.data )
{
const uchar* p = evals0.ptr();
if( evals0.size() == evals.size() )
evals.convertTo(evals0, evals0.type());
else if( evals0.type() == evals.type() )
cv::transpose(evals, evals0);
else
cv::Mat(evals.t()).convertTo(evals0, evals0.type());
CV_Assert( p == evals0.ptr() );
}
}
CV_IMPL void
cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
{
cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v;
int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n);
CV_Assert( w.type() == type &&
(w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) ||
w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) );
cv::SVD svd;
if( w.size() == cv::Size(nm, 1) )
svd.w = cv::Mat(nm, 1, type, w.ptr() );
else if( w.isContinuous() )
svd.w = w;
if( uarr )
{
u = cv::cvarrToMat(uarr);
CV_Assert( u.type() == type );
svd.u = u;
}
if( varr )
{
v = cv::cvarrToMat(varr);
CV_Assert( v.type() == type );
svd.vt = v;
}
svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) |
((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) |
((m != n && (svd.u.size() == cv::Size(mn, mn) ||
svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0));
if( !u.empty() )
{
if( flags & CV_SVD_U_T )
cv::transpose( svd.u, u );
else if( u.data != svd.u.data )
{
CV_Assert( u.size() == svd.u.size() );
svd.u.copyTo(u);
}
}
if( !v.empty() )
{
if( !(flags & CV_SVD_V_T) )
cv::transpose( svd.vt, v );
else if( v.data != svd.vt.data )
{
CV_Assert( v.size() == svd.vt.size() );
svd.vt.copyTo(v);
}
}
if( w.data != svd.w.data )
{
if( w.size() == svd.w.size() )
svd.w.copyTo(w);
else
{
w = cv::Scalar(0);
cv::Mat wd = w.diag();
svd.w.copyTo(wd);
}
}
}
CV_IMPL void
cvSVBkSb( const CvArr* warr, const CvArr* uarr,
const CvArr* varr, const CvArr* rhsarr,
CvArr* dstarr, int flags )
{
cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr),
2011-04-26 05:44:22 +08:00
v = cv::cvarrToMat(varr), rhs,
dst = cv::cvarrToMat(dstarr), dst0 = dst;
if( flags & CV_SVD_U_T )
{
2011-04-26 05:44:22 +08:00
cv::Mat tmp;
transpose(u, tmp);
u = tmp;
}
2011-04-26 05:44:22 +08:00
if( !(flags & CV_SVD_V_T) )
{
cv::Mat tmp;
transpose(v, tmp);
v = tmp;
}
if( rhsarr )
rhs = cv::cvarrToMat(rhsarr);
2012-10-17 07:18:30 +08:00
2011-04-26 05:44:22 +08:00
cv::SVD::backSubst(w, u, v, rhs, dst);
CV_Assert( dst.data == dst0.data );
2013-08-21 21:26:54 +08:00
}
#endif // OPENCV_EXCLUDE_C_API