mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge pull request #24804 from fengyuentau:fix_lapack_warnings
core: try to solve warnings caused by Apple's new LAPACK interface #24804 Resolves https://github.com/opencv/opencv/issues/24660 Apple's BLAS documentation: https://developer.apple.com/documentation/accelerate/blas?language=objc New interface since macOS >= 13.3, iOS >= 16.4. Todo: - [x] Detect macOS version. - [x] ~Detect iOS versions (major and minor version).~ No calling of Accelerate New LAPACK on iOS. - [x] Solve calling `cblas_cgemm` and `cblas_zgemm`. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
e52540162f
commit
49f80cb3c4
@ -106,11 +106,26 @@ macro(ocv_lapack_check)
|
|||||||
list(APPEND __link_directories ${LAPACK_LINK_LIBRARIES})
|
list(APPEND __link_directories ${LAPACK_LINK_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(LAPACK_TRY_COMPILE_DEF "")
|
||||||
|
if(LAPACK_IMPL STREQUAL "LAPACK/Apple" AND NOT IOS) # https://github.com/opencv/opencv/issues/24660
|
||||||
|
# Get macOS version
|
||||||
|
execute_process(COMMAND sw_vers -productVersion
|
||||||
|
OUTPUT_VARIABLE MACOS_VERSION
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
# Enable Accelerate New LAPACK if macOS >= 13.3
|
||||||
|
if (MACOS_VERSION VERSION_GREATER "13.3" OR MACOS_VERSION VERSION_EQUAL "13.3")
|
||||||
|
set(LAPACK_TRY_COMPILE_DEF "-DACCELERATE_NEW_LAPACK")
|
||||||
|
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
||||||
|
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
try_compile(__VALID_LAPACK
|
try_compile(__VALID_LAPACK
|
||||||
"${OpenCV_BINARY_DIR}"
|
"${OpenCV_BINARY_DIR}"
|
||||||
"${OpenCV_SOURCE_DIR}/cmake/checks/lapack_check.cpp"
|
"${OpenCV_SOURCE_DIR}/cmake/checks/lapack_check.cpp"
|
||||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LAPACK_INCLUDE_DIR}\;${CMAKE_BINARY_DIR}"
|
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LAPACK_INCLUDE_DIR}\;${CMAKE_BINARY_DIR}"
|
||||||
"-DLINK_DIRECTORIES:STRING=${__link_directories}"
|
"-DLINK_DIRECTORIES:STRING=${__link_directories}"
|
||||||
|
COMPILE_DEFINITIONS ${LAPACK_TRY_COMPILE_DEF}
|
||||||
LINK_LIBRARIES ${LAPACK_LIBRARIES}
|
LINK_LIBRARIES ${LAPACK_LIBRARIES}
|
||||||
OUTPUT_VARIABLE TRY_OUT
|
OUTPUT_VARIABLE TRY_OUT
|
||||||
)
|
)
|
||||||
|
@ -111,8 +111,18 @@ set_value(fptype *dst, size_t dst_ld, fptype value, size_t m, size_t n)
|
|||||||
template <typename fptype> static inline int
|
template <typename fptype> static inline int
|
||||||
lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int* info)
|
lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int* info)
|
||||||
{
|
{
|
||||||
int lda = (int)(a_step / sizeof(fptype)), sign = 0;
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
int* piv = new int[m];
|
cv::AutoBuffer<long> piv_buff(m);
|
||||||
|
long lda = (long)(a_step / sizeof(fptype));
|
||||||
|
long _m = static_cast<long>(m), _n = static_cast<long>(n);
|
||||||
|
long _info[1];
|
||||||
|
#else
|
||||||
|
cv::AutoBuffer<int> piv_buff(m);
|
||||||
|
int lda = (int)(a_step / sizeof(fptype));
|
||||||
|
int _m = m, _n = n;
|
||||||
|
int* _info = info;
|
||||||
|
#endif
|
||||||
|
auto piv = piv_buff.data();
|
||||||
|
|
||||||
transpose_square_inplace(a, lda, m);
|
transpose_square_inplace(a, lda, m);
|
||||||
|
|
||||||
@ -121,9 +131,9 @@ lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int*
|
|||||||
if(n == 1 && b_step == sizeof(fptype))
|
if(n == 1 && b_step == sizeof(fptype))
|
||||||
{
|
{
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
sgesv_(&m, &n, (float*)a, &lda, piv, (float*)b, &m, info);
|
sgesv_(&_m, &_n, (float*)a, &lda, piv, (float*)b, &_m, _info);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
dgesv_(&m, &n, (double*)a, &lda, piv, (double*)b, &m, info);
|
dgesv_(&_m, &_n, (double*)a, &lda, piv, (double*)b, &_m, _info);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -133,9 +143,9 @@ lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int*
|
|||||||
transpose(b, ldb, tmpB, m, m, n);
|
transpose(b, ldb, tmpB, m, m, n);
|
||||||
|
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
sgesv_(&m, &n, (float*)a, &lda, piv, (float*)tmpB, &m, info);
|
sgesv_(&_m, &_n, (float*)a, &lda, piv, (float*)tmpB, &_m, _info);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
dgesv_(&m, &n, (double*)a, &lda, piv, (double*)tmpB, &m, info);
|
dgesv_(&_m, &_n, (double*)a, &lda, piv, (double*)tmpB, &_m, _info);
|
||||||
|
|
||||||
transpose(tmpB, m, b, ldb, n, m);
|
transpose(tmpB, m, b, ldb, n, m);
|
||||||
delete[] tmpB;
|
delete[] tmpB;
|
||||||
@ -144,11 +154,16 @@ lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int*
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
sgetrf_(&m, &m, (float*)a, &lda, piv, info);
|
sgetrf_(&_m, &_m, (float*)a, &lda, piv, _info);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
dgetrf_(&m, &m, (double*)a, &lda, piv, info);
|
dgetrf_(&_m, &_m, (double*)a, &lda, piv, _info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
*info = static_cast<int>(_info[0]);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int sign = 0;
|
||||||
if(*info == 0)
|
if(*info == 0)
|
||||||
{
|
{
|
||||||
for(int i = 0; i < m; i++)
|
for(int i = 0; i < m; i++)
|
||||||
@ -158,15 +173,21 @@ lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int*
|
|||||||
else
|
else
|
||||||
*info = 0; //in opencv LU function zero means error
|
*info = 0; //in opencv LU function zero means error
|
||||||
|
|
||||||
delete[] piv;
|
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename fptype> static inline int
|
template <typename fptype> static inline int
|
||||||
lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, bool* info)
|
lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, bool* info)
|
||||||
{
|
{
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
long _m = static_cast<long>(m), _n = static_cast<long>(n);
|
||||||
|
long lapackStatus = 0;
|
||||||
|
long lda = (long)(a_step / sizeof(fptype));
|
||||||
|
#else
|
||||||
|
int _m = m, _n = n;
|
||||||
int lapackStatus = 0;
|
int lapackStatus = 0;
|
||||||
int lda = (int)(a_step / sizeof(fptype));
|
int lda = (int)(a_step / sizeof(fptype));
|
||||||
|
#endif
|
||||||
char L[] = {'L', '\0'};
|
char L[] = {'L', '\0'};
|
||||||
|
|
||||||
if(b)
|
if(b)
|
||||||
@ -174,9 +195,9 @@ lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n
|
|||||||
if(n == 1 && b_step == sizeof(fptype))
|
if(n == 1 && b_step == sizeof(fptype))
|
||||||
{
|
{
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sposv)(L, &m, &n, (float*)a, &lda, (float*)b, &m, &lapackStatus);
|
OCV_LAPACK_FUNC(sposv)(L, &_m, &_n, (float*)a, &lda, (float*)b, &_m, &lapackStatus);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dposv)(L, &m, &n, (double*)a, &lda, (double*)b, &m, &lapackStatus);
|
OCV_LAPACK_FUNC(dposv)(L, &_m, &_n, (double*)a, &lda, (double*)b, &_m, &lapackStatus);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -185,9 +206,9 @@ lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n
|
|||||||
transpose(b, ldb, tmpB, m, m, n);
|
transpose(b, ldb, tmpB, m, m, n);
|
||||||
|
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sposv)(L, &m, &n, (float*)a, &lda, (float*)tmpB, &m, &lapackStatus);
|
OCV_LAPACK_FUNC(sposv)(L, &_m, &_n, (float*)a, &lda, (float*)tmpB, &_m, &lapackStatus);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dposv)(L, &m, &n, (double*)a, &lda, (double*)tmpB, &m, &lapackStatus);
|
OCV_LAPACK_FUNC(dposv)(L, &_m, &_n, (double*)a, &lda, (double*)tmpB, &_m, &lapackStatus);
|
||||||
|
|
||||||
transpose(tmpB, m, b, ldb, n, m);
|
transpose(tmpB, m, b, ldb, n, m);
|
||||||
delete[] tmpB;
|
delete[] tmpB;
|
||||||
@ -196,9 +217,9 @@ lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(spotrf)(L, &m, (float*)a, &lda, &lapackStatus);
|
OCV_LAPACK_FUNC(spotrf)(L, &_m, (float*)a, &lda, &lapackStatus);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dpotrf)(L, &m, (double*)a, &lda, &lapackStatus);
|
OCV_LAPACK_FUNC(dpotrf)(L, &_m, (double*)a, &lda, &lapackStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(lapackStatus == 0) *info = true;
|
if(lapackStatus == 0) *info = true;
|
||||||
@ -210,11 +231,24 @@ lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n
|
|||||||
template <typename fptype> static inline int
|
template <typename fptype> static inline int
|
||||||
lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype* vt, size_t v_step, int m, int n, int flags, int* info)
|
lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype* vt, size_t v_step, int m, int n, int flags, int* info)
|
||||||
{
|
{
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
long _m = static_cast<long>(m), _n = static_cast<long>(n);
|
||||||
|
long _info[1];
|
||||||
|
long lda = (long)(a_step / sizeof(fptype));
|
||||||
|
long ldv = (long)(v_step / sizeof(fptype));
|
||||||
|
long ldu = (long)(u_step / sizeof(fptype));
|
||||||
|
long lwork = -1;
|
||||||
|
cv::AutoBuffer<long> iworkBuf_(8 * std::min(m, n));
|
||||||
|
#else
|
||||||
|
int _m = m, _n = n;
|
||||||
|
int* _info = info;
|
||||||
int lda = (int)(a_step / sizeof(fptype));
|
int lda = (int)(a_step / sizeof(fptype));
|
||||||
int ldv = (int)(v_step / sizeof(fptype));
|
int ldv = (int)(v_step / sizeof(fptype));
|
||||||
int ldu = (int)(u_step / sizeof(fptype));
|
int ldu = (int)(u_step / sizeof(fptype));
|
||||||
int lwork = -1;
|
int lwork = -1;
|
||||||
int* iworkBuf = new int[8*std::min(m, n)];
|
cv::AutoBuffer<int> iworkBuf_(8 * std::min(m, n));
|
||||||
|
#endif
|
||||||
|
auto iworkBuf = iworkBuf_.data();
|
||||||
fptype work1 = 0;
|
fptype work1 = 0;
|
||||||
|
|
||||||
//A already transposed and m>=n
|
//A already transposed and m>=n
|
||||||
@ -238,9 +272,9 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sgesdd)(mode, &m, &n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)&work1, &lwork, iworkBuf, info);
|
OCV_LAPACK_FUNC(sgesdd)(mode, &_m, &_n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)&work1, &lwork, iworkBuf, _info);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dgesdd)(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)&work1, &lwork, iworkBuf, info);
|
OCV_LAPACK_FUNC(dgesdd)(mode, &_m, &_n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)&work1, &lwork, iworkBuf, _info);
|
||||||
|
|
||||||
lwork = (int)round(work1); //optimal buffer size
|
lwork = (int)round(work1); //optimal buffer size
|
||||||
fptype* buffer = new fptype[lwork + 1];
|
fptype* buffer = new fptype[lwork + 1];
|
||||||
@ -251,9 +285,13 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype
|
|||||||
CV_ANNOTATE_MEMORY_IS_INITIALIZED(buffer, sizeof(fptype) * (lwork + 1));
|
CV_ANNOTATE_MEMORY_IS_INITIALIZED(buffer, sizeof(fptype) * (lwork + 1));
|
||||||
|
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sgesdd)(mode, &m, &n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)buffer, &lwork, iworkBuf, info);
|
OCV_LAPACK_FUNC(sgesdd)(mode, &_m, &_n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)buffer, &lwork, iworkBuf, _info);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dgesdd)(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)buffer, &lwork, iworkBuf, info);
|
OCV_LAPACK_FUNC(dgesdd)(mode, &_m, &_n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)buffer, &lwork, iworkBuf, _info);
|
||||||
|
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
*info = static_cast<int>(_info[0]);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Make sure MSAN sees the memory as having been written.
|
// Make sure MSAN sees the memory as having been written.
|
||||||
// MSAN does not think it has been written because a different language was called.
|
// MSAN does not think it has been written because a different language was called.
|
||||||
@ -276,7 +314,6 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype
|
|||||||
delete[] u;
|
delete[] u;
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] iworkBuf;
|
|
||||||
delete[] buffer;
|
delete[] buffer;
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
@ -284,14 +321,27 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype
|
|||||||
template <typename fptype> static inline int
|
template <typename fptype> static inline int
|
||||||
lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_step, fptype* dst, int* info)
|
lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_step, fptype* dst, int* info)
|
||||||
{
|
{
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
long _m = static_cast<long>(m), _n = static_cast<long>(n), _k = static_cast<long>(k);
|
||||||
|
long _info[1];
|
||||||
|
long lda = (long)(a_step / sizeof(fptype));
|
||||||
|
long lwork = -1;
|
||||||
|
long ldtmpA;
|
||||||
|
#else
|
||||||
|
int _m = m, _n = n, _k = k;
|
||||||
|
int* _info = info;
|
||||||
int lda = (int)(a_step / sizeof(fptype));
|
int lda = (int)(a_step / sizeof(fptype));
|
||||||
|
int lwork = -1;
|
||||||
|
int ldtmpA;
|
||||||
|
#endif
|
||||||
|
|
||||||
char mode[] = { 'N', '\0' };
|
char mode[] = { 'N', '\0' };
|
||||||
if(m < n)
|
if(m < n)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
std::vector<fptype> tmpAMemHolder;
|
std::vector<fptype> tmpAMemHolder;
|
||||||
fptype* tmpA;
|
fptype* tmpA;
|
||||||
int ldtmpA;
|
|
||||||
if (m == n)
|
if (m == n)
|
||||||
{
|
{
|
||||||
transpose_square_inplace(a, lda, m);
|
transpose_square_inplace(a, lda, m);
|
||||||
@ -306,7 +356,6 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste
|
|||||||
transpose(a, lda, tmpA, m, m, n);
|
transpose(a, lda, tmpA, m, m, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
int lwork = -1;
|
|
||||||
fptype work1 = 0.;
|
fptype work1 = 0.;
|
||||||
|
|
||||||
if (b)
|
if (b)
|
||||||
@ -314,18 +363,18 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste
|
|||||||
if (k == 1 && b_step == sizeof(fptype))
|
if (k == 1 && b_step == sizeof(fptype))
|
||||||
{
|
{
|
||||||
if (typeid(fptype) == typeid(float))
|
if (typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sgels)(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)b, &m, (float*)&work1, &lwork, info);
|
OCV_LAPACK_FUNC(sgels)(mode, &_m, &_n, &_k, (float*)tmpA, &ldtmpA, (float*)b, &_m, (float*)&work1, &lwork, _info);
|
||||||
else if (typeid(fptype) == typeid(double))
|
else if (typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dgels)(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)b, &m, (double*)&work1, &lwork, info);
|
OCV_LAPACK_FUNC(dgels)(mode, &_m, &_n, &_k, (double*)tmpA, &ldtmpA, (double*)b, &_m, (double*)&work1, &lwork, _info);
|
||||||
|
|
||||||
lwork = cvRound(work1); //optimal buffer size
|
lwork = cvRound(work1); //optimal buffer size
|
||||||
std::vector<fptype> workBufMemHolder(lwork + 1);
|
std::vector<fptype> workBufMemHolder(lwork + 1);
|
||||||
fptype* buffer = &workBufMemHolder.front();
|
fptype* buffer = &workBufMemHolder.front();
|
||||||
|
|
||||||
if (typeid(fptype) == typeid(float))
|
if (typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sgels)(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)b, &m, (float*)buffer, &lwork, info);
|
OCV_LAPACK_FUNC(sgels)(mode, &_m, &_n, &_k, (float*)tmpA, &ldtmpA, (float*)b, &_m, (float*)buffer, &lwork, _info);
|
||||||
else if (typeid(fptype) == typeid(double))
|
else if (typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dgels)(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)b, &m, (double*)buffer, &lwork, info);
|
OCV_LAPACK_FUNC(dgels)(mode, &_m, &_n, &_k, (double*)tmpA, &ldtmpA, (double*)b, &_m, (double*)buffer, &lwork, _info);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -335,18 +384,18 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste
|
|||||||
transpose(b, ldb, tmpB, m, m, k);
|
transpose(b, ldb, tmpB, m, m, k);
|
||||||
|
|
||||||
if (typeid(fptype) == typeid(float))
|
if (typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sgels)(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)tmpB, &m, (float*)&work1, &lwork, info);
|
OCV_LAPACK_FUNC(sgels)(mode, &_m, &_n, &_k, (float*)tmpA, &ldtmpA, (float*)tmpB, &_m, (float*)&work1, &lwork, _info);
|
||||||
else if (typeid(fptype) == typeid(double))
|
else if (typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dgels)(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)tmpB, &m, (double*)&work1, &lwork, info);
|
OCV_LAPACK_FUNC(dgels)(mode, &_m, &_n, &_k, (double*)tmpA, &ldtmpA, (double*)tmpB, &_m, (double*)&work1, &lwork, _info);
|
||||||
|
|
||||||
lwork = cvRound(work1); //optimal buffer size
|
lwork = cvRound(work1); //optimal buffer size
|
||||||
std::vector<fptype> workBufMemHolder(lwork + 1);
|
std::vector<fptype> workBufMemHolder(lwork + 1);
|
||||||
fptype* buffer = &workBufMemHolder.front();
|
fptype* buffer = &workBufMemHolder.front();
|
||||||
|
|
||||||
if (typeid(fptype) == typeid(float))
|
if (typeid(fptype) == typeid(float))
|
||||||
OCV_LAPACK_FUNC(sgels)(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)tmpB, &m, (float*)buffer, &lwork, info);
|
OCV_LAPACK_FUNC(sgels)(mode, &_m, &_n, &_k, (float*)tmpA, &ldtmpA, (float*)tmpB, &_m, (float*)buffer, &lwork, _info);
|
||||||
else if (typeid(fptype) == typeid(double))
|
else if (typeid(fptype) == typeid(double))
|
||||||
OCV_LAPACK_FUNC(dgels)(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)tmpB, &m, (double*)buffer, &lwork, info);
|
OCV_LAPACK_FUNC(dgels)(mode, &_m, &_n, &_k, (double*)tmpA, &ldtmpA, (double*)tmpB, &_m, (double*)buffer, &lwork, _info);
|
||||||
|
|
||||||
transpose(tmpB, m, b, ldb, k, m);
|
transpose(tmpB, m, b, ldb, k, m);
|
||||||
}
|
}
|
||||||
@ -354,18 +403,18 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (typeid(fptype) == typeid(float))
|
if (typeid(fptype) == typeid(float))
|
||||||
sgeqrf_(&m, &n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)&work1, &lwork, info);
|
sgeqrf_(&_m, &_n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)&work1, &lwork, _info);
|
||||||
else if (typeid(fptype) == typeid(double))
|
else if (typeid(fptype) == typeid(double))
|
||||||
dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)&work1, &lwork, info);
|
dgeqrf_(&_m, &_n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)&work1, &lwork, _info);
|
||||||
|
|
||||||
lwork = cvRound(work1); //optimal buffer size
|
lwork = cvRound(work1); //optimal buffer size
|
||||||
std::vector<fptype> workBufMemHolder(lwork + 1);
|
std::vector<fptype> workBufMemHolder(lwork + 1);
|
||||||
fptype* buffer = &workBufMemHolder.front();
|
fptype* buffer = &workBufMemHolder.front();
|
||||||
|
|
||||||
if (typeid(fptype) == typeid(float))
|
if (typeid(fptype) == typeid(float))
|
||||||
sgeqrf_(&m, &n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)buffer, &lwork, info);
|
sgeqrf_(&_m, &_n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)buffer, &lwork, _info);
|
||||||
else if (typeid(fptype) == typeid(double))
|
else if (typeid(fptype) == typeid(double))
|
||||||
dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)buffer, &lwork, info);
|
dgeqrf_(&_m, &_n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)buffer, &lwork, _info);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_ANNOTATE_MEMORY_IS_INITIALIZED(info, sizeof(int));
|
CV_ANNOTATE_MEMORY_IS_INITIALIZED(info, sizeof(int));
|
||||||
@ -374,6 +423,10 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste
|
|||||||
else
|
else
|
||||||
transpose(tmpA, m, a, lda, n, m);
|
transpose(tmpA, m, a, lda, n, m);
|
||||||
|
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
*info = static_cast<int>(_info[0]);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (*info != 0)
|
if (*info != 0)
|
||||||
*info = 0;
|
*info = 0;
|
||||||
else
|
else
|
||||||
@ -458,7 +511,6 @@ lapack_gemm(const fptype *src1, size_t src1_step, const fptype *src2, size_t src
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename fptype> static inline int
|
template <typename fptype> static inline int
|
||||||
lapack_gemm_c(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
|
lapack_gemm_c(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
|
||||||
const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
|
const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
|
||||||
@ -529,10 +581,29 @@ lapack_gemm_c(const fptype *src1, size_t src1_step, const fptype *src2, size_t s
|
|||||||
else if(src3_step == 0 && beta != 0.0)
|
else if(src3_step == 0 && beta != 0.0)
|
||||||
set_value((std::complex<fptype>*)dst, lddst, std::complex<fptype>(0.0, 0.0), d_m, d_n);
|
set_value((std::complex<fptype>*)dst, lddst, std::complex<fptype>(0.0, 0.0), d_m, d_n);
|
||||||
|
|
||||||
|
// FIXME: this is a workaround. Support ILP64 in HAL API.
|
||||||
|
#if defined (ACCELERATE_NEW_LAPACK) && defined (ACCELERATE_LAPACK_ILP64)
|
||||||
|
int M = a_m, N = d_n, K = a_n;
|
||||||
|
if(typeid(fptype) == typeid(float)) {
|
||||||
|
auto src1_cast = (std::complex<float>*)(src1);
|
||||||
|
auto src2_cast = (std::complex<float>*)(src2);
|
||||||
|
auto dst_cast = (std::complex<float>*)(dst);
|
||||||
|
long lda = ldsrc1, ldb = ldsrc2, ldc = lddst;
|
||||||
|
cblas_cgemm(CblasRowMajor, transA, transB, M, N, K, (std::complex<float>*)&cAlpha, src1_cast, lda, src2_cast, ldb, (std::complex<float>*)&cBeta, dst_cast, ldc);
|
||||||
|
}
|
||||||
|
else if(typeid(fptype) == typeid(double)) {
|
||||||
|
auto src1_cast = (std::complex<double>*)(src1);
|
||||||
|
auto src2_cast = (std::complex<double>*)(src2);
|
||||||
|
auto dst_cast = (std::complex<double>*)(dst);
|
||||||
|
long lda = ldsrc1, ldb = ldsrc2, ldc = lddst;
|
||||||
|
cblas_zgemm(CblasRowMajor, transA, transB, M, N, K, (std::complex<double>*)&cAlpha, src1_cast, lda, src2_cast, ldb, (std::complex<double>*)&cBeta, dst_cast, ldc);
|
||||||
|
}
|
||||||
|
#else
|
||||||
if(typeid(fptype) == typeid(float))
|
if(typeid(fptype) == typeid(float))
|
||||||
cblas_cgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (float*)reinterpret_cast<fptype(&)[2]>(cAlpha), (float*)src1, ldsrc1, (float*)src2, ldsrc2, (float*)reinterpret_cast<fptype(&)[2]>(cBeta), (float*)dst, lddst);
|
cblas_cgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (float*)reinterpret_cast<fptype(&)[2]>(cAlpha), (float*)src1, ldsrc1, (float*)src2, ldsrc2, (float*)reinterpret_cast<fptype(&)[2]>(cBeta), (float*)dst, lddst);
|
||||||
else if(typeid(fptype) == typeid(double))
|
else if(typeid(fptype) == typeid(double))
|
||||||
cblas_zgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (double*)reinterpret_cast<fptype(&)[2]>(cAlpha), (double*)src1, ldsrc1, (double*)src2, ldsrc2, (double*)reinterpret_cast<fptype(&)[2]>(cBeta), (double*)dst, lddst);
|
cblas_zgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (double*)reinterpret_cast<fptype(&)[2]>(cAlpha), (double*)src1, ldsrc1, (double*)src2, ldsrc2, (double*)reinterpret_cast<fptype(&)[2]>(cBeta), (double*)dst, lddst);
|
||||||
|
#endif
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user