mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge pull request #27184 from CodeLinaro:gemm_fastcv_hal
FastCV gemm hal #27184 FastCV hal for gemm 32f ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
485c7d5be7
commit
edccfa7961
19
3rdparty/fastcv/include/fastcv_hal_core.hpp
vendored
19
3rdparty/fastcv/include/fastcv_hal_core.hpp
vendored
@ -34,7 +34,8 @@
|
||||
#define cv_hal_mul32f fastcv_hal_mul32f
|
||||
#undef cv_hal_SVD32f
|
||||
#define cv_hal_SVD32f fastcv_hal_SVD32f
|
||||
|
||||
#undef cv_hal_gemm32f
|
||||
#define cv_hal_gemm32f fastcv_hal_gemm32f
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief look-up table transform of an array.
|
||||
@ -250,4 +251,20 @@ int fastcv_hal_SVD32f(
|
||||
int n,
|
||||
int flags);
|
||||
|
||||
int fastcv_hal_gemm32f(
|
||||
const float* src1,
|
||||
size_t src1_step,
|
||||
const float* src2,
|
||||
size_t src2_step,
|
||||
float alpha,
|
||||
const float* src3,
|
||||
size_t src3_step,
|
||||
float beta,
|
||||
float* dst,
|
||||
size_t dst_step,
|
||||
int m,
|
||||
int n,
|
||||
int k,
|
||||
int flags);
|
||||
|
||||
#endif
|
||||
|
115
3rdparty/fastcv/src/fastcv_hal_core.cpp
vendored
115
3rdparty/fastcv/src/fastcv_hal_core.cpp
vendored
@ -623,4 +623,119 @@ int fastcv_hal_SVD32f(
|
||||
}
|
||||
|
||||
CV_HAL_RETURN(status, fastcv_hal_SVD32f);
|
||||
}
|
||||
|
||||
int fastcv_hal_gemm32f(
|
||||
const float* src1,
|
||||
size_t src1_step,
|
||||
const float* src2,
|
||||
size_t src2_step,
|
||||
float alpha,
|
||||
const float* src3,
|
||||
size_t src3_step,
|
||||
float beta,
|
||||
float* dst,
|
||||
size_t dst_step,
|
||||
int m,
|
||||
int n,
|
||||
int k,
|
||||
int flags)
|
||||
{
|
||||
cv::Mat src1_t, src2_t, src3_t, dst_temp1;
|
||||
int height_a = m, width_a = n, width_d = k;
|
||||
const float *src1p = src1, *src2p = src2, *src3p = src3;
|
||||
|
||||
INITIALIZATION_CHECK;
|
||||
|
||||
if((flags & (cv::GEMM_1_T)) && (flags & (cv::GEMM_2_T)))
|
||||
{
|
||||
height_a = n; width_a = m;
|
||||
}
|
||||
else if(flags & (cv::GEMM_1_T))
|
||||
{
|
||||
src1_t = cv::Mat(width_a, height_a, CV_32FC1);
|
||||
fcvTransposef32_v2(src1, width_a, height_a, src1_step, src1_t.ptr<float>(), src1_t.step[0]);
|
||||
src1p = src1_t.ptr<float>();
|
||||
src1_step = src1_t.step[0];
|
||||
height_a = n; width_a = m;
|
||||
}
|
||||
else if(flags & (cv::GEMM_2_T))
|
||||
{
|
||||
src2_t = cv::Mat(width_a, width_d, CV_32FC1);
|
||||
fcvTransposef32_v2(src2, width_a, width_d, src2_step, src2_t.ptr<float>(), src2_t.step[0]);
|
||||
src2p = src2_t.ptr<float>();
|
||||
src2_step = src2_t.step[0];
|
||||
}
|
||||
|
||||
if((flags & cv::GEMM_3_T) && beta != 0.0 && src3 != NULL)
|
||||
{
|
||||
src3_t = cv::Mat(height_a, width_d, CV_32FC1);
|
||||
fcvTransposef32_v2(src3, height_a, width_d, src3_step, src3_t.ptr<float>(), src3_t.step[0]);
|
||||
src3p = src3_t.ptr<float>();
|
||||
src3_step = src3_t.step[0];
|
||||
}
|
||||
|
||||
bool inplace = false;
|
||||
size_t dst_stride;
|
||||
float *dstp = NULL;
|
||||
|
||||
if(src1 == dst || src2 == dst || src3 == dst)
|
||||
{
|
||||
dst_temp1 = cv::Mat(height_a, width_d, CV_32FC1);
|
||||
dstp = dst_temp1.ptr<float>();
|
||||
inplace = true;
|
||||
dst_stride = dst_temp1.step[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
dstp = dst;
|
||||
dst_stride = dst_step;
|
||||
}
|
||||
|
||||
float *dstp1 = dstp;
|
||||
|
||||
fcvStatus status = FASTCV_SUCCESS;
|
||||
|
||||
if(alpha != 0.0)
|
||||
{
|
||||
if((flags & (cv::GEMM_1_T)) && (flags & (cv::GEMM_2_T)))
|
||||
{
|
||||
cv::Mat dst_temp2 = cv::Mat(k, n, CV_32FC1);
|
||||
fcvMatrixMultiplyf32_v2(src2p, m, k, src2_step, src1p, n, src1_step,
|
||||
dst_temp2.ptr<float>(), dst_temp2.step[0]);
|
||||
fcvTransposef32_v2(dst_temp2.ptr<float>(), n, k, dst_temp2.step[0], dstp, dst_stride);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
status = fcvMatrixMultiplyf32_v2(src1p, width_a, height_a, src1_step, src2p, width_d,
|
||||
src2_step, dstp, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
if(alpha != 1.0 && alpha != 0.0 && status == FASTCV_SUCCESS)
|
||||
{
|
||||
status = fcvMultiplyScalarf32(dstp, width_d, height_a, dst_stride, alpha, dstp1, dst_stride);
|
||||
}
|
||||
|
||||
if(src3 != NULL && beta != 0.0 && status == FASTCV_SUCCESS)
|
||||
{
|
||||
cv::Mat dst3 = cv::Mat(height_a, width_d, CV_32FC1);
|
||||
if(beta != 1.0)
|
||||
{
|
||||
status = fcvMultiplyScalarf32(src3p, width_d, height_a, src3_step, beta, (float32_t*)dst3.data, dst3.step);
|
||||
if(status == FASTCV_SUCCESS)
|
||||
fcvAddf32_v2(dstp, width_d, height_a, dst_stride, (float32_t*)dst3.data, dst3.step, dstp1, dst_stride);
|
||||
}
|
||||
else
|
||||
fcvAddf32_v2(dstp, width_d, height_a, dst_stride, src3p, src3_step, dstp1, dst_stride);
|
||||
}
|
||||
|
||||
if(inplace)
|
||||
{
|
||||
cv::Mat dst_mat = cv::Mat(height_a, width_d, CV_32FC1, (void*)dst, dst_step);
|
||||
dst_temp1.copyTo(dst_mat);
|
||||
}
|
||||
|
||||
CV_HAL_RETURN(status,hal_gemm32f);
|
||||
}
|
@ -1214,7 +1214,7 @@ bool CV_OperationsTest::TestSVD()
|
||||
cvtest::norm(Vt*Vt.t(), I, CV_C) > FLT_EPSILON ||
|
||||
W.at<float>(2) < 0 || W.at<float>(1) < W.at<float>(2) ||
|
||||
W.at<float>(0) < W.at<float>(1) ||
|
||||
cvtest::norm(U*Mat::diag(W)*Vt, Q, CV_C) > FLT_EPSILON )
|
||||
cvtest::norm(U*Mat::diag(W)*Vt, Q, CV_C) > FLT_EPSILON*2 )
|
||||
throw test_excep();
|
||||
}
|
||||
catch(const test_excep&)
|
||||
|
Loading…
Reference in New Issue
Block a user