mirror of
https://github.com/opencv/opencv.git
synced 2025-06-08 01:53:19 +08:00
Merge branch 4.x
This commit is contained in:
commit
97620c053f
1
3rdparty/openjpeg/CMakeLists.txt
vendored
1
3rdparty/openjpeg/CMakeLists.txt
vendored
@ -16,6 +16,7 @@ ocv_warnings_disable(CMAKE_C_FLAGS
|
|||||||
-Wunused-but-set-variable # clang15
|
-Wunused-but-set-variable # clang15
|
||||||
-Wmissing-prototypes # clang, function opj_t1_ht_decode_cblk
|
-Wmissing-prototypes # clang, function opj_t1_ht_decode_cblk
|
||||||
-Wmissing-declarations # gcc, function opj_t1_ht_decode_cblk
|
-Wmissing-declarations # gcc, function opj_t1_ht_decode_cblk
|
||||||
|
-Wdocumentation # clang
|
||||||
)
|
)
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
|
2
3rdparty/protobuf/CMakeLists.txt
vendored
2
3rdparty/protobuf/CMakeLists.txt
vendored
@ -27,6 +27,8 @@ else()
|
|||||||
-Wimplicit-fallthrough
|
-Wimplicit-fallthrough
|
||||||
-Warray-bounds # GCC 9+
|
-Warray-bounds # GCC 9+
|
||||||
-Wstringop-overflow -Wstringop-overread # GCC 11-12
|
-Wstringop-overflow -Wstringop-overread # GCC 11-12
|
||||||
|
-Wextra-semi # clang
|
||||||
|
-Wcomma # clang
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
if(CV_ICC)
|
if(CV_ICC)
|
||||||
|
@ -209,7 +209,7 @@ if(NOT ${found})
|
|||||||
message(STATUS " PYTHON3_NUMPY_INCLUDE_DIRS")
|
message(STATUS " PYTHON3_NUMPY_INCLUDE_DIRS")
|
||||||
else()
|
else()
|
||||||
# Attempt to discover the NumPy include directory. If this succeeds, then build python API with NumPy
|
# Attempt to discover the NumPy include directory. If this succeeds, then build python API with NumPy
|
||||||
execute_process(COMMAND "${_executable}" -c "import os; os.environ['DISTUTILS_USE_SDK']='1'; import numpy.distutils; print(os.pathsep.join(numpy.distutils.misc_util.get_numpy_include_dirs()))"
|
execute_process(COMMAND "${_executable}" -c "import numpy; print(numpy.get_include())"
|
||||||
RESULT_VARIABLE _numpy_process
|
RESULT_VARIABLE _numpy_process
|
||||||
OUTPUT_VARIABLE _numpy_include_dirs
|
OUTPUT_VARIABLE _numpy_include_dirs
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
@ -186,6 +186,8 @@ class PatternMaker:
|
|||||||
yspacing = (self.height - self.rows * self.square_size) / 2.0
|
yspacing = (self.height - self.rows * self.square_size) / 2.0
|
||||||
|
|
||||||
ch_ar_border = (self.square_size - self.aruco_marker_size)/2
|
ch_ar_border = (self.square_size - self.aruco_marker_size)/2
|
||||||
|
if ch_ar_border < side*0.7:
|
||||||
|
print("Marker border {} is less than 70% of ArUco pin size {}. Please increase --square_size or decrease --marker_size for stable board detection".format(ch_ar_border, int(side)))
|
||||||
marker_id = 0
|
marker_id = 0
|
||||||
for y in range(0, self.rows):
|
for y in range(0, self.rows):
|
||||||
for x in range(0, self.cols):
|
for x in range(0, self.cols):
|
||||||
@ -283,6 +285,9 @@ def main():
|
|||||||
else:
|
else:
|
||||||
raise ValueError("The marker {},{} is outside the checkerboard".format(x, y))
|
raise ValueError("The marker {},{} is outside the checkerboard".format(x, y))
|
||||||
|
|
||||||
|
if p_type == "charuco_board" and aruco_marker_size >= square_size:
|
||||||
|
raise ValueError("ArUco markers size must be smaller than square size")
|
||||||
|
|
||||||
pm = PatternMaker(columns, rows, output, units, square_size, radius_rate, page_width, page_height, markers, aruco_marker_size, dict_file)
|
pm = PatternMaker(columns, rows, output, units, square_size, radius_rate, page_width, page_height, markers, aruco_marker_size, dict_file)
|
||||||
# dict for easy lookup of pattern type
|
# dict for easy lookup of pattern type
|
||||||
mp = {"circles": pm.make_circles_pattern, "acircles": pm.make_acircles_pattern,
|
mp = {"circles": pm.make_circles_pattern, "acircles": pm.make_acircles_pattern,
|
||||||
|
@ -112,7 +112,7 @@ public:
|
|||||||
* 2 columns 1 channel
|
* 2 columns 1 channel
|
||||||
* @param _m2 destination points containing (x,y), depth is CV_32F with 1 column 2 channels or
|
* @param _m2 destination points containing (x,y), depth is CV_32F with 1 column 2 channels or
|
||||||
* 2 columns 1 channel
|
* 2 columns 1 channel
|
||||||
* @param _model, CV_64FC1, 3x3, normalized, i.e., the last element is 1
|
* @param _model CV_64FC1, 3x3, normalized, i.e., the last element is 1
|
||||||
*/
|
*/
|
||||||
int runKernel( InputArray _m1, InputArray _m2, OutputArray _model ) const CV_OVERRIDE
|
int runKernel( InputArray _m1, InputArray _m2, OutputArray _model ) const CV_OVERRIDE
|
||||||
{
|
{
|
||||||
@ -187,7 +187,7 @@ public:
|
|||||||
* @param _m1 depth CV_32F, 1-channel with 2 columns or 2-channel with 1 column
|
* @param _m1 depth CV_32F, 1-channel with 2 columns or 2-channel with 1 column
|
||||||
* @param _m2 depth CV_32F, 1-channel with 2 columns or 2-channel with 1 column
|
* @param _m2 depth CV_32F, 1-channel with 2 columns or 2-channel with 1 column
|
||||||
* @param _model CV_64FC1, 3x3
|
* @param _model CV_64FC1, 3x3
|
||||||
* @param _err, output, CV_32FC1, square of the L2 norm
|
* @param _err output, CV_32FC1, square of the L2 norm
|
||||||
*/
|
*/
|
||||||
void computeError( InputArray _m1, InputArray _m2, InputArray _model, OutputArray _err ) const CV_OVERRIDE
|
void computeError( InputArray _m1, InputArray _m2, InputArray _model, OutputArray _err ) const CV_OVERRIDE
|
||||||
{
|
{
|
||||||
|
@ -111,7 +111,7 @@ private:
|
|||||||
/**
|
/**
|
||||||
* @brief Computes the translation solution for a given rotation solution
|
* @brief Computes the translation solution for a given rotation solution
|
||||||
* @param objectPoints Array of corresponding object points, 1xN/Nx1 3-channel where N is the number of points
|
* @param objectPoints Array of corresponding object points, 1xN/Nx1 3-channel where N is the number of points
|
||||||
* @param normalizedImagePoints Array of corresponding image points (undistorted), 1xN/Nx1 2-channel where N is the number of points
|
* @param normalizedImgPoints Array of corresponding image points (undistorted), 1xN/Nx1 2-channel where N is the number of points
|
||||||
* @param R Rotation solution (3x1 rotation vector)
|
* @param R Rotation solution (3x1 rotation vector)
|
||||||
* @param t Translation solution (3x1 rotation vector)
|
* @param t Translation solution (3x1 rotation vector)
|
||||||
*/
|
*/
|
||||||
@ -220,10 +220,10 @@ private:
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Computes the average depth of an object given its pose in camera coordinates
|
* @brief Computes the average depth of an object given its pose in camera coordinates
|
||||||
* @param objectPoints: Object points defined in 3D object space
|
* @param objectPoints Object points defined in 3D object space
|
||||||
* @param rvec: Rotation component of pose
|
* @param rvec Rotation component of pose
|
||||||
* @param tvec: Translation component of pose
|
* @param tvec Translation component of pose
|
||||||
* @return: average depth of the object
|
* @return average depth of the object
|
||||||
*/
|
*/
|
||||||
double meanSceneDepth(InputArray objectPoints, InputArray rvec, InputArray tvec);
|
double meanSceneDepth(InputArray objectPoints, InputArray rvec, InputArray tvec);
|
||||||
|
|
||||||
|
@ -220,8 +220,8 @@ int p3p::solve(double R[4][3][3], double t[4][3],
|
|||||||
/// Only the solution to the main branch.
|
/// Only the solution to the main branch.
|
||||||
/// Reference : X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
|
/// Reference : X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
|
||||||
/// IEEE Trans. on PAMI, vol. 25, No. 8, August 2003
|
/// IEEE Trans. on PAMI, vol. 25, No. 8, August 2003
|
||||||
/// \param lengths3D Lengths of line segments up to four solutions.
|
/// \param lengths Lengths of line segments up to four solutions.
|
||||||
/// \param dist3D Distance between 3D points in pairs |BC|, |AC|, |AB|.
|
/// \param distances Distance between 3D points in pairs |BC|, |AC|, |AB|.
|
||||||
/// \param cosines Cosine of the angles /_BPC, /_APC, /_APB.
|
/// \param cosines Cosine of the angles /_BPC, /_APC, /_APB.
|
||||||
/// \returns Number of solutions.
|
/// \returns Number of solutions.
|
||||||
/// WARNING: NOT ALL THE DEGENERATE CASES ARE IMPLEMENTED
|
/// WARNING: NOT ALL THE DEGENERATE CASES ARE IMPLEMENTED
|
||||||
|
@ -89,7 +89,7 @@ namespace cv {
|
|||||||
* @param ep outlier ratio
|
* @param ep outlier ratio
|
||||||
* @param modelPoints number of model points required for estimation
|
* @param modelPoints number of model points required for estimation
|
||||||
* @param maxIters maximum number of iterations
|
* @param maxIters maximum number of iterations
|
||||||
* @return
|
* @return The number of iterations according to the formula
|
||||||
* \f[
|
* \f[
|
||||||
* \frac{\ln(1-p)}{\ln\left(1-(1-ep)^\mathrm{modelPoints}\right)}
|
* \frac{\ln(1-p)}{\ln\left(1-(1-ep)^\mathrm{modelPoints}\right)}
|
||||||
* \f]
|
* \f]
|
||||||
|
@ -486,7 +486,7 @@ void rhoSeed(Ptr<RHO_HEST> p, uint64_t seed){
|
|||||||
* Estimates the homography using the given context, matches and parameters to
|
* Estimates the homography using the given context, matches and parameters to
|
||||||
* PROSAC.
|
* PROSAC.
|
||||||
*
|
*
|
||||||
* @param [in/out] p The context to use for homography estimation. Must
|
* @param [in,out] p The context to use for homography estimation. Must
|
||||||
* be already initialized. Cannot be NULL.
|
* be already initialized. Cannot be NULL.
|
||||||
* @param [in] src The pointer to the source points of the matches.
|
* @param [in] src The pointer to the source points of the matches.
|
||||||
* Must be aligned to 4 bytes. Cannot be NULL.
|
* Must be aligned to 4 bytes. Cannot be NULL.
|
||||||
|
@ -206,7 +206,7 @@ void rhoSeed(Ptr<RHO_HEST> p, uint64_t seed);
|
|||||||
* homography with at least the minimum required support, and 0 if it was not.
|
* homography with at least the minimum required support, and 0 if it was not.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* @param [in/out] p The context to use for homography estimation. Must
|
* @param [in,out] p The context to use for homography estimation. Must
|
||||||
* be already initialized. Cannot be NULL.
|
* be already initialized. Cannot be NULL.
|
||||||
* @param [in] src The pointer to the source points of the matches.
|
* @param [in] src The pointer to the source points of the matches.
|
||||||
* Must be aligned to 4 bytes. Cannot be NULL.
|
* Must be aligned to 4 bytes. Cannot be NULL.
|
||||||
|
@ -89,8 +89,8 @@ public:
|
|||||||
s2(_s2),
|
s2(_s2),
|
||||||
s3(_s3),
|
s3(_s3),
|
||||||
s4(_s4) {
|
s4(_s4) {
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
for (int i = 0; i < 2 * v_float64::nlanes; ++i)
|
for (int i = 0; i < 2 * VTraits<v_float64>::vlanes(); ++i)
|
||||||
{
|
{
|
||||||
s_x[i] = ir[0] * i;
|
s_x[i] = ir[0] * i;
|
||||||
s_y[i] = ir[3] * i;
|
s_y[i] = ir[3] * i;
|
||||||
@ -123,26 +123,26 @@ public:
|
|||||||
else
|
else
|
||||||
CV_Assert(m1 != NULL);
|
CV_Assert(m1 != NULL);
|
||||||
|
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
const v_float64 v_one = vx_setall_f64(1.0);
|
const v_float64 v_one = vx_setall_f64(1.0);
|
||||||
for (; j <= size.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes, _x += 2*v_float64::nlanes * ir[0], _y += 2*v_float64::nlanes * ir[3], _w += 2*v_float64::nlanes * ir[6])
|
for (; j <= size.width - 2*VTraits<v_float64>::vlanes(); j += 2*VTraits<v_float64>::vlanes(), _x += 2*VTraits<v_float64>::vlanes() * ir[0], _y += 2*VTraits<v_float64>::vlanes() * ir[3], _w += 2*VTraits<v_float64>::vlanes() * ir[6])
|
||||||
{
|
{
|
||||||
v_float64 m_0, m_1, m_2, m_3;
|
v_float64 m_0, m_1, m_2, m_3;
|
||||||
m_2 = v_one / (vx_setall_f64(_w) + vx_load(s_w));
|
m_2 = v_div(v_one, v_add(vx_setall_f64(_w), vx_load(this->s_w)));
|
||||||
m_3 = v_one / (vx_setall_f64(_w) + vx_load(s_w + v_float64::nlanes));
|
m_3 = v_div(v_one, v_add(vx_setall_f64(_w), vx_load(this->s_w + VTraits<v_float64>::vlanes())));
|
||||||
m_0 = vx_setall_f64(_x); m_1 = vx_setall_f64(_y);
|
m_0 = vx_setall_f64(_x); m_1 = vx_setall_f64(_y);
|
||||||
v_float64 x_0 = (m_0 + vx_load(s_x)) * m_2;
|
v_float64 x_0 = v_mul(v_add(m_0, vx_load(this->s_x)), m_2);
|
||||||
v_float64 x_1 = (m_0 + vx_load(s_x + v_float64::nlanes)) * m_3;
|
v_float64 x_1 = v_mul(v_add(m_0, vx_load(this->s_x + VTraits<v_float64>::vlanes())), m_3);
|
||||||
v_float64 y_0 = (m_1 + vx_load(s_y)) * m_2;
|
v_float64 y_0 = v_mul(v_add(m_1, vx_load(this->s_y)), m_2);
|
||||||
v_float64 y_1 = (m_1 + vx_load(s_y + v_float64::nlanes)) * m_3;
|
v_float64 y_1 = v_mul(v_add(m_1, vx_load(this->s_y + VTraits<v_float64>::vlanes())), m_3);
|
||||||
|
|
||||||
v_float64 xd_0 = x_0 * x_0;
|
v_float64 xd_0 = v_mul(x_0, x_0);
|
||||||
v_float64 yd_0 = y_0 * y_0;
|
v_float64 yd_0 = v_mul(y_0, y_0);
|
||||||
v_float64 xd_1 = x_1 * x_1;
|
v_float64 xd_1 = v_mul(x_1, x_1);
|
||||||
v_float64 yd_1 = y_1 * y_1;
|
v_float64 yd_1 = v_mul(y_1, y_1);
|
||||||
|
|
||||||
v_float64 r2_0 = xd_0 + yd_0;
|
v_float64 r2_0 = v_add(xd_0, yd_0);
|
||||||
v_float64 r2_1 = xd_1 + yd_1;
|
v_float64 r2_1 = v_add(xd_1, yd_1);
|
||||||
|
|
||||||
m_1 = vx_setall_f64(k3);
|
m_1 = vx_setall_f64(k3);
|
||||||
m_2 = vx_setall_f64(k2);
|
m_2 = vx_setall_f64(k2);
|
||||||
@ -151,18 +151,18 @@ public:
|
|||||||
m_1 = v_muladd(v_muladd(v_muladd(m_1, r2_1, m_2), r2_1, m_3), r2_1, v_one);
|
m_1 = v_muladd(v_muladd(v_muladd(m_1, r2_1, m_2), r2_1, m_3), r2_1, v_one);
|
||||||
m_3 = vx_setall_f64(k6);
|
m_3 = vx_setall_f64(k6);
|
||||||
m_2 = vx_setall_f64(k5);
|
m_2 = vx_setall_f64(k5);
|
||||||
m_0 /= v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(k4)), r2_0, v_one);
|
m_0 = v_div(m_0, v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(this->k4)), r2_0, v_one));
|
||||||
m_1 /= v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(k4)), r2_1, v_one);
|
m_1 = v_div(m_1, v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(this->k4)), r2_1, v_one));
|
||||||
|
|
||||||
m_3 = vx_setall_f64(2.0);
|
m_3 = vx_setall_f64(2.0);
|
||||||
xd_0 = v_muladd(m_3, xd_0, r2_0);
|
xd_0 = v_muladd(m_3, xd_0, r2_0);
|
||||||
yd_0 = v_muladd(m_3, yd_0, r2_0);
|
yd_0 = v_muladd(m_3, yd_0, r2_0);
|
||||||
xd_1 = v_muladd(m_3, xd_1, r2_1);
|
xd_1 = v_muladd(m_3, xd_1, r2_1);
|
||||||
yd_1 = v_muladd(m_3, yd_1, r2_1);
|
yd_1 = v_muladd(m_3, yd_1, r2_1);
|
||||||
m_2 = x_0 * y_0 * m_3;
|
m_2 = v_mul(v_mul(x_0, y_0), m_3);
|
||||||
m_3 = x_1 * y_1 * m_3;
|
m_3 = v_mul(v_mul(x_1, y_1), m_3);
|
||||||
|
|
||||||
x_0 *= m_0; y_0 *= m_0; x_1 *= m_1; y_1 *= m_1;
|
x_0 = v_mul(x_0, m_0); y_0 = v_mul(y_0, m_0); x_1 = v_mul(x_1, m_1); y_1 = v_mul(y_1, m_1);
|
||||||
|
|
||||||
m_0 = vx_setall_f64(p1);
|
m_0 = vx_setall_f64(p1);
|
||||||
m_1 = vx_setall_f64(p2);
|
m_1 = vx_setall_f64(p2);
|
||||||
@ -176,8 +176,8 @@ public:
|
|||||||
xd_1 = v_muladd(m_0, m_3, xd_1);
|
xd_1 = v_muladd(m_0, m_3, xd_1);
|
||||||
yd_1 = v_muladd(m_1, m_3, yd_1);
|
yd_1 = v_muladd(m_1, m_3, yd_1);
|
||||||
|
|
||||||
m_0 = r2_0 * r2_0;
|
m_0 = v_mul(r2_0, r2_0);
|
||||||
m_1 = r2_1 * r2_1;
|
m_1 = v_mul(r2_1, r2_1);
|
||||||
m_2 = vx_setall_f64(s2);
|
m_2 = vx_setall_f64(s2);
|
||||||
m_3 = vx_setall_f64(s1);
|
m_3 = vx_setall_f64(s1);
|
||||||
xd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, xd_0));
|
xd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, xd_0));
|
||||||
@ -203,17 +203,17 @@ public:
|
|||||||
r2_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
|
r2_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
|
||||||
r2_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
|
r2_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
|
||||||
m_0 = vx_setzero_f64();
|
m_0 = vx_setzero_f64();
|
||||||
r2_0 = v_select(r2_0 == m_0, v_one, v_one / r2_0);
|
r2_0 = v_select(v_eq(r2_0, m_0), v_one, v_div(v_one, r2_0));
|
||||||
r2_1 = v_select(r2_1 == m_0, v_one, v_one / r2_1);
|
r2_1 = v_select(v_eq(r2_1, m_0), v_one, v_div(v_one, r2_1));
|
||||||
|
|
||||||
m_0 = vx_setall_f64(fx);
|
m_0 = vx_setall_f64(fx);
|
||||||
m_1 = vx_setall_f64(u0);
|
m_1 = vx_setall_f64(u0);
|
||||||
m_2 = vx_setall_f64(fy);
|
m_2 = vx_setall_f64(fy);
|
||||||
m_3 = vx_setall_f64(v0);
|
m_3 = vx_setall_f64(v0);
|
||||||
x_0 = v_muladd(m_0 * r2_0, x_0, m_1);
|
x_0 = v_muladd(v_mul(m_0, r2_0), x_0, m_1);
|
||||||
y_0 = v_muladd(m_2 * r2_0, y_0, m_3);
|
y_0 = v_muladd(v_mul(m_2, r2_0), y_0, m_3);
|
||||||
x_1 = v_muladd(m_0 * r2_1, x_1, m_1);
|
x_1 = v_muladd(v_mul(m_0, r2_1), x_1, m_1);
|
||||||
y_1 = v_muladd(m_2 * r2_1, y_1, m_3);
|
y_1 = v_muladd(v_mul(m_2, r2_1), y_1, m_3);
|
||||||
|
|
||||||
if (m1type == CV_32FC1)
|
if (m1type == CV_32FC1)
|
||||||
{
|
{
|
||||||
@ -225,20 +225,20 @@ public:
|
|||||||
v_float32 mf0, mf1;
|
v_float32 mf0, mf1;
|
||||||
v_zip(v_cvt_f32(x_0, x_1), v_cvt_f32(y_0, y_1), mf0, mf1);
|
v_zip(v_cvt_f32(x_0, x_1), v_cvt_f32(y_0, y_1), mf0, mf1);
|
||||||
v_store(&m1f[j * 2], mf0);
|
v_store(&m1f[j * 2], mf0);
|
||||||
v_store(&m1f[j * 2 + v_float32::nlanes], mf1);
|
v_store(&m1f[j * 2 + VTraits<v_float32>::vlanes()], mf1);
|
||||||
}
|
}
|
||||||
else // m1type == CV_16SC2
|
else // m1type == CV_16SC2
|
||||||
{
|
{
|
||||||
m_0 = vx_setall_f64(INTER_TAB_SIZE);
|
m_0 = vx_setall_f64(INTER_TAB_SIZE);
|
||||||
x_0 *= m_0; x_1 *= m_0; y_0 *= m_0; y_1 *= m_0;
|
x_0 = v_mul(x_0, m_0); x_1 = v_mul(x_1, m_0); y_0 = v_mul(y_0, m_0); y_1 = v_mul(y_1, m_0);
|
||||||
|
|
||||||
v_int32 mask = vx_setall_s32(INTER_TAB_SIZE - 1);
|
v_int32 mask = vx_setall_s32(INTER_TAB_SIZE - 1);
|
||||||
v_int32 iu = v_round(x_0, x_1);
|
v_int32 iu = v_round(x_0, x_1);
|
||||||
v_int32 iv = v_round(y_0, y_1);
|
v_int32 iv = v_round(y_0, y_1);
|
||||||
|
|
||||||
v_pack_u_store(&m2[j], (iu & mask) + (iv & mask) * vx_setall_s32(INTER_TAB_SIZE));
|
v_pack_u_store(&m2[j], v_add(v_and(iu, mask), v_mul(v_and(iv, mask), vx_setall_s32(INTER_TAB_SIZE))));
|
||||||
v_int32 out0, out1;
|
v_int32 out0, out1;
|
||||||
v_zip(iu >> INTER_BITS, iv >> INTER_BITS, out0, out1);
|
v_zip(v_shr<INTER_BITS>(iu), v_shr<INTER_BITS>(iv), out0, out1);
|
||||||
v_store(&m1[j * 2], v_pack(out0, out1));
|
v_store(&m1[j * 2], v_pack(out0, out1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -302,10 +302,10 @@ private:
|
|||||||
double s2;
|
double s2;
|
||||||
double s3;
|
double s3;
|
||||||
double s4;
|
double s4;
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
double s_x[2*v_float64::nlanes];
|
double s_x[2*VTraits<v_float64>::max_nlanes];
|
||||||
double s_y[2*v_float64::nlanes];
|
double s_y[2*VTraits<v_float64>::max_nlanes];
|
||||||
double s_w[2*v_float64::nlanes];
|
double s_w[2*VTraits<v_float64>::max_nlanes];
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -203,12 +203,12 @@ class Chessboard: public cv::Feature2D
|
|||||||
* d12/d34 = d13/d24
|
* d12/d34 = d13/d24
|
||||||
*
|
*
|
||||||
* point order on the line:
|
* point order on the line:
|
||||||
* pt1 --> pt2 --> pt3 --> pt4
|
* p0 --> p1 --> p2 --> p3
|
||||||
*
|
*
|
||||||
* \param[in] pt1 First point coordinate
|
* \param[in] p0 First point coordinate
|
||||||
* \param[in] pt2 Second point coordinate
|
* \param[in] p1 Second point coordinate
|
||||||
* \param[in] pt3 Third point coordinate
|
* \param[in] p2 Third point coordinate
|
||||||
* \param[out] pt4 Forth point coordinate
|
* \param[out] p3 Forth point coordinate
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static bool estimatePoint(const cv::Point2f &p0,const cv::Point2f &p1,const cv::Point2f &p2,cv::Point2f &p3);
|
static bool estimatePoint(const cv::Point2f &p0,const cv::Point2f &p1,const cv::Point2f &p2,cv::Point2f &p3);
|
||||||
@ -309,7 +309,7 @@ class Chessboard: public cv::Feature2D
|
|||||||
* \brief Draws the corners into the given image
|
* \brief Draws the corners into the given image
|
||||||
*
|
*
|
||||||
* \param[in] m The image
|
* \param[in] m The image
|
||||||
* \param[out] m The resulting image
|
* \param[out] out The resulting image
|
||||||
* \param[in] H optional homography to calculate search area
|
* \param[in] H optional homography to calculate search area
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -668,7 +668,7 @@ class Chessboard: public cv::Feature2D
|
|||||||
* \brief Calculates the average edge sharpness for the chessboard
|
* \brief Calculates the average edge sharpness for the chessboard
|
||||||
*
|
*
|
||||||
* \param[in] image The image where the chessboard was detected
|
* \param[in] image The image where the chessboard was detected
|
||||||
* \param[in] rise_distante Rise distance 0.8 means 10% ... 90%
|
* \param[in] rise_distance Rise distance 0.8 means 10% ... 90%
|
||||||
* \param[in] vertical by default only edge response for horiontal lines are calculated
|
* \param[in] vertical by default only edge response for horiontal lines are calculated
|
||||||
*
|
*
|
||||||
* \returns Scalar(sharpness, average min_val, average max_val)
|
* \returns Scalar(sharpness, average min_val, average max_val)
|
||||||
|
@ -66,7 +66,7 @@ namespace cv {
|
|||||||
* @param ep outlier ratio
|
* @param ep outlier ratio
|
||||||
* @param modelPoints number of model points required for estimation
|
* @param modelPoints number of model points required for estimation
|
||||||
* @param maxIters maximum number of iterations
|
* @param maxIters maximum number of iterations
|
||||||
* @return
|
* @return The number of iterations according to the formula
|
||||||
* \f[
|
* \f[
|
||||||
* \frac{\ln(1-p)}{\ln\left(1-(1-ep)^\mathrm{modelPoints}\right)}
|
* \frac{\ln(1-p)}{\ln\left(1-(1-ep)^\mathrm{modelPoints}\right)}
|
||||||
* \f]
|
* \f]
|
||||||
|
@ -36,15 +36,15 @@
|
|||||||
namespace cv {
|
namespace cv {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
DualQuat<T>::DualQuat():w(0), x(0), y(0), z(0), w_(0), x_(0), y_(0), z_(0){};
|
DualQuat<T>::DualQuat():w(0), x(0), y(0), z(0), w_(0), x_(0), y_(0), z_(0){}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
DualQuat<T>::DualQuat(const T vw, const T vx, const T vy, const T vz, const T _w, const T _x, const T _y, const T _z):
|
DualQuat<T>::DualQuat(const T vw, const T vx, const T vy, const T vz, const T _w, const T _x, const T _y, const T _z):
|
||||||
w(vw), x(vx), y(vy), z(vz), w_(_w), x_(_x), y_(_y), z_(_z){};
|
w(vw), x(vx), y(vy), z(vz), w_(_w), x_(_x), y_(_y), z_(_z){}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
DualQuat<T>::DualQuat(const Vec<T, 8> &q):w(q[0]), x(q[1]), y(q[2]), z(q[3]),
|
DualQuat<T>::DualQuat(const Vec<T, 8> &q):w(q[0]), x(q[1]), y(q[2]), z(q[3]),
|
||||||
w_(q[4]), x_(q[5]), y_(q[6]), z_(q[7]){};
|
w_(q[4]), x_(q[5]), y_(q[6]), z_(q[7]){}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
DualQuat<T> DualQuat<T>::createFromQuat(const Quat<T> &realPart, const Quat<T> &dualPart)
|
DualQuat<T> DualQuat<T>::createFromQuat(const Quat<T> &realPart, const Quat<T> &dualPart)
|
||||||
|
@ -987,6 +987,15 @@ namespace CV__SIMD_NAMESPACE {
|
|||||||
{ \
|
{ \
|
||||||
return a op b; \
|
return a op b; \
|
||||||
}
|
}
|
||||||
|
#define OPENCV_HAL_WRAP_EQ_OP(_Tpvec) \
|
||||||
|
inline _Tpvec v_eq(const _Tpvec& a, const _Tpvec& b) \
|
||||||
|
{ \
|
||||||
|
return a == b; \
|
||||||
|
} \
|
||||||
|
inline _Tpvec v_ne(const _Tpvec& a, const _Tpvec& b) \
|
||||||
|
{ \
|
||||||
|
return a != b; \
|
||||||
|
}
|
||||||
|
|
||||||
#define OPENCV_HAL_WRAP_CMP(_Tpvec) \
|
#define OPENCV_HAL_WRAP_CMP(_Tpvec) \
|
||||||
OPENCV_HAL_WRAP_CMP_OP(_Tpvec, eq, ==) \
|
OPENCV_HAL_WRAP_CMP_OP(_Tpvec, eq, ==) \
|
||||||
@ -999,11 +1008,11 @@ namespace CV__SIMD_NAMESPACE {
|
|||||||
OPENCV_HAL_WRAP_CMP(v_uint8)
|
OPENCV_HAL_WRAP_CMP(v_uint8)
|
||||||
OPENCV_HAL_WRAP_CMP(v_uint16)
|
OPENCV_HAL_WRAP_CMP(v_uint16)
|
||||||
OPENCV_HAL_WRAP_CMP(v_uint32)
|
OPENCV_HAL_WRAP_CMP(v_uint32)
|
||||||
// OPENCV_HAL_WRAP_CMP(v_uint64)
|
OPENCV_HAL_WRAP_EQ_OP(v_uint64)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int8)
|
OPENCV_HAL_WRAP_CMP(v_int8)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int16)
|
OPENCV_HAL_WRAP_CMP(v_int16)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int32)
|
OPENCV_HAL_WRAP_CMP(v_int32)
|
||||||
// OPENCV_HAL_WRAP_CMP(v_int64)
|
OPENCV_HAL_WRAP_EQ_OP(v_int64)
|
||||||
OPENCV_HAL_WRAP_CMP(v_float32)
|
OPENCV_HAL_WRAP_CMP(v_float32)
|
||||||
#if CV_SIMD_64F
|
#if CV_SIMD_64F
|
||||||
OPENCV_HAL_WRAP_CMP(v_float64)
|
OPENCV_HAL_WRAP_CMP(v_float64)
|
||||||
@ -1012,9 +1021,11 @@ namespace CV__SIMD_NAMESPACE {
|
|||||||
OPENCV_HAL_WRAP_CMP(v_uint8x16)
|
OPENCV_HAL_WRAP_CMP(v_uint8x16)
|
||||||
OPENCV_HAL_WRAP_CMP(v_uint16x8)
|
OPENCV_HAL_WRAP_CMP(v_uint16x8)
|
||||||
OPENCV_HAL_WRAP_CMP(v_uint32x4)
|
OPENCV_HAL_WRAP_CMP(v_uint32x4)
|
||||||
|
OPENCV_HAL_WRAP_EQ_OP(v_uint64x2)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int8x16)
|
OPENCV_HAL_WRAP_CMP(v_int8x16)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int16x8)
|
OPENCV_HAL_WRAP_CMP(v_int16x8)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int32x4)
|
OPENCV_HAL_WRAP_CMP(v_int32x4)
|
||||||
|
OPENCV_HAL_WRAP_EQ_OP(v_int64x2)
|
||||||
OPENCV_HAL_WRAP_CMP(v_float32x4)
|
OPENCV_HAL_WRAP_CMP(v_float32x4)
|
||||||
#if CV_SIMD_64F
|
#if CV_SIMD_64F
|
||||||
OPENCV_HAL_WRAP_CMP(v_float64x2)
|
OPENCV_HAL_WRAP_CMP(v_float64x2)
|
||||||
@ -1024,9 +1035,11 @@ namespace CV__SIMD_NAMESPACE {
|
|||||||
OPENCV_HAL_WRAP_CMP(v_uint8x32)
|
OPENCV_HAL_WRAP_CMP(v_uint8x32)
|
||||||
OPENCV_HAL_WRAP_CMP(v_uint16x16)
|
OPENCV_HAL_WRAP_CMP(v_uint16x16)
|
||||||
OPENCV_HAL_WRAP_CMP(v_uint32x8)
|
OPENCV_HAL_WRAP_CMP(v_uint32x8)
|
||||||
|
OPENCV_HAL_WRAP_EQ_OP(v_uint64x4)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int8x32)
|
OPENCV_HAL_WRAP_CMP(v_int8x32)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int16x16)
|
OPENCV_HAL_WRAP_CMP(v_int16x16)
|
||||||
OPENCV_HAL_WRAP_CMP(v_int32x8)
|
OPENCV_HAL_WRAP_CMP(v_int32x8)
|
||||||
|
OPENCV_HAL_WRAP_EQ_OP(v_int64x4)
|
||||||
OPENCV_HAL_WRAP_CMP(v_float32x8)
|
OPENCV_HAL_WRAP_CMP(v_float32x8)
|
||||||
#if CV_SIMD_64F
|
#if CV_SIMD_64F
|
||||||
OPENCV_HAL_WRAP_CMP(v_float64x4)
|
OPENCV_HAL_WRAP_CMP(v_float64x4)
|
||||||
|
@ -188,4 +188,4 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
|||||||
|
|
||||||
//! @endcond
|
//! @endcond
|
||||||
|
|
||||||
} // cv::
|
} // cv::
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// 0.11 -> 0.12 compatibility
|
||||||
|
|
||||||
|
#ifndef _RVV_IMPLICIT_VXRM
|
||||||
|
#define _RVV_IMPLICIT_VXRM __RISCV_VXRM_RNU
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// NOTE: masked should go first to avoid extra substitution (3 arg -> 4 arg -> 5 arg)
|
||||||
|
|
||||||
|
// masked
|
||||||
|
#define __riscv_vaadd(_1, _2, _3, _4) __riscv_vaadd(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vasub(_1, _2, _3, _4) __riscv_vasub(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vaaddu(_1, _2, _3, _4) __riscv_vaaddu(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vasubu(_1, _2, _3, _4) __riscv_vasubu(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vsmul(_1, _2, _3, _4) __riscv_vsmul(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vssra(_1, _2, _3, _4) __riscv_vssra(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vssrl(_1, _2, _3, _4) __riscv_vssrl(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vnclip(_1, _2, _3, _4) __riscv_vnclip(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
#define __riscv_vnclipu(_1, _2, _3, _4) __riscv_vnclipu(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
|
||||||
|
|
||||||
|
// unmasked
|
||||||
|
#define __riscv_vaadd(_1, _2, _3) __riscv_vaadd(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vasub(_1, _2, _3) __riscv_vasub(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vaaddu(_1, _2, _3) __riscv_vaaddu(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vasubu(_1, _2, _3) __riscv_vasubu(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vsmul(_1, _2, _3) __riscv_vsmul(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vssra(_1, _2, _3) __riscv_vssra(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vssrl(_1, _2, _3) __riscv_vssrl(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vnclip(_1, _2, _3) __riscv_vnclip(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
||||||
|
#define __riscv_vnclipu(_1, _2, _3) __riscv_vnclipu(_1, _2, _RVV_IMPLICIT_VXRM, _3)
|
@ -21,6 +21,10 @@
|
|||||||
#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
|
#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>11999
|
||||||
|
#include "intrin_rvv_011_compat.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__GNUC__) && !defined(__clang__)
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
// FIXIT: eliminate massive warnigs from templates
|
// FIXIT: eliminate massive warnigs from templates
|
||||||
// GCC from 'rvv-next': riscv64-unknown-linux-gnu-g++ (g42df3464463) 12.0.1 20220505 (prerelease)
|
// GCC from 'rvv-next': riscv64-unknown-linux-gnu-g++ (g42df3464463) 12.0.1 20220505 (prerelease)
|
||||||
|
@ -225,7 +225,7 @@ public:
|
|||||||
void copyTo(const _OutputArray& dst) const;
|
void copyTo(const _OutputArray& dst) const;
|
||||||
void convertTo(const _OutputArray& dst, int type, double scale=1., double shift=0.) const;
|
void convertTo(const _OutputArray& dst, int type, double scale=1., double shift=0.) const;
|
||||||
|
|
||||||
_Tp val[m*n]; //< matrix elements
|
_Tp val[m*n]; ///< matrix elements
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef Matx<float, 1, 2> Matx12f;
|
typedef Matx<float, 1, 2> Matx12f;
|
||||||
|
@ -774,7 +774,7 @@ public:
|
|||||||
void start();
|
void start();
|
||||||
void stop();
|
void stop();
|
||||||
|
|
||||||
uint64 durationNS() const; //< duration in nanoseconds
|
uint64 durationNS() const; ///< duration in nanoseconds
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct Impl;
|
struct Impl;
|
||||||
|
@ -89,7 +89,7 @@ public:
|
|||||||
//! conjugation
|
//! conjugation
|
||||||
Complex conj() const;
|
Complex conj() const;
|
||||||
|
|
||||||
_Tp re, im; //< the real and the imaginary parts
|
_Tp re, im; ///< the real and the imaginary parts
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef Complex<float> Complexf;
|
typedef Complex<float> Complexf;
|
||||||
@ -2028,8 +2028,8 @@ double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) {
|
|||||||
/** @brief Finds out if there is any intersection between two rectangles
|
/** @brief Finds out if there is any intersection between two rectangles
|
||||||
*
|
*
|
||||||
* mainly useful for language bindings
|
* mainly useful for language bindings
|
||||||
* @param rect1 First rectangle
|
* @param a First rectangle
|
||||||
* @param rect2 Second rectangle
|
* @param b Second rectangle
|
||||||
* @return the area of the intersection
|
* @return the area of the intersection
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W inline double rectangleIntersectionArea(const Rect2d& a, const Rect2d& b) { return (a & b).area(); }
|
CV_EXPORTS_W inline double rectangleIntersectionArea(const Rect2d& a, const Rect2d& b) { return (a & b).area(); }
|
||||||
|
@ -47,11 +47,11 @@ public:
|
|||||||
explicit FileLock(const char* fname);
|
explicit FileLock(const char* fname);
|
||||||
~FileLock();
|
~FileLock();
|
||||||
|
|
||||||
void lock(); //< acquire exclusive (writer) lock
|
void lock(); ///< acquire exclusive (writer) lock
|
||||||
void unlock(); //< release exclusive (writer) lock
|
void unlock(); ///< release exclusive (writer) lock
|
||||||
|
|
||||||
void lock_shared(); //< acquire shareable (reader) lock
|
void lock_shared(); ///< acquire shareable (reader) lock
|
||||||
void unlock_shared(); //< release shareable (reader) lock
|
void unlock_shared(); ///< release shareable (reader) lock
|
||||||
|
|
||||||
struct Impl;
|
struct Impl;
|
||||||
protected:
|
protected:
|
||||||
|
@ -70,11 +70,11 @@ public:
|
|||||||
struct LocationExtraData;
|
struct LocationExtraData;
|
||||||
struct LocationStaticStorage
|
struct LocationStaticStorage
|
||||||
{
|
{
|
||||||
LocationExtraData** ppExtra; //< implementation specific data
|
LocationExtraData** ppExtra; ///< implementation specific data
|
||||||
const char* name; //< region name (function name or other custom name)
|
const char* name; ///< region name (function name or other custom name)
|
||||||
const char* filename; //< source code filename
|
const char* filename; ///< source code filename
|
||||||
int line; //< source code line
|
int line; ///< source code line
|
||||||
int flags; //< flags (implementation code path: Plain, IPP, OpenCL)
|
int flags; ///< flags (implementation code path: Plain, IPP, OpenCL)
|
||||||
};
|
};
|
||||||
|
|
||||||
Region(const LocationStaticStorage& location);
|
Region(const LocationStaticStorage& location);
|
||||||
@ -100,18 +100,18 @@ private:
|
|||||||
|
|
||||||
//! Specify region flags
|
//! Specify region flags
|
||||||
enum RegionLocationFlag {
|
enum RegionLocationFlag {
|
||||||
REGION_FLAG_FUNCTION = (1 << 0), //< region is function (=1) / nested named region (=0)
|
REGION_FLAG_FUNCTION = (1 << 0), ///< region is function (=1) / nested named region (=0)
|
||||||
REGION_FLAG_APP_CODE = (1 << 1), //< region is Application code (=1) / OpenCV library code (=0)
|
REGION_FLAG_APP_CODE = (1 << 1), ///< region is Application code (=1) / OpenCV library code (=0)
|
||||||
REGION_FLAG_SKIP_NESTED = (1 << 2), //< avoid processing of nested regions
|
REGION_FLAG_SKIP_NESTED = (1 << 2), ///< avoid processing of nested regions
|
||||||
|
|
||||||
REGION_FLAG_IMPL_IPP = (1 << 16), //< region is part of IPP code path
|
REGION_FLAG_IMPL_IPP = (1 << 16), ///< region is part of IPP code path
|
||||||
REGION_FLAG_IMPL_OPENCL = (2 << 16), //< region is part of OpenCL code path
|
REGION_FLAG_IMPL_OPENCL = (2 << 16), ///< region is part of OpenCL code path
|
||||||
REGION_FLAG_IMPL_OPENVX = (3 << 16), //< region is part of OpenVX code path
|
REGION_FLAG_IMPL_OPENVX = (3 << 16), ///< region is part of OpenVX code path
|
||||||
|
|
||||||
REGION_FLAG_IMPL_MASK = (15 << 16),
|
REGION_FLAG_IMPL_MASK = (15 << 16),
|
||||||
|
|
||||||
REGION_FLAG_REGION_FORCE = (1 << 30),
|
REGION_FLAG_REGION_FORCE = (1 << 30),
|
||||||
REGION_FLAG_REGION_NEXT = (1 << 31), //< close previous region (see #CV_TRACE_REGION_NEXT macro)
|
REGION_FLAG_REGION_NEXT = (1 << 31), ///< close previous region (see #CV_TRACE_REGION_NEXT macro)
|
||||||
|
|
||||||
ENUM_REGION_FLAG_FORCE_INT = INT_MAX
|
ENUM_REGION_FLAG_FORCE_INT = INT_MAX
|
||||||
};
|
};
|
||||||
|
@ -962,9 +962,9 @@ public class CoreTest extends OpenCVTestCase {
|
|||||||
|
|
||||||
assertEquals(0.0, d);
|
assertEquals(0.0, d);
|
||||||
|
|
||||||
d = Core.Mahalanobis(line1, line2, covar);
|
// Bug: https://github.com/opencv/opencv/issues/24348
|
||||||
|
// d = Core.Mahalanobis(line1, line2, covar);
|
||||||
assertTrue(d > 0.0);
|
// assertTrue(d > 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMax() {
|
public void testMax() {
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#include "opencv2/core/async.hpp"
|
#include "opencv2/core/async.hpp"
|
||||||
|
|
||||||
CV_PY_TO_CLASS(AsyncArray);
|
CV_PY_TO_CLASS(AsyncArray)
|
||||||
CV_PY_FROM_CLASS(AsyncArray);
|
CV_PY_FROM_CLASS(AsyncArray)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -20,18 +20,18 @@ template<> struct pyopencvVecConverter<cuda::GpuMat>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_PY_TO_CLASS(cuda::GpuMat);
|
CV_PY_TO_CLASS(cuda::GpuMat)
|
||||||
CV_PY_TO_CLASS(cuda::Stream);
|
CV_PY_TO_CLASS(cuda::Stream)
|
||||||
CV_PY_TO_CLASS(cuda::Event);
|
CV_PY_TO_CLASS(cuda::Event)
|
||||||
CV_PY_TO_CLASS(cuda::HostMem);
|
CV_PY_TO_CLASS(cuda::HostMem)
|
||||||
|
|
||||||
CV_PY_TO_CLASS_PTR(cuda::GpuMat);
|
CV_PY_TO_CLASS_PTR(cuda::GpuMat)
|
||||||
CV_PY_TO_CLASS_PTR(cuda::GpuMat::Allocator);
|
CV_PY_TO_CLASS_PTR(cuda::GpuMat::Allocator)
|
||||||
|
|
||||||
CV_PY_FROM_CLASS(cuda::GpuMat);
|
CV_PY_FROM_CLASS(cuda::GpuMat)
|
||||||
CV_PY_FROM_CLASS(cuda::Stream);
|
CV_PY_FROM_CLASS(cuda::Stream)
|
||||||
CV_PY_FROM_CLASS(cuda::HostMem);
|
CV_PY_FROM_CLASS(cuda::HostMem)
|
||||||
|
|
||||||
CV_PY_FROM_CLASS_PTR(cuda::GpuMat::Allocator);
|
CV_PY_FROM_CLASS_PTR(cuda::GpuMat::Allocator)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
|
|
||||||
typedef std::vector<Range> vector_Range;
|
typedef std::vector<Range> vector_Range;
|
||||||
|
|
||||||
CV_PY_TO_CLASS(UMat);
|
CV_PY_TO_CLASS(UMat)
|
||||||
CV_PY_FROM_CLASS(UMat);
|
CV_PY_FROM_CLASS(UMat)
|
||||||
|
|
||||||
static bool cv_mappable_to(const Ptr<Mat>& src, Ptr<UMat>& dst)
|
static bool cv_mappable_to(const Ptr<Mat>& src, Ptr<UMat>& dst)
|
||||||
{
|
{
|
||||||
|
@ -45,4 +45,4 @@ PERF_TEST_P(MatDepth_tb, DISABLED_Allocation_Aligned,
|
|||||||
SANITY_CHECK_NOTHING();
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
}
|
||||||
|
@ -53,7 +53,6 @@
|
|||||||
#undef CV__ALLOCATOR_STATS_LOG
|
#undef CV__ALLOCATOR_STATS_LOG
|
||||||
|
|
||||||
//#define OPENCV_ALLOC_ENABLE_STATISTICS
|
//#define OPENCV_ALLOC_ENABLE_STATISTICS
|
||||||
#define OPENCV_ALLOC_STATISTICS_LIMIT 4096 // don't track buffers less than N bytes
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_POSIX_MEMALIGN
|
#ifdef HAVE_POSIX_MEMALIGN
|
||||||
@ -63,6 +62,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef OPENCV_ALLOC_ENABLE_STATISTICS
|
#ifdef OPENCV_ALLOC_ENABLE_STATISTICS
|
||||||
|
#define OPENCV_ALLOC_STATISTICS_LIMIT 4096 // don't track buffers less than N bytes
|
||||||
#include <map>
|
#include <map>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -8,4 +8,4 @@
|
|||||||
#include "arithm.simd_declarations.hpp"
|
#include "arithm.simd_declarations.hpp"
|
||||||
|
|
||||||
#define ARITHM_DISPATCHING_ONLY
|
#define ARITHM_DISPATCHING_ONLY
|
||||||
#include "arithm.simd.hpp"
|
#include "arithm.simd.hpp"
|
||||||
|
@ -69,7 +69,7 @@
|
|||||||
#define DEFINE_SIMD_F32(fun, ...) \
|
#define DEFINE_SIMD_F32(fun, ...) \
|
||||||
DEFINE_SIMD(__CV_CAT(fun, 32f), float, v_float32, __VA_ARGS__)
|
DEFINE_SIMD(__CV_CAT(fun, 32f), float, v_float32, __VA_ARGS__)
|
||||||
|
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
#define DEFINE_SIMD_F64(fun, ...) \
|
#define DEFINE_SIMD_F64(fun, ...) \
|
||||||
DEFINE_SIMD(__CV_CAT(fun, 64f), double, v_float64, __VA_ARGS__)
|
DEFINE_SIMD(__CV_CAT(fun, 64f), double, v_float64, __VA_ARGS__)
|
||||||
#else
|
#else
|
||||||
@ -262,7 +262,7 @@ struct op_absdiff
|
|||||||
template<>
|
template<>
|
||||||
struct op_absdiff<schar, v_int8>
|
struct op_absdiff<schar, v_int8>
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_int8 r(const v_int8& a, const v_int8& b)
|
static inline v_int8 r(const v_int8& a, const v_int8& b)
|
||||||
{ return v_absdiffs(a, b); }
|
{ return v_absdiffs(a, b); }
|
||||||
#endif
|
#endif
|
||||||
@ -272,7 +272,7 @@ struct op_absdiff<schar, v_int8>
|
|||||||
template<>
|
template<>
|
||||||
struct op_absdiff<short, v_int16>
|
struct op_absdiff<short, v_int16>
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_int16 r(const v_int16& a, const v_int16& b)
|
static inline v_int16 r(const v_int16& a, const v_int16& b)
|
||||||
{ return v_absdiffs(a, b); }
|
{ return v_absdiffs(a, b); }
|
||||||
#endif
|
#endif
|
||||||
@ -282,7 +282,7 @@ struct op_absdiff<short, v_int16>
|
|||||||
template<>
|
template<>
|
||||||
struct op_absdiff<int, v_int32>
|
struct op_absdiff<int, v_int32>
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_int32 r(const v_int32& a, const v_int32& b)
|
static inline v_int32 r(const v_int32& a, const v_int32& b)
|
||||||
{ return v_reinterpret_as_s32(v_absdiff(a, b)); }
|
{ return v_reinterpret_as_s32(v_absdiff(a, b)); }
|
||||||
#endif
|
#endif
|
||||||
@ -327,7 +327,7 @@ struct op_not
|
|||||||
|
|
||||||
//////////////////////////// Loaders /////////////////////////////////
|
//////////////////////////// Loaders /////////////////////////////////
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
|
|
||||||
template< template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
|
template< template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
|
||||||
struct bin_loader
|
struct bin_loader
|
||||||
@ -392,7 +392,7 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
|
|||||||
static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
|
static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
|
||||||
{
|
{
|
||||||
typedef OP<T1, Tvec> op;
|
typedef OP<T1, Tvec> op;
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef bin_loader<OP, T1, Tvec> ldr;
|
typedef bin_loader<OP, T1, Tvec> ldr;
|
||||||
const int wide_step = VTraits<Tvec>::vlanes();
|
const int wide_step = VTraits<Tvec>::vlanes();
|
||||||
#if !CV_NEON && CV_SIMD_WIDTH == 16
|
#if !CV_NEON && CV_SIMD_WIDTH == 16
|
||||||
@ -410,7 +410,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
|
|||||||
{
|
{
|
||||||
int x = 0;
|
int x = 0;
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
#if !CV_NEON && !CV_MSA
|
#if !CV_NEON && !CV_MSA
|
||||||
if (is_aligned(src1, src2, dst))
|
if (is_aligned(src1, src2, dst))
|
||||||
{
|
{
|
||||||
@ -460,7 +460,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
|
|||||||
vx_cleanup();
|
vx_cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !CV_SIMD_64F
|
#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
template<template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
|
template<template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
|
||||||
static void bin_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
|
static void bin_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
|
||||||
{
|
{
|
||||||
@ -492,7 +492,7 @@ static void bin_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t
|
|||||||
#define BIN_LOOP64F bin_loop_nosimd
|
#define BIN_LOOP64F bin_loop_nosimd
|
||||||
#else
|
#else
|
||||||
#define BIN_LOOP64F bin_loop
|
#define BIN_LOOP64F bin_loop
|
||||||
#endif //!CV_SIMD_64F
|
#endif //!(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
|
|
||||||
#endif // ARITHM_DEFINITIONS_ONLY
|
#endif // ARITHM_DEFINITIONS_ONLY
|
||||||
|
|
||||||
@ -617,7 +617,7 @@ struct op_cmpne
|
|||||||
|
|
||||||
//////////////////////////// Loaders /////////////////////////////////
|
//////////////////////////// Loaders /////////////////////////////////
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
// todo: add support for RW alignment & stream
|
// todo: add support for RW alignment & stream
|
||||||
template<int nload, template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
|
template<int nload, template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
|
||||||
struct cmp_loader_n
|
struct cmp_loader_n
|
||||||
@ -697,7 +697,7 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
|
|||||||
static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
|
static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
|
||||||
{
|
{
|
||||||
typedef OP<T1, Tvec> op;
|
typedef OP<T1, Tvec> op;
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef cmp_loader_n<sizeof(T1), OP, T1, Tvec> ldr;
|
typedef cmp_loader_n<sizeof(T1), OP, T1, Tvec> ldr;
|
||||||
const int wide_step = VTraits<Tvec>::vlanes() * sizeof(T1);
|
const int wide_step = VTraits<Tvec>::vlanes() * sizeof(T1);
|
||||||
#endif // CV_SIMD
|
#endif // CV_SIMD
|
||||||
@ -709,7 +709,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
|
|||||||
{
|
{
|
||||||
int x = 0;
|
int x = 0;
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
for (; x <= width - wide_step; x += wide_step)
|
for (; x <= width - wide_step; x += wide_step)
|
||||||
{
|
{
|
||||||
ldr::l(src1 + x, src2 + x, dst + x);
|
ldr::l(src1 + x, src2 + x, dst + x);
|
||||||
@ -764,7 +764,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !CV_SIMD_64F
|
#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
template< template<typename T1, typename Tvec> class OP, typename T1>
|
template< template<typename T1, typename Tvec> class OP, typename T1>
|
||||||
static void cmp_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
|
static void cmp_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
|
||||||
{
|
{
|
||||||
@ -818,7 +818,7 @@ static void cmp_loop_nosimd(const double* src1, size_t step1, const double* src2
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // !CV_SIMD_64F
|
#endif // !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
|
|
||||||
#endif // ARITHM_DEFINITIONS_ONLY
|
#endif // ARITHM_DEFINITIONS_ONLY
|
||||||
|
|
||||||
@ -876,7 +876,7 @@ DEFINE_SIMD_ALL(cmp)
|
|||||||
|
|
||||||
//////////////////////////// Loaders ///////////////////////////////
|
//////////////////////////// Loaders ///////////////////////////////
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
// todo: add support for RW alignment & stream
|
// todo: add support for RW alignment & stream
|
||||||
template<int nload, template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
|
template<int nload, template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
|
||||||
struct scalar_loader_n
|
struct scalar_loader_n
|
||||||
@ -1095,16 +1095,16 @@ struct scalar_loader_n<sizeof(float), OP, float, T2, v_float32>
|
|||||||
};
|
};
|
||||||
#endif // CV_SIMD
|
#endif // CV_SIMD
|
||||||
|
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
template<template<typename T1, typename T2, typename Tvec> class OP>
|
template<template<typename T1, typename T2, typename Tvec> class OP>
|
||||||
struct scalar_loader_n<sizeof(int), OP, int, double, v_int32>
|
struct scalar_loader_n<sizeof(int), OP, int, double, v_int32>
|
||||||
{
|
{
|
||||||
typedef OP<int, float, v_int32> op;
|
typedef OP<int, float, v_int32> op;
|
||||||
typedef OP<double, double, v_float64> op64;
|
typedef OP<double, double, v_float64> op64;
|
||||||
enum {step = v_int32::nlanes};
|
|
||||||
|
|
||||||
static inline void l(const int* src1, const int* src2, const double* scalar, int* dst)
|
static inline void l(const int* src1, const int* src2, const double* scalar, int* dst)
|
||||||
{
|
{
|
||||||
|
const int step = VTraits<v_int32>::vlanes();
|
||||||
v_int32 v_src1 = vx_load(src1);
|
v_int32 v_src1 = vx_load(src1);
|
||||||
v_int32 v_src2 = vx_load(src2);
|
v_int32 v_src2 = vx_load(src2);
|
||||||
v_int32 v_src1s = vx_load(src1 + step);
|
v_int32 v_src1s = vx_load(src1 + step);
|
||||||
@ -1121,6 +1121,7 @@ struct scalar_loader_n<sizeof(int), OP, int, double, v_int32>
|
|||||||
}
|
}
|
||||||
static inline void l(const int* src1, const double* scalar, int* dst)
|
static inline void l(const int* src1, const double* scalar, int* dst)
|
||||||
{
|
{
|
||||||
|
const int step = VTraits<v_int32>::vlanes();
|
||||||
v_int32 v_src1 = vx_load(src1);
|
v_int32 v_src1 = vx_load(src1);
|
||||||
v_int32 v_src1s = vx_load(src1 + step);
|
v_int32 v_src1s = vx_load(src1 + step);
|
||||||
|
|
||||||
@ -1165,10 +1166,10 @@ struct scalar_loader_n<sizeof(float), OP, float, double, v_float32>
|
|||||||
{
|
{
|
||||||
typedef OP<float, float, v_float32> op;
|
typedef OP<float, float, v_float32> op;
|
||||||
typedef OP<double, double, v_float64> op64;
|
typedef OP<double, double, v_float64> op64;
|
||||||
enum {step = v_float32::nlanes};
|
|
||||||
|
|
||||||
static inline void l(const float* src1, const float* src2, const double* scalar, float* dst)
|
static inline void l(const float* src1, const float* src2, const double* scalar, float* dst)
|
||||||
{
|
{
|
||||||
|
const int step = VTraits<v_float32>::vlanes();
|
||||||
v_float32 v_src1 = vx_load(src1);
|
v_float32 v_src1 = vx_load(src1);
|
||||||
v_float32 v_src2 = vx_load(src2);
|
v_float32 v_src2 = vx_load(src2);
|
||||||
v_float32 v_src1s = vx_load(src1 + step);
|
v_float32 v_src1s = vx_load(src1 + step);
|
||||||
@ -1182,6 +1183,7 @@ struct scalar_loader_n<sizeof(float), OP, float, double, v_float32>
|
|||||||
}
|
}
|
||||||
static inline void l(const float* src1, const double* scalar, float* dst)
|
static inline void l(const float* src1, const double* scalar, float* dst)
|
||||||
{
|
{
|
||||||
|
const int step = VTraits<v_float32>::vlanes();
|
||||||
v_float32 v_src1 = vx_load(src1);
|
v_float32 v_src1 = vx_load(src1);
|
||||||
v_float32 v_src1s = vx_load(src1 + step);
|
v_float32 v_src1s = vx_load(src1 + step);
|
||||||
|
|
||||||
@ -1222,10 +1224,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP>
|
|||||||
struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
|
struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
|
||||||
{
|
{
|
||||||
typedef OP<double, double, v_float64> op;
|
typedef OP<double, double, v_float64> op;
|
||||||
enum {step = v_float64::nlanes};
|
|
||||||
|
|
||||||
static inline void l(const double* src1, const double* src2, const double* scalar, double* dst)
|
static inline void l(const double* src1, const double* src2, const double* scalar, double* dst)
|
||||||
{
|
{
|
||||||
|
const int step = VTraits<v_float64>::vlanes();
|
||||||
v_float64 v_src1 = vx_load(src1);
|
v_float64 v_src1 = vx_load(src1);
|
||||||
v_float64 v_src2 = vx_load(src2);
|
v_float64 v_src2 = vx_load(src2);
|
||||||
v_float64 v_src1s = vx_load(src1 + step);
|
v_float64 v_src1s = vx_load(src1 + step);
|
||||||
@ -1239,6 +1241,7 @@ struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
|
|||||||
}
|
}
|
||||||
static inline void l(const double* src1, const double* scalar, double* dst)
|
static inline void l(const double* src1, const double* scalar, double* dst)
|
||||||
{
|
{
|
||||||
|
const int step = VTraits<v_float64>::vlanes();
|
||||||
v_float64 v_src1 = vx_load(src1);
|
v_float64 v_src1 = vx_load(src1);
|
||||||
v_float64 v_src1s = vx_load(src1 + step);
|
v_float64 v_src1s = vx_load(src1 + step);
|
||||||
|
|
||||||
@ -1249,7 +1252,7 @@ struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
|
|||||||
v_store(dst + step, r1);
|
v_store(dst + step, r1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif // CV_SIMD_64F
|
#endif // (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
|
|
||||||
//////////////////////////// Loops /////////////////////////////////
|
//////////////////////////// Loops /////////////////////////////////
|
||||||
|
|
||||||
@ -1259,7 +1262,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
|
|||||||
T1* dst, size_t step, int width, int height, const T2* scalar)
|
T1* dst, size_t step, int width, int height, const T2* scalar)
|
||||||
{
|
{
|
||||||
typedef OP<T1, T2, Tvec> op;
|
typedef OP<T1, T2, Tvec> op;
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
|
typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
|
||||||
const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
|
const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
|
||||||
sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
|
sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
|
||||||
@ -1273,7 +1276,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
|
|||||||
{
|
{
|
||||||
int x = 0;
|
int x = 0;
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
for (; x <= width - wide_step; x += wide_step)
|
for (; x <= width - wide_step; x += wide_step)
|
||||||
{
|
{
|
||||||
ldr::l(src1 + x, src2 + x, scalar, dst + x);
|
ldr::l(src1 + x, src2 + x, scalar, dst + x);
|
||||||
@ -1305,7 +1308,7 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T1
|
|||||||
static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int width, int height, const T2* scalar)
|
static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int width, int height, const T2* scalar)
|
||||||
{
|
{
|
||||||
typedef OP<T1, T2, Tvec> op;
|
typedef OP<T1, T2, Tvec> op;
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
|
typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
|
||||||
const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
|
const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
|
||||||
sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
|
sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
|
||||||
@ -1318,7 +1321,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
|
|||||||
{
|
{
|
||||||
int x = 0;
|
int x = 0;
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
for (; x <= width - wide_step; x += wide_step)
|
for (; x <= width - wide_step; x += wide_step)
|
||||||
{
|
{
|
||||||
ldr::l(src1 + x, scalar, dst + x);
|
ldr::l(src1 + x, scalar, dst + x);
|
||||||
@ -1345,7 +1348,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
|
|||||||
vx_cleanup();
|
vx_cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !CV_SIMD_64F
|
#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
// dual source
|
// dual source
|
||||||
template<template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
|
template<template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
|
||||||
static void scalar_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2,
|
static void scalar_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2,
|
||||||
@ -1409,7 +1412,7 @@ static void scalar_loop_nosimd(const T1* src1, size_t step1, T1* dst, size_t ste
|
|||||||
#define SCALAR_LOOP64F scalar_loop_nosimd
|
#define SCALAR_LOOP64F scalar_loop_nosimd
|
||||||
#else
|
#else
|
||||||
#define SCALAR_LOOP64F scalar_loop
|
#define SCALAR_LOOP64F scalar_loop
|
||||||
#endif // !CV_SIMD_64F
|
#endif // !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
|
|
||||||
#endif // ARITHM_DEFINITIONS_ONLY
|
#endif // ARITHM_DEFINITIONS_ONLY
|
||||||
|
|
||||||
@ -1433,7 +1436,7 @@ struct op_mul
|
|||||||
template<typename T1, typename T2, typename Tvec>
|
template<typename T1, typename T2, typename Tvec>
|
||||||
struct op_mul_scale
|
struct op_mul_scale
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
|
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
|
||||||
{
|
{
|
||||||
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
||||||
@ -1449,7 +1452,7 @@ struct op_mul_scale
|
|||||||
template<>
|
template<>
|
||||||
struct op_mul_scale<double, double, v_float64>
|
struct op_mul_scale<double, double, v_float64>
|
||||||
{
|
{
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
|
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
|
||||||
{
|
{
|
||||||
const v_float64 v_scalar = vx_setall_f64(*scalar);
|
const v_float64 v_scalar = vx_setall_f64(*scalar);
|
||||||
@ -1574,7 +1577,7 @@ struct op_div_f
|
|||||||
template<typename T1, typename T2, typename Tvec>
|
template<typename T1, typename T2, typename Tvec>
|
||||||
struct op_div_scale
|
struct op_div_scale
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
|
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
|
||||||
{
|
{
|
||||||
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
||||||
@ -1596,7 +1599,7 @@ struct op_div_scale
|
|||||||
template<>
|
template<>
|
||||||
struct op_div_scale<float, float, v_float32>
|
struct op_div_scale<float, float, v_float32>
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const v_float32& b, const float* scalar)
|
static inline v_float32 r(const v_float32& a, const v_float32& b, const float* scalar)
|
||||||
{
|
{
|
||||||
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
||||||
@ -1610,7 +1613,7 @@ struct op_div_scale<float, float, v_float32>
|
|||||||
template<>
|
template<>
|
||||||
struct op_div_scale<double, double, v_float64>
|
struct op_div_scale<double, double, v_float64>
|
||||||
{
|
{
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
|
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
|
||||||
{
|
{
|
||||||
const v_float64 v_scalar = vx_setall_f64(*scalar);
|
const v_float64 v_scalar = vx_setall_f64(*scalar);
|
||||||
@ -1682,7 +1685,7 @@ DEFINE_SIMD_ALL(div, div_loop)
|
|||||||
template<typename T1, typename T2, typename Tvec>
|
template<typename T1, typename T2, typename Tvec>
|
||||||
struct op_add_scale
|
struct op_add_scale
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
|
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
|
||||||
{
|
{
|
||||||
const v_float32 v_alpha = vx_setall_f32(*scalar);
|
const v_float32 v_alpha = vx_setall_f32(*scalar);
|
||||||
@ -1698,7 +1701,7 @@ struct op_add_scale
|
|||||||
template<>
|
template<>
|
||||||
struct op_add_scale<double, double, v_float64>
|
struct op_add_scale<double, double, v_float64>
|
||||||
{
|
{
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
|
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
|
||||||
{
|
{
|
||||||
const v_float64 v_alpha = vx_setall_f64(*scalar);
|
const v_float64 v_alpha = vx_setall_f64(*scalar);
|
||||||
@ -1715,7 +1718,7 @@ struct op_add_scale<double, double, v_float64>
|
|||||||
template<typename T1, typename T2, typename Tvec>
|
template<typename T1, typename T2, typename Tvec>
|
||||||
struct op_add_weighted
|
struct op_add_weighted
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalars)
|
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalars)
|
||||||
{
|
{
|
||||||
const v_float32 v_alpha = vx_setall_f32(scalars[0]);
|
const v_float32 v_alpha = vx_setall_f32(scalars[0]);
|
||||||
@ -1733,7 +1736,7 @@ struct op_add_weighted
|
|||||||
template<>
|
template<>
|
||||||
struct op_add_weighted<double, double, v_float64>
|
struct op_add_weighted<double, double, v_float64>
|
||||||
{
|
{
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalars)
|
static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalars)
|
||||||
{
|
{
|
||||||
const v_float64 v_alpha = vx_setall_f64(scalars[0]);
|
const v_float64 v_alpha = vx_setall_f64(scalars[0]);
|
||||||
@ -1832,7 +1835,7 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d)
|
|||||||
template<typename T1, typename T2, typename Tvec>
|
template<typename T1, typename T2, typename Tvec>
|
||||||
struct op_recip
|
struct op_recip
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const T2* scalar)
|
static inline v_float32 r(const v_float32& a, const T2* scalar)
|
||||||
{
|
{
|
||||||
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
||||||
@ -1854,7 +1857,7 @@ struct op_recip
|
|||||||
template<>
|
template<>
|
||||||
struct op_recip<float, float, v_float32>
|
struct op_recip<float, float, v_float32>
|
||||||
{
|
{
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
static inline v_float32 r(const v_float32& a, const float* scalar)
|
static inline v_float32 r(const v_float32& a, const float* scalar)
|
||||||
{
|
{
|
||||||
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
const v_float32 v_scalar = vx_setall_f32(*scalar);
|
||||||
@ -1868,7 +1871,7 @@ struct op_recip<float, float, v_float32>
|
|||||||
template<>
|
template<>
|
||||||
struct op_recip<double, double, v_float64>
|
struct op_recip<double, double, v_float64>
|
||||||
{
|
{
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
static inline v_float64 r(const v_float64& a, const double* scalar)
|
static inline v_float64 r(const v_float64& a, const double* scalar)
|
||||||
{
|
{
|
||||||
const v_float64 v_scalar = vx_setall_f64(*scalar);
|
const v_float64 v_scalar = vx_setall_f64(*scalar);
|
||||||
|
@ -414,4 +414,4 @@ inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2,
|
|||||||
|
|
||||||
#if !ARITHM_USE_IPP
|
#if !ARITHM_USE_IPP
|
||||||
#define ARITHM_CALL_IPP(...)
|
#define ARITHM_CALL_IPP(...)
|
||||||
#endif
|
#endif
|
||||||
|
@ -64,8 +64,6 @@ namespace cv
|
|||||||
Discrete Fourier Transform
|
Discrete Fourier Transform
|
||||||
\****************************************************************************************/
|
\****************************************************************************************/
|
||||||
|
|
||||||
#define CV_MAX_LOCAL_DFT_SIZE (1 << 15)
|
|
||||||
|
|
||||||
static unsigned char bitrevTab[] =
|
static unsigned char bitrevTab[] =
|
||||||
{
|
{
|
||||||
0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
|
0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
|
||||||
|
@ -69,10 +69,14 @@
|
|||||||
/**
|
/**
|
||||||
Add: _dst[i] = src1[i] + src2[i]_ @n
|
Add: _dst[i] = src1[i] + src2[i]_ @n
|
||||||
Sub: _dst[i] = src1[i] - src2[i]_
|
Sub: _dst[i] = src1[i] - src2[i]_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_addsub Element-wise add and subtract
|
//! @addtogroup core_hal_interface_addsub Element-wise add and subtract
|
||||||
//! @{
|
//! @{
|
||||||
@ -96,10 +100,14 @@ inline int hal_ni_sub64f(const double *src1_data, size_t src1_step, const double
|
|||||||
/**
|
/**
|
||||||
Minimum: _dst[i] = min(src1[i], src2[i])_ @n
|
Minimum: _dst[i] = min(src1[i], src2[i])_ @n
|
||||||
Maximum: _dst[i] = max(src1[i], src2[i])_
|
Maximum: _dst[i] = max(src1[i], src2[i])_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_minmax Element-wise minimum or maximum
|
//! @addtogroup core_hal_interface_minmax Element-wise minimum or maximum
|
||||||
//! @{
|
//! @{
|
||||||
@ -122,11 +130,14 @@ inline int hal_ni_min64f(const double *src1_data, size_t src1_step, const double
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Absolute difference: _dst[i] = | src1[i] - src2[i] |_
|
Absolute difference: _dst[i] = | src1[i] - src2[i] |_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
@param scale additional multiplier
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_absdiff Element-wise absolute difference
|
//! @addtogroup core_hal_interface_absdiff Element-wise absolute difference
|
||||||
//! @{
|
//! @{
|
||||||
@ -144,10 +155,14 @@ Bitwise AND: _dst[i] = src1[i] & src2[i]_ @n
|
|||||||
Bitwise OR: _dst[i] = src1[i] | src2[i]_ @n
|
Bitwise OR: _dst[i] = src1[i] | src2[i]_ @n
|
||||||
Bitwise XOR: _dst[i] = src1[i] ^ src2[i]_ @n
|
Bitwise XOR: _dst[i] = src1[i] ^ src2[i]_ @n
|
||||||
Bitwise NOT: _dst[i] = !src[i]_
|
Bitwise NOT: _dst[i] = !src[i]_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_logical Bitwise logical operations
|
//! @addtogroup core_hal_interface_logical Bitwise logical operations
|
||||||
//! @{
|
//! @{
|
||||||
@ -201,10 +216,14 @@ inline int hal_ni_not8u(const uchar *src_data, size_t src_step, uchar *dst_data,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Compare: _dst[i] = src1[i] op src2[i]_
|
Compare: _dst[i] = src1[i] op src2[i]_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
@param operation one of (CV_HAL_CMP_EQ, CV_HAL_CMP_GT, ...)
|
@param operation one of (CV_HAL_CMP_EQ, CV_HAL_CMP_GT, ...)
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_compare Element-wise compare
|
//! @addtogroup core_hal_interface_compare Element-wise compare
|
||||||
@ -230,10 +249,14 @@ inline int hal_ni_cmp64f(const double *src1_data, size_t src1_step, const double
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Multiply: _dst[i] = scale * src1[i] * src2[i]_
|
Multiply: _dst[i] = scale * src1[i] * src2[i]_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
@param scale additional multiplier
|
@param scale additional multiplier
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_multiply Element-wise multiply
|
//! @addtogroup core_hal_interface_multiply Element-wise multiply
|
||||||
@ -249,10 +272,14 @@ inline int hal_ni_mul64f(const double *src1_data, size_t src1_step, const double
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Divide: _dst[i] = scale * src1[i] / src2[i]_
|
Divide: _dst[i] = scale * src1[i] / src2[i]_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data and step
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image data and step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data and step
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image data and step
|
||||||
|
@param dst_data destination image data and step
|
||||||
|
@param dst_step destination image data and step
|
||||||
|
@param width dimensions of the images
|
||||||
|
@param height dimensions of the images
|
||||||
@param scale additional multiplier
|
@param scale additional multiplier
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_divide Element-wise divide
|
//! @addtogroup core_hal_interface_divide Element-wise divide
|
||||||
@ -268,9 +295,12 @@ inline int hal_ni_div64f(const double *src1_data, size_t src1_step, const double
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Computes reciprocial: _dst[i] = scale / src[i]_
|
Computes reciprocial: _dst[i] = scale / src[i]_
|
||||||
@param src_data,src_step source image data and step
|
@param src_data source image data
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src_step source image step
|
||||||
@param width,height dimensions of the images
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
@param scale additional multiplier
|
@param scale additional multiplier
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_reciprocial Element-wise reciprocial
|
//! @addtogroup core_hal_interface_reciprocial Element-wise reciprocial
|
||||||
@ -310,10 +340,14 @@ inline int hal_ni_recip64f(const double *src_data, size_t src_step, double *dst_
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Computes weighted sum of two arrays using formula: _dst[i] = a * src1[i] + b * src2[i] + c_
|
Computes weighted sum of two arrays using formula: _dst[i] = a * src1[i] + b * src2[i] + c_
|
||||||
@param src1_data,src1_step first source image data and step
|
@param src1_data first source image data
|
||||||
@param src2_data,src2_step second source image data and step
|
@param src1_step first source image step
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src2_data second source image data
|
||||||
@param width,height dimensions of the images
|
@param src2_step second source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param width width of the images
|
||||||
|
@param height height of the images
|
||||||
@param scalars numbers _a_, _b_, and _c_
|
@param scalars numbers _a_, _b_, and _c_
|
||||||
*/
|
*/
|
||||||
//! @addtogroup core_hal_interface_addWeighted Element-wise weighted sum
|
//! @addtogroup core_hal_interface_addWeighted Element-wise weighted sum
|
||||||
@ -381,7 +415,8 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@param y,x source Y and X arrays
|
@param y source Y arrays
|
||||||
|
@param x source X arrays
|
||||||
@param dst destination array
|
@param dst destination array
|
||||||
@param len length of arrays
|
@param len length of arrays
|
||||||
@param angleInDegrees if set to true return angles in degrees, otherwise in radians
|
@param angleInDegrees if set to true return angles in degrees, otherwise in radians
|
||||||
@ -399,7 +434,8 @@ inline int hal_ni_fastAtan64f(const double* y, const double* x, double* dst, int
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@param x,y source X and Y arrays
|
@param x source X array
|
||||||
|
@param y source Y array
|
||||||
@param dst destination array
|
@param dst destination array
|
||||||
@param len length of arrays
|
@param len length of arrays
|
||||||
*/
|
*/
|
||||||
@ -530,7 +566,8 @@ inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
@param context double pointer to context storing all necessary data
|
@param context double pointer to context storing all necessary data
|
||||||
@param width,height image dimensions
|
@param width image width
|
||||||
|
@param height image height
|
||||||
@param depth image type (CV_32F or CV_64F)
|
@param depth image type (CV_32F or CV_64F)
|
||||||
@param src_channels number of channels in input image
|
@param src_channels number of channels in input image
|
||||||
@param dst_channels number of channels in output image
|
@param dst_channels number of channels in output image
|
||||||
@ -540,8 +577,10 @@ inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME
|
|||||||
inline int hal_ni_dftInit2D(cvhalDFT **context, int width, int height, int depth, int src_channels, int dst_channels, int flags, int nonzero_rows) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_dftInit2D(cvhalDFT **context, int width, int height, int depth, int src_channels, int dst_channels, int flags, int nonzero_rows) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
/**
|
/**
|
||||||
@param context pointer to context storing all necessary data
|
@param context pointer to context storing all necessary data
|
||||||
@param src_data,src_step source image data and step
|
@param src_data source image data
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src_step source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
*/
|
*/
|
||||||
inline int hal_ni_dft2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_dft2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
/**
|
/**
|
||||||
@ -557,15 +596,18 @@ inline int hal_ni_dftFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
@param context double pointer to context storing all necessary data
|
@param context double pointer to context storing all necessary data
|
||||||
@param width,height image dimensions
|
@param width image width
|
||||||
|
@param height image height
|
||||||
@param depth image type (CV_32F or CV_64F)
|
@param depth image type (CV_32F or CV_64F)
|
||||||
@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...)
|
@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...)
|
||||||
*/
|
*/
|
||||||
inline int hal_ni_dctInit2D(cvhalDFT **context, int width, int height, int depth, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_dctInit2D(cvhalDFT **context, int width, int height, int depth, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
/**
|
/**
|
||||||
@param context pointer to context storing all necessary data
|
@param context pointer to context storing all necessary data
|
||||||
@param src_data,src_step source image data and step
|
@param src_data source image data
|
||||||
@param dst_data,dst_step destination image data and step
|
@param src_step source image step
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
*/
|
*/
|
||||||
inline int hal_ni_dct2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_dct2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
/**
|
/**
|
||||||
@ -717,11 +759,15 @@ inline int hal_ni_gemm64fc(const double* src1, size_t src1_step, const double* s
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
@brief Finds the global minimum and maximum in an array.
|
@brief Finds the global minimum and maximum in an array.
|
||||||
@param src_data,src_step Source image
|
@param src_data Source image
|
||||||
@param width,height Source image dimensions
|
@param src_step Source image
|
||||||
|
@param width Source image dimensions
|
||||||
|
@param height Source image dimensions
|
||||||
@param depth Depth of source image
|
@param depth Depth of source image
|
||||||
@param minVal,maxVal Pointer to the returned global minimum and maximum in an array.
|
@param minVal Pointer to the returned global minimum and maximum in an array.
|
||||||
@param minIdx,maxIdx Pointer to the returned minimum and maximum location.
|
@param maxVal Pointer to the returned global minimum and maximum in an array.
|
||||||
|
@param minIdx Pointer to the returned minimum and maximum location.
|
||||||
|
@param maxIdx Pointer to the returned minimum and maximum location.
|
||||||
@param mask Specified array region.
|
@param mask Specified array region.
|
||||||
*/
|
*/
|
||||||
inline int hal_ni_minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
|
inline int hal_ni_minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
|
||||||
@ -731,6 +777,47 @@ inline int hal_ni_minMaxIdx(const uchar* src_data, size_t src_step, int width, i
|
|||||||
#define cv_hal_minMaxIdx hal_ni_minMaxIdx
|
#define cv_hal_minMaxIdx hal_ni_minMaxIdx
|
||||||
//! @endcond
|
//! @endcond
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief hal_flip
|
||||||
|
@param src_type source and destination image type
|
||||||
|
@param src_data source image data
|
||||||
|
@param src_step source image step
|
||||||
|
@param src_width source and destination image width
|
||||||
|
@param src_height source and destination image height
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param flip_mode 0 flips around x-axis, positive around y-axis, negative both
|
||||||
|
*/
|
||||||
|
inline int hal_ni_flip(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||||
|
uchar* dst_data, size_t dst_step, int flip_mode) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define cv_hal_flip hal_ni_flip
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@brief rotate90
|
||||||
|
@param src_type source and destination image type
|
||||||
|
@param src_data source image data
|
||||||
|
@param src_step source image step
|
||||||
|
@param src_width source image width
|
||||||
|
If angle has value [180] it is also destination image width
|
||||||
|
If angle has values [90, 270] it is also destination image height
|
||||||
|
@param src_height source and destination image height (destination image width for angles [90, 270])
|
||||||
|
If angle has value [180] it is also destination image height
|
||||||
|
If angle has values [90, 270] it is also destination image width
|
||||||
|
@param dst_data destination image data
|
||||||
|
@param dst_step destination image step
|
||||||
|
@param angle clockwise angle for rotation in degrees from set [90, 180, 270]
|
||||||
|
*/
|
||||||
|
inline int hal_ni_rotate90(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||||
|
uchar* dst_data, size_t dst_step, int angle) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define cv_hal_rotate90 hal_ni_rotate90
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
//! @}
|
//! @}
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,11 +87,11 @@ static bool hasNonZero8u( const uchar* src, size_t len )
|
|||||||
{
|
{
|
||||||
bool res = false;
|
bool res = false;
|
||||||
const uchar* srcEnd = src+len;
|
const uchar* srcEnd = src+len;
|
||||||
#if CV_SIMD
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef v_uint8 v_type;
|
typedef v_uint8 v_type;
|
||||||
const v_type v_zero = vx_setzero_u8();
|
const v_type v_zero = vx_setzero_u8();
|
||||||
constexpr const int unrollCount = 2;
|
constexpr const int unrollCount = 2;
|
||||||
int step = v_type::nlanes * unrollCount;
|
int step = VTraits<v_type>::vlanes() * unrollCount;
|
||||||
int len0 = len & -step;
|
int len0 = len & -step;
|
||||||
const uchar* srcSimdEnd = src+len0;
|
const uchar* srcSimdEnd = src+len0;
|
||||||
|
|
||||||
@ -99,10 +99,10 @@ static bool hasNonZero8u( const uchar* src, size_t len )
|
|||||||
while(!res && countSIMD--)
|
while(!res && countSIMD--)
|
||||||
{
|
{
|
||||||
v_type v0 = vx_load(src);
|
v_type v0 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v1 = vx_load(src);
|
v_type v1 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
res = v_check_any(((v0 | v1) != v_zero));
|
res = v_check_any((v_ne(v_or(v0, v1), v_zero)));
|
||||||
}
|
}
|
||||||
|
|
||||||
v_cleanup();
|
v_cleanup();
|
||||||
@ -114,11 +114,11 @@ static bool hasNonZero16u( const ushort* src, size_t len )
|
|||||||
{
|
{
|
||||||
bool res = false;
|
bool res = false;
|
||||||
const ushort* srcEnd = src+len;
|
const ushort* srcEnd = src+len;
|
||||||
#if CV_SIMD
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef v_uint16 v_type;
|
typedef v_uint16 v_type;
|
||||||
const v_type v_zero = vx_setzero_u16();
|
const v_type v_zero = vx_setzero_u16();
|
||||||
constexpr const int unrollCount = 4;
|
constexpr const int unrollCount = 4;
|
||||||
int step = v_type::nlanes * unrollCount;
|
int step = VTraits<v_type>::vlanes() * unrollCount;
|
||||||
int len0 = len & -step;
|
int len0 = len & -step;
|
||||||
const ushort* srcSimdEnd = src+len0;
|
const ushort* srcSimdEnd = src+len0;
|
||||||
|
|
||||||
@ -126,16 +126,16 @@ static bool hasNonZero16u( const ushort* src, size_t len )
|
|||||||
while(!res && countSIMD--)
|
while(!res && countSIMD--)
|
||||||
{
|
{
|
||||||
v_type v0 = vx_load(src);
|
v_type v0 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v1 = vx_load(src);
|
v_type v1 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v2 = vx_load(src);
|
v_type v2 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v3 = vx_load(src);
|
v_type v3 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v0 |= v1;
|
v0 = v_or(v0, v1);
|
||||||
v2 |= v3;
|
v2 = v_or(v2, v3);
|
||||||
res = v_check_any(((v0 | v2) != v_zero));
|
res = v_check_any((v_ne(v_or(v0, v2), v_zero)));
|
||||||
}
|
}
|
||||||
|
|
||||||
v_cleanup();
|
v_cleanup();
|
||||||
@ -147,11 +147,11 @@ static bool hasNonZero32s( const int* src, size_t len )
|
|||||||
{
|
{
|
||||||
bool res = false;
|
bool res = false;
|
||||||
const int* srcEnd = src+len;
|
const int* srcEnd = src+len;
|
||||||
#if CV_SIMD
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef v_int32 v_type;
|
typedef v_int32 v_type;
|
||||||
const v_type v_zero = vx_setzero_s32();
|
const v_type v_zero = vx_setzero_s32();
|
||||||
constexpr const int unrollCount = 8;
|
constexpr const int unrollCount = 8;
|
||||||
int step = v_type::nlanes * unrollCount;
|
int step = VTraits<v_type>::vlanes() * unrollCount;
|
||||||
int len0 = len & -step;
|
int len0 = len & -step;
|
||||||
const int* srcSimdEnd = src+len0;
|
const int* srcSimdEnd = src+len0;
|
||||||
|
|
||||||
@ -159,29 +159,29 @@ static bool hasNonZero32s( const int* src, size_t len )
|
|||||||
while(!res && countSIMD--)
|
while(!res && countSIMD--)
|
||||||
{
|
{
|
||||||
v_type v0 = vx_load(src);
|
v_type v0 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v1 = vx_load(src);
|
v_type v1 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v2 = vx_load(src);
|
v_type v2 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v3 = vx_load(src);
|
v_type v3 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v4 = vx_load(src);
|
v_type v4 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v5 = vx_load(src);
|
v_type v5 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v6 = vx_load(src);
|
v_type v6 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v7 = vx_load(src);
|
v_type v7 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v0 |= v1;
|
v0 = v_or(v0, v1);
|
||||||
v2 |= v3;
|
v2 = v_or(v2, v3);
|
||||||
v4 |= v5;
|
v4 = v_or(v4, v5);
|
||||||
v6 |= v7;
|
v6 = v_or(v6, v7);
|
||||||
|
|
||||||
v0 |= v2;
|
v0 = v_or(v0, v2);
|
||||||
v4 |= v6;
|
v4 = v_or(v4, v6);
|
||||||
res = v_check_any(((v0 | v4) != v_zero));
|
res = v_check_any((v_ne(v_or(v0, v4), v_zero)));
|
||||||
}
|
}
|
||||||
|
|
||||||
v_cleanup();
|
v_cleanup();
|
||||||
@ -193,11 +193,11 @@ static bool hasNonZero32f( const float* src, size_t len )
|
|||||||
{
|
{
|
||||||
bool res = false;
|
bool res = false;
|
||||||
const float* srcEnd = src+len;
|
const float* srcEnd = src+len;
|
||||||
#if CV_SIMD
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
typedef v_float32 v_type;
|
typedef v_float32 v_type;
|
||||||
const v_type v_zero = vx_setzero_f32();
|
const v_type v_zero = vx_setzero_f32();
|
||||||
constexpr const int unrollCount = 8;
|
constexpr const int unrollCount = 8;
|
||||||
int step = v_type::nlanes * unrollCount;
|
int step = VTraits<v_type>::vlanes() * unrollCount;
|
||||||
int len0 = len & -step;
|
int len0 = len & -step;
|
||||||
const float* srcSimdEnd = src+len0;
|
const float* srcSimdEnd = src+len0;
|
||||||
|
|
||||||
@ -205,30 +205,30 @@ static bool hasNonZero32f( const float* src, size_t len )
|
|||||||
while(!res && countSIMD--)
|
while(!res && countSIMD--)
|
||||||
{
|
{
|
||||||
v_type v0 = vx_load(src);
|
v_type v0 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v1 = vx_load(src);
|
v_type v1 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v2 = vx_load(src);
|
v_type v2 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v3 = vx_load(src);
|
v_type v3 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v4 = vx_load(src);
|
v_type v4 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v5 = vx_load(src);
|
v_type v5 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v6 = vx_load(src);
|
v_type v6 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v7 = vx_load(src);
|
v_type v7 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v0 |= v1;
|
v0 = v_or(v0, v1);
|
||||||
v2 |= v3;
|
v2 = v_or(v2, v3);
|
||||||
v4 |= v5;
|
v4 = v_or(v4, v5);
|
||||||
v6 |= v7;
|
v6 = v_or(v6, v7);
|
||||||
|
|
||||||
v0 |= v2;
|
v0 = v_or(v0, v2);
|
||||||
v4 |= v6;
|
v4 = v_or(v4, v6);
|
||||||
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
|
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
|
||||||
res = !v_check_all(((v0 | v4) == v_zero));
|
res = !v_check_all((v_eq(v_or(v0, v4), v_zero)));
|
||||||
}
|
}
|
||||||
|
|
||||||
v_cleanup();
|
v_cleanup();
|
||||||
@ -240,11 +240,11 @@ static bool hasNonZero64f( const double* src, size_t len )
|
|||||||
{
|
{
|
||||||
bool res = false;
|
bool res = false;
|
||||||
const double* srcEnd = src+len;
|
const double* srcEnd = src+len;
|
||||||
#if CV_SIMD_64F
|
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||||
typedef v_float64 v_type;
|
typedef v_float64 v_type;
|
||||||
const v_type v_zero = vx_setzero_f64();
|
const v_type v_zero = vx_setzero_f64();
|
||||||
constexpr const int unrollCount = 16;
|
constexpr const int unrollCount = 16;
|
||||||
int step = v_type::nlanes * unrollCount;
|
int step = VTraits<v_type>::vlanes() * unrollCount;
|
||||||
int len0 = len & -step;
|
int len0 = len & -step;
|
||||||
const double* srcSimdEnd = src+len0;
|
const double* srcSimdEnd = src+len0;
|
||||||
|
|
||||||
@ -252,55 +252,55 @@ static bool hasNonZero64f( const double* src, size_t len )
|
|||||||
while(!res && countSIMD--)
|
while(!res && countSIMD--)
|
||||||
{
|
{
|
||||||
v_type v0 = vx_load(src);
|
v_type v0 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v1 = vx_load(src);
|
v_type v1 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v2 = vx_load(src);
|
v_type v2 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v3 = vx_load(src);
|
v_type v3 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v4 = vx_load(src);
|
v_type v4 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v5 = vx_load(src);
|
v_type v5 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v6 = vx_load(src);
|
v_type v6 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v7 = vx_load(src);
|
v_type v7 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v8 = vx_load(src);
|
v_type v8 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v9 = vx_load(src);
|
v_type v9 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v10 = vx_load(src);
|
v_type v10 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v11 = vx_load(src);
|
v_type v11 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v12 = vx_load(src);
|
v_type v12 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v13 = vx_load(src);
|
v_type v13 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v14 = vx_load(src);
|
v_type v14 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v_type v15 = vx_load(src);
|
v_type v15 = vx_load(src);
|
||||||
src += v_type::nlanes;
|
src += VTraits<v_type>::vlanes();
|
||||||
v0 |= v1;
|
v0 = v_or(v0, v1);
|
||||||
v2 |= v3;
|
v2 = v_or(v2, v3);
|
||||||
v4 |= v5;
|
v4 = v_or(v4, v5);
|
||||||
v6 |= v7;
|
v6 = v_or(v6, v7);
|
||||||
v8 |= v9;
|
v8 = v_or(v8, v9);
|
||||||
v10 |= v11;
|
v10 = v_or(v10, v11);
|
||||||
v12 |= v13;
|
v12 = v_or(v12, v13);
|
||||||
v14 |= v15;
|
v14 = v_or(v14, v15);
|
||||||
|
|
||||||
v0 |= v2;
|
v0 = v_or(v0, v2);
|
||||||
v4 |= v6;
|
v4 = v_or(v4, v6);
|
||||||
v8 |= v10;
|
v8 = v_or(v8, v10);
|
||||||
v12 |= v14;
|
v12 = v_or(v12, v14);
|
||||||
|
|
||||||
v0 |= v4;
|
v0 = v_or(v0, v4);
|
||||||
v8 |= v12;
|
v8 = v_or(v8, v12);
|
||||||
//res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
|
//res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
|
||||||
res = !v_check_all(((v0 | v8) == v_zero));
|
res = !v_check_all((v_eq(v_or(v0, v8), v_zero)));
|
||||||
}
|
}
|
||||||
|
|
||||||
v_cleanup();
|
v_cleanup();
|
||||||
|
@ -276,7 +276,7 @@ template<typename T> struct VBLAS
|
|||||||
int givens(T*, T*, int, T, T) const { return 0; }
|
int givens(T*, T*, int, T, T) const { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE_64F
|
#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE, GCC 13 related
|
||||||
template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
|
template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
|
||||||
{
|
{
|
||||||
if( n < 2*VTraits<v_float32>::vlanes() )
|
if( n < 2*VTraits<v_float32>::vlanes() )
|
||||||
|
@ -2549,6 +2549,7 @@ double dotProd_16s(const short* src1, const short* src2, int len)
|
|||||||
double dotProd_32s(const int* src1, const int* src2, int len)
|
double dotProd_32s(const int* src1, const int* src2, int len)
|
||||||
{
|
{
|
||||||
#if CV_SIMD_64F // TODO: enable for CV_SIMD_SCALABLE_64F
|
#if CV_SIMD_64F // TODO: enable for CV_SIMD_SCALABLE_64F
|
||||||
|
// Test failed on RVV(QEMU): Too big difference (=1.20209e-08 > 1.11022e-12)
|
||||||
double r = .0;
|
double r = .0;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
const int step = VTraits<v_int32>::vlanes();
|
const int step = VTraits<v_int32>::vlanes();
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include "opencl_kernels_core.hpp"
|
#include "opencl_kernels_core.hpp"
|
||||||
|
#include "hal_replacement.hpp"
|
||||||
#include "opencv2/core/detail/dispatch_helper.impl.hpp"
|
#include "opencv2/core/detail/dispatch_helper.impl.hpp"
|
||||||
|
|
||||||
#include <algorithm> // std::swap_ranges
|
#include <algorithm> // std::swap_ranges
|
||||||
@ -802,6 +803,9 @@ void flip( InputArray _src, OutputArray _dst, int flip_mode )
|
|||||||
_dst.create( size, type );
|
_dst.create( size, type );
|
||||||
Mat dst = _dst.getMat();
|
Mat dst = _dst.getMat();
|
||||||
|
|
||||||
|
CALL_HAL(flip, cv_hal_flip, type, src.ptr(), src.step, src.cols, src.rows,
|
||||||
|
dst.ptr(), dst.step, flip_mode);
|
||||||
|
|
||||||
CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode));
|
CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode));
|
||||||
|
|
||||||
size_t esz = CV_ELEM_SIZE(type);
|
size_t esz = CV_ELEM_SIZE(type);
|
||||||
@ -1075,10 +1079,8 @@ void broadcast(InputArray _src, InputArray _shape, OutputArray _dst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void rotate(InputArray _src, OutputArray _dst, int rotateMode)
|
static void rotateImpl(InputArray _src, OutputArray _dst, int rotateMode)
|
||||||
{
|
{
|
||||||
CV_Assert(_src.dims() <= 2);
|
|
||||||
|
|
||||||
switch (rotateMode)
|
switch (rotateMode)
|
||||||
{
|
{
|
||||||
case ROTATE_90_CLOCKWISE:
|
case ROTATE_90_CLOCKWISE:
|
||||||
@ -1097,4 +1099,51 @@ void rotate(InputArray _src, OutputArray _dst, int rotateMode)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void rotate(InputArray _src, OutputArray _dst, int rotateMode)
|
||||||
|
{
|
||||||
|
CV_Assert(_src.dims() <= 2);
|
||||||
|
int angle;
|
||||||
|
|
||||||
|
if (_dst.isUMat())
|
||||||
|
{
|
||||||
|
rotateImpl(_src, _dst, rotateMode);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat src = _src.getMat();
|
||||||
|
int type = src.type();
|
||||||
|
if( src.empty() )
|
||||||
|
{
|
||||||
|
_dst.release();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (rotateMode)
|
||||||
|
{
|
||||||
|
case ROTATE_90_CLOCKWISE:
|
||||||
|
_dst.create(src.cols, src.rows, type);
|
||||||
|
angle = 90;
|
||||||
|
break;
|
||||||
|
case ROTATE_180:
|
||||||
|
_dst.create(src.rows, src.cols, type);
|
||||||
|
angle = 180;
|
||||||
|
break;
|
||||||
|
case ROTATE_90_COUNTERCLOCKWISE:
|
||||||
|
_dst.create(src.cols, src.rows, type);
|
||||||
|
angle = 270;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_dst.create(src.rows, src.cols, type);
|
||||||
|
angle = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat dst = _dst.getMat();
|
||||||
|
CALL_HAL(rotate90, cv_hal_rotate90, type, src.ptr(), src.step, src.cols, src.rows,
|
||||||
|
dst.ptr(), dst.step, angle);
|
||||||
|
|
||||||
|
// use src (Mat) since _src (InputArray) is updated by _dst.create() when in-place
|
||||||
|
rotateImpl(src, _dst, rotateMode);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -24,7 +24,7 @@ struct SumSqr_SIMD
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if CV_SIMD || CV_SIMD_SCALABLE
|
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct SumSqr_SIMD<uchar, int, int>
|
struct SumSqr_SIMD<uchar, int, int>
|
||||||
|
@ -1546,9 +1546,9 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
|
|||||||
if (!src.empty() && mask.empty())
|
if (!src.empty() && mask.empty())
|
||||||
{
|
{
|
||||||
if( minidx == 0 )
|
if( minidx == 0 )
|
||||||
minidx = 1;
|
minidx = 1;
|
||||||
if( maxidx == 0 )
|
if( maxidx == 0 )
|
||||||
maxidx = 1;
|
maxidx = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( minidx == 0 )
|
if( minidx == 0 )
|
||||||
|
@ -791,7 +791,7 @@ int getThreadNum()
|
|||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
#elif defined HAVE_HPX
|
#elif defined HAVE_HPX
|
||||||
return (int)(hpx::get_num_worker_threads());
|
return (int)(hpx::get_num_worker_threads());
|
||||||
#elif defined HAVE_OPENMP
|
#elif defined HAVE_OPENMP
|
||||||
return omp_get_thread_num();
|
return omp_get_thread_num();
|
||||||
#elif defined HAVE_GCD
|
#elif defined HAVE_GCD
|
||||||
|
@ -367,4 +367,4 @@ size_t base64::RawDataToBinaryConvertor::make_to_binary_funcs(const std::string
|
|||||||
return offset_packed;
|
return offset_packed;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -124,4 +124,4 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -306,9 +306,6 @@ softdouble cos(const softdouble& a) { return f64_cos(a); }
|
|||||||
| The values to return on conversions to 32-bit integer formats that raise an
|
| The values to return on conversions to 32-bit integer formats that raise an
|
||||||
| invalid exception.
|
| invalid exception.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
#define ui32_fromPosOverflow 0xFFFFFFFF
|
|
||||||
#define ui32_fromNegOverflow 0
|
|
||||||
#define ui32_fromNaN 0xFFFFFFFF
|
|
||||||
#define i32_fromPosOverflow 0x7FFFFFFF
|
#define i32_fromPosOverflow 0x7FFFFFFF
|
||||||
#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
|
#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
|
||||||
#define i32_fromNaN 0x7FFFFFFF
|
#define i32_fromNaN 0x7FFFFFFF
|
||||||
@ -317,9 +314,6 @@ softdouble cos(const softdouble& a) { return f64_cos(a); }
|
|||||||
| The values to return on conversions to 64-bit integer formats that raise an
|
| The values to return on conversions to 64-bit integer formats that raise an
|
||||||
| invalid exception.
|
| invalid exception.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
|
||||||
#define ui64_fromNegOverflow 0
|
|
||||||
#define ui64_fromNaN UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
|
||||||
#define i64_fromPosOverflow UINT64_C( 0x7FFFFFFFFFFFFFFF )
|
#define i64_fromPosOverflow UINT64_C( 0x7FFFFFFFFFFFFFFF )
|
||||||
//fixed unsigned unary minus: -x == ~x + 1
|
//fixed unsigned unary minus: -x == ~x + 1
|
||||||
//#define i64_fromNegOverflow (-UINT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
//#define i64_fromNegOverflow (-UINT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
||||||
@ -422,34 +416,6 @@ struct uint64_extra { uint64_t v, extra; };
|
|||||||
struct uint128_extra { struct uint128 v; uint64_t extra; };
|
struct uint128_extra { struct uint128 v; uint64_t extra; };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| These macros are used to isolate the differences in word order between big-
|
|
||||||
| endian and little-endian platforms.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
#ifndef WORDS_BIGENDIAN
|
|
||||||
#define wordIncr 1
|
|
||||||
#define indexWord( total, n ) (n)
|
|
||||||
#define indexWordHi( total ) ((total) - 1)
|
|
||||||
#define indexWordLo( total ) 0
|
|
||||||
#define indexMultiword( total, m, n ) (n)
|
|
||||||
#define indexMultiwordHi( total, n ) ((total) - (n))
|
|
||||||
#define indexMultiwordLo( total, n ) 0
|
|
||||||
#define indexMultiwordHiBut( total, n ) (n)
|
|
||||||
#define indexMultiwordLoBut( total, n ) 0
|
|
||||||
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
|
|
||||||
#else
|
|
||||||
#define wordIncr -1
|
|
||||||
#define indexWord( total, n ) ((total) - 1 - (n))
|
|
||||||
#define indexWordHi( total ) 0
|
|
||||||
#define indexWordLo( total ) ((total) - 1)
|
|
||||||
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
|
|
||||||
#define indexMultiwordHi( total, n ) 0
|
|
||||||
#define indexMultiwordLo( total, n ) ((total) - (n))
|
|
||||||
#define indexMultiwordHiBut( total, n ) 0
|
|
||||||
#define indexMultiwordLoBut( total, n ) (n)
|
|
||||||
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
softfloat_mulAdd_subC = 1,
|
softfloat_mulAdd_subC = 1,
|
||||||
softfloat_mulAdd_subProd = 2
|
softfloat_mulAdd_subProd = 2
|
||||||
|
@ -220,4 +220,4 @@ void split64s(const int64* src, int64** dst, int len, int cn )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
@ -672,7 +672,7 @@ static void inRangeS(const Mat& src, const Scalar& lb, const Scalar& rb, Mat& ds
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
CVTEST_GUARD_SYMBOL(inRange);
|
CVTEST_GUARD_SYMBOL(inRange)
|
||||||
|
|
||||||
struct InRangeSOp : public BaseArithmOp
|
struct InRangeSOp : public BaseArithmOp
|
||||||
{
|
{
|
||||||
@ -1202,7 +1202,7 @@ struct MeanOp : public BaseArithmOp
|
|||||||
MeanOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
|
MeanOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
|
||||||
{
|
{
|
||||||
context = 3;
|
context = 3;
|
||||||
};
|
}
|
||||||
void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
|
void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
|
||||||
{
|
{
|
||||||
dst.create(1, 1, CV_64FC4);
|
dst.create(1, 1, CV_64FC4);
|
||||||
@ -1225,7 +1225,7 @@ struct SumOp : public BaseArithmOp
|
|||||||
SumOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
|
SumOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
|
||||||
{
|
{
|
||||||
context = 3;
|
context = 3;
|
||||||
};
|
}
|
||||||
void op(const vector<Mat>& src, Mat& dst, const Mat&)
|
void op(const vector<Mat>& src, Mat& dst, const Mat&)
|
||||||
{
|
{
|
||||||
dst.create(1, 1, CV_64FC4);
|
dst.create(1, 1, CV_64FC4);
|
||||||
@ -1285,7 +1285,7 @@ struct MeanStdDevOp : public BaseArithmOp
|
|||||||
{
|
{
|
||||||
cn = 0;
|
cn = 0;
|
||||||
context = 7;
|
context = 7;
|
||||||
};
|
}
|
||||||
void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
|
void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
|
||||||
{
|
{
|
||||||
dst.create(1, 2, CV_64FC4);
|
dst.create(1, 2, CV_64FC4);
|
||||||
@ -1326,7 +1326,7 @@ struct NormOp : public BaseArithmOp
|
|||||||
{
|
{
|
||||||
context = 1;
|
context = 1;
|
||||||
normType = 0;
|
normType = 0;
|
||||||
};
|
}
|
||||||
int getRandomType(RNG& rng)
|
int getRandomType(RNG& rng)
|
||||||
{
|
{
|
||||||
int type = cvtest::randomType(rng, baseArithmTypeMask, 1, 4);
|
int type = cvtest::randomType(rng, baseArithmTypeMask, 1, 4);
|
||||||
@ -1372,7 +1372,7 @@ struct MinMaxLocOp : public BaseArithmOp
|
|||||||
MinMaxLocOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
|
MinMaxLocOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
|
||||||
{
|
{
|
||||||
context = ARITHM_MAX_NDIMS*2 + 2;
|
context = ARITHM_MAX_NDIMS*2 + 2;
|
||||||
};
|
}
|
||||||
int getRandomType(RNG& rng)
|
int getRandomType(RNG& rng)
|
||||||
{
|
{
|
||||||
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
|
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
|
||||||
@ -1419,7 +1419,7 @@ struct reduceArgMinMaxOp : public BaseArithmOp
|
|||||||
isLast(false), isMax(false), axis(0)
|
isLast(false), isMax(false), axis(0)
|
||||||
{
|
{
|
||||||
context = ARITHM_MAX_NDIMS*2 + 2;
|
context = ARITHM_MAX_NDIMS*2 + 2;
|
||||||
};
|
}
|
||||||
int getRandomType(RNG& rng) override
|
int getRandomType(RNG& rng) override
|
||||||
{
|
{
|
||||||
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
|
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
|
||||||
|
@ -435,6 +435,8 @@ protected:
|
|||||||
CV_Assert( ov1 == v1 );
|
CV_Assert( ov1 == v1 );
|
||||||
CV_Assert( osc1 == sc1 );
|
CV_Assert( osc1 == sc1 );
|
||||||
CV_Assert( og1 == g1 );
|
CV_Assert( og1 == g1 );
|
||||||
|
fs.release();
|
||||||
|
remove(fname.c_str());
|
||||||
}
|
}
|
||||||
catch(...)
|
catch(...)
|
||||||
{
|
{
|
||||||
@ -489,6 +491,7 @@ TEST(Core_InputOutput, FileStorage)
|
|||||||
char arr[66];
|
char arr[66];
|
||||||
snprintf(arr, sizeof(arr), "snprintf is hell %d", 666);
|
snprintf(arr, sizeof(arr), "snprintf is hell %d", 666);
|
||||||
EXPECT_NO_THROW(f << arr);
|
EXPECT_NO_THROW(f << arr);
|
||||||
|
remove(file.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Core_InputOutput, FileStorageKey)
|
TEST(Core_InputOutput, FileStorageKey)
|
||||||
@ -534,6 +537,7 @@ TEST(Core_InputOutput, FileStorageSpaces)
|
|||||||
ASSERT_STREQ(values[i].c_str(), valuesReadAppend[i].c_str());
|
ASSERT_STREQ(values[i].c_str(), valuesReadAppend[i].c_str());
|
||||||
}
|
}
|
||||||
g3.release();
|
g3.release();
|
||||||
|
EXPECT_EQ(0, remove(fileName.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct data_t
|
struct data_t
|
||||||
@ -585,12 +589,15 @@ struct data_t
|
|||||||
|
|
||||||
static void test_filestorage_basic(int write_flags, const char* suffix_name, bool testReadWrite, bool useMemory = false)
|
static void test_filestorage_basic(int write_flags, const char* suffix_name, bool testReadWrite, bool useMemory = false)
|
||||||
{
|
{
|
||||||
|
const bool generateTestData = false; // enable to regenerate reference in opencv_extra
|
||||||
const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
|
const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
|
||||||
CV_Assert(test_info);
|
CV_Assert(test_info);
|
||||||
std::string name = (std::string(test_info->test_case_name()) + "--" + test_info->name() + suffix_name);
|
std::string name = (std::string(test_info->test_case_name()) + "--" + test_info->name() + suffix_name);
|
||||||
std::string name_34 = string(cvtest::TS::ptr()->get_data_path()) + "io/3_4/" + name;
|
std::string name_34 = string(cvtest::TS::ptr()->get_data_path()) + "io/3_4/" + name;
|
||||||
if (!testReadWrite)
|
if (!testReadWrite || generateTestData)
|
||||||
name = string(cvtest::TS::ptr()->get_data_path()) + "io/" + name;
|
name = string(cvtest::TS::ptr()->get_data_path()) + "io/" + name;
|
||||||
|
else
|
||||||
|
name = cv::tempfile(name.c_str());
|
||||||
|
|
||||||
{
|
{
|
||||||
const size_t rawdata_N = 40;
|
const size_t rawdata_N = 40;
|
||||||
@ -636,10 +643,7 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo
|
|||||||
rawdata.push_back(tmp);
|
rawdata.push_back(tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef GENERATE_TEST_DATA
|
if (testReadWrite || useMemory || generateTestData)
|
||||||
#else
|
|
||||||
if (testReadWrite || useMemory)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
cv::FileStorage fs(name, write_flags + (useMemory ? cv::FileStorage::MEMORY : 0));
|
cv::FileStorage fs(name, write_flags + (useMemory ? cv::FileStorage::MEMORY : 0));
|
||||||
fs << "normal_2d_mat" << _2d_out;
|
fs << "normal_2d_mat" << _2d_out;
|
||||||
@ -761,9 +765,13 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo
|
|||||||
ASSERT_EQ(_rd_in.dims , _rd_out.dims);
|
ASSERT_EQ(_rd_in.dims , _rd_out.dims);
|
||||||
ASSERT_EQ(_rd_in.depth(), _rd_out.depth());
|
ASSERT_EQ(_rd_in.depth(), _rd_out.depth());
|
||||||
|
|
||||||
if (useMemory) {
|
if (useMemory)
|
||||||
|
{
|
||||||
EXPECT_EQ(0, cv::norm(_rd_in, _rd_out, NORM_INF));
|
EXPECT_EQ(0, cv::norm(_rd_in, _rd_out, NORM_INF));
|
||||||
}
|
}
|
||||||
|
if (testReadWrite && !useMemory && !generateTestData) {
|
||||||
|
EXPECT_EQ(0, remove(name.c_str()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -810,7 +818,7 @@ TEST(Core_InputOutput, filestorage_heap_overflow)
|
|||||||
const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
|
const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
|
||||||
CV_Assert(test_info);
|
CV_Assert(test_info);
|
||||||
|
|
||||||
std::string name = std::string(test_info->test_case_name()) + "--" + test_info->name();
|
std::string name = cv::tempfile();
|
||||||
const char data[] = {0x00, 0x2f, 0x4a, 0x4a, 0x50, 0x4a, 0x4a };
|
const char data[] = {0x00, 0x2f, 0x4a, 0x4a, 0x50, 0x4a, 0x4a };
|
||||||
|
|
||||||
std::ofstream file;
|
std::ofstream file;
|
||||||
@ -822,6 +830,7 @@ TEST(Core_InputOutput, filestorage_heap_overflow)
|
|||||||
|
|
||||||
// This just shouldn't segfault, otherwise it's fine
|
// This just shouldn't segfault, otherwise it's fine
|
||||||
EXPECT_ANY_THROW(FileStorage(name, FileStorage::READ));
|
EXPECT_ANY_THROW(FileStorage(name, FileStorage::READ));
|
||||||
|
EXPECT_EQ(0, remove(name.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Core_InputOutput, filestorage_base64_valid_call)
|
TEST(Core_InputOutput, filestorage_base64_valid_call)
|
||||||
@ -832,18 +841,6 @@ TEST(Core_InputOutput, filestorage_base64_valid_call)
|
|||||||
: (std::string(test_info->test_case_name()) + "--" + test_info->name());
|
: (std::string(test_info->test_case_name()) + "--" + test_info->name());
|
||||||
|
|
||||||
char const * filenames[] = {
|
char const * filenames[] = {
|
||||||
"core_io_base64_other_test.yml",
|
|
||||||
"core_io_base64_other_test.xml",
|
|
||||||
"core_io_base64_other_test.json",
|
|
||||||
"core_io_base64_other_test.yml?base64",
|
|
||||||
"core_io_base64_other_test.xml?base64",
|
|
||||||
"core_io_base64_other_test.json?base64",
|
|
||||||
0
|
|
||||||
};
|
|
||||||
char const * real_name[] = {
|
|
||||||
"core_io_base64_other_test.yml",
|
|
||||||
"core_io_base64_other_test.xml",
|
|
||||||
"core_io_base64_other_test.json",
|
|
||||||
"core_io_base64_other_test.yml",
|
"core_io_base64_other_test.yml",
|
||||||
"core_io_base64_other_test.xml",
|
"core_io_base64_other_test.xml",
|
||||||
"core_io_base64_other_test.json",
|
"core_io_base64_other_test.json",
|
||||||
@ -855,14 +852,16 @@ TEST(Core_InputOutput, filestorage_base64_valid_call)
|
|||||||
|
|
||||||
for (int n = 0; n < 6; n++)
|
for (int n = 0; n < 6; n++)
|
||||||
{
|
{
|
||||||
char const* suffix_name = filenames[n];
|
const int idx = n / 2;
|
||||||
SCOPED_TRACE(suffix_name);
|
const std::string mode_suffix = (n % 2 == 0) ? "" : "?base64";
|
||||||
std::string name = basename + '_' + suffix_name;
|
std::string suffix_name = basename + "_" + filenames[idx];
|
||||||
std::string file_name = basename + '_' + real_name[n];
|
std::string file_name = cv::tempfile(suffix_name.c_str());
|
||||||
|
std::string mode_file_name = file_name + mode_suffix;
|
||||||
|
SCOPED_TRACE(mode_file_name);
|
||||||
|
|
||||||
EXPECT_NO_THROW(
|
EXPECT_NO_THROW(
|
||||||
{
|
{
|
||||||
cv::FileStorage fs(name, cv::FileStorage::WRITE_BASE64);
|
cv::FileStorage fs(mode_file_name, cv::FileStorage::WRITE_BASE64);
|
||||||
|
|
||||||
fs << "manydata" << "[";
|
fs << "manydata" << "[";
|
||||||
fs << "[:";
|
fs << "[:";
|
||||||
@ -890,7 +889,7 @@ TEST(Core_InputOutput, filestorage_base64_valid_call)
|
|||||||
|
|
||||||
EXPECT_NO_THROW(
|
EXPECT_NO_THROW(
|
||||||
{
|
{
|
||||||
cv::FileStorage fs(name, cv::FileStorage::WRITE);
|
cv::FileStorage fs(mode_file_name, cv::FileStorage::WRITE);
|
||||||
|
|
||||||
fs << "manydata" << "[";
|
fs << "manydata" << "[";
|
||||||
fs << str_out;
|
fs << str_out;
|
||||||
@ -934,10 +933,10 @@ TEST(Core_InputOutput, filestorage_base64_invalid_call)
|
|||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
for (char const ** ptr = filenames; *ptr; ptr++)
|
for (int idx = 0; idx < 3; ++idx)
|
||||||
{
|
{
|
||||||
char const * suffix_name = *ptr;
|
const string base_suffix = basename + '_' + filenames[idx];
|
||||||
std::string name = basename + '_' + suffix_name;
|
std::string name = cv::tempfile(base_suffix.c_str());
|
||||||
|
|
||||||
EXPECT_NO_THROW({
|
EXPECT_NO_THROW({
|
||||||
cv::FileStorage fs(name, cv::FileStorage::WRITE);
|
cv::FileStorage fs(name, cv::FileStorage::WRITE);
|
||||||
@ -958,7 +957,7 @@ TEST(Core_InputOutput, filestorage_base64_invalid_call)
|
|||||||
|
|
||||||
TEST(Core_InputOutput, filestorage_yml_vec2i)
|
TEST(Core_InputOutput, filestorage_yml_vec2i)
|
||||||
{
|
{
|
||||||
const std::string file_name = "vec2i.yml";
|
const std::string file_name = cv::tempfile("vec2i.yml");
|
||||||
cv::Vec2i vec(2, 1), ovec;
|
cv::Vec2i vec(2, 1), ovec;
|
||||||
|
|
||||||
/* write */
|
/* write */
|
||||||
@ -1040,7 +1039,7 @@ TEST(Core_InputOutput, filestorage_vec_vec_io)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String fileName = "vec_vec_io_test.";
|
String basename = "vec_vec_io_test.";
|
||||||
|
|
||||||
std::vector<String> formats;
|
std::vector<String> formats;
|
||||||
formats.push_back("xml");
|
formats.push_back("xml");
|
||||||
@ -1049,11 +1048,13 @@ TEST(Core_InputOutput, filestorage_vec_vec_io)
|
|||||||
|
|
||||||
for(size_t i = 0; i < formats.size(); i++)
|
for(size_t i = 0; i < formats.size(); i++)
|
||||||
{
|
{
|
||||||
FileStorage writer(fileName + formats[i], FileStorage::WRITE);
|
const String basename_plus(basename + formats[i]);
|
||||||
|
const String fileName = tempfile(basename_plus.c_str());
|
||||||
|
FileStorage writer(fileName, FileStorage::WRITE);
|
||||||
writer << "vecVecMat" << outputMats;
|
writer << "vecVecMat" << outputMats;
|
||||||
writer.release();
|
writer.release();
|
||||||
|
|
||||||
FileStorage reader(fileName + formats[i], FileStorage::READ);
|
FileStorage reader(fileName, FileStorage::READ);
|
||||||
std::vector<std::vector<Mat> > testMats;
|
std::vector<std::vector<Mat> > testMats;
|
||||||
reader["vecVecMat"] >> testMats;
|
reader["vecVecMat"] >> testMats;
|
||||||
|
|
||||||
@ -1070,7 +1071,7 @@ TEST(Core_InputOutput, filestorage_vec_vec_io)
|
|||||||
}
|
}
|
||||||
|
|
||||||
reader.release();
|
reader.release();
|
||||||
remove((fileName + formats[i]).c_str());
|
remove(fileName.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1661,7 +1662,7 @@ TEST(Core_InputOutput, FileStorage_json_bool)
|
|||||||
|
|
||||||
TEST(Core_InputOutput, FileStorage_free_file_after_exception)
|
TEST(Core_InputOutput, FileStorage_free_file_after_exception)
|
||||||
{
|
{
|
||||||
const std::string fileName = "FileStorage_free_file_after_exception_test.yml";
|
const std::string fileName = cv::tempfile("FileStorage_free_file_after_exception_test.yml");
|
||||||
const std::string content = "%YAML:1.0\n cameraMatrix;:: !<tag:yaml.org,2002:opencv-matrix>\n";
|
const std::string content = "%YAML:1.0\n cameraMatrix;:: !<tag:yaml.org,2002:opencv-matrix>\n";
|
||||||
|
|
||||||
std::fstream testFile;
|
std::fstream testFile;
|
||||||
@ -1684,11 +1685,11 @@ TEST(Core_InputOutput, FileStorage_free_file_after_exception)
|
|||||||
TEST(Core_InputOutput, FileStorage_write_to_sequence)
|
TEST(Core_InputOutput, FileStorage_write_to_sequence)
|
||||||
{
|
{
|
||||||
const std::vector<std::string> formatExts = { ".yml", ".json", ".xml" };
|
const std::vector<std::string> formatExts = { ".yml", ".json", ".xml" };
|
||||||
const std::string fileName = "FileStorage_write_to_sequence";
|
|
||||||
|
|
||||||
for (const auto& ext : formatExts)
|
for (const auto& ext : formatExts)
|
||||||
{
|
{
|
||||||
FileStorage fs(fileName + ext, FileStorage::WRITE);
|
const std::string name = tempfile(ext.c_str());
|
||||||
|
|
||||||
|
FileStorage fs(name, FileStorage::WRITE);
|
||||||
std::vector<int> in = { 23, 42 };
|
std::vector<int> in = { 23, 42 };
|
||||||
fs.startWriteStruct("some_sequence", cv::FileNode::SEQ);
|
fs.startWriteStruct("some_sequence", cv::FileNode::SEQ);
|
||||||
for (int i : in)
|
for (int i : in)
|
||||||
@ -1696,7 +1697,7 @@ TEST(Core_InputOutput, FileStorage_write_to_sequence)
|
|||||||
fs.endWriteStruct();
|
fs.endWriteStruct();
|
||||||
fs.release();
|
fs.release();
|
||||||
|
|
||||||
FileStorage fsIn(fileName + ext, FileStorage::READ);
|
FileStorage fsIn(name, FileStorage::READ);
|
||||||
FileNode seq = fsIn["some_sequence"];
|
FileNode seq = fsIn["some_sequence"];
|
||||||
FileNodeIterator it = seq.begin(), it_end = seq.end();
|
FileNodeIterator it = seq.begin(), it_end = seq.end();
|
||||||
std::vector<int> out;
|
std::vector<int> out;
|
||||||
@ -1704,12 +1705,13 @@ TEST(Core_InputOutput, FileStorage_write_to_sequence)
|
|||||||
out.push_back((int)*it);
|
out.push_back((int)*it);
|
||||||
|
|
||||||
EXPECT_EQ(in, out);
|
EXPECT_EQ(in, out);
|
||||||
|
EXPECT_EQ(0, remove(name.c_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents)
|
TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents)
|
||||||
{
|
{
|
||||||
const std::string filename = "FileStorage_YAML_parse_multiple_documents.yml";
|
const std::string filename = cv::tempfile("FileStorage_YAML_parse_multiple_documents.yml");
|
||||||
FileStorage fs;
|
FileStorage fs;
|
||||||
|
|
||||||
fs.open(filename, FileStorage::WRITE);
|
fs.open(filename, FileStorage::WRITE);
|
||||||
|
@ -475,12 +475,13 @@ TEST(Core_PCA, accuracy)
|
|||||||
ASSERT_LE(err, diffBackPrjEps) << "bad accuracy of cvBackProjectPCA() (CV_PCA_DATA_AS_COL)";
|
ASSERT_LE(err, diffBackPrjEps) << "bad accuracy of cvBackProjectPCA() (CV_PCA_DATA_AS_COL)";
|
||||||
#endif
|
#endif
|
||||||
// Test read and write
|
// Test read and write
|
||||||
FileStorage fs( "PCA_store.yml", FileStorage::WRITE );
|
const std::string filename = cv::tempfile("PCA_store.yml");
|
||||||
|
FileStorage fs( filename, FileStorage::WRITE );
|
||||||
rPCA.write( fs );
|
rPCA.write( fs );
|
||||||
fs.release();
|
fs.release();
|
||||||
|
|
||||||
PCA lPCA;
|
PCA lPCA;
|
||||||
fs.open( "PCA_store.yml", FileStorage::READ );
|
fs.open( filename, FileStorage::READ );
|
||||||
lPCA.read( fs.root() );
|
lPCA.read( fs.root() );
|
||||||
err = cvtest::norm(rPCA.eigenvectors, lPCA.eigenvectors, NORM_L2 | NORM_RELATIVE);
|
err = cvtest::norm(rPCA.eigenvectors, lPCA.eigenvectors, NORM_L2 | NORM_RELATIVE);
|
||||||
EXPECT_LE(err, 0) << "bad accuracy of write/load functions (YML)";
|
EXPECT_LE(err, 0) << "bad accuracy of write/load functions (YML)";
|
||||||
@ -488,6 +489,7 @@ TEST(Core_PCA, accuracy)
|
|||||||
EXPECT_LE(err, 0) << "bad accuracy of write/load functions (YML)";
|
EXPECT_LE(err, 0) << "bad accuracy of write/load functions (YML)";
|
||||||
err = cvtest::norm(rPCA.mean, lPCA.mean, NORM_L2 | NORM_RELATIVE);
|
err = cvtest::norm(rPCA.mean, lPCA.mean, NORM_L2 | NORM_RELATIVE);
|
||||||
EXPECT_LE(err, 0) << "bad accuracy of write/load functions (YML)";
|
EXPECT_LE(err, 0) << "bad accuracy of write/load functions (YML)";
|
||||||
|
EXPECT_EQ(0, remove(filename.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
class Core_ArrayOpTest : public cvtest::BaseTest
|
class Core_ArrayOpTest : public cvtest::BaseTest
|
||||||
|
@ -588,11 +588,11 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual void forwardSlice(const float* src, float* dst, int len,
|
virtual void forwardSlice(const float* src, float* dst, int len,
|
||||||
size_t outPlaneSize, int cn0, int cn1) const {};
|
size_t outPlaneSize, int cn0, int cn1) const {}
|
||||||
virtual void forwardSlice(const int* src, const int* lut, int* dst, int len,
|
virtual void forwardSlice(const int* src, const int* lut, int* dst, int len,
|
||||||
size_t outPlaneSize, int cn0, int cn1) const {};
|
size_t outPlaneSize, int cn0, int cn1) const {}
|
||||||
virtual void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len,
|
virtual void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len,
|
||||||
size_t outPlaneSize, int cn0, int cn1) const {};
|
size_t outPlaneSize, int cn0, int cn1) const {}
|
||||||
};
|
};
|
||||||
|
|
||||||
class CV_EXPORTS ReLULayer : public ActivationLayer
|
class CV_EXPORTS ReLULayer : public ActivationLayer
|
||||||
|
@ -28,22 +28,28 @@ public:
|
|||||||
target = (dnn::Target)(int)get<1>(GetParam());
|
target = (dnn::Target)(int)get<1>(GetParam());
|
||||||
}
|
}
|
||||||
|
|
||||||
void processNet(std::string weights, std::string proto, const Mat& input, const std::string& outputLayer = "")
|
void processNet(std::string weights, std::string proto,
|
||||||
{
|
const std::vector<std::tuple<Mat, std::string>>& inputs, const std::string& outputLayer = ""){
|
||||||
randu(input, 0.0f, 1.0f);
|
|
||||||
|
|
||||||
weights = findDataFile(weights, false);
|
weights = findDataFile(weights, false);
|
||||||
if (!proto.empty())
|
if (!proto.empty())
|
||||||
proto = findDataFile(proto);
|
proto = findDataFile(proto);
|
||||||
net = readNet(proto, weights);
|
net = readNet(proto, weights);
|
||||||
net.setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
|
// Set multiple inputs
|
||||||
|
for(auto &inp: inputs){
|
||||||
|
net.setInput(std::get<0>(inp), std::get<1>(inp));
|
||||||
|
}
|
||||||
|
|
||||||
net.setPreferableBackend(backend);
|
net.setPreferableBackend(backend);
|
||||||
net.setPreferableTarget(target);
|
net.setPreferableTarget(target);
|
||||||
|
|
||||||
MatShape netInputShape = shape(1, 3, input.rows, input.cols);
|
// Calculate multiple inputs memory consumption
|
||||||
|
std::vector<MatShape> netMatShapes;
|
||||||
|
for(auto &inp: inputs){
|
||||||
|
netMatShapes.push_back(shape(std::get<0>(inp)));
|
||||||
|
}
|
||||||
size_t weightsMemory = 0, blobsMemory = 0;
|
size_t weightsMemory = 0, blobsMemory = 0;
|
||||||
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
|
net.getMemoryConsumption(netMatShapes, weightsMemory, blobsMemory);
|
||||||
int64 flops = net.getFLOPS(netInputShape);
|
int64 flops = net.getFLOPS(netMatShapes);
|
||||||
CV_Assert(flops > 0);
|
CV_Assert(flops > 0);
|
||||||
|
|
||||||
net.forward(outputLayer); // warmup
|
net.forward(outputLayer); // warmup
|
||||||
@ -59,33 +65,48 @@ public:
|
|||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void processNet(std::string weights, std::string proto,
|
||||||
|
Mat &input, const std::string& outputLayer = "")
|
||||||
|
{
|
||||||
|
processNet(weights, proto, {std::make_tuple(input, "")}, outputLayer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void processNet(std::string weights, std::string proto,
|
||||||
|
Size inpSize, const std::string& outputLayer = "")
|
||||||
|
{
|
||||||
|
Mat input_data(inpSize, CV_32FC3);
|
||||||
|
randu(input_data, 0.0f, 1.0f);
|
||||||
|
Mat input = blobFromImage(input_data, 1.0, Size(), Scalar(), false);
|
||||||
|
processNet(weights, proto, input, outputLayer);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, AlexNet)
|
PERF_TEST_P_(DNNTestNetwork, AlexNet)
|
||||||
{
|
{
|
||||||
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt", Mat(cv::Size(227, 227), CV_32FC3));
|
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt", cv::Size(227, 227));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
|
PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
|
||||||
{
|
{
|
||||||
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt", Mat(cv::Size(224, 224), CV_32FC3));
|
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt", cv::Size(224, 224));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, ResNet_50)
|
PERF_TEST_P_(DNNTestNetwork, ResNet_50)
|
||||||
{
|
{
|
||||||
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt", Mat(cv::Size(224, 224), CV_32FC3));
|
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt", cv::Size(224, 224));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
|
PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
|
||||||
{
|
{
|
||||||
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt", Mat(cv::Size(227, 227), CV_32FC3));
|
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt", cv::Size(227, 227));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, Inception_5h)
|
PERF_TEST_P_(DNNTestNetwork, Inception_5h)
|
||||||
{
|
{
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) throw SkipTestException("");
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) throw SkipTestException("");
|
||||||
processNet("dnn/tensorflow_inception_graph.pb", "", Mat(cv::Size(224, 224), CV_32FC3), "softmax2");
|
processNet("dnn/tensorflow_inception_graph.pb", "", cv::Size(224, 224), "softmax2");
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, ENet)
|
PERF_TEST_P_(DNNTestNetwork, ENet)
|
||||||
@ -97,12 +118,12 @@ PERF_TEST_P_(DNNTestNetwork, ENet)
|
|||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
#endif
|
#endif
|
||||||
processNet("dnn/Enet-model-best.net", "", Mat(cv::Size(512, 256), CV_32FC3));
|
processNet("dnn/Enet-model-best.net", "", cv::Size(512, 256));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, SSD)
|
PERF_TEST_P_(DNNTestNetwork, SSD)
|
||||||
{
|
{
|
||||||
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", "dnn/ssd_vgg16.prototxt", Mat(cv::Size(300, 300), CV_32FC3));
|
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", "dnn/ssd_vgg16.prototxt", cv::Size(300, 300));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, OpenFace)
|
PERF_TEST_P_(DNNTestNetwork, OpenFace)
|
||||||
@ -111,27 +132,27 @@ PERF_TEST_P_(DNNTestNetwork, OpenFace)
|
|||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_HDDL))
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_HDDL))
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
#endif
|
#endif
|
||||||
processNet("dnn/openface_nn4.small2.v1.t7", "", Mat(cv::Size(96, 96), CV_32FC3));
|
processNet("dnn/openface_nn4.small2.v1.t7", "", cv::Size(96, 96));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
|
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||||
{
|
{
|
||||||
processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", Mat(cv::Size(300, 300), CV_32FC3));
|
processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", cv::Size(300, 300));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
|
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
|
||||||
{
|
{
|
||||||
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "ssd_mobilenet_v1_coco_2017_11_17.pbtxt", Mat(cv::Size(300, 300), CV_32FC3));
|
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "ssd_mobilenet_v1_coco_2017_11_17.pbtxt", cv::Size(300, 300));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||||
{
|
{
|
||||||
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "ssd_mobilenet_v2_coco_2018_03_29.pbtxt", Mat(cv::Size(300, 300), CV_32FC3));
|
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "ssd_mobilenet_v2_coco_2018_03_29.pbtxt", cv::Size(300, 300));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
|
PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
|
||||||
{
|
{
|
||||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Mat(cv::Size(224, 224), CV_32FC3));
|
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", cv::Size(224, 224));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
||||||
@ -140,17 +161,17 @@ PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
|||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
// The same .caffemodel but modified .prototxt
|
// The same .caffemodel but modified .prototxt
|
||||||
// See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
|
// See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
|
||||||
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt", Mat(cv::Size(368, 368), CV_32FC3));
|
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt", cv::Size(368, 368));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
|
PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
|
||||||
{
|
{
|
||||||
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", Mat(cv::Size(300, 300), CV_32FC3));
|
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", cv::Size(300, 300));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||||
{
|
{
|
||||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", Mat(cv::Size(300, 300), CV_32FC3));
|
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", cv::Size(300, 300));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
||||||
@ -168,9 +189,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
cvtColor(sample, sample, COLOR_BGR2RGB);
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(), Scalar(), true);
|
||||||
Mat inp;
|
|
||||||
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
|
|
||||||
processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", inp);
|
processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", inp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,9 +205,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4)
|
|||||||
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
|
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
|
||||||
#endif
|
#endif
|
||||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
cvtColor(sample, sample, COLOR_BGR2RGB);
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(), Scalar(), true);
|
||||||
Mat inp;
|
|
||||||
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
|
|
||||||
processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", inp);
|
processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", inp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -199,20 +216,39 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
|
|||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
#endif
|
#endif
|
||||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
cvtColor(sample, sample, COLOR_BGR2RGB);
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(), Scalar(), true);
|
||||||
Mat inp;
|
|
||||||
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
|
|
||||||
processNet("dnn/yolov4-tiny-2020-12.weights", "dnn/yolov4-tiny-2020-12.cfg", inp);
|
processNet("dnn/yolov4-tiny-2020-12.weights", "dnn/yolov4-tiny-2020-12.cfg", inp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, YOLOv5) {
|
||||||
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
||||||
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(640, 640), Scalar(), true);
|
||||||
|
processNet("", "dnn/yolov5n.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, YOLOv8) {
|
||||||
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
||||||
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(640, 640), Scalar(), true);
|
||||||
|
processNet("", "dnn/yolov8n.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, YOLOX) {
|
||||||
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
||||||
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(640, 640), Scalar(), true);
|
||||||
|
processNet("", "dnn/yolox_s.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
|
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
|
||||||
{
|
{
|
||||||
processNet("dnn/frozen_east_text_detection.pb", "", Mat(cv::Size(320, 320), CV_32FC3));
|
processNet("dnn/frozen_east_text_detection.pb", "", cv::Size(320, 320));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, FastNeuralStyle_eccv16)
|
PERF_TEST_P_(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||||
{
|
{
|
||||||
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", Mat(cv::Size(320, 240), CV_32FC3));
|
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", cv::Size(320, 240));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
|
PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
|
||||||
@ -233,7 +269,8 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
|
|||||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
processNet("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb",
|
processNet("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb",
|
||||||
"dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", Mat(cv::Size(800, 600), CV_32FC3));
|
"dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt",
|
||||||
|
cv::Size(800, 600));
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, EfficientDet)
|
PERF_TEST_P_(DNNTestNetwork, EfficientDet)
|
||||||
@ -241,12 +278,88 @@ PERF_TEST_P_(DNNTestNetwork, EfficientDet)
|
|||||||
if (target != DNN_TARGET_CPU)
|
if (target != DNN_TARGET_CPU)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
resize(sample, sample, Size(512, 512));
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(512, 512), Scalar(), true);
|
||||||
Mat inp;
|
|
||||||
sample.convertTo(inp, CV_32FC3, 1.0/255);
|
|
||||||
processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", inp);
|
processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", inp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, EfficientNet)
|
||||||
|
{
|
||||||
|
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||||
|
Mat inp = blobFromImage(sample, 1.0 / 255.0, Size(224, 224), Scalar(), true);
|
||||||
|
transposeND(inp, {0, 2, 3, 1}, inp);
|
||||||
|
processNet("", "dnn/efficientnet-lite4.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, YuNet) {
|
||||||
|
processNet("", "dnn/onnx/models/yunet-202303.onnx", cv::Size(640, 640));
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, SFace) {
|
||||||
|
processNet("", "dnn/face_recognition_sface_2021dec.onnx", cv::Size(112, 112));
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, MPPalm) {
|
||||||
|
Mat inp(cv::Size(192, 192), CV_32FC3);
|
||||||
|
randu(inp, 0.0f, 1.0f);
|
||||||
|
inp = blobFromImage(inp, 1.0, Size(), Scalar(), false);
|
||||||
|
transposeND(inp, {0, 2, 3, 1}, inp);
|
||||||
|
processNet("", "dnn/palm_detection_mediapipe_2023feb.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, MPHand) {
|
||||||
|
Mat inp(cv::Size(224, 224), CV_32FC3);
|
||||||
|
randu(inp, 0.0f, 1.0f);
|
||||||
|
inp = blobFromImage(inp, 1.0, Size(), Scalar(), false);
|
||||||
|
transposeND(inp, {0, 2, 3, 1}, inp);
|
||||||
|
processNet("", "dnn/handpose_estimation_mediapipe_2023feb.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, MPPose) {
|
||||||
|
Mat inp(cv::Size(256, 256), CV_32FC3);
|
||||||
|
randu(inp, 0.0f, 1.0f);
|
||||||
|
inp = blobFromImage(inp, 1.0, Size(), Scalar(), false);
|
||||||
|
transposeND(inp, {0, 2, 3, 1}, inp);
|
||||||
|
processNet("", "dnn/pose_estimation_mediapipe_2023mar.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, PPOCRv3) {
|
||||||
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
||||||
|
processNet("", "dnn/onnx/models/PP_OCRv3_DB_text_det.onnx", cv::Size(736, 736));
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, PPHumanSeg) {
|
||||||
|
processNet("", "dnn/human_segmentation_pphumanseg_2023mar.onnx", cv::Size(192, 192));
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, CRNN) {
|
||||||
|
Mat inp(cv::Size(100, 32), CV_32FC1);
|
||||||
|
randu(inp, 0.0f, 1.0f);
|
||||||
|
inp = blobFromImage(inp, 1.0, Size(), Scalar(), false);
|
||||||
|
processNet("", "dnn/text_recognition_CRNN_EN_2021sep.onnx", inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, ViTTrack) {
|
||||||
|
Mat inp1(cv::Size(128, 128), CV_32FC3);
|
||||||
|
Mat inp2(cv::Size(256, 256), CV_32FC3);
|
||||||
|
randu(inp1, 0.0f, 1.0f);
|
||||||
|
randu(inp2, 0.0f, 1.0f);
|
||||||
|
inp1 = blobFromImage(inp1, 1.0, Size(), Scalar(), false);
|
||||||
|
inp2 = blobFromImage(inp2, 1.0, Size(), Scalar(), false);
|
||||||
|
processNet("", "dnn/onnx/models/vitTracker.onnx", {std::make_tuple(inp1, "template"), std::make_tuple(inp2, "search")});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PERF_TEST_P_(DNNTestNetwork, EfficientDet_int8)
|
||||||
|
{
|
||||||
|
if (target != DNN_TARGET_CPU || (backend != DNN_BACKEND_OPENCV &&
|
||||||
|
backend != DNN_BACKEND_TIMVX && backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) {
|
||||||
|
throw SkipTestException("");
|
||||||
|
}
|
||||||
|
Mat inp = imread(findDataFile("dnn/dog416.png"));
|
||||||
|
inp = blobFromImage(inp, 1.0 / 255.0, Size(320, 320), Scalar(), true);
|
||||||
|
processNet("", "dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", inp);
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
|
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -17,7 +17,7 @@ namespace cv { namespace dnn {
|
|||||||
class ImportNodeWrapper
|
class ImportNodeWrapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~ImportNodeWrapper() {};
|
virtual ~ImportNodeWrapper() {}
|
||||||
|
|
||||||
virtual int getNumInputs() const = 0;
|
virtual int getNumInputs() const = 0;
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ public:
|
|||||||
class ImportGraphWrapper
|
class ImportGraphWrapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~ImportGraphWrapper() {};
|
virtual ~ImportGraphWrapper() {}
|
||||||
|
|
||||||
virtual Ptr<ImportNodeWrapper> getNode(int idx) const = 0;
|
virtual Ptr<ImportNodeWrapper> getNode(int idx) const = 0;
|
||||||
|
|
||||||
|
@ -590,7 +590,7 @@ void InfEngineNgraphNet::init(Target targetId)
|
|||||||
allBlobs[name] = ov::Tensor(src.get_element_type(), outShape, src.data());
|
allBlobs[name] = ov::Tensor(src.get_element_type(), outShape, src.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
ppp.output(i++).tensor().set_element_type(ov::element::f32); // Should be always FP32
|
ppp.output(i++).tensor().set_element_type(src.get_element_type());
|
||||||
}
|
}
|
||||||
|
|
||||||
ppp.build();
|
ppp.build();
|
||||||
@ -840,6 +840,8 @@ ov::Tensor wrapToNgraphBlob(const Mat& m) {
|
|||||||
return ov::Tensor(ov::element::f32, shape, m.data);
|
return ov::Tensor(ov::element::f32, shape, m.data);
|
||||||
else if (m.type() == CV_8U)
|
else if (m.type() == CV_8U)
|
||||||
return ov::Tensor(ov::element::u8, shape, m.data);
|
return ov::Tensor(ov::element::u8, shape, m.data);
|
||||||
|
else if (m.type() == CV_8SC1)
|
||||||
|
return ov::Tensor(ov::element::i8, shape, m.data);
|
||||||
else if (m.type() == CV_32SC1)
|
else if (m.type() == CV_32SC1)
|
||||||
return ov::Tensor(ov::element::i32, shape, m.data);
|
return ov::Tensor(ov::element::i32, shape, m.data);
|
||||||
else
|
else
|
||||||
@ -1234,6 +1236,32 @@ void InfEngineNgraphNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlo
|
|||||||
#endif // OpenVINO >= 2022.1
|
#endif // OpenVINO >= 2022.1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> ngraphQuantize(ngraph::Output<ngraph::Node> input, float output_sc, float output_zp) {
|
||||||
|
float outLow = -128, outHigh = 127;
|
||||||
|
float inpLow = output_sc * (outLow - output_zp);
|
||||||
|
float inpHigh = output_sc * (outHigh - output_zp);
|
||||||
|
return std::make_shared<ngraph::op::FakeQuantize>(input,
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpLow),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpHigh),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outLow),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outHigh),
|
||||||
|
256 // levels
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> ngraphDequantize(ngraph::Output<ngraph::Node> input, float input_sc, float input_zp) {
|
||||||
|
float inpLow = -128, inpHigh = 127;
|
||||||
|
float outLow = input_sc * (inpLow - input_zp);
|
||||||
|
float outHigh = input_sc * (inpHigh - input_zp);
|
||||||
|
return std::make_shared<ngraph::op::FakeQuantize>(input,
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpLow),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpHigh),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outLow),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outHigh),
|
||||||
|
256 // levels
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -148,6 +148,9 @@ private:
|
|||||||
InferenceEngine::CNNNetwork t_net;
|
InferenceEngine::CNNNetwork t_net;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> ngraphQuantize(ngraph::Output<ngraph::Node> input, float output_sc, float output_zp);
|
||||||
|
ngraph::Output<ngraph::Node> ngraphDequantize(ngraph::Output<ngraph::Node> input, float input_sc, float input_zp);
|
||||||
|
|
||||||
#endif // HAVE_DNN_NGRAPH
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
}} // namespace cv::dnn
|
}} // namespace cv::dnn
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
|
|
||||||
#include <opencv2/dnn/shape_utils.hpp>
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
|
|
||||||
@ -110,7 +111,8 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||||
@ -238,6 +240,27 @@ public:
|
|||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
|
||||||
|
input = ngraphDequantize(input, input_sc, input_zp);
|
||||||
|
|
||||||
|
std::vector<size_t> shape(input.get_shape().size(), 1);
|
||||||
|
shape[1] = origin_weights.total();
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> res;
|
||||||
|
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, origin_weights.data);
|
||||||
|
auto ieBias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, origin_bias.data);
|
||||||
|
res = std::make_shared<ngraph::op::v1::Multiply>(input, ieWeights);
|
||||||
|
res = std::make_shared<ngraph::op::v1::Add>(res, ieBias);
|
||||||
|
|
||||||
|
res = ngraphQuantize(res, output_sc, output_zp);
|
||||||
|
return new InfEngineNgraphNode(res);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "opencv2/core/hal/hal.hpp"
|
#include "opencv2/core/hal/hal.hpp"
|
||||||
#include "opencv2/core/hal/intrin.hpp"
|
#include "opencv2/core/hal/intrin.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
||||||
@ -18,7 +19,7 @@ namespace cv
|
|||||||
namespace dnn
|
namespace dnn
|
||||||
{
|
{
|
||||||
|
|
||||||
#if CV_SIMD
|
#if CV_SIMD128
|
||||||
static inline void v_expand_mul_add(const v_int8x16& a, const v_int8x16& b,
|
static inline void v_expand_mul_add(const v_int8x16& a, const v_int8x16& b,
|
||||||
v_int32x4& out0, v_int32x4& out1, v_int32x4& out2, v_int32x4& out3)
|
v_int32x4& out0, v_int32x4& out1, v_int32x4& out2, v_int32x4& out3)
|
||||||
{
|
{
|
||||||
@ -195,7 +196,8 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// Only default backend and Conv1D/Conv2D/Conv3D are supported
|
// Only default backend and Conv1D/Conv2D/Conv3D are supported
|
||||||
return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3;
|
return (backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3) ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
@ -561,6 +563,126 @@ public:
|
|||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
CV_Assert(!blobs.empty());
|
||||||
|
CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1);
|
||||||
|
CV_CheckTypeEQ(weightsMat.type(), CV_8S, "");
|
||||||
|
auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
std::vector<size_t> dims = ieInpNode.get_shape();
|
||||||
|
CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, "");
|
||||||
|
CV_Assert(ieInpNode.get_element_type() == ngraph::element::f32);
|
||||||
|
ngraph::Output<ngraph::Node> ieWeights;
|
||||||
|
if (nodes.size() > 1)
|
||||||
|
ieWeights = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
const int inpCn = dims[1];
|
||||||
|
const int inpGroupCn = nodes.size() > 1 ? ieWeights.get_shape()[1] : blobs[0].size[1];
|
||||||
|
const int group = inpCn / inpGroupCn;
|
||||||
|
|
||||||
|
std::vector<size_t> kernel_shape;
|
||||||
|
if (group != 1)
|
||||||
|
{
|
||||||
|
kernel_shape.push_back(group);
|
||||||
|
}
|
||||||
|
kernel_shape.push_back(numOutput / group);
|
||||||
|
kernel_shape.push_back(inpCn / group);
|
||||||
|
std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape));
|
||||||
|
|
||||||
|
if (nodes.size() == 1)
|
||||||
|
{
|
||||||
|
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::i8, kernel_shape, blobs[0].data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||||
|
ngraph::Shape{kernel_shape.size()}, std::vector<int64_t>(kernel_shape.begin(), kernel_shape.end()));
|
||||||
|
ieWeights = std::make_shared<ngraph::op::v1::Reshape>(ieWeights, shape, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
|
||||||
|
if (!padMode.empty())
|
||||||
|
pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
|
||||||
|
|
||||||
|
ieInpNode = ngraphDequantize(ieInpNode, input_sc, input_zp);
|
||||||
|
|
||||||
|
const float low = -128, high = 127;
|
||||||
|
std::vector<float> inpLows(numOutput, low);
|
||||||
|
std::vector<float> inpHighs(numOutput, high);
|
||||||
|
std::vector<float> outLows(numOutput);
|
||||||
|
std::vector<float> outHighs(numOutput);
|
||||||
|
std::vector<size_t> quantShape(kernel_shape.size(), 1);
|
||||||
|
if (group != 1)
|
||||||
|
{
|
||||||
|
quantShape[0] = group;
|
||||||
|
quantShape[1] = numOutput / group;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
quantShape[0] = numOutput;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numOutput; ++i) {
|
||||||
|
outLows[i] = low * outputMultiplier[i] * output_sc / input_sc;
|
||||||
|
outHighs[i] = high * outputMultiplier[i] * output_sc / input_sc;
|
||||||
|
}
|
||||||
|
ieWeights = std::make_shared<ngraph::op::Convert>(ieWeights, ngraph::element::f32);
|
||||||
|
ieWeights = std::make_shared<ngraph::op::FakeQuantize>(ieWeights,
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, inpLows.data()),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, inpHighs.data()),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, outLows.data()),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, outHighs.data()),
|
||||||
|
256 // levels
|
||||||
|
);
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> conv_node;
|
||||||
|
if (group != 1) {
|
||||||
|
conv_node = std::make_shared<ngraph::op::v1::GroupConvolution>(
|
||||||
|
ieInpNode, ieWeights,
|
||||||
|
ngraph::Strides(strides),
|
||||||
|
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
|
||||||
|
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_end.begin(), pads_end.end())),
|
||||||
|
ngraph::Strides(dilations),
|
||||||
|
pad_type);
|
||||||
|
} else {
|
||||||
|
conv_node = std::make_shared<ngraph::op::v1::Convolution>(
|
||||||
|
ieInpNode, ieWeights,
|
||||||
|
ngraph::Strides(strides),
|
||||||
|
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
|
||||||
|
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_end.begin(), pads_end.end())),
|
||||||
|
ngraph::Strides(dilations),
|
||||||
|
pad_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> shape(conv_node.get_shape().size(), 1);
|
||||||
|
shape[1] = conv_node.get_shape()[1];
|
||||||
|
if (biasvec.size() || nodes.size() == 3)
|
||||||
|
{
|
||||||
|
std::shared_ptr<ngraph::Node> bias;
|
||||||
|
if (nodes.size() == 3)
|
||||||
|
{
|
||||||
|
auto bias_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||||
|
ngraph::Shape{shape.size()}, std::vector<int64_t>(shape.begin(), shape.end()));
|
||||||
|
bias = std::make_shared<ngraph::op::v1::Reshape>(nodes[2].dynamicCast<InfEngineNgraphNode>()->node, bias_shape, true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::vector<float> ovBias(numOutput);
|
||||||
|
for (int i = 0; i < numOutput; ++i) {
|
||||||
|
ovBias[i] = (biasvec[i] + input_zp * cv::sum(blobs[0].row(i))[0]) * outputMultiplier[i] * output_sc;
|
||||||
|
}
|
||||||
|
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), ovBias.data());
|
||||||
|
}
|
||||||
|
conv_node = std::make_shared<ngraph::op::v1::Add>(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
|
||||||
|
}
|
||||||
|
|
||||||
|
conv_node = ngraphQuantize(conv_node, output_sc, output_zp);
|
||||||
|
|
||||||
|
return new InfEngineNgraphNode(conv_node);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
class ParallelConv : public cv::ParallelLoopBody
|
class ParallelConv : public cv::ParallelLoopBody
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -893,7 +1015,7 @@ public:
|
|||||||
outptr[0] = std::min(std::max(out1, -128), 127);
|
outptr[0] = std::min(std::max(out1, -128), 127);
|
||||||
out_j = 1;
|
out_j = 1;
|
||||||
}
|
}
|
||||||
#if CV_SIMD
|
#if CV_SIMD128
|
||||||
if( stride_w == 1 )
|
if( stride_w == 1 )
|
||||||
{
|
{
|
||||||
const int out_delta = 16;
|
const int out_delta = 16;
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
|
|
||||||
#include <opencv2/dnn/shape_utils.hpp>
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -56,7 +57,7 @@ public:
|
|||||||
return tvActType != tvActNotSupported;
|
return tvActType != tvActNotSupported;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
@ -244,6 +245,42 @@ public:
|
|||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
|
||||||
|
input = ngraphDequantize(input, input_sc, input_zp);
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> res;
|
||||||
|
if (type == "ReLU6Int8") {
|
||||||
|
res = std::make_shared<ngraph::op::Clamp>(input, 0.0f, 6.0f);
|
||||||
|
} else if (type == "ReLUInt8") {
|
||||||
|
if (slope) {
|
||||||
|
auto param = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &slope);
|
||||||
|
res = std::make_shared<ngraph::op::PRelu>(input, param);
|
||||||
|
} else {
|
||||||
|
res = std::make_shared<ngraph::op::Relu>(input);
|
||||||
|
}
|
||||||
|
} else if (type == "ELUInt8") {
|
||||||
|
res = std::make_shared<ngraph::op::Elu>(input, 1.0f);
|
||||||
|
} else if (type == "MishInt8") {
|
||||||
|
res = std::make_shared<ngraph::op::v4::Mish>(input);
|
||||||
|
} else if (type == "AbsValInt8") {
|
||||||
|
res = std::make_shared<ngraph::op::Abs>(input);
|
||||||
|
} else if (type == "SigmoidInt8") {
|
||||||
|
res = std::make_shared<ngraph::op::Sigmoid>(input);
|
||||||
|
} else {
|
||||||
|
CV_Error(Error::StsNotImplemented, type + " activation with OpenVINO");
|
||||||
|
}
|
||||||
|
|
||||||
|
res = ngraphQuantize(res, output_sc, output_zp);
|
||||||
|
|
||||||
|
return new InfEngineNgraphNode(res);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
#include <opencv2/dnn/shape_utils.hpp>
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
@ -138,7 +139,7 @@ public:
|
|||||||
// For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported.
|
// For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported.
|
||||||
if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
|
if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
|
||||||
return channelsModeInput == ELTWISE_CHANNNELS_SAME;
|
return channelsModeInput == ELTWISE_CHANNNELS_SAME;
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
@ -369,6 +370,38 @@ public:
|
|||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
CV_Assert(nodes.size() >= 2);
|
||||||
|
std::vector<ngraph::Output<ngraph::Node>> ieInpNodes(nodes.size());
|
||||||
|
for (size_t i = 0; i < nodes.size(); i++)
|
||||||
|
{
|
||||||
|
ieInpNodes[i] = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
|
||||||
|
float input_sc = !coeffs.empty() ? coeffs[i] : 1.0f;
|
||||||
|
float input_zp = op == PROD ? zeropoints[i] : 0.0f;
|
||||||
|
ieInpNodes[i] = ngraphDequantize(ieInpNodes[i], input_sc, input_zp);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = ieInpNodes[0];
|
||||||
|
for (size_t i = 1; i < ieInpNodes.size(); i++)
|
||||||
|
{
|
||||||
|
switch (op) {
|
||||||
|
case SUM: res = std::make_shared<ngraph::op::v1::Add>(res, ieInpNodes[i]); break;
|
||||||
|
case PROD: res = std::make_shared<ngraph::op::v1::Multiply>(res, ieInpNodes[i]); break;
|
||||||
|
case MAX: res = std::make_shared<ngraph::op::v1::Maximum>(res, ieInpNodes[i]); break;
|
||||||
|
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res = ngraphQuantize(res, 1.0f, offset);
|
||||||
|
|
||||||
|
return new InfEngineNgraphNode(res);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
class EltwiseInvoker : public ParallelLoopBody
|
class EltwiseInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
EltwiseLayerInt8Impl& self;
|
EltwiseLayerInt8Impl& self;
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
|
|
||||||
#include <opencv2/dnn/shape_utils.hpp>
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
|
|
||||||
@ -86,7 +87,8 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||||
@ -303,7 +305,7 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int i = 0;
|
int i = 0;
|
||||||
#if CV_SIMD
|
#if CV_SIMD128
|
||||||
for( ; i <= nw - 4; i += 4, wptr += 4*wstep )
|
for( ; i <= nw - 4; i += 4, wptr += 4*wstep )
|
||||||
{
|
{
|
||||||
v_int32x4 vs0 = v_setzero_s32(), vs1 = v_setzero_s32(),
|
v_int32x4 vs0 = v_setzero_s32(), vs1 = v_setzero_s32(),
|
||||||
@ -395,6 +397,77 @@ public:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
CV_CheckTypeEQ(blobs[0].type(), CV_8S, ""); // weights
|
||||||
|
CV_CheckTypeEQ(blobs[1].type(), CV_32S, ""); // bias
|
||||||
|
CV_CheckTypeEQ(outputMultiplier.type(), CV_32F, "");
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
ngraph::Output<ngraph::Node> ieWeights, ieBias, matmul;
|
||||||
|
bool transA = false, transB = true;
|
||||||
|
size_t numOutput = blobs[0].size[0];
|
||||||
|
|
||||||
|
if (nodes.size() == 2)
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
|
// auto inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
// matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::vector<int> shape(1 + normalize_axis(axis, input.get_shape().size()), 0);
|
||||||
|
shape[shape.size() - 1] = -1;
|
||||||
|
input = std::make_shared<ngraph::op::v1::Reshape>(
|
||||||
|
input,
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
input = ngraphDequantize(input, input_sc, input_zp);
|
||||||
|
|
||||||
|
const float low = -128, high = 127;
|
||||||
|
std::vector<float> inpLows(numOutput, low);
|
||||||
|
std::vector<float> inpHighs(numOutput, high);
|
||||||
|
std::vector<float> outLows(numOutput);
|
||||||
|
std::vector<float> outHighs(numOutput);
|
||||||
|
for (int i = 0; i < numOutput; ++i) {
|
||||||
|
outLows[i] = low * outputMultiplier.ptr<float>()[i] * output_sc / input_sc;
|
||||||
|
outHighs[i] = high * outputMultiplier.ptr<float>()[i] * output_sc / input_sc;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
|
||||||
|
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::i8, weight_shape, blobs[0].data);
|
||||||
|
ieWeights = std::make_shared<ngraph::op::Convert>(ieWeights, ngraph::element::f32);
|
||||||
|
ieWeights = std::make_shared<ngraph::op::FakeQuantize>(ieWeights,
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, inpLows.data()),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, inpHighs.data()),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, outLows.data()),
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, outHighs.data()),
|
||||||
|
256 // levels
|
||||||
|
);
|
||||||
|
matmul = std::make_shared<ngraph::op::MatMul>(input, ieWeights, transA, transB);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blobs.size() > 1) {
|
||||||
|
int32_t* bias = blobs[1].ptr<int32_t>();
|
||||||
|
std::vector<float> ovBias(blobs[1].total());
|
||||||
|
for (int i = 0; i < ovBias.size(); ++i) {
|
||||||
|
ovBias[i] = (bias[i] + input_zp * cv::sum(blobs[0].row(i))[0]) * outputMultiplier.ptr<float>()[i] * output_sc;
|
||||||
|
}
|
||||||
|
auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||||
|
ngraph::Shape{blobs[1].total()}, ovBias.data());
|
||||||
|
matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
matmul = ngraphQuantize(matmul, output_sc, output_zp);
|
||||||
|
|
||||||
|
return new InfEngineNgraphNode(matmul);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
Mat weightsMat, biasMat, outputMultiplier, activationLUT;
|
Mat weightsMat, biasMat, outputMultiplier, activationLUT;
|
||||||
Ptr<ActivationLayerInt8> activ;
|
Ptr<ActivationLayerInt8> activ;
|
||||||
};
|
};
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
#include "opencv2/core/hal/intrin.hpp"
|
#include "opencv2/core/hal/intrin.hpp"
|
||||||
|
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -124,6 +125,10 @@ public:
|
|||||||
return type == MAX || type == AVE;
|
return type == MAX || type == AVE;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -271,6 +276,49 @@ public:
|
|||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
|
||||||
|
input = ngraphDequantize(input, input_sc, input_zp);
|
||||||
|
|
||||||
|
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
|
||||||
|
if (!padMode.empty())
|
||||||
|
pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
|
||||||
|
|
||||||
|
auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
|
||||||
|
ngraph::Output<ngraph::Node> pool;
|
||||||
|
if (type == MAX) {
|
||||||
|
pool = std::make_shared<ngraph::op::v1::MaxPool>(input, ngraph::Strides(strides),
|
||||||
|
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||||
|
rounding_type, pad_type);
|
||||||
|
} else if (type == AVE) {
|
||||||
|
pool = std::make_shared<ngraph::op::v1::AvgPool>(input, ngraph::Strides(strides),
|
||||||
|
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||||
|
!avePoolPaddedArea, rounding_type, pad_type);
|
||||||
|
} else if (type == SUM) {
|
||||||
|
ngraph::Shape inpShape = input.get_shape();
|
||||||
|
CV_Assert(inpShape.size() == 2 + kernel_size.size());
|
||||||
|
std::vector<int64_t> axes;
|
||||||
|
for (size_t i = 0; i < kernel_size.size(); i++)
|
||||||
|
{
|
||||||
|
if (inpShape[2 + i] == kernel_size[i])
|
||||||
|
axes.push_back(2 + i);
|
||||||
|
}
|
||||||
|
auto reduction_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
|
||||||
|
pool = std::make_shared<ngraph::op::v1::ReduceSum>(input, reduction_axes, true);
|
||||||
|
} else {
|
||||||
|
CV_Error(Error::StsNotImplemented, format("INT8 Pooling type: %d", type));
|
||||||
|
}
|
||||||
|
|
||||||
|
pool = ngraphQuantize(pool, output_sc, output_zp);
|
||||||
|
|
||||||
|
return new InfEngineNgraphNode(pool);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
@ -98,7 +99,8 @@ public:
|
|||||||
|
|
||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
@ -171,6 +173,16 @@ public:
|
|||||||
else
|
else
|
||||||
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
|
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
const auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
auto quantized = ngraphQuantize(input, scales[0], zeropoints[0]);
|
||||||
|
return Ptr<BackendNode>(new InfEngineNgraphNode(quantized));
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
};
|
};
|
||||||
|
|
||||||
// Dequantize INT8 Inputs to FP32/FP16
|
// Dequantize INT8 Inputs to FP32/FP16
|
||||||
@ -214,7 +226,7 @@ public:
|
|||||||
|
|
||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
@ -285,6 +297,16 @@ public:
|
|||||||
else
|
else
|
||||||
inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0]));
|
inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
const auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
auto quantized = ngraphDequantize(input, scales[0], zeropoints[0]);
|
||||||
|
return new InfEngineNgraphNode(quantized);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
};
|
};
|
||||||
|
|
||||||
// Rescale/Requantize INT8 Inputs from (scale1, zeropoint1) to (scale2, zeropoint2)
|
// Rescale/Requantize INT8 Inputs from (scale1, zeropoint1) to (scale2, zeropoint2)
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include <opencv2/imgproc.hpp>
|
#include <opencv2/imgproc.hpp>
|
||||||
#include <opencv2/dnn/shape_utils.hpp>
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
@ -72,7 +73,8 @@ public:
|
|||||||
|
|
||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||||
@ -186,6 +188,59 @@ public:
|
|||||||
return flops;
|
return flops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
std::vector<ngraph::Output<ngraph::Node>> ieInpNodes(nodes.size());
|
||||||
|
for (int i = 0; i < nodes.size(); ++i) {
|
||||||
|
ieInpNodes[i] = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
}
|
||||||
|
|
||||||
|
ieInpNodes[0] = ngraphDequantize(ieInpNodes[0], inp_sc[0], inp_zp[0]);
|
||||||
|
|
||||||
|
CV_Assert(!blobs.empty() || ieInpNodes.size() == 1 + (int)hasWeights + (int)hasBias);
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> weights, bias;
|
||||||
|
if (blobs.empty()) {
|
||||||
|
if (hasWeights)
|
||||||
|
weights = ieInpNodes[1];
|
||||||
|
if (hasBias)
|
||||||
|
bias = ieInpNodes[1 + (int)hasWeights];
|
||||||
|
} else {
|
||||||
|
std::vector<size_t> shape = ieInpNodes[0].get_shape();
|
||||||
|
int cAxis = normalize_axis(axis, shape.size());
|
||||||
|
|
||||||
|
size_t numWeights = blobs[0].total();
|
||||||
|
for (int i = 0; i < cAxis; ++i) {
|
||||||
|
shape[i] = 1;
|
||||||
|
}
|
||||||
|
for (int i = cAxis; i < shape.size(); ++i) {
|
||||||
|
if (numWeights == 1) {
|
||||||
|
shape[i] = 1;
|
||||||
|
}
|
||||||
|
numWeights = std::max(numWeights / shape[i], (size_t)1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasWeights)
|
||||||
|
weights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, blobs[0].data);
|
||||||
|
if (hasBias)
|
||||||
|
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, blobs[(int)hasWeights].data);
|
||||||
|
}
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> res = ieInpNodes[0];
|
||||||
|
if (hasWeights) {
|
||||||
|
res = std::make_shared<ngraph::op::v1::Multiply>(res, weights);
|
||||||
|
}
|
||||||
|
if (hasBias) {
|
||||||
|
res = std::make_shared<ngraph::op::v1::Add>(res, bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = ngraphQuantize(res, output_sc, output_zp);
|
||||||
|
|
||||||
|
return new InfEngineNgraphNode(res);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool hasWeights;
|
bool hasWeights;
|
||||||
std::vector<float> inp_sc;
|
std::vector<float> inp_sc;
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "../op_timvx.hpp"
|
#include "../op_timvx.hpp"
|
||||||
|
#include "../ie_ngraph.hpp"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -90,7 +91,8 @@ public:
|
|||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_OPENCV ||
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
(backendId == DNN_BACKEND_TIMVX && haveTimVX());
|
(backendId == DNN_BACKEND_TIMVX && haveTimVX()) ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
||||||
@ -194,6 +196,26 @@ public:
|
|||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||||
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
|
||||||
|
input = ngraphDequantize(input, input_sc, input_zp);
|
||||||
|
|
||||||
|
ngraph::Output<ngraph::Node> res;
|
||||||
|
if (logSoftMax) {
|
||||||
|
res = std::make_shared<ngraph::op::v5::LogSoftmax>(input, axis);
|
||||||
|
} else {
|
||||||
|
res = std::make_shared<ngraph::op::v1::Softmax>(input, axis);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = ngraphQuantize(res, output_sc, output_zp);
|
||||||
|
return new InfEngineNgraphNode(res);
|
||||||
|
}
|
||||||
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
template <bool with_log>
|
template <bool with_log>
|
||||||
class SoftmaxInt8Invoker : public ParallelLoopBody {
|
class SoftmaxInt8Invoker : public ParallelLoopBody {
|
||||||
public:
|
public:
|
||||||
|
@ -62,10 +62,15 @@ public:
|
|||||||
{
|
{
|
||||||
std::vector<UMat> outputs;
|
std::vector<UMat> outputs;
|
||||||
outs.getUMatVector(outputs);
|
outs.getUMatVector(outputs);
|
||||||
if (outs.depth() == CV_16S)
|
if (outs.depth() == CV_16S) {
|
||||||
convertFp16(blobs[0], outputs[0]);
|
auto blob = blobs[0];
|
||||||
|
if (blob.type() != CV_32F) {
|
||||||
|
blob.convertTo(blob, CV_32F);
|
||||||
|
}
|
||||||
|
convertFp16(blob, outputs[0]);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
blobs[0].copyTo(outputs[0]);
|
blobs[0].convertTo(outputs[0], outputs[0].type());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -80,7 +85,7 @@ public:
|
|||||||
|
|
||||||
std::vector<Mat> outputs;
|
std::vector<Mat> outputs;
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
blobs[0].copyTo(outputs[0]);
|
blobs[0].convertTo(outputs[0], outputs[0].type());
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_CANN
|
#ifdef HAVE_CANN
|
||||||
@ -123,9 +128,23 @@ public:
|
|||||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
auto node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
ngraph::element::Type dType;
|
||||||
|
if (blobs[0].depth() == CV_32F) {
|
||||||
|
dType = ngraph::element::f32;
|
||||||
|
} else if (blobs[0].depth() == CV_32S) {
|
||||||
|
dType = ngraph::element::i32;
|
||||||
|
} else if (blobs[0].depth() == CV_8S) {
|
||||||
|
dType = ngraph::element::i8;
|
||||||
|
} else {
|
||||||
|
CV_Error(Error::StsNotImplemented, format("Unexpected Const data depth: %d", blobs[0].depth()));
|
||||||
|
}
|
||||||
|
std::shared_ptr<ngraph::Node> node =
|
||||||
|
std::make_shared<ngraph::op::Constant>(dType,
|
||||||
getShape<size_t>(blobs[0]),
|
getShape<size_t>(blobs[0]),
|
||||||
blobs[0].data);
|
blobs[0].data);
|
||||||
|
if (node->get_element_type() != ngraph::element::f32) {
|
||||||
|
node = std::make_shared<ngraph::op::Convert>(node, ngraph::element::f32);
|
||||||
|
}
|
||||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||||
}
|
}
|
||||||
#endif // HAVE_DNN_NGRAPH
|
#endif // HAVE_DNN_NGRAPH
|
||||||
@ -151,7 +170,11 @@ public:
|
|||||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||||
|
|
||||||
CV_Assert(blobs.size() == 1);
|
CV_Assert(blobs.size() == 1);
|
||||||
return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blobs[0]);
|
Mat blob = blobs[0];
|
||||||
|
if (blob.type() != CV_32F) {
|
||||||
|
blob.convertTo(blob, CV_32F);
|
||||||
|
}
|
||||||
|
return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blob);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -201,8 +201,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#define IS_POWER_LAYER(layer) \
|
|
||||||
(!layer.empty() && !layer->type.compare("Power"))
|
|
||||||
//TODO: simultaneously convolution and bias addition for cache optimization
|
//TODO: simultaneously convolution and bias addition for cache optimization
|
||||||
class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
|
class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
|
||||||
{
|
{
|
||||||
|
@ -12,16 +12,16 @@
|
|||||||
#include <opencv2/core/hal/intrin.hpp>
|
#include <opencv2/core/hal/intrin.hpp>
|
||||||
#include <opencv2/core/utility.hpp> // parallel_for_
|
#include <opencv2/core/utility.hpp> // parallel_for_
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_STORAGE (1<<20) // 2^20
|
#define FAST_GEMM_STORAGE (1<<20) // 2^20
|
||||||
#define FAST_GEMM_DEFAULT_MAX_STACKBUF (1 << 14)
|
#define FAST_GEMM_MAX_STACKBUF (1 << 14)
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_F32_MC 64
|
#define FAST_GEMM_F32_MC 64
|
||||||
#define FAST_GEMM_DEFAULT_F32_NC 240
|
#define FAST_GEMM_F32_NC 240
|
||||||
#define FAST_GEMM_DEFAULT_F32_MR 8
|
#define FAST_GEMM_F32_MR 8
|
||||||
#define FAST_GEMM_DEFAULT_F32_NR 12
|
#define FAST_GEMM_F32_NR 12
|
||||||
#define FAST_GEMM_DEFAULT_F32_PACKED_STRIDE_K 256
|
#define FAST_GEMM_F32_PACKED_STRIDE_K 64
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_IMPLEMENT_PACK(N, suffix, styp, dtyp) \
|
#define FAST_GEMM_IMPLEMENT_PACK(N, suffix, styp, dtyp) \
|
||||||
static void fast_gemm_pack##N##suffix( int m, int k, const void* A_, \
|
static void fast_gemm_pack##N##suffix( int m, int k, const void* A_, \
|
||||||
int lda0, int lda1, void* packA_ ) \
|
int lda0, int lda1, void* packA_ ) \
|
||||||
{ \
|
{ \
|
||||||
@ -32,47 +32,47 @@ static void fast_gemm_pack##N##suffix( int m, int k, const void* A_, \
|
|||||||
const styp* a_ptr = A + lda0*i; \
|
const styp* a_ptr = A + lda0*i; \
|
||||||
for( int j = 0; j < k*lda1; packA += N, j += lda1 ) \
|
for( int j = 0; j < k*lda1; packA += N, j += lda1 ) \
|
||||||
{ \
|
{ \
|
||||||
FAST_GEMM_DEFAULT_LOAD_TO_BUF_##N(styp); \
|
FAST_GEMM_LOAD_TO_BUF_##N(styp); \
|
||||||
FAST_GEMM_DEFAULT_PACK##suffix##_##N(buf, packA); \
|
FAST_GEMM_PACK##suffix##_##N(buf, packA); \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
const styp* a_ptr[N]; \
|
const styp* a_ptr[N]; \
|
||||||
for (int k = 0; k < N; k++) a_ptr[k] = A + lda0*(i+k < m ? i+k : i); \
|
for (int k = 0; k < N; k++) a_ptr[k] = A + lda0*(i+k < m ? i+k : i); \
|
||||||
for( int j = 0; j < k*lda1; packA += N, j += lda1 ) \
|
for( int j = 0; j < k*lda1; packA += N, j += lda1 ) \
|
||||||
{ \
|
{ \
|
||||||
FAST_GEMM_DEFAULT_LOAD_TO_BUF_BORDERS_##N(styp); \
|
FAST_GEMM_LOAD_TO_BUF_BORDERS_##N(styp); \
|
||||||
FAST_GEMM_DEFAULT_PACK##suffix##_##N(buf, packA); \
|
FAST_GEMM_PACK##suffix##_##N(buf, packA); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_LOAD_TO_BUF_8(styp) \
|
#define FAST_GEMM_LOAD_TO_BUF_8(styp) \
|
||||||
styp buf[] = { \
|
styp buf[] = { \
|
||||||
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3], \
|
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3], \
|
||||||
a_ptr[j+lda0*4], a_ptr[j+lda0*5], a_ptr[j+lda0*6], a_ptr[j+lda0*7] }
|
a_ptr[j+lda0*4], a_ptr[j+lda0*5], a_ptr[j+lda0*6], a_ptr[j+lda0*7] }
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_LOAD_TO_BUF_BORDERS_8(styp) \
|
#define FAST_GEMM_LOAD_TO_BUF_BORDERS_8(styp) \
|
||||||
styp buf[] = { \
|
styp buf[] = { \
|
||||||
a_ptr[0][j], a_ptr[1][j], a_ptr[2][j], a_ptr[3][j], \
|
a_ptr[0][j], a_ptr[1][j], a_ptr[2][j], a_ptr[3][j], \
|
||||||
a_ptr[4][j], a_ptr[5][j], a_ptr[6][j], a_ptr[7][j] }
|
a_ptr[4][j], a_ptr[5][j], a_ptr[6][j], a_ptr[7][j] }
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_LOAD_TO_BUF_12(styp) \
|
#define FAST_GEMM_LOAD_TO_BUF_12(styp) \
|
||||||
styp buf[] = { \
|
styp buf[] = { \
|
||||||
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3], \
|
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3], \
|
||||||
a_ptr[j+lda0*4], a_ptr[j+lda0*5], a_ptr[j+lda0*6], a_ptr[j+lda0*7], \
|
a_ptr[j+lda0*4], a_ptr[j+lda0*5], a_ptr[j+lda0*6], a_ptr[j+lda0*7], \
|
||||||
a_ptr[j+lda0*8], a_ptr[j+lda0*9], a_ptr[j+lda0*10], a_ptr[j+lda0*11] }
|
a_ptr[j+lda0*8], a_ptr[j+lda0*9], a_ptr[j+lda0*10], a_ptr[j+lda0*11] }
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_LOAD_TO_BUF_BORDERS_12(styp) \
|
#define FAST_GEMM_LOAD_TO_BUF_BORDERS_12(styp) \
|
||||||
styp buf[] = { \
|
styp buf[] = { \
|
||||||
a_ptr[0][j], a_ptr[1][j], a_ptr[2][j], a_ptr[3][j], \
|
a_ptr[0][j], a_ptr[1][j], a_ptr[2][j], a_ptr[3][j], \
|
||||||
a_ptr[4][j], a_ptr[5][j], a_ptr[6][j], a_ptr[7][j], \
|
a_ptr[4][j], a_ptr[5][j], a_ptr[6][j], a_ptr[7][j], \
|
||||||
a_ptr[8][j], a_ptr[9][j], a_ptr[10][j], a_ptr[11][j] }
|
a_ptr[8][j], a_ptr[9][j], a_ptr[10][j], a_ptr[11][j] }
|
||||||
|
|
||||||
#define FAST_GEMM_DEFAULT_PACK_COPY(src, dst, N) \
|
#define FAST_GEMM_PACK_COPY(src, dst, N) \
|
||||||
memcpy((dst), (src), N*sizeof(src[0]))
|
memcpy((dst), (src), N*sizeof(src[0]))
|
||||||
#define FAST_GEMM_DEFAULT_PACK_f32_8(src, dst) FAST_GEMM_DEFAULT_PACK_COPY((src), (dst), 8)
|
#define FAST_GEMM_PACK_f32_8(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 8)
|
||||||
#define FAST_GEMM_DEFAULT_PACK_f32_12(src, dst) FAST_GEMM_DEFAULT_PACK_COPY((src), (dst), 12)
|
#define FAST_GEMM_PACK_f32_12(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 12)
|
||||||
|
|
||||||
namespace cv { namespace dnn { namespace cpu_baseline {
|
namespace cv { namespace dnn { namespace cpu_baseline {
|
||||||
|
|
||||||
@ -88,20 +88,20 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
float alpha, const char *A, int lda0, int lda1,
|
float alpha, const char *A, int lda0, int lda1,
|
||||||
const char *packed_B, float beta, char *C, int ldc, int esz);
|
const char *packed_B, float beta, char *C, int ldc, int esz);
|
||||||
|
|
||||||
FAST_GEMM_DEFAULT_IMPLEMENT_PACK(8, _f32, float, float)
|
FAST_GEMM_IMPLEMENT_PACK(8, _f32, float, float)
|
||||||
FAST_GEMM_DEFAULT_IMPLEMENT_PACK(12, _f32, float, float)
|
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float)
|
||||||
|
|
||||||
int fastGemmPackBSize(int N, int K) {
|
int fastGemmPackBSize(int N, int K) {
|
||||||
int GEMM_NC = FAST_GEMM_DEFAULT_F32_NC, GEMM_NR = FAST_GEMM_DEFAULT_F32_NR;
|
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
||||||
|
|
||||||
return static_cast<int>((N + NC - 1) / NC) * NC * K;
|
return static_cast<int>((N + NC - 1) / NC) * NC * K;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz) {
|
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz) {
|
||||||
int GEMM_NC = FAST_GEMM_DEFAULT_F32_NC, GEMM_NR = FAST_GEMM_DEFAULT_F32_NR;
|
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
||||||
int KC = std::min(FAST_GEMM_DEFAULT_F32_PACKED_STRIDE_K, K);
|
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
||||||
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
int n_tiles = (N + NC - 1) / NC;
|
||||||
for (int r = 0; r < n_tiles; ++r) {
|
for (int r = 0; r < n_tiles; ++r) {
|
||||||
@ -116,140 +116,50 @@ void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV_SIMD128
|
static inline void fast_gemm_f32(int k, const char *a_, const char *b_,
|
||||||
static void fast_gemm8x12_f32(int k, const char *a_, const char *b_,
|
char *c_, int ldc, float alpha) {
|
||||||
char *c_, int ldc, float alpha) {
|
|
||||||
const float* a = (const float*)a_;
|
const float* a = (const float*)a_;
|
||||||
const float* b = (const float*)b_;
|
const float* b = (const float*)b_;
|
||||||
float* c = (float*)c_;
|
float* c = (float*)c_;
|
||||||
|
|
||||||
v_float32x4 s00 = v_setzero_f32(), s01 = s00, s02 = s00;
|
float sbuf[FAST_GEMM_F32_MR * FAST_GEMM_F32_NR];
|
||||||
v_float32x4 s10 = s00, s11 = s00, s12 = s00;
|
|
||||||
v_float32x4 s20 = s00, s21 = s00, s22 = s00;
|
|
||||||
v_float32x4 s30 = s00, s31 = s00, s32 = s00;
|
|
||||||
v_float32x4 s40 = s00, s41 = s00, s42 = s00;
|
|
||||||
v_float32x4 s50 = s00, s51 = s00, s52 = s00;
|
|
||||||
v_float32x4 s60 = s00, s61 = s00, s62 = s00;
|
|
||||||
v_float32x4 s70 = s00, s71 = s00, s72 = s00;
|
|
||||||
|
|
||||||
for(int p = 0; p < k; p++, a += FAST_GEMM_DEFAULT_F32_MR, b += FAST_GEMM_DEFAULT_F32_NR) {
|
|
||||||
v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8);
|
|
||||||
|
|
||||||
v_float32x4 a0 = v_setall_f32(*a);
|
|
||||||
s00 = v_fma(b0, a0, s00);
|
|
||||||
s01 = v_fma(b1, a0, s01);
|
|
||||||
s02 = v_fma(b2, a0, s02);
|
|
||||||
v_float32x4 a1 = v_setall_f32(*(a + 1));
|
|
||||||
s10 = v_fma(b0, a1, s10);
|
|
||||||
s11 = v_fma(b1, a1, s11);
|
|
||||||
s12 = v_fma(b2, a1, s12);
|
|
||||||
|
|
||||||
v_float32x4 a2 = v_setall_f32(*(a + 2));
|
|
||||||
s20 = v_fma(b0, a2, s20);
|
|
||||||
s21 = v_fma(b1, a2, s21);
|
|
||||||
s22 = v_fma(b2, a2, s22);
|
|
||||||
v_float32x4 a3 = v_setall_f32(*(a + 3));
|
|
||||||
s30 = v_fma(b0, a3, s30);
|
|
||||||
s31 = v_fma(b1, a3, s31);
|
|
||||||
s32 = v_fma(b2, a3, s32);
|
|
||||||
|
|
||||||
a0 = v_setall_f32(*(a + 4));
|
|
||||||
s40 = v_fma(b0, a0, s40);
|
|
||||||
s41 = v_fma(b1, a0, s41);
|
|
||||||
s42 = v_fma(b2, a0, s42);
|
|
||||||
a1 = v_setall_f32(*(a + 5));
|
|
||||||
s50 = v_fma(b0, a1, s50);
|
|
||||||
s51 = v_fma(b1, a1, s51);
|
|
||||||
s52 = v_fma(b2, a1, s52);
|
|
||||||
|
|
||||||
a2 = v_setall_f32(*(a + 6));
|
|
||||||
s60 = v_fma(b0, a2, s60);
|
|
||||||
s61 = v_fma(b1, a2, s61);
|
|
||||||
s62 = v_fma(b2, a2, s62);
|
|
||||||
a3 = v_setall_f32(*(a + 7));
|
|
||||||
s70 = v_fma(b0, a3, s70);
|
|
||||||
s71 = v_fma(b1, a3, s71);
|
|
||||||
s72 = v_fma(b2, a3, s72);
|
|
||||||
}
|
|
||||||
|
|
||||||
v_float32x4 c0, c1, c2, c3, c4, c5, v_alpha = v_setall_f32(alpha);
|
|
||||||
#define FAST_GEMM_FINALE(row0, row1) \
|
|
||||||
c0 = v_load(c + row0 * ldc); \
|
|
||||||
c1 = v_load(c + row0 * ldc + 4); \
|
|
||||||
c2 = v_load(c + row0 * ldc + 8); \
|
|
||||||
c3 = v_load(c + row1 * ldc); \
|
|
||||||
c4 = v_load(c + row1 * ldc + 4); \
|
|
||||||
c5 = v_load(c + row1 * ldc + 8); \
|
|
||||||
c0 = v_fma(s##row0##0, v_alpha, c0); \
|
|
||||||
c1 = v_fma(s##row0##1, v_alpha, c1); \
|
|
||||||
c2 = v_fma(s##row0##2, v_alpha, c2); \
|
|
||||||
c3 = v_fma(s##row1##0, v_alpha, c3); \
|
|
||||||
c4 = v_fma(s##row1##1, v_alpha, c4); \
|
|
||||||
c5 = v_fma(s##row1##2, v_alpha, c5); \
|
|
||||||
v_store(c + row0 * ldc, c0); \
|
|
||||||
v_store(c + row0 * ldc + 4, c1); \
|
|
||||||
v_store(c + row0 * ldc + 8, c2); \
|
|
||||||
v_store(c + row1 * ldc, c3); \
|
|
||||||
v_store(c + row1 * ldc + 4, c4); \
|
|
||||||
v_store(c + row1 * ldc + 8, c5);
|
|
||||||
|
|
||||||
FAST_GEMM_FINALE(0, 1);
|
|
||||||
FAST_GEMM_FINALE(2, 3);
|
|
||||||
FAST_GEMM_FINALE(4, 5);
|
|
||||||
FAST_GEMM_FINALE(6, 7);
|
|
||||||
#undef FAST_GEMM_FINALE
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
static void fast_gemm_f32(int k, const char *a_, const char *b_,
|
|
||||||
char *c_, int ldc, float alpha) {
|
|
||||||
const float* a = (const float*)a_;
|
|
||||||
const float* b = (const float*)b_;
|
|
||||||
float* c = (float*)c_;
|
|
||||||
|
|
||||||
float sbuf[FAST_GEMM_DEFAULT_F32_MR * FAST_GEMM_DEFAULT_F32_NR];
|
|
||||||
memset(sbuf, 0, sizeof(sbuf));
|
memset(sbuf, 0, sizeof(sbuf));
|
||||||
for(int p = 0; p < k; p++) {
|
for(int p = 0; p < k; p++) {
|
||||||
for( int i = 0; i < FAST_GEMM_DEFAULT_F32_MR; i++ ) {
|
for( int i = 0; i < FAST_GEMM_F32_MR; i++ ) {
|
||||||
float ai = a[FAST_GEMM_DEFAULT_F32_MR * p + i];
|
float ai = a[FAST_GEMM_F32_MR * p + i];
|
||||||
for( int j = 0; j < FAST_GEMM_DEFAULT_F32_NR; j++ )
|
for( int j = 0; j < FAST_GEMM_F32_NR; j++ )
|
||||||
sbuf[i * FAST_GEMM_DEFAULT_F32_NR + j] += b[FAST_GEMM_DEFAULT_F32_NR * p + j] * ai;
|
sbuf[i * FAST_GEMM_F32_NR + j] += b[FAST_GEMM_F32_NR * p + j] * ai;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < FAST_GEMM_DEFAULT_F32_MR; i++) {
|
for (int i = 0; i < FAST_GEMM_F32_MR; i++) {
|
||||||
for (int j = 0; j < FAST_GEMM_DEFAULT_F32_NR; j++)
|
for (int j = 0; j < FAST_GEMM_F32_NR; j++)
|
||||||
c[i * ldc + j] += alpha * sbuf[i * FAST_GEMM_DEFAULT_F32_NR + j];
|
c[i * ldc + j] += alpha * sbuf[i * FAST_GEMM_F32_NR + j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // CV_SIMD128
|
|
||||||
|
|
||||||
static void fast_gemm_macro_kernel(int m, int n, int k,
|
static void fast_gemm_macro_kernel(int m, int n, int k,
|
||||||
const char *packed_A, const char *packed_B,
|
const char *packed_A, const char *packed_B,
|
||||||
float alpha, char *c, int ldc0, int esz) {
|
float alpha, char *c, int ldc0, int esz) {
|
||||||
int ldc0_esz = ldc0 * esz;
|
int ldc0_esz = ldc0 * esz;
|
||||||
|
|
||||||
double tempC[FAST_GEMM_DEFAULT_F32_MR * FAST_GEMM_DEFAULT_F32_NR]; // make sure the buffer is big enough
|
double tempC[FAST_GEMM_F32_MR * FAST_GEMM_F32_NR]; // make sure the buffer is big enough
|
||||||
for(int i = 0; i < m; i += FAST_GEMM_DEFAULT_F32_MR) {
|
for(int i = 0; i < m; i += FAST_GEMM_F32_MR) {
|
||||||
for(int j = 0; j < n; j += FAST_GEMM_DEFAULT_F32_NR) {
|
for(int j = 0; j < n; j += FAST_GEMM_F32_NR) {
|
||||||
char* cptr0 = &c[i * ldc0_esz + j * esz];
|
char* cptr0 = &c[i * ldc0_esz + j * esz];
|
||||||
char* cptr = cptr0;
|
char* cptr = cptr0;
|
||||||
int ldc = ldc0;
|
int ldc = ldc0;
|
||||||
int mr = m - i < FAST_GEMM_DEFAULT_F32_MR ? m - i : FAST_GEMM_DEFAULT_F32_MR;
|
int mr = m - i < FAST_GEMM_F32_MR ? m - i : FAST_GEMM_F32_MR;
|
||||||
int nr = n - j < FAST_GEMM_DEFAULT_F32_NR ? n - j : FAST_GEMM_DEFAULT_F32_NR;
|
int nr = n - j < FAST_GEMM_F32_NR ? n - j : FAST_GEMM_F32_NR;
|
||||||
int nr_esz = nr * esz;
|
int nr_esz = nr * esz;
|
||||||
bool partial = (bool)((mr < FAST_GEMM_DEFAULT_F32_MR) | (nr < FAST_GEMM_DEFAULT_F32_NR));
|
bool partial = (bool)((mr < FAST_GEMM_F32_MR) | (nr < FAST_GEMM_F32_NR));
|
||||||
if (partial) {
|
if (partial) {
|
||||||
memset(tempC, 0, sizeof(tempC));
|
memset(tempC, 0, sizeof(tempC));
|
||||||
cptr = (char *)tempC;
|
cptr = (char *)tempC;
|
||||||
ldc = FAST_GEMM_DEFAULT_F32_NR;
|
ldc = FAST_GEMM_F32_NR;
|
||||||
for(int p = 0; p < mr; p++)
|
for(int p = 0; p < mr; p++)
|
||||||
memcpy(cptr + p * (ldc * esz), cptr0 + p * ldc0_esz, nr_esz);
|
memcpy(cptr + p * (ldc * esz), cptr0 + p * ldc0_esz, nr_esz);
|
||||||
}
|
}
|
||||||
#if CV_SIMD128
|
|
||||||
fast_gemm8x12_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
|
||||||
#else
|
|
||||||
fast_gemm_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
fast_gemm_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
||||||
#endif
|
|
||||||
|
|
||||||
if (partial) {
|
if (partial) {
|
||||||
for(int p = 0; p < mr; p++)
|
for(int p = 0; p < mr; p++)
|
||||||
@ -263,19 +173,19 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
float alpha, const char *A, int lda0, int lda1,
|
float alpha, const char *A, int lda0, int lda1,
|
||||||
const char *B, int ldb0, int ldb1,
|
const char *B, int ldb0, int ldb1,
|
||||||
float beta, char *C, int ldc, int esz) {
|
float beta, char *C, int ldc, int esz) {
|
||||||
int GEMM_MC = FAST_GEMM_DEFAULT_F32_MC,
|
int GEMM_MC = FAST_GEMM_F32_MC,
|
||||||
GEMM_NC = FAST_GEMM_DEFAULT_F32_NC,
|
GEMM_NC = FAST_GEMM_F32_NC,
|
||||||
GEMM_MR = FAST_GEMM_DEFAULT_F32_MR,
|
GEMM_MR = FAST_GEMM_F32_MR,
|
||||||
GEMM_NR = FAST_GEMM_DEFAULT_F32_NR;
|
GEMM_NR = FAST_GEMM_F32_NR;
|
||||||
|
|
||||||
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
||||||
int KC = FAST_GEMM_DEFAULT_STORAGE / ((MC + NC) * esz);
|
int KC = FAST_GEMM_STORAGE / ((MC + NC) * esz);
|
||||||
KC = KC > 8 ? KC : 8;
|
KC = KC > 8 ? KC : 8;
|
||||||
KC = KC < K ? KC : K;
|
KC = KC < K ? KC : K;
|
||||||
|
|
||||||
size_t buff_size = KC * (MC + NC) * esz;
|
size_t buff_size = KC * (MC + NC) * esz;
|
||||||
bool use_stackbuff = buff_size <= FAST_GEMM_DEFAULT_MAX_STACKBUF;
|
bool use_stackbuff = buff_size <= FAST_GEMM_MAX_STACKBUF;
|
||||||
int m_tiles = (M + MC - 1) / MC;
|
int m_tiles = (M + MC - 1) / MC;
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
int n_tiles = (N + NC - 1) / NC;
|
||||||
int total_tiles = m_tiles * n_tiles;
|
int total_tiles = m_tiles * n_tiles;
|
||||||
@ -328,17 +238,17 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
void fastGemmKernel(int M, int N, int K,
|
void fastGemmKernel(int M, int N, int K,
|
||||||
float alpha, const char *A, int lda0, int lda1,
|
float alpha, const char *A, int lda0, int lda1,
|
||||||
const char *packed_B, float beta, char *C, int ldc, int esz) {
|
const char *packed_B, float beta, char *C, int ldc, int esz) {
|
||||||
int GEMM_MC = FAST_GEMM_DEFAULT_F32_MC,
|
int GEMM_MC = FAST_GEMM_F32_MC,
|
||||||
GEMM_NC = FAST_GEMM_DEFAULT_F32_NC,
|
GEMM_NC = FAST_GEMM_F32_NC,
|
||||||
GEMM_MR = FAST_GEMM_DEFAULT_F32_MR,
|
GEMM_MR = FAST_GEMM_F32_MR,
|
||||||
GEMM_NR = FAST_GEMM_DEFAULT_F32_NR;
|
GEMM_NR = FAST_GEMM_F32_NR;
|
||||||
|
|
||||||
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
||||||
int KC = std::min(FAST_GEMM_DEFAULT_F32_PACKED_STRIDE_K, K);
|
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
||||||
|
|
||||||
size_t buff_size = KC * MC * esz;
|
size_t buff_size = KC * MC * esz;
|
||||||
bool use_stackbuff = buff_size <= FAST_GEMM_DEFAULT_MAX_STACKBUF;
|
bool use_stackbuff = buff_size <= FAST_GEMM_MAX_STACKBUF;
|
||||||
int m_tiles = (M + MC - 1) / MC;
|
int m_tiles = (M + MC - 1) / MC;
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
int n_tiles = (N + NC - 1) / NC;
|
||||||
int total_tiles = m_tiles * n_tiles;
|
int total_tiles = m_tiles * n_tiles;
|
||||||
@ -391,3 +301,29 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
}
|
}
|
||||||
|
|
||||||
}}} // cv::dnn::cpu_baseline
|
}}} // cv::dnn::cpu_baseline
|
||||||
|
|
||||||
|
#undef FAST_GEMM_STORAGE
|
||||||
|
#undef FAST_GEMM_MAX_STACKBUF
|
||||||
|
#ifdef FAST_GEMM_F32_MC
|
||||||
|
#undef FAST_GEMM_F32_MC
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_NC
|
||||||
|
#undef FAST_GEMM_F32_NC
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_MR
|
||||||
|
#undef FAST_GEMM_F32_MR
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_NR
|
||||||
|
#undef FAST_GEMM_F32_NR
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_PACKED_STRIDE_K
|
||||||
|
#undef FAST_GEMM_F32_PACKED_STRIDE_K
|
||||||
|
#endif
|
||||||
|
#undef FAST_GEMM_IMPLEMENT_PACK
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_8
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_BORDERS_8
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_12
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_BORDERS_12
|
||||||
|
#undef FAST_GEMM_PACK_COPY
|
||||||
|
#undef FAST_GEMM_PACK_f32_8
|
||||||
|
#undef FAST_GEMM_PACK_f32_12
|
||||||
|
@ -15,37 +15,31 @@
|
|||||||
#define FAST_GEMM_STORAGE (1<<20) // 2^20
|
#define FAST_GEMM_STORAGE (1<<20) // 2^20
|
||||||
#define FAST_GEMM_MAX_STACKBUF (1 << 14)
|
#define FAST_GEMM_MAX_STACKBUF (1 << 14)
|
||||||
|
|
||||||
#if CV_NEON
|
#if CV_AVX
|
||||||
#define FAST_GEMM_F32_MC 64
|
|
||||||
#define FAST_GEMM_F32_NC 240
|
|
||||||
#elif CV_AVX
|
|
||||||
#define FAST_GEMM_F32_MC 60
|
#define FAST_GEMM_F32_MC 60
|
||||||
#define FAST_GEMM_F32_NC 320
|
#define FAST_GEMM_F32_NC 320
|
||||||
#elif CV_LASX
|
#elif CV_LASX
|
||||||
#define FAST_GEMM_F32_MC 48
|
#define FAST_GEMM_F32_MC 48
|
||||||
#define FAST_GEMM_F32_NC 128
|
#define FAST_GEMM_F32_NC 128
|
||||||
|
#else // CV_NEON_AARCH64, SIMD128
|
||||||
|
#define FAST_GEMM_F32_MC 64
|
||||||
|
#define FAST_GEMM_F32_NC 240
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// micro kernel size
|
#if CV_AVX
|
||||||
#if CV_NEON && CV_NEON_AARCH64
|
|
||||||
#define FAST_GEMM_F32_MR 8
|
|
||||||
#define FAST_GEMM_F32_NR 12
|
|
||||||
#elif CV_NEON
|
|
||||||
#define FAST_GEMM_F32_MR 4
|
|
||||||
#define FAST_GEMM_F32_NR 12
|
|
||||||
#elif CV_AVX
|
|
||||||
#define FAST_GEMM_F32_MR 12
|
#define FAST_GEMM_F32_MR 12
|
||||||
#define FAST_GEMM_F32_NR 8
|
#define FAST_GEMM_F32_NR 8
|
||||||
#elif CV_LASX
|
#elif CV_LASX
|
||||||
#define FAST_GEMM_F32_MR 12
|
#define FAST_GEMM_F32_MR 12
|
||||||
#define FAST_GEMM_F32_NR 16
|
#define FAST_GEMM_F32_NR 16
|
||||||
|
#else // CV_NEON_AARCH64, CV_SIMD128
|
||||||
|
#define FAST_GEMM_F32_MR 8
|
||||||
|
#define FAST_GEMM_F32_NR 12
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CV_NEON
|
#if CV_AVX
|
||||||
#define FAST_GEMM_F32_PACKED_STRIDE_K 64
|
|
||||||
#elif CV_AVX
|
|
||||||
#define FAST_GEMM_F32_PACKED_STRIDE_K 128
|
#define FAST_GEMM_F32_PACKED_STRIDE_K 128
|
||||||
#elif CV_LASX
|
#else // CV_LASX, CV_NEON_AARCH64, CV_SIMD128
|
||||||
#define FAST_GEMM_F32_PACKED_STRIDE_K 64
|
#define FAST_GEMM_F32_PACKED_STRIDE_K 64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -75,14 +69,6 @@ static void fast_gemm_pack##N##suffix( int m, int k, const void* A_, \
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FAST_GEMM_LOAD_TO_BUF_4(styp) \
|
|
||||||
styp buf[] = { \
|
|
||||||
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3] }
|
|
||||||
|
|
||||||
#define FAST_GEMM_LOAD_TO_BUF_BORDERS_4(styp) \
|
|
||||||
styp buf[] = { \
|
|
||||||
a_ptr[0][j], a_ptr[1][j], a_ptr[2][j], a_ptr[3][j] }
|
|
||||||
|
|
||||||
#define FAST_GEMM_LOAD_TO_BUF_8(styp) \
|
#define FAST_GEMM_LOAD_TO_BUF_8(styp) \
|
||||||
styp buf[] = { \
|
styp buf[] = { \
|
||||||
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3], \
|
a_ptr[j], a_ptr[j+lda0], a_ptr[j+lda0*2], a_ptr[j+lda0*3], \
|
||||||
@ -121,7 +107,6 @@ static void fast_gemm_pack##N##suffix( int m, int k, const void* A_, \
|
|||||||
|
|
||||||
#define FAST_GEMM_PACK_COPY(src, dst, N) \
|
#define FAST_GEMM_PACK_COPY(src, dst, N) \
|
||||||
memcpy((dst), (src), N*sizeof(src[0]))
|
memcpy((dst), (src), N*sizeof(src[0]))
|
||||||
#define FAST_GEMM_PACK_f32_4(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 4)
|
|
||||||
#define FAST_GEMM_PACK_f32_8(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 8)
|
#define FAST_GEMM_PACK_f32_8(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 8)
|
||||||
#define FAST_GEMM_PACK_f32_12(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 12)
|
#define FAST_GEMM_PACK_f32_12(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 12)
|
||||||
#define FAST_GEMM_PACK_f32_16(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 16)
|
#define FAST_GEMM_PACK_f32_16(src, dst) FAST_GEMM_PACK_COPY((src), (dst), 16)
|
||||||
@ -130,7 +115,6 @@ namespace cv { namespace dnn {
|
|||||||
|
|
||||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||||
|
|
||||||
// TODO: type to size_t
|
|
||||||
int fastGemmPackBSize(int N, int K);
|
int fastGemmPackBSize(int N, int K);
|
||||||
|
|
||||||
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz);
|
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz);
|
||||||
@ -143,44 +127,18 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
float alpha, const char *A, int lda0, int lda1,
|
float alpha, const char *A, int lda0, int lda1,
|
||||||
const char *packed_B, float beta, char *C, int ldc, int esz);
|
const char *packed_B, float beta, char *C, int ldc, int esz);
|
||||||
|
|
||||||
// NEON (AARCH64: 32 x 128-bit registers, armv7: 16 x 128-bit registers)
|
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_NEON
|
|
||||||
|
|
||||||
#if CV_NEON_AARCH64
|
/*
|
||||||
FAST_GEMM_IMPLEMENT_PACK(8, _f32, float, float)
|
Compute kernels that optimized for different platforms
|
||||||
#else
|
*/
|
||||||
FAST_GEMM_IMPLEMENT_PACK(4, _f32, float, float)
|
#if CV_NEON && CV_NEON_AARCH64 // AARCH64: 32 x 128-bit registers
|
||||||
#endif
|
|
||||||
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float)
|
|
||||||
|
|
||||||
int fastGemmPackBSize(int N, int K) {
|
FAST_GEMM_IMPLEMENT_PACK(8, _f32, float, float) // a packer
|
||||||
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float) // b packer
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
|
|
||||||
return static_cast<int>((N + NC - 1) / NC) * NC * K;
|
static inline void fast_gemm8x12_f32(int k, const char *a_, const char *b_,
|
||||||
}
|
char *c_, int ldc, float alpha) {
|
||||||
|
|
||||||
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz) {
|
|
||||||
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
|
||||||
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
for (int r = 0; r < n_tiles; ++r) {
|
|
||||||
int j0 = r * NC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int _nc = static_cast<int>((nc + GEMM_NR - 1) / GEMM_NR) * GEMM_NR * esz;
|
|
||||||
for (int k = 0; k < K; k += KC) {
|
|
||||||
int kc = K - k < KC ? K - k : KC;
|
|
||||||
fast_gemm_pack12_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
|
||||||
packed_B += _nc * kc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if CV_NEON_AARCH64
|
|
||||||
static void fast_gemm8x12_f32(int k, const char *a_, const char *b_,
|
|
||||||
char *c_, int ldc, float alpha) {
|
|
||||||
const float* a = (const float*)a_;
|
const float* a = (const float*)a_;
|
||||||
const float* b = (const float*)b_;
|
const float* b = (const float*)b_;
|
||||||
float* c = (float*)c_;
|
float* c = (float*)c_;
|
||||||
@ -258,278 +216,17 @@ static void fast_gemm8x12_f32(int k, const char *a_, const char *b_,
|
|||||||
#undef FAST_GEMM_FINALE
|
#undef FAST_GEMM_FINALE
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // CV_NEON_AARCH64
|
#elif CV_AVX // AVX and AVX2 (16 x 256-bit registers)
|
||||||
static void fast_gemm4x12_f32(int k, const char *a_, const char *b_,
|
|
||||||
char *c_, int ldc, float alpha) {
|
|
||||||
const float* a = (const float*)a_;
|
|
||||||
const float* b = (const float*)b_;
|
|
||||||
float* c = (float*)c_;
|
|
||||||
|
|
||||||
float32x4_t s00 = vdupq_n_f32(0.f), s01 = s00, s02 = s00,
|
FAST_GEMM_IMPLEMENT_PACK(8, _f32, float, float) // a packer
|
||||||
s10 = s00, s11 = s00, s12 = s00,
|
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float) // b packer
|
||||||
s20 = s00, s21 = s00, s22 = s00,
|
|
||||||
s30 = s00, s31 = s00, s32 = s00;
|
|
||||||
|
|
||||||
for(int p = 0; p < k; p++, a += FAST_GEMM_F32_MR, b += FAST_GEMM_F32_NR)
|
|
||||||
{
|
|
||||||
float32x4_t b0 = vld1q_f32(b), b1 = vld1q_f32(b + 4), b2 = vld1q_f32(b + 8);
|
|
||||||
|
|
||||||
float32x4_t a0 = vld1q_dup_f32(a);
|
|
||||||
s00 = vmlaq_f32(a0, b0, s00);
|
|
||||||
s01 = vmlaq_f32(a0, b1, s01);
|
|
||||||
s02 = vmlaq_f32(a0, b2, s02);
|
|
||||||
|
|
||||||
a0 = vld1q_dup_f32(a + 1);
|
|
||||||
s10 = vmlaq_f32(a0, b0, s10);
|
|
||||||
s11 = vmlaq_f32(a0, b1, s11);
|
|
||||||
s12 = vmlaq_f32(a0, b2, s12);
|
|
||||||
|
|
||||||
a0 = vld1q_dup_f32(a + 2);
|
|
||||||
s20 = vmlaq_f32(a0, b0, s20);
|
|
||||||
s21 = vmlaq_f32(a0, b1, s21);
|
|
||||||
s22 = vmlaq_f32(a0, b2, s22);
|
|
||||||
|
|
||||||
a0 = vld1q_dup_f32(a + 3);
|
|
||||||
s30 = vmlaq_f32(a0, b0, s30);
|
|
||||||
s31 = vmlaq_f32(a0, b1, s31);
|
|
||||||
s32 = vmlaq_f32(a0, b2, s32);
|
|
||||||
}
|
|
||||||
|
|
||||||
float32x4_t c0, c1, c2, v_alpha = vdupq_n_f32(alpha);
|
|
||||||
#define FAST_GEMM_FINALE(row0) \
|
|
||||||
c0 = vld1q_f32(c + row0 * ldc); \
|
|
||||||
c1 = vld1q_f32(c + row0 * ldc + 4); \
|
|
||||||
c2 = vld1q_f32(c + row0 * ldc + 8); \
|
|
||||||
c0 = vmlaq_f32(c0, s##row0##0, v_alpha); \
|
|
||||||
c1 = vmlaq_f32(c1, s##row0##1, v_alpha); \
|
|
||||||
c2 = vmlaq_f32(c2, s##row0##2, v_alpha); \
|
|
||||||
vst1q_f32(c + row0 * ldc, c0); \
|
|
||||||
vst1q_f32(c + row0 * ldc + 4, c1); \
|
|
||||||
vst1q_f32(c + row0 * ldc + 8, c2);
|
|
||||||
|
|
||||||
FAST_GEMM_FINALE(0);
|
|
||||||
FAST_GEMM_FINALE(1);
|
|
||||||
FAST_GEMM_FINALE(2);
|
|
||||||
FAST_GEMM_FINALE(3);
|
|
||||||
#undef FAST_GEMM_FINALE
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // micro kernel CV_NEON_AARCH64
|
|
||||||
|
|
||||||
static void fast_gemm_macro_kernel(int m, int n, int k,
|
|
||||||
const char *packed_A, const char *packed_B,
|
|
||||||
float alpha, char *c, int ldc0, int esz) {
|
|
||||||
int ldc0_esz = ldc0 * esz;
|
|
||||||
|
|
||||||
double tempC[FAST_GEMM_F32_MR * FAST_GEMM_F32_NR]; // make sure the buffer is big enough
|
|
||||||
for(int i = 0; i < m; i += FAST_GEMM_F32_MR) {
|
|
||||||
for(int j = 0; j < n; j += FAST_GEMM_F32_NR) {
|
|
||||||
char* cptr0 = &c[i * ldc0_esz + j * esz];
|
|
||||||
char* cptr = cptr0;
|
|
||||||
int ldc = ldc0;
|
|
||||||
int mr = m - i < FAST_GEMM_F32_MR ? m - i : FAST_GEMM_F32_MR;
|
|
||||||
int nr = n - j < FAST_GEMM_F32_NR ? n - j : FAST_GEMM_F32_NR;
|
|
||||||
int nr_esz = nr * esz;
|
|
||||||
bool partial = (bool)((mr < FAST_GEMM_F32_MR) | (nr < FAST_GEMM_F32_NR));
|
|
||||||
if (partial) {
|
|
||||||
memset(tempC, 0, sizeof(tempC));
|
|
||||||
cptr = (char *)tempC;
|
|
||||||
ldc = FAST_GEMM_F32_NR;
|
|
||||||
for(int p = 0; p < mr; p++)
|
|
||||||
memcpy(cptr + p * (ldc * esz), cptr0 + p * ldc0_esz, nr_esz);
|
|
||||||
}
|
|
||||||
#if CV_NEON_AARCH64
|
|
||||||
fast_gemm8x12_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
|
||||||
#else
|
|
||||||
fast_gemm4x12_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (partial) {
|
|
||||||
for(int p = 0; p < mr; p++)
|
|
||||||
memcpy(cptr0 + p * ldc0_esz, cptr + p * (ldc * esz), nr_esz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void fastGemmKernel(int M, int N, int K,
|
|
||||||
float alpha, const char *A, int lda0, int lda1,
|
|
||||||
const char *B, int ldb0, int ldb1,
|
|
||||||
float beta, char *C, int ldc, int esz) {
|
|
||||||
int GEMM_MC = FAST_GEMM_F32_MC,
|
|
||||||
GEMM_NC = FAST_GEMM_F32_NC,
|
|
||||||
GEMM_MR = FAST_GEMM_F32_MR,
|
|
||||||
GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
|
|
||||||
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = FAST_GEMM_STORAGE / ((MC + NC) * esz);
|
|
||||||
KC = KC > 8 ? KC : 8;
|
|
||||||
KC = KC < K ? KC : K;
|
|
||||||
|
|
||||||
size_t buff_size = KC * (MC + NC) * esz;
|
|
||||||
bool use_stackbuff = buff_size <= FAST_GEMM_MAX_STACKBUF;
|
|
||||||
int m_tiles = (M + MC - 1) / MC;
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
int total_tiles = m_tiles * n_tiles;
|
|
||||||
|
|
||||||
auto fn = [&](const Range &r) {
|
|
||||||
char* packed_a = (char*)(use_stackbuff ? alloca(buff_size) : malloc(buff_size));
|
|
||||||
char* packed_b = packed_a + KC * MC * esz;
|
|
||||||
int start = r.start;
|
|
||||||
int end = r.end;
|
|
||||||
|
|
||||||
for (int tile_idx = start; tile_idx < end; tile_idx++) {
|
|
||||||
int i0 = (tile_idx / n_tiles) * MC;
|
|
||||||
int j0 = (tile_idx % n_tiles) * NC;
|
|
||||||
int mc = M - i0 < MC ? M - i0 : MC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int ldc_block = ldc;
|
|
||||||
char* c_block = C + (i0 * ldc + j0) * esz;
|
|
||||||
|
|
||||||
if (beta == 0.f) {
|
|
||||||
for(int i = 0; i < mc; i++)
|
|
||||||
memset(c_block + i * ldc_block * esz, 0, nc * esz);
|
|
||||||
} else if (beta != 1.f) {
|
|
||||||
for(int i = 0; i < mc; i++) {
|
|
||||||
float* c_i = (float*)c_block + i * ldc_block;
|
|
||||||
for(int j = 0; j < nc; j++)
|
|
||||||
c_i[j] *= beta;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int k0 = 0; k0 < K; k0 += KC)
|
|
||||||
{
|
|
||||||
int kc = K - k0 < KC ? K - k0 : KC;
|
|
||||||
#if CV_NEON_AARCH64
|
|
||||||
fast_gemm_pack8_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
|
||||||
#else
|
|
||||||
fast_gemm_pack4_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
|
||||||
#endif
|
|
||||||
fast_gemm_pack12_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
|
||||||
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b, alpha, c_block, ldc_block, esz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!use_stackbuff) {
|
|
||||||
free(packed_a);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int total = total_tiles;
|
|
||||||
int cost_per_thread = static_cast<int>((K / KC) * (MC / GEMM_MR) * (NC / GEMM_NR));
|
|
||||||
double nstripes = (size_t)total * cost_per_thread * (1 / 1024.0);
|
|
||||||
parallel_for_(Range(0, total), fn, nstripes);
|
|
||||||
}
|
|
||||||
|
|
||||||
void fastGemmKernel(int M, int N, int K,
|
|
||||||
float alpha, const char *A, int lda0, int lda1,
|
|
||||||
const char *packed_B, float beta, char *C, int ldc, int esz) {
|
|
||||||
int GEMM_MC = FAST_GEMM_F32_MC,
|
|
||||||
GEMM_NC = FAST_GEMM_F32_NC,
|
|
||||||
GEMM_MR = FAST_GEMM_F32_MR,
|
|
||||||
GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
|
|
||||||
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
|
||||||
|
|
||||||
size_t buff_size = KC * MC * esz;
|
|
||||||
bool use_stackbuff = buff_size <= FAST_GEMM_MAX_STACKBUF;
|
|
||||||
int m_tiles = (M + MC - 1) / MC;
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
int total_tiles = m_tiles * n_tiles;
|
|
||||||
|
|
||||||
auto fn = [&](const Range &r) {
|
|
||||||
char* packed_a = (char*)(use_stackbuff ? alloca(buff_size) : malloc(buff_size)); // TODO: use AutoBuffer
|
|
||||||
const char *packed_b_ = packed_B;
|
|
||||||
int start = r.start;
|
|
||||||
int end = r.end;
|
|
||||||
|
|
||||||
for (int tile_idx = start; tile_idx < end; tile_idx++) {
|
|
||||||
int i0 = (tile_idx / n_tiles) * MC;
|
|
||||||
int j0 = (tile_idx % n_tiles) * NC;
|
|
||||||
int mc = M - i0 < MC ? M - i0 : MC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int ldc_block = ldc;
|
|
||||||
char* c_block = C + (i0 * ldc + j0) * esz;
|
|
||||||
packed_b_ = packed_B + j0 * K * esz;
|
|
||||||
|
|
||||||
if (beta == 0.f) {
|
|
||||||
for(int i = 0; i < mc; i++)
|
|
||||||
memset(c_block + i * ldc_block * esz, 0, nc * esz);
|
|
||||||
} else if (beta != 1.f) {
|
|
||||||
for(int i = 0; i < mc; i++) {
|
|
||||||
float* c_i = (float*)c_block + i * ldc_block;
|
|
||||||
for(int j = 0; j < nc; j++)
|
|
||||||
c_i[j] *= beta;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int _nc = static_cast<int>((nc + GEMM_NR - 1) / GEMM_NR) * GEMM_NR * esz;
|
|
||||||
for(int k0 = 0; k0 < K; k0 += KC)
|
|
||||||
{
|
|
||||||
int kc = K - k0 < KC ? K - k0 : KC;
|
|
||||||
#if CV_NEON_AARCH64
|
|
||||||
fast_gemm_pack8_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
|
||||||
#else
|
|
||||||
fast_gemm_pack4_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
|
||||||
#endif
|
|
||||||
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b_, alpha, c_block, ldc_block, esz);
|
|
||||||
packed_b_ += _nc * kc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!use_stackbuff) {
|
|
||||||
free(packed_a);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int total = total_tiles;
|
|
||||||
int cost_per_thread = static_cast<int>((K / KC) * (MC / GEMM_MR) * (NC / GEMM_NR));
|
|
||||||
double nstripes = (size_t)total * cost_per_thread * (1 / 1024.0);
|
|
||||||
parallel_for_(Range(0, total), fn, nstripes);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // CV_NEON, CV_NEON_AARCH64
|
|
||||||
|
|
||||||
// AVX and AVX2 (16 x 256-bit registers)
|
|
||||||
#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX
|
|
||||||
|
|
||||||
FAST_GEMM_IMPLEMENT_PACK(8, _f32, float, float)
|
|
||||||
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float)
|
|
||||||
|
|
||||||
int fastGemmPackBSize(int N, int K) {
|
|
||||||
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
|
|
||||||
return static_cast<int>((N + NC - 1) / NC) * NC * K;
|
|
||||||
}
|
|
||||||
|
|
||||||
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz) {
|
|
||||||
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
|
||||||
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
for (int r = 0; r < n_tiles; ++r) {
|
|
||||||
int j0 = r * NC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int _nc = static_cast<int>((nc + GEMM_NR - 1) / GEMM_NR) * GEMM_NR * esz;
|
|
||||||
for (int k = 0; k < K; k += KC) {
|
|
||||||
int kc = K - k < KC ? K - k : KC;
|
|
||||||
fast_gemm_pack8_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
|
||||||
packed_B += _nc * kc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !CV_FMA3 // AVX workaround for FMA
|
#if !CV_FMA3 // AVX workaround for FMA
|
||||||
#undef _mm256_fmadd_ps
|
#undef _mm256_fmadd_ps
|
||||||
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
|
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void fast_gemm12x8_f32(int k, const char *a_, const char *b_, char *c_, int ldc, float alpha) {
|
static inline void fast_gemm12x8_f32(int k, const char *a_, const char *b_, char *c_, int ldc, float alpha) {
|
||||||
const float* a = (const float*)a_;
|
const float* a = (const float*)a_;
|
||||||
const float* b = (const float*)b_;
|
const float* b = (const float*)b_;
|
||||||
float* c = (float*)c_;
|
float* c = (float*)c_;
|
||||||
@ -599,203 +296,12 @@ static void fast_gemm12x8_f32(int k, const char *a_, const char *b_, char *c_, i
|
|||||||
#undef FAST_GEMM_FINALE
|
#undef FAST_GEMM_FINALE
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fast_gemm_macro_kernel(int m, int n, int k,
|
#elif CV_LASX // LASX (32 x 256-bit registers)
|
||||||
const char *packed_A, const char *packed_B,
|
|
||||||
float alpha, char *c, int ldc0, int esz) {
|
|
||||||
int ldc0_esz = ldc0 * esz;
|
|
||||||
|
|
||||||
double tempC[FAST_GEMM_F32_MR * FAST_GEMM_F32_NR]; // make sure the buffer is big enough
|
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float) // a packer
|
||||||
for(int i = 0; i < m; i += FAST_GEMM_F32_MR) {
|
FAST_GEMM_IMPLEMENT_PACK(16, _f32, float, float) // b packer
|
||||||
for(int j = 0; j < n; j += FAST_GEMM_F32_NR) {
|
|
||||||
char* cptr0 = &c[i * ldc0_esz + j * esz];
|
|
||||||
char* cptr = cptr0;
|
|
||||||
int ldc = ldc0;
|
|
||||||
int mr = m - i < FAST_GEMM_F32_MR ? m - i : FAST_GEMM_F32_MR;
|
|
||||||
int nr = n - j < FAST_GEMM_F32_NR ? n - j : FAST_GEMM_F32_NR;
|
|
||||||
int nr_esz = nr * esz;
|
|
||||||
bool partial = (bool)((mr < FAST_GEMM_F32_MR) | (nr < FAST_GEMM_F32_NR));
|
|
||||||
if (partial) {
|
|
||||||
memset(tempC, 0, sizeof(tempC));
|
|
||||||
cptr = (char *)tempC;
|
|
||||||
ldc = FAST_GEMM_F32_NR;
|
|
||||||
for(int p = 0; p < mr; p++)
|
|
||||||
memcpy(cptr + p * (ldc * esz), cptr0 + p * ldc0_esz, nr_esz);
|
|
||||||
}
|
|
||||||
fast_gemm12x8_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
|
||||||
|
|
||||||
if (partial) {
|
static inline void fast_gemm12x16_f32(int k, const char *a_, const char *b_, char *c_, int ldc, float alpha) {
|
||||||
for(int p = 0; p < mr; p++)
|
|
||||||
memcpy(cptr0 + p * ldc0_esz, cptr + p * (ldc * esz), nr_esz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void fastGemmKernel(int M, int N, int K,
|
|
||||||
float alpha, const char *A, int lda0, int lda1,
|
|
||||||
const char *B, int ldb0, int ldb1,
|
|
||||||
float beta, char *C, int ldc, int esz) {
|
|
||||||
int GEMM_MC = FAST_GEMM_F32_MC,
|
|
||||||
GEMM_NC = FAST_GEMM_F32_NC,
|
|
||||||
GEMM_MR = FAST_GEMM_F32_MR,
|
|
||||||
GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
|
|
||||||
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = FAST_GEMM_STORAGE / ((MC + NC) * esz);
|
|
||||||
KC = KC > 8 ? KC : 8;
|
|
||||||
KC = KC < K ? KC : K;
|
|
||||||
|
|
||||||
size_t buff_size = KC * (MC + NC) * esz;
|
|
||||||
bool use_stackbuff = buff_size <= FAST_GEMM_MAX_STACKBUF;
|
|
||||||
int m_tiles = (M + MC - 1) / MC;
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
int total_tiles = m_tiles * n_tiles;
|
|
||||||
|
|
||||||
auto fn = [&](const Range &r) {
|
|
||||||
char* packed_a = (char*)(use_stackbuff ? alloca(buff_size) : malloc(buff_size));
|
|
||||||
char* packed_b = packed_a + KC * MC * esz;
|
|
||||||
int start = r.start;
|
|
||||||
int end = r.end;
|
|
||||||
|
|
||||||
for (int tile_idx = start; tile_idx < end; tile_idx++) {
|
|
||||||
int i0 = (tile_idx / n_tiles) * MC;
|
|
||||||
int j0 = (tile_idx % n_tiles) * NC;
|
|
||||||
int mc = M - i0 < MC ? M - i0 : MC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int ldc_block = ldc;
|
|
||||||
char* c_block = C + (i0 * ldc + j0) * esz;
|
|
||||||
|
|
||||||
if (beta == 0.f) {
|
|
||||||
for(int i = 0; i < mc; i++)
|
|
||||||
memset(c_block + i * ldc_block * esz, 0, nc * esz);
|
|
||||||
} else if (beta != 1.f) {
|
|
||||||
for(int i = 0; i < mc; i++) {
|
|
||||||
float* c_i = (float*)c_block + i * ldc_block;
|
|
||||||
for(int j = 0; j < nc; j++)
|
|
||||||
c_i[j] *= beta;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int k0 = 0; k0 < K; k0 += KC)
|
|
||||||
{
|
|
||||||
int kc = K - k0 < KC ? K - k0 : KC;
|
|
||||||
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
|
||||||
fast_gemm_pack8_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
|
||||||
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b, alpha, c_block, ldc_block, esz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!use_stackbuff) {
|
|
||||||
free(packed_a);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int total = total_tiles;
|
|
||||||
int cost_per_thread = static_cast<int>((K / KC) * (MC / GEMM_MR) * (NC / GEMM_NR));
|
|
||||||
double nstripes = (size_t)total * cost_per_thread * (1 / 1024.0);
|
|
||||||
parallel_for_(Range(0, total), fn, nstripes);
|
|
||||||
}
|
|
||||||
|
|
||||||
void fastGemmKernel(int M, int N, int K,
|
|
||||||
float alpha, const char *A, int lda0, int lda1,
|
|
||||||
const char *packed_B, float beta, char *C, int ldc, int esz) {
|
|
||||||
int GEMM_MC = FAST_GEMM_F32_MC,
|
|
||||||
GEMM_NC = FAST_GEMM_F32_NC,
|
|
||||||
GEMM_MR = FAST_GEMM_F32_MR,
|
|
||||||
GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
|
|
||||||
int MC = (((GEMM_MC < M ? GEMM_MC : M) + GEMM_MR - 1) / GEMM_MR) * GEMM_MR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
|
||||||
|
|
||||||
size_t buff_size = KC * MC * esz;
|
|
||||||
bool use_stackbuff = buff_size <= FAST_GEMM_MAX_STACKBUF;
|
|
||||||
int m_tiles = (M + MC - 1) / MC;
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
int total_tiles = m_tiles * n_tiles;
|
|
||||||
|
|
||||||
auto fn = [&](const Range &r) {
|
|
||||||
char* packed_a = (char*)(use_stackbuff ? alloca(buff_size) : malloc(buff_size)); // TODO: use AutoBuffer
|
|
||||||
const char *packed_b_ = packed_B;
|
|
||||||
int start = r.start;
|
|
||||||
int end = r.end;
|
|
||||||
|
|
||||||
for (int tile_idx = start; tile_idx < end; tile_idx++) {
|
|
||||||
int i0 = (tile_idx / n_tiles) * MC;
|
|
||||||
int j0 = (tile_idx % n_tiles) * NC;
|
|
||||||
int mc = M - i0 < MC ? M - i0 : MC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int ldc_block = ldc;
|
|
||||||
char* c_block = C + (i0 * ldc + j0) * esz;
|
|
||||||
packed_b_ = packed_B + j0 * K * esz;
|
|
||||||
|
|
||||||
if (beta == 0.f) {
|
|
||||||
for(int i = 0; i < mc; i++)
|
|
||||||
memset(c_block + i * ldc_block * esz, 0, nc * esz);
|
|
||||||
} else if (beta != 1.f) {
|
|
||||||
for(int i = 0; i < mc; i++) {
|
|
||||||
float* c_i = (float*)c_block + i * ldc_block;
|
|
||||||
for(int j = 0; j < nc; j++)
|
|
||||||
c_i[j] *= beta;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int _nc = static_cast<int>((nc + GEMM_NR - 1) / GEMM_NR) * GEMM_NR * esz;
|
|
||||||
for(int k0 = 0; k0 < K; k0 += KC)
|
|
||||||
{
|
|
||||||
int kc = K - k0 < KC ? K - k0 : KC;
|
|
||||||
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
|
||||||
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b_, alpha, c_block, ldc_block, esz);
|
|
||||||
packed_b_ += _nc * kc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!use_stackbuff) {
|
|
||||||
free(packed_a);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int total = total_tiles;
|
|
||||||
int cost_per_thread = static_cast<int>((K / KC) * (MC / GEMM_MR) * (NC / GEMM_NR));
|
|
||||||
double nstripes = (size_t)total * cost_per_thread * (1 / 1024.0);
|
|
||||||
parallel_for_(Range(0, total), fn, nstripes);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // CV_AVX, CV_AVX2
|
|
||||||
|
|
||||||
// LASX (32 x 256-bit registers)
|
|
||||||
#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_LASX
|
|
||||||
|
|
||||||
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float)
|
|
||||||
FAST_GEMM_IMPLEMENT_PACK(16, _f32, float, float)
|
|
||||||
|
|
||||||
int fastGemmPackBSize(int N, int K) {
|
|
||||||
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
|
|
||||||
return static_cast<int>((N + NC - 1) / NC) * NC * K;
|
|
||||||
}
|
|
||||||
|
|
||||||
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz) {
|
|
||||||
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
|
||||||
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
|
||||||
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
|
||||||
|
|
||||||
int n_tiles = (N + NC - 1) / NC;
|
|
||||||
for (int r = 0; r < n_tiles; ++r) {
|
|
||||||
int j0 = r * NC;
|
|
||||||
int nc = N - j0 < NC ? N - j0 : NC;
|
|
||||||
int _nc = static_cast<int>((nc + GEMM_NR - 1) / GEMM_NR) * GEMM_NR * esz;
|
|
||||||
for (int k = 0; k < K; k += KC) {
|
|
||||||
int kc = K - k < KC ? K - k : KC;
|
|
||||||
fast_gemm_pack16_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
|
||||||
packed_B += _nc * kc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fast_gemm12x16_f32(int k, const char *a_, const char *b_, char *c_, int ldc, float alpha) {
|
|
||||||
const float* a = (const float*)a_;
|
const float* a = (const float*)a_;
|
||||||
const float* b = (const float*)b_;
|
const float* b = (const float*)b_;
|
||||||
float* c = (float*)c_;
|
float* c = (float*)c_;
|
||||||
@ -889,9 +395,99 @@ static void fast_gemm12x16_f32(int k, const char *a_, const char *b_, char *c_,
|
|||||||
#undef FAST_GEMM_FINALE
|
#undef FAST_GEMM_FINALE
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fast_gemm_macro_kernel(int m, int n, int k,
|
#elif CV_SIMD128 // armv7: 16 x 128-bit registers
|
||||||
const char *packed_A, const char *packed_B,
|
|
||||||
float alpha, char *c, int ldc0, int esz) {
|
FAST_GEMM_IMPLEMENT_PACK(8, _f32, float, float) // a packer
|
||||||
|
FAST_GEMM_IMPLEMENT_PACK(12, _f32, float, float) // b packer
|
||||||
|
|
||||||
|
static inline void fast_gemm8x12_f32(int k, const char *a_, const char *b_,
|
||||||
|
char *c_, int ldc, float alpha) {
|
||||||
|
const float* a = (const float*)a_;
|
||||||
|
const float* b = (const float*)b_;
|
||||||
|
float* c = (float*)c_;
|
||||||
|
|
||||||
|
v_float32x4 s00 = v_setzero_f32(), s01 = s00, s02 = s00;
|
||||||
|
v_float32x4 s10 = s00, s11 = s00, s12 = s00;
|
||||||
|
v_float32x4 s20 = s00, s21 = s00, s22 = s00;
|
||||||
|
v_float32x4 s30 = s00, s31 = s00, s32 = s00;
|
||||||
|
v_float32x4 s40 = s00, s41 = s00, s42 = s00;
|
||||||
|
v_float32x4 s50 = s00, s51 = s00, s52 = s00;
|
||||||
|
v_float32x4 s60 = s00, s61 = s00, s62 = s00;
|
||||||
|
v_float32x4 s70 = s00, s71 = s00, s72 = s00;
|
||||||
|
|
||||||
|
for(int p = 0; p < k; p++, a += FAST_GEMM_F32_MR, b += FAST_GEMM_F32_NR) {
|
||||||
|
v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8);
|
||||||
|
|
||||||
|
v_float32x4 a0 = v_setall_f32(*a);
|
||||||
|
s00 = v_fma(b0, a0, s00);
|
||||||
|
s01 = v_fma(b1, a0, s01);
|
||||||
|
s02 = v_fma(b2, a0, s02);
|
||||||
|
v_float32x4 a1 = v_setall_f32(*(a + 1));
|
||||||
|
s10 = v_fma(b0, a1, s10);
|
||||||
|
s11 = v_fma(b1, a1, s11);
|
||||||
|
s12 = v_fma(b2, a1, s12);
|
||||||
|
|
||||||
|
v_float32x4 a2 = v_setall_f32(*(a + 2));
|
||||||
|
s20 = v_fma(b0, a2, s20);
|
||||||
|
s21 = v_fma(b1, a2, s21);
|
||||||
|
s22 = v_fma(b2, a2, s22);
|
||||||
|
v_float32x4 a3 = v_setall_f32(*(a + 3));
|
||||||
|
s30 = v_fma(b0, a3, s30);
|
||||||
|
s31 = v_fma(b1, a3, s31);
|
||||||
|
s32 = v_fma(b2, a3, s32);
|
||||||
|
|
||||||
|
a0 = v_setall_f32(*(a + 4));
|
||||||
|
s40 = v_fma(b0, a0, s40);
|
||||||
|
s41 = v_fma(b1, a0, s41);
|
||||||
|
s42 = v_fma(b2, a0, s42);
|
||||||
|
a1 = v_setall_f32(*(a + 5));
|
||||||
|
s50 = v_fma(b0, a1, s50);
|
||||||
|
s51 = v_fma(b1, a1, s51);
|
||||||
|
s52 = v_fma(b2, a1, s52);
|
||||||
|
|
||||||
|
a2 = v_setall_f32(*(a + 6));
|
||||||
|
s60 = v_fma(b0, a2, s60);
|
||||||
|
s61 = v_fma(b1, a2, s61);
|
||||||
|
s62 = v_fma(b2, a2, s62);
|
||||||
|
a3 = v_setall_f32(*(a + 7));
|
||||||
|
s70 = v_fma(b0, a3, s70);
|
||||||
|
s71 = v_fma(b1, a3, s71);
|
||||||
|
s72 = v_fma(b2, a3, s72);
|
||||||
|
}
|
||||||
|
|
||||||
|
v_float32x4 c0, c1, c2, c3, c4, c5, v_alpha = v_setall_f32(alpha);
|
||||||
|
#define FAST_GEMM_FINALE(row0, row1) \
|
||||||
|
c0 = v_load(c + row0 * ldc); \
|
||||||
|
c1 = v_load(c + row0 * ldc + 4); \
|
||||||
|
c2 = v_load(c + row0 * ldc + 8); \
|
||||||
|
c3 = v_load(c + row1 * ldc); \
|
||||||
|
c4 = v_load(c + row1 * ldc + 4); \
|
||||||
|
c5 = v_load(c + row1 * ldc + 8); \
|
||||||
|
c0 = v_fma(s##row0##0, v_alpha, c0); \
|
||||||
|
c1 = v_fma(s##row0##1, v_alpha, c1); \
|
||||||
|
c2 = v_fma(s##row0##2, v_alpha, c2); \
|
||||||
|
c3 = v_fma(s##row1##0, v_alpha, c3); \
|
||||||
|
c4 = v_fma(s##row1##1, v_alpha, c4); \
|
||||||
|
c5 = v_fma(s##row1##2, v_alpha, c5); \
|
||||||
|
v_store(c + row0 * ldc, c0); \
|
||||||
|
v_store(c + row0 * ldc + 4, c1); \
|
||||||
|
v_store(c + row0 * ldc + 8, c2); \
|
||||||
|
v_store(c + row1 * ldc, c3); \
|
||||||
|
v_store(c + row1 * ldc + 4, c4); \
|
||||||
|
v_store(c + row1 * ldc + 8, c5);
|
||||||
|
|
||||||
|
FAST_GEMM_FINALE(0, 1);
|
||||||
|
FAST_GEMM_FINALE(2, 3);
|
||||||
|
FAST_GEMM_FINALE(4, 5);
|
||||||
|
FAST_GEMM_FINALE(6, 7);
|
||||||
|
#undef FAST_GEMM_FINALE
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void fast_gemm_macro_kernel(int m, int n, int k,
|
||||||
|
const char *packed_A, const char *packed_B,
|
||||||
|
float alpha, char *c, int ldc0, int esz) {
|
||||||
int ldc0_esz = ldc0 * esz;
|
int ldc0_esz = ldc0 * esz;
|
||||||
|
|
||||||
double tempC[FAST_GEMM_F32_MR * FAST_GEMM_F32_NR]; // make sure the buffer is big enough
|
double tempC[FAST_GEMM_F32_MR * FAST_GEMM_F32_NR]; // make sure the buffer is big enough
|
||||||
@ -911,7 +507,15 @@ static void fast_gemm_macro_kernel(int m, int n, int k,
|
|||||||
for(int p = 0; p < mr; p++)
|
for(int p = 0; p < mr; p++)
|
||||||
memcpy(cptr + p * (ldc * esz), cptr0 + p * ldc0_esz, nr_esz);
|
memcpy(cptr + p * (ldc * esz), cptr0 + p * ldc0_esz, nr_esz);
|
||||||
}
|
}
|
||||||
|
#if CV_NEON && CV_NEON_AARCH64
|
||||||
|
fast_gemm8x12_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
||||||
|
#elif CV_AVX
|
||||||
|
fast_gemm12x8_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
||||||
|
#elif CV_LASX
|
||||||
fast_gemm12x16_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
fast_gemm12x16_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
||||||
|
#elif CV_SIMD128
|
||||||
|
fast_gemm8x12_f32(k, packed_A + i * k * esz, packed_B + j * k * esz, cptr, ldc, alpha);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (partial) {
|
if (partial) {
|
||||||
for(int p = 0; p < mr; p++)
|
for(int p = 0; p < mr; p++)
|
||||||
@ -921,6 +525,39 @@ static void fast_gemm_macro_kernel(int m, int n, int k,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int fastGemmPackBSize(int N, int K) {
|
||||||
|
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
||||||
|
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
||||||
|
|
||||||
|
return static_cast<int>((N + NC - 1) / NC) * NC * K;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fastGemmPackBKernel(const char *B, char *packed_B, int N, int K, int ldb0, int ldb1, int esz) {
|
||||||
|
int GEMM_NC = FAST_GEMM_F32_NC, GEMM_NR = FAST_GEMM_F32_NR;
|
||||||
|
int NC = (((GEMM_NC < N ? GEMM_NC : N) + GEMM_NR - 1) / GEMM_NR) * GEMM_NR;
|
||||||
|
int KC = std::min(FAST_GEMM_F32_PACKED_STRIDE_K, K);
|
||||||
|
|
||||||
|
int n_tiles = (N + NC - 1) / NC;
|
||||||
|
for (int r = 0; r < n_tiles; ++r) {
|
||||||
|
int j0 = r * NC;
|
||||||
|
int nc = N - j0 < NC ? N - j0 : NC;
|
||||||
|
int _nc = static_cast<int>((nc + GEMM_NR - 1) / GEMM_NR) * GEMM_NR * esz;
|
||||||
|
for (int k = 0; k < K; k += KC) {
|
||||||
|
int kc = K - k < KC ? K - k : KC;
|
||||||
|
#if CV_NEON && CV_NEON_AARCH64
|
||||||
|
fast_gemm_pack12_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
||||||
|
#elif CV_AVX
|
||||||
|
fast_gemm_pack8_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
||||||
|
#elif CV_LASX
|
||||||
|
fast_gemm_pack16_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
||||||
|
#elif CV_SIMD128
|
||||||
|
fast_gemm_pack12_f32(nc, kc, B + (k * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_B);
|
||||||
|
#endif
|
||||||
|
packed_B += _nc * kc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void fastGemmKernel(int M, int N, int K,
|
void fastGemmKernel(int M, int N, int K,
|
||||||
float alpha, const char *A, int lda0, int lda1,
|
float alpha, const char *A, int lda0, int lda1,
|
||||||
const char *B, int ldb0, int ldb1,
|
const char *B, int ldb0, int ldb1,
|
||||||
@ -970,8 +607,29 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
for(int k0 = 0; k0 < K; k0 += KC)
|
for(int k0 = 0; k0 < K; k0 += KC)
|
||||||
{
|
{
|
||||||
int kc = K - k0 < KC ? K - k0 : KC;
|
int kc = K - k0 < KC ? K - k0 : KC;
|
||||||
|
// pack a
|
||||||
|
#if CV_NEON && CV_NEON_AARCH64
|
||||||
|
fast_gemm_pack8_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#elif CV_AVX
|
||||||
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#elif CV_LASX
|
||||||
|
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#elif CV_SIMD128
|
||||||
|
fast_gemm_pack8_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// pack b
|
||||||
|
#if CV_NEON && CV_NEON_AARCH64
|
||||||
|
fast_gemm_pack12_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
||||||
|
#elif CV_AVX
|
||||||
|
fast_gemm_pack8_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
||||||
|
#elif CV_LASX
|
||||||
fast_gemm_pack16_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
fast_gemm_pack16_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
||||||
|
#elif CV_SIMD128
|
||||||
|
fast_gemm_pack12_f32(nc, kc, B + (k0 * ldb0 + j0 * ldb1) * esz, ldb1, ldb0, packed_b);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// run kernel
|
||||||
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b, alpha, c_block, ldc_block, esz);
|
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b, alpha, c_block, ldc_block, esz);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1035,7 +693,18 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
for(int k0 = 0; k0 < K; k0 += KC)
|
for(int k0 = 0; k0 < K; k0 += KC)
|
||||||
{
|
{
|
||||||
int kc = K - k0 < KC ? K - k0 : KC;
|
int kc = K - k0 < KC ? K - k0 : KC;
|
||||||
|
// pack a
|
||||||
|
#if CV_NEON && CV_NEON_AARCH64
|
||||||
|
fast_gemm_pack8_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#elif CV_AVX
|
||||||
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#elif CV_LASX
|
||||||
|
fast_gemm_pack12_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#elif CV_SIMD128
|
||||||
|
fast_gemm_pack8_f32(mc, kc, A + (i0 * lda0 + k0 * lda1) * esz, lda0, lda1, packed_a);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// run kernel
|
||||||
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b_, alpha, c_block, ldc_block, esz);
|
fast_gemm_macro_kernel(mc, nc, kc, packed_a, packed_b_, alpha, c_block, ldc_block, esz);
|
||||||
packed_b_ += _nc * kc;
|
packed_b_ += _nc * kc;
|
||||||
}
|
}
|
||||||
@ -1052,8 +721,37 @@ void fastGemmKernel(int M, int N, int K,
|
|||||||
parallel_for_(Range(0, total), fn, nstripes);
|
parallel_for_(Range(0, total), fn, nstripes);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CV_LASX
|
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
|
|
||||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||||
|
|
||||||
}} // cv::dnn
|
}} // cv::dnn
|
||||||
|
|
||||||
|
#undef FAST_GEMM_STORAGE
|
||||||
|
#undef FAST_GEMM_MAX_STACKBUF
|
||||||
|
#ifdef FAST_GEMM_F32_MC
|
||||||
|
#undef FAST_GEMM_F32_MC
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_NC
|
||||||
|
#undef FAST_GEMM_F32_NC
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_MR
|
||||||
|
#undef FAST_GEMM_F32_MR
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_NR
|
||||||
|
#undef FAST_GEMM_F32_NR
|
||||||
|
#endif
|
||||||
|
#ifdef FAST_GEMM_F32_PACKED_STRIDE_K
|
||||||
|
#undef FAST_GEMM_F32_PACKED_STRIDE_K
|
||||||
|
#endif
|
||||||
|
#undef FAST_GEMM_IMPLEMENT_PACK
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_8
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_BORDERS_8
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_12
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_BORDERS_12
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_16
|
||||||
|
#undef FAST_GEMM_LOAD_TO_BUF_BORDERS_16
|
||||||
|
#undef FAST_GEMM_PACK_COPY
|
||||||
|
#undef FAST_GEMM_PACK_f32_8
|
||||||
|
#undef FAST_GEMM_PACK_f32_12
|
||||||
|
#undef FAST_GEMM_PACK_f32_16
|
||||||
|
@ -47,73 +47,76 @@ public:
|
|||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
// Get x tensor.
|
// Get input tensor.
|
||||||
const auto &src_mat = inputs[0];
|
const auto& src_mat = inputs[0];
|
||||||
const auto *src_ptr = src_mat.ptr<float>();
|
const auto* src_ptr = src_mat.ptr<float>();
|
||||||
|
|
||||||
// Get axis.
|
// Get target axis.
|
||||||
const int axis = normalize_axis(axis_raw, src_mat.dims);
|
int axis = inputs.size() > 1 ? parseAxis(inputs[1]) : axis_raw;
|
||||||
|
axis = normalize_axis(axis, src_mat.dims);
|
||||||
|
|
||||||
// Get y tensor.
|
|
||||||
auto &dst_mat = outputs[0];
|
// Get output tensor.
|
||||||
src_mat.copyTo(dst_mat);
|
auto& dst_mat = outputs[0];
|
||||||
auto *dst_ptr = dst_mat.ptr<float>();
|
auto* dst_ptr = dst_mat.ptr<float>();
|
||||||
|
|
||||||
// Get flags.
|
// Get flags.
|
||||||
const auto exclusive = exclusive_raw == 1;
|
const auto exclusive = exclusive_raw == 1;
|
||||||
const auto reverse = reverse_raw == 1;
|
const auto reverse = reverse_raw == 1;
|
||||||
|
|
||||||
// Get parameters to iterate outer dimension.
|
// Data with [dim_1, .. , dim_k-1, target_dim, dim_k+1, .. , dim_n]
|
||||||
|
// dimensions is represented here as [outer_dim, target_dim, inner_dim]
|
||||||
const size_t outer_size = src_mat.total(0, axis);
|
const size_t outer_size = src_mat.total(0, axis);
|
||||||
const size_t outer_step_length = src_mat.total(axis);
|
const size_t target_size = src_mat.size[axis];
|
||||||
|
const size_t inner_size = src_mat.total(axis + 1);
|
||||||
|
const size_t outer_step_length = target_size * inner_size;
|
||||||
|
|
||||||
// Get parameters to iterate inner dimension.
|
// Calculating steps in target dimensions
|
||||||
const size_t inner_size = src_mat.size[axis];
|
const int target_start = reverse ? target_size - 1 : 0;
|
||||||
|
const int target_stop = reverse ? -1 : target_size;
|
||||||
|
const int target_delta = reverse ? -1 : 1;
|
||||||
|
const int target_step = target_delta * inner_size;
|
||||||
|
|
||||||
if (!inner_size)
|
// If exclusive, the j-th output element would be the sum of the first (j-1) elements.
|
||||||
return;
|
// Otherwise, it would be the sum of the first j elements.
|
||||||
|
const int exclusive_delta = exclusive ? target_step : 0;
|
||||||
|
|
||||||
const size_t inner_step_length = src_mat.total(axis + 1);
|
for (size_t outer_idx = 0; outer_idx < outer_size; outer_idx++)
|
||||||
const int inner_step = (reverse ? -1 : 1) * inner_step_length;
|
|
||||||
const int inner_start = reverse ? inner_size - 1 : 0;
|
|
||||||
const int inner_stop = reverse ? -1 : inner_size;
|
|
||||||
const int inner_delta = reverse ? -1 : 1;
|
|
||||||
|
|
||||||
// Get parameters to populate channels.
|
|
||||||
const size_t num_channels = src_mat.total(axis + 1);
|
|
||||||
|
|
||||||
for (size_t outer_dim = 0; outer_dim < outer_size; outer_dim++)
|
|
||||||
{
|
{
|
||||||
const size_t outer_offset = outer_dim * outer_step_length;
|
const size_t target_offset = outer_idx * outer_step_length;
|
||||||
size_t src_offset = outer_offset + inner_start * inner_step_length;
|
|
||||||
|
|
||||||
// Populate first element of inner dimension.
|
// Handle first element of target dimension.
|
||||||
for (size_t channel = 0; channel < num_channels; channel++)
|
size_t first_inner_offset = target_offset + target_start * inner_size;
|
||||||
|
if (exclusive)
|
||||||
|
for (size_t inner_idx = 0; inner_idx < inner_size; inner_idx++)
|
||||||
|
dst_ptr[first_inner_offset + inner_idx] = 0.0f;
|
||||||
|
else
|
||||||
|
for (size_t inner_idx = 0; inner_idx < inner_size; inner_idx++)
|
||||||
|
dst_ptr[first_inner_offset + inner_idx] = src_ptr[first_inner_offset + inner_idx];
|
||||||
|
|
||||||
|
// Handle remaining elements of target dimension.
|
||||||
|
for (int target_idx = target_start + target_delta; target_idx != target_stop; target_idx += target_delta)
|
||||||
{
|
{
|
||||||
if (exclusive)
|
const size_t inner_offset = target_offset + target_idx * inner_size;
|
||||||
|
|
||||||
|
for (size_t inner_idx = 0; inner_idx < inner_size; inner_idx++)
|
||||||
{
|
{
|
||||||
dst_ptr[src_offset + channel] = 0.0f;
|
dst_ptr[inner_offset + inner_idx] = dst_ptr[inner_offset - target_step + inner_idx] +
|
||||||
}
|
src_ptr[inner_offset - exclusive_delta + inner_idx];
|
||||||
else
|
|
||||||
{
|
|
||||||
dst_ptr[src_offset + channel] = src_ptr[src_offset + channel];
|
|
||||||
src_offset += inner_step;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Populate remaining elements of inner dimension.
|
int parseAxis(const Mat& axis_mat) {
|
||||||
for (int inner_dim = inner_start + inner_delta; inner_dim != inner_stop; inner_dim += inner_delta)
|
CV_CheckEQ(axis_mat.total(), 1u, "Axis tensor should contain single value");
|
||||||
{
|
if (axis_mat.type() == CV_32SC1)
|
||||||
const size_t dst_offset = outer_offset + inner_dim * inner_step_length;
|
return axis_mat.at<int32_t>(0);
|
||||||
|
else
|
||||||
for (size_t channel = 0; channel < num_channels; channel++)
|
{
|
||||||
{
|
Mat axis_mat_int;
|
||||||
const size_t previous_dst_offset = dst_offset - inner_step;
|
axis_mat.convertTo(axis_mat_int, CV_32SC1);
|
||||||
dst_ptr[dst_offset + channel] = dst_ptr[previous_dst_offset + channel] +
|
return axis_mat_int.at<int32_t>(0);
|
||||||
src_ptr[src_offset + channel];
|
|
||||||
src_offset += inner_step;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,7 +38,6 @@ Mat batchwiseMatMul(
|
|||||||
const Mat& input2,
|
const Mat& input2,
|
||||||
const MatShape& input2ShapeOverride)
|
const MatShape& input2ShapeOverride)
|
||||||
{
|
{
|
||||||
|
|
||||||
// Sanity checks before the actual MatMul
|
// Sanity checks before the actual MatMul
|
||||||
//input_1.DataType() == input_2.DataType(), "Data types of the inputs must match for MatMul");
|
//input_1.DataType() == input_2.DataType(), "Data types of the inputs must match for MatMul");
|
||||||
|
|
||||||
@ -391,6 +390,15 @@ public:
|
|||||||
OutputArrayOfArrays outputs_arr,
|
OutputArrayOfArrays outputs_arr,
|
||||||
OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
if (inputs_arr.depth() == CV_16S)
|
||||||
|
{
|
||||||
|
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// homogenize inputs
|
// homogenize inputs
|
||||||
preProcessInputs(inputs_arr);
|
preProcessInputs(inputs_arr);
|
||||||
|
|
||||||
|
@ -984,13 +984,7 @@ struct MishFunctor : public BaseDefaultFunctor<MishFunctor>
|
|||||||
#ifdef HAVE_DNN_NGRAPH
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
|
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
|
||||||
{
|
{
|
||||||
float one = 1.0f;
|
return std::make_shared<ngraph::op::v4::Mish>(node);
|
||||||
auto constant = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &one);
|
|
||||||
auto exp_node = std::make_shared<ngraph::op::v0::Exp>(node);
|
|
||||||
auto sum = std::make_shared<ngraph::op::v1::Add>(constant, exp_node, ngraph::op::AutoBroadcastType::NUMPY);
|
|
||||||
auto log_node = std::make_shared<ngraph::op::v0::Log>(sum);
|
|
||||||
auto tanh_node = std::make_shared<ngraph::op::Tanh>(log_node);
|
|
||||||
return std::make_shared<ngraph::op::v1::Multiply>(node, tanh_node);
|
|
||||||
}
|
}
|
||||||
#endif // HAVE_DNN_NGRAPH
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
@ -1190,10 +1184,7 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
|
|||||||
#ifdef HAVE_DNN_NGRAPH
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
|
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
|
||||||
{
|
{
|
||||||
float coeff = -0.999999f;
|
return std::make_shared<ngraph::op::Abs>(node);
|
||||||
// float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f;
|
|
||||||
auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeff);
|
|
||||||
return std::make_shared<ngraph::op::PRelu>(node, slope);
|
|
||||||
}
|
}
|
||||||
#endif // HAVE_DNN_NGRAPH
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
@ -2563,11 +2554,6 @@ template<>
|
|||||||
const char* const ReciprocalFunctor::BaseDefaultFunctor<ReciprocalFunctor>::ocl_kernel_name = "ReciprocalForward";
|
const char* const ReciprocalFunctor::BaseDefaultFunctor<ReciprocalFunctor>::ocl_kernel_name = "ReciprocalForward";
|
||||||
|
|
||||||
|
|
||||||
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
|
|
||||||
Ptr<_Layer> _Layer::create() { \
|
|
||||||
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
|
|
||||||
|
|
||||||
|
|
||||||
Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
|
Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
|
||||||
{
|
{
|
||||||
float negativeSlope = params.get<float>("negative_slope", 0.f);
|
float negativeSlope = params.get<float>("negative_slope", 0.f);
|
||||||
|
@ -191,7 +191,6 @@ public:
|
|||||||
size_t dims_Y = shape_Y.size();
|
size_t dims_Y = shape_Y.size();
|
||||||
int M = shape_Y[dims_Y - 2], N = shape_Y[dims_Y - 1];
|
int M = shape_Y[dims_Y - 2], N = shape_Y[dims_Y - 1];
|
||||||
int K = trans_a ? ma : na;
|
int K = trans_a ? ma : na;
|
||||||
int batches = std::accumulate(shape_A.begin(), shape_A.end() - 2, 1, std::multiplies<int>());
|
|
||||||
|
|
||||||
// broadcast C and copy C to output
|
// broadcast C and copy C to output
|
||||||
if (have_bias) {
|
if (have_bias) {
|
||||||
@ -201,9 +200,7 @@ public:
|
|||||||
int step = M * N;
|
int step = M * N;
|
||||||
CV_CheckEQ(broadcast_C.size(), static_cast<size_t>(step), "DNN/Gemm: C is not broadcast properly");
|
CV_CheckEQ(broadcast_C.size(), static_cast<size_t>(step), "DNN/Gemm: C is not broadcast properly");
|
||||||
float *ptr_y = Y.ptr<float>();
|
float *ptr_y = Y.ptr<float>();
|
||||||
for (int i = 0; i < batches; i++) {
|
std::memcpy(ptr_y, broadcast_C.data(), step * sizeof(float));
|
||||||
std::memcpy(ptr_y + i * step, broadcast_C.data(), step * sizeof(float));
|
|
||||||
}
|
|
||||||
} else { // initialization
|
} else { // initialization
|
||||||
float *ptr_y = Y.ptr<float>();
|
float *ptr_y = Y.ptr<float>();
|
||||||
size_t total = Y.total();
|
size_t total = Y.total();
|
||||||
@ -212,7 +209,6 @@ public:
|
|||||||
|
|
||||||
if (const_B) {
|
if (const_B) {
|
||||||
CV_CheckGT(packed_B.size(), static_cast<size_t>(0), "DNN/Gemm: constant B is not pre-packed");
|
CV_CheckGT(packed_B.size(), static_cast<size_t>(0), "DNN/Gemm: constant B is not pre-packed");
|
||||||
M *= batches;
|
|
||||||
fastGemm(trans_a, M, N, K, alpha, A.ptr<const float>(), na, packed_B.data(), 1.f, Y.ptr<float>(), N, opt);
|
fastGemm(trans_a, M, N, K, alpha, A.ptr<const float>(), na, packed_B.data(), 1.f, Y.ptr<float>(), N, opt);
|
||||||
} else {
|
} else {
|
||||||
fastGemmBatched(trans_a, trans_b, alpha, A, inputs[1], 1.f, Y, opt);
|
fastGemmBatched(trans_a, trans_b, alpha, A, inputs[1], 1.f, Y, opt);
|
||||||
|
@ -359,11 +359,11 @@ public:
|
|||||||
{
|
{
|
||||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size());
|
int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size());
|
||||||
auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
|
if (logSoftMax) {
|
||||||
if (logSoftMax)
|
return new InfEngineNgraphNode(std::make_shared<ngraph::op::v5::LogSoftmax>(ieInpNode, axis));
|
||||||
return Ptr<BackendNode>(new InfEngineNgraphNode(std::make_shared<ngraph::op::v0::Log>(softmax)));
|
} else {
|
||||||
|
return new InfEngineNgraphNode(std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis));
|
||||||
return Ptr<BackendNode>(new InfEngineNgraphNode(softmax));
|
}
|
||||||
}
|
}
|
||||||
#endif // HAVE_DNN_NGRAPH
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ BackendNode::BackendNode(int backendId)
|
|||||||
: backendId(backendId)
|
: backendId(backendId)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
BackendNode::~BackendNode() {};
|
BackendNode::~BackendNode() {}
|
||||||
|
|
||||||
BackendWrapper::BackendWrapper(int backendId, int targetId)
|
BackendWrapper::BackendWrapper(int backendId, int targetId)
|
||||||
: backendId(backendId)
|
: backendId(backendId)
|
||||||
|
@ -306,9 +306,9 @@ void ClassificationModel::classify(InputArray frame, int& classId, float& conf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
KeypointsModel::KeypointsModel(const String& model, const String& config)
|
KeypointsModel::KeypointsModel(const String& model, const String& config)
|
||||||
: Model(model, config) {};
|
: Model(model, config) {}
|
||||||
|
|
||||||
KeypointsModel::KeypointsModel(const Net& network) : Model(network) {};
|
KeypointsModel::KeypointsModel(const Net& network) : Model(network) {}
|
||||||
|
|
||||||
std::vector<Point2f> KeypointsModel::estimate(InputArray frame, float thresh)
|
std::vector<Point2f> KeypointsModel::estimate(InputArray frame, float thresh)
|
||||||
{
|
{
|
||||||
@ -364,9 +364,9 @@ std::vector<Point2f> KeypointsModel::estimate(InputArray frame, float thresh)
|
|||||||
}
|
}
|
||||||
|
|
||||||
SegmentationModel::SegmentationModel(const String& model, const String& config)
|
SegmentationModel::SegmentationModel(const String& model, const String& config)
|
||||||
: Model(model, config) {};
|
: Model(model, config) {}
|
||||||
|
|
||||||
SegmentationModel::SegmentationModel(const Net& network) : Model(network) {};
|
SegmentationModel::SegmentationModel(const Net& network) : Model(network) {}
|
||||||
|
|
||||||
void SegmentationModel::segment(InputArray frame, OutputArray mask)
|
void SegmentationModel::segment(InputArray frame, OutputArray mask)
|
||||||
{
|
{
|
||||||
|
@ -155,11 +155,19 @@ void Net::Impl::setPreferableBackend(Net& net, int backendId)
|
|||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam();
|
backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam();
|
||||||
|
|
||||||
if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX)
|
if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX &&
|
||||||
|
backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
{
|
{
|
||||||
CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks");
|
CV_LOG_WARNING(NULL, "DNN: Only default, TIMVX and OpenVINO backends support quantized networks");
|
||||||
backendId = DNN_BACKEND_OPENCV;
|
backendId = DNN_BACKEND_OPENCV;
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_DNN_NGRAPH
|
||||||
|
if (netWasQuantized && backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2023_0))
|
||||||
|
{
|
||||||
|
CV_LOG_WARNING(NULL, "DNN: OpenVINO 2023.0 and higher is required to supports quantized networks");
|
||||||
|
backendId = DNN_BACKEND_OPENCV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (preferableBackend != backendId)
|
if (preferableBackend != backendId)
|
||||||
{
|
{
|
||||||
|
@ -48,7 +48,6 @@ public:
|
|||||||
CV_Assert(basePtr_);
|
CV_Assert(basePtr_);
|
||||||
Net::Impl& base = *basePtr_;
|
Net::Impl& base = *basePtr_;
|
||||||
CV_Assert(!base.netWasAllocated);
|
CV_Assert(!base.netWasAllocated);
|
||||||
CV_Assert(!base.netWasQuantized);
|
|
||||||
netInputLayer = base.netInputLayer;
|
netInputLayer = base.netInputLayer;
|
||||||
blobsToKeep = base.blobsToKeep;
|
blobsToKeep = base.blobsToKeep;
|
||||||
layers = base.layers;
|
layers = base.layers;
|
||||||
|
@ -383,7 +383,7 @@ void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
|
|||||||
{
|
{
|
||||||
inpShapes[i] = shape(inputs[i]);
|
inpShapes[i] = shape(inputs[i]);
|
||||||
if (i > 0 && ddepth != inputs[i].depth())
|
if (i > 0 && ddepth != inputs[i].depth())
|
||||||
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
|
CV_Error(Error::StsNotImplemented, cv::format("Mixed input data types. Required type: %d, actual type: %d", ddepth, inputs[i].depth()));
|
||||||
|
|
||||||
// Quantize and Dequantize layer have different output type than input.
|
// Quantize and Dequantize layer have different output type than input.
|
||||||
if (params.type != "Quantize" && params.type != "Dequantize")
|
if (params.type != "Quantize" && params.type != "Dequantize")
|
||||||
@ -1502,7 +1502,7 @@ void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onn
|
|||||||
blob = Mat(blobShape, CV_32FC1, 0.);
|
blob = Mat(blobShape, CV_32FC1, 0.);
|
||||||
}
|
}
|
||||||
layerParams.blobs.push_back(blob);
|
layerParams.blobs.push_back(blob);
|
||||||
};
|
}
|
||||||
|
|
||||||
void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n)
|
void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n)
|
||||||
{
|
{
|
||||||
@ -1517,7 +1517,7 @@ void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::st
|
|||||||
reshape_proto.add_input(input_name);
|
reshape_proto.add_input(input_name);
|
||||||
reshape_proto.add_output(output_name);
|
reshape_proto.add_output(output_name);
|
||||||
addLayer(reshapeLp, reshape_proto);
|
addLayer(reshapeLp, reshape_proto);
|
||||||
};
|
}
|
||||||
|
|
||||||
std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n)
|
std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n)
|
||||||
{
|
{
|
||||||
@ -1536,7 +1536,7 @@ std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_nam
|
|||||||
addLayer(sliceLP, slice_proto);
|
addLayer(sliceLP, slice_proto);
|
||||||
|
|
||||||
return slice_proto.output(0);
|
return slice_proto.output(0);
|
||||||
};
|
}
|
||||||
|
|
||||||
std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
|
std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
|
||||||
int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
|
int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
|
||||||
@ -1564,7 +1564,7 @@ std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_o
|
|||||||
addLayer(permuteLP, permute_proto);
|
addLayer(permuteLP, permute_proto);
|
||||||
|
|
||||||
return permute_proto.output(0);
|
return permute_proto.output(0);
|
||||||
};
|
}
|
||||||
|
|
||||||
void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size,
|
void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size,
|
||||||
int index, const std::string& input_name, const std::string& output_name)
|
int index, const std::string& input_name, const std::string& output_name)
|
||||||
@ -1606,7 +1606,7 @@ void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hi
|
|||||||
int layerShape[] = {2, batch_size, hidden_size};
|
int layerShape[] = {2, batch_size, hidden_size};
|
||||||
lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
|
lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
||||||
{
|
{
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#define INF_ENGINE_RELEASE_2021_3 2021030000
|
#define INF_ENGINE_RELEASE_2021_3 2021030000
|
||||||
#define INF_ENGINE_RELEASE_2021_4 2021040000
|
#define INF_ENGINE_RELEASE_2021_4 2021040000
|
||||||
#define INF_ENGINE_RELEASE_2022_1 2022010000
|
#define INF_ENGINE_RELEASE_2022_1 2022010000
|
||||||
|
#define INF_ENGINE_RELEASE_2023_0 2023000000
|
||||||
|
|
||||||
#ifndef INF_ENGINE_RELEASE
|
#ifndef INF_ENGINE_RELEASE
|
||||||
#warning("IE version have not been provided via command-line. Using 2021.4 by default")
|
#warning("IE version have not been provided via command-line. Using 2021.4 by default")
|
||||||
|
@ -3227,7 +3227,7 @@ void TFLayerHandler::fillRegistry(const tensorflow::GraphDef& net)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
printMissing();
|
printMissing();
|
||||||
};
|
}
|
||||||
|
|
||||||
bool TFLayerHandler::handleMissing(const tensorflow::NodeDef& layer)
|
bool TFLayerHandler::handleMissing(const tensorflow::NodeDef& layer)
|
||||||
{
|
{
|
||||||
|
@ -151,10 +151,12 @@ TEST_P(DNNTestNetwork, ENet)
|
|||||||
{
|
{
|
||||||
applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB);
|
applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB);
|
||||||
|
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
|
if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
|
||||||
@ -482,7 +484,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
|
|||||||
Mat img = imread(findDataFile("dnn/googlenet_1.png"));
|
Mat img = imread(findDataFile("dnn/googlenet_1.png"));
|
||||||
Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
|
Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
|
||||||
// Output image has values in range [-143.526, 148.539].
|
// Output image has values in range [-143.526, 148.539].
|
||||||
float l1 = 2e-4, lInf = 2e-3;
|
float l1 = 2e-4, lInf = 2.4e-3;
|
||||||
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
|
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
|
||||||
{
|
{
|
||||||
l1 = 0.4;
|
l1 = 0.4;
|
||||||
@ -875,8 +877,12 @@ TEST_P(MaxPooling, Accuracy)
|
|||||||
Target targetId = get<1>(get<5>(GetParam()));
|
Target targetId = get<1>(get<5>(GetParam()));
|
||||||
|
|
||||||
// https://github.com/openvinotoolkit/openvino/issues/18731
|
// https://github.com/openvinotoolkit/openvino/issues/18731
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1))
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1)) {
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
int ow = ceil(static_cast<float>(inSize.width + 2 * pad.width - kernel.width) / stride.width);
|
||||||
|
int oh = ceil(static_cast<float>(inSize.height + 2 * pad.height - kernel.height) / stride.height);
|
||||||
|
if (ow * stride.width >= inSize.width + pad.width || oh * stride.height >= inSize.height + pad.height)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
|
||||||
@ -1026,10 +1032,12 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, SoftMax, testing::Combine(
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
TEST_P(Test_layers_backends, MaxPoolUnpool)
|
TEST_P(Test_layers_backends, MaxPoolUnpool)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
|
|
||||||
LayerParams pool;
|
LayerParams pool;
|
||||||
pool.set("pool", "max");
|
pool.set("pool", "max");
|
||||||
|
@ -14,6 +14,9 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
|
|||||||
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
|
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
|
||||||
#ifdef HAVE_TIMVX
|
#ifdef HAVE_TIMVX
|
||||||
targets.push_back(make_tuple(DNN_BACKEND_TIMVX, DNN_TARGET_NPU));
|
targets.push_back(make_tuple(DNN_BACKEND_TIMVX, DNN_TARGET_NPU));
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_INF_ENGINE
|
||||||
|
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
|
||||||
#endif
|
#endif
|
||||||
return testing::ValuesIn(targets);
|
return testing::ValuesIn(targets);
|
||||||
}
|
}
|
||||||
@ -66,8 +69,6 @@ public:
|
|||||||
outPath = _tf("onnx/data/output_" + basename);
|
outPath = _tf("onnx/data/output_" + basename);
|
||||||
}
|
}
|
||||||
ASSERT_FALSE(net.empty());
|
ASSERT_FALSE(net.empty());
|
||||||
net.setPreferableBackend(backend);
|
|
||||||
net.setPreferableTarget(target);
|
|
||||||
|
|
||||||
for (int i = 0; i < numInps; i++)
|
for (int i = 0; i < numInps; i++)
|
||||||
inps[i] = blobFromNPY(inpPath + ((numInps > 1) ? cv::format("_%d.npy", i) : ".npy"));
|
inps[i] = blobFromNPY(inpPath + ((numInps > 1) ? cv::format("_%d.npy", i) : ".npy"));
|
||||||
@ -78,6 +79,8 @@ public:
|
|||||||
qnet = net.quantize(inps, CV_8S, CV_8S, perChannel);
|
qnet = net.quantize(inps, CV_8S, CV_8S, perChannel);
|
||||||
qnet.getInputDetails(inputScale, inputZp);
|
qnet.getInputDetails(inputScale, inputZp);
|
||||||
qnet.getOutputDetails(outputScale, outputZp);
|
qnet.getOutputDetails(outputScale, outputZp);
|
||||||
|
qnet.setPreferableBackend(backend);
|
||||||
|
qnet.setPreferableTarget(target);
|
||||||
|
|
||||||
// Quantize inputs to int8
|
// Quantize inputs to int8
|
||||||
// int8_value = float_value/scale + zero-point
|
// int8_value = float_value/scale + zero-point
|
||||||
@ -98,7 +101,7 @@ public:
|
|||||||
if (out_i.dims == 2 && ref_i.dims == 1) {
|
if (out_i.dims == 2 && ref_i.dims == 1) {
|
||||||
ref_i = ref_i.reshape(1, 1);
|
ref_i = ref_i.reshape(1, 1);
|
||||||
}
|
}
|
||||||
normAssert(ref_i, out_i, "", l1, lInf);
|
normAssert(ref_i, out_i, basename.c_str(), l1, lInf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -201,10 +204,13 @@ TEST_P(Test_Int8_layers, Padding)
|
|||||||
|
|
||||||
TEST_P(Test_Int8_layers, AvePooling)
|
TEST_P(Test_Int8_layers, AvePooling)
|
||||||
{
|
{
|
||||||
testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075);
|
// Some tests failed with OpenVINO due to wrong padded area calculation
|
||||||
|
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075);
|
||||||
testLayer("ave_pool_same", "TensorFlow", 0.00153, 0.0041);
|
testLayer("ave_pool_same", "TensorFlow", 0.00153, 0.0041);
|
||||||
testLayer("average_pooling_1d", "ONNX", 0.002, 0.0048);
|
testLayer("average_pooling_1d", "ONNX", 0.002, 0.0048);
|
||||||
testLayer("average_pooling", "ONNX", 0.0014, 0.0032);
|
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
testLayer("average_pooling", "ONNX", 0.0014, 0.0032);
|
||||||
testLayer("average_pooling_dynamic_axes", "ONNX", 0.0014, 0.006);
|
testLayer("average_pooling_dynamic_axes", "ONNX", 0.0014, 0.006);
|
||||||
|
|
||||||
if (target != DNN_TARGET_CPU)
|
if (target != DNN_TARGET_CPU)
|
||||||
@ -220,8 +226,6 @@ TEST_P(Test_Int8_layers, MaxPooling)
|
|||||||
throw SkipTestException("Only CPU is supported");
|
throw SkipTestException("Only CPU is supported");
|
||||||
testLayer("pool_conv_3d", "ONNX", 0.0033, 0.0124);
|
testLayer("pool_conv_3d", "ONNX", 0.0033, 0.0124);
|
||||||
|
|
||||||
/* All the below tests have MaxPooling as last layer, so computeMaxIdx is set to true
|
|
||||||
which is not supported by int8 maxpooling
|
|
||||||
testLayer("layer_pooling_max", "Caffe", 0.0021, 0.004);
|
testLayer("layer_pooling_max", "Caffe", 0.0021, 0.004);
|
||||||
testLayer("max_pool_even", "TensorFlow", 0.0048, 0.0139);
|
testLayer("max_pool_even", "TensorFlow", 0.0048, 0.0139);
|
||||||
testLayer("max_pool_odd_valid", "TensorFlow", 0.0043, 0.012);
|
testLayer("max_pool_odd_valid", "TensorFlow", 0.0043, 0.012);
|
||||||
@ -231,7 +235,7 @@ TEST_P(Test_Int8_layers, MaxPooling)
|
|||||||
testLayer("two_maxpooling_1d", "ONNX", 0.0037, 0.0052);
|
testLayer("two_maxpooling_1d", "ONNX", 0.0037, 0.0052);
|
||||||
testLayer("maxpooling", "ONNX", 0.0034, 0.0065);
|
testLayer("maxpooling", "ONNX", 0.0034, 0.0065);
|
||||||
testLayer("two_maxpooling", "ONNX", 0.0025, 0.0052);
|
testLayer("two_maxpooling", "ONNX", 0.0025, 0.0052);
|
||||||
testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);*/
|
testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Int8_layers, Reduce)
|
TEST_P(Test_Int8_layers, Reduce)
|
||||||
@ -326,7 +330,10 @@ TEST_P(Test_Int8_layers, DISABLED_Softmax_unfused_ONNX) // FIXIT Support 'Ident
|
|||||||
TEST_P(Test_Int8_layers, Concat)
|
TEST_P(Test_Int8_layers, Concat)
|
||||||
{
|
{
|
||||||
testLayer("layer_concat_shared_input", "Caffe", 0.0076, 0.029, 1, 1, true, false);
|
testLayer("layer_concat_shared_input", "Caffe", 0.0076, 0.029, 1, 1, true, false);
|
||||||
testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017);
|
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||||
|
// Crashes with segfault
|
||||||
|
testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017);
|
||||||
|
}
|
||||||
testLayer("keras_pad_concat", "TensorFlow", 0.0032, 0.0089);
|
testLayer("keras_pad_concat", "TensorFlow", 0.0032, 0.0089);
|
||||||
testLayer("concat_3d", "TensorFlow", 0.005, 0.014);
|
testLayer("concat_3d", "TensorFlow", 0.005, 0.014);
|
||||||
testLayer("concatenation", "ONNX", 0.0032, 0.009);
|
testLayer("concatenation", "ONNX", 0.0032, 0.009);
|
||||||
@ -404,10 +411,13 @@ TEST_P(Test_Int8_layers, Reshape)
|
|||||||
testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029);
|
testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029);
|
||||||
|
|
||||||
testLayer("reshape_conv", "TensorFlow", 0.035, 0.054);
|
testLayer("reshape_conv", "TensorFlow", 0.035, 0.054);
|
||||||
testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078);
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
testLayer("reshape_reduce", "TensorFlow", 0.0053, 0.011);
|
||||||
|
else
|
||||||
|
testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078);
|
||||||
testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028);
|
testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028);
|
||||||
testLayer("reshape_no_reorder", "TensorFlow", 0.0014, 0.0028);
|
testLayer("reshape_no_reorder", "TensorFlow", 0.0014, 0.0028);
|
||||||
testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, 0.014);
|
testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.016 : 0.014);
|
||||||
testLayer("dynamic_reshape", "ONNX", 0.0047, 0.0079);
|
testLayer("dynamic_reshape", "ONNX", 0.0047, 0.0079);
|
||||||
testLayer("dynamic_reshape_opset_11", "ONNX", 0.0048, 0.0081);
|
testLayer("dynamic_reshape_opset_11", "ONNX", 0.0048, 0.0081);
|
||||||
testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081);
|
testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081);
|
||||||
@ -495,10 +505,10 @@ TEST_P(Test_Int8_layers, Eltwise)
|
|||||||
|
|
||||||
testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false);
|
testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false);
|
||||||
testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047);
|
testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047);
|
||||||
testLayer("eltwise_add_vec", "TensorFlow", 0.037, 0.21); // tflite 0.0095, 0.0365
|
testLayer("eltwise_add_vec", "TensorFlow", 0.037, backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.24 : 0.21); // tflite 0.0095, 0.0365
|
||||||
testLayer("eltwise_mul_vec", "TensorFlow", 0.173, 1.14); // tflite 0.0028, 0.017
|
testLayer("eltwise_mul_vec", "TensorFlow", 0.173, 1.14); // tflite 0.0028, 0.017
|
||||||
testLayer("channel_broadcast", "TensorFlow", 0.0025, 0.0063);
|
testLayer("channel_broadcast", "TensorFlow", 0.0025, 0.0063);
|
||||||
testLayer("split_equals", "TensorFlow", 0.02, 0.065);
|
testLayer("split_equals", "TensorFlow", backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.021 : 0.02, 0.065);
|
||||||
testLayer("mul", "ONNX", 0.0039, 0.014);
|
testLayer("mul", "ONNX", 0.0039, 0.014);
|
||||||
testLayer("split_max", "ONNX", 0.004, 0.012);
|
testLayer("split_max", "ONNX", 0.004, 0.012);
|
||||||
}
|
}
|
||||||
@ -555,10 +565,10 @@ public:
|
|||||||
Mat blob = readTensorFromONNX(findDataFile("dnn/onnx/data/input_" + basename + ".pb"));
|
Mat blob = readTensorFromONNX(findDataFile("dnn/onnx/data/input_" + basename + ".pb"));
|
||||||
Mat ref = readTensorFromONNX(findDataFile("dnn/onnx/data/output_" + basename + ".pb"));
|
Mat ref = readTensorFromONNX(findDataFile("dnn/onnx/data/output_" + basename + ".pb"));
|
||||||
Net baseNet = readNetFromONNX(onnxmodel);
|
Net baseNet = readNetFromONNX(onnxmodel);
|
||||||
baseNet.setPreferableBackend(backend);
|
|
||||||
baseNet.setPreferableTarget(target);
|
|
||||||
|
|
||||||
Net qnet = baseNet.quantize(blob, CV_32F, CV_32F, perChannel);
|
Net qnet = baseNet.quantize(blob, CV_32F, CV_32F, perChannel);
|
||||||
|
qnet.setPreferableBackend(backend);
|
||||||
|
qnet.setPreferableTarget(target);
|
||||||
qnet.setInput(blob);
|
qnet.setInput(blob);
|
||||||
Mat out = qnet.forward();
|
Mat out = qnet.forward();
|
||||||
|
|
||||||
@ -703,9 +713,6 @@ TEST_P(Test_Int8_nets, AlexNet)
|
|||||||
#else
|
#else
|
||||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||||
#endif
|
#endif
|
||||||
if (backend != DNN_BACKEND_OPENCV)
|
|
||||||
throw SkipTestException("Only OpenCV backend is supported");
|
|
||||||
|
|
||||||
if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
@ -746,8 +753,6 @@ TEST_P(Test_Int8_nets, GoogLeNet)
|
|||||||
TEST_P(Test_Int8_nets, ResNet50)
|
TEST_P(Test_Int8_nets, ResNet50)
|
||||||
{
|
{
|
||||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||||
if (backend != DNN_BACKEND_OPENCV)
|
|
||||||
throw SkipTestException("Only OpenCV backend is supported");
|
|
||||||
|
|
||||||
if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
@ -778,6 +783,8 @@ TEST_P(Test_Int8_nets, DenseNet121)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
|
||||||
Net net = readNetFromCaffe(findDataFile("dnn/DenseNet_121.prototxt", false),
|
Net net = readNetFromCaffe(findDataFile("dnn/DenseNet_121.prototxt", false),
|
||||||
findDataFile("dnn/DenseNet_121.caffemodel", false));
|
findDataFile("dnn/DenseNet_121.caffemodel", false));
|
||||||
@ -959,6 +966,8 @@ TEST_P(Test_Int8_nets, opencv_face_detector)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
|
||||||
Net net = readNetFromCaffe(findDataFile("dnn/opencv_face_detector.prototxt"),
|
Net net = readNetFromCaffe(findDataFile("dnn/opencv_face_detector.prototxt"),
|
||||||
findDataFile("dnn/opencv_face_detector.caffemodel", false));
|
findDataFile("dnn/opencv_face_detector.caffemodel", false));
|
||||||
@ -1025,7 +1034,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_resnet50)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
|
|
||||||
@ -1052,7 +1062,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_inceptionv2)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
|
|
||||||
@ -1083,6 +1094,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_vgg16)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
|
||||||
Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_vgg16.prototxt"),
|
Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_vgg16.prototxt"),
|
||||||
findDataFile("dnn/VGG16_faster_rcnn_final.caffemodel", false));
|
findDataFile("dnn/VGG16_faster_rcnn_final.caffemodel", false));
|
||||||
@ -1110,6 +1123,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_zf)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
|
||||||
Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_zf.prototxt"),
|
Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_zf.prototxt"),
|
||||||
findDataFile("dnn/ZF_faster_rcnn_final.caffemodel", false));
|
findDataFile("dnn/ZF_faster_rcnn_final.caffemodel", false));
|
||||||
@ -1142,6 +1157,9 @@ TEST_P(Test_Int8_nets, RFCN)
|
|||||||
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
|
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
|
||||||
|
|
||||||
float confThreshold = 0.8, scoreDiff = 0.15, iouDiff = 0.11;
|
float confThreshold = 0.8, scoreDiff = 0.15, iouDiff = 0.11;
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||||
|
iouDiff = 0.12;
|
||||||
|
}
|
||||||
testFaster(net, ref, confThreshold, scoreDiff, iouDiff);
|
testFaster(net, ref, confThreshold, scoreDiff, iouDiff);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1321,6 +1339,8 @@ TEST_P(Test_Int8_nets, YOLOv4_tiny)
|
|||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
|
||||||
const float confThreshold = 0.6;
|
const float confThreshold = 0.6;
|
||||||
|
|
||||||
|
@ -413,10 +413,12 @@ TEST_P(Test_Caffe_layers, layer_prelu_fc)
|
|||||||
|
|
||||||
TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
|
TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
|
|
||||||
Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
|
Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
|
||||||
ASSERT_FALSE(net.empty());
|
ASSERT_FALSE(net.empty());
|
||||||
@ -795,8 +797,10 @@ TEST_P(Test_Caffe_layers, DataAugmentation)
|
|||||||
|
|
||||||
TEST_P(Test_Caffe_layers, Resample)
|
TEST_P(Test_Caffe_layers, Resample)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend != DNN_BACKEND_OPENCV)
|
if (backend != DNN_BACKEND_OPENCV)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
testLayerUsingCaffeModels("nearest_2inps", false, false, 0.0, 0.0, 2);
|
testLayerUsingCaffeModels("nearest_2inps", false, false, 0.0, 0.0, 2);
|
||||||
testLayerUsingCaffeModels("nearest", false, false);
|
testLayerUsingCaffeModels("nearest", false, false);
|
||||||
}
|
}
|
||||||
|
@ -4,4 +4,4 @@
|
|||||||
#include <hpx/hpx_main.hpp>
|
#include <hpx/hpx_main.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CV_TEST_MAIN("", initDNNTests());
|
CV_TEST_MAIN("", initDNNTests())
|
||||||
|
@ -1236,4 +1236,4 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_conformance,
|
|||||||
printOnnxConfParams
|
printOnnxConfParams
|
||||||
);
|
);
|
||||||
|
|
||||||
};
|
}
|
||||||
|
@ -46,6 +46,13 @@
|
|||||||
"test_conv_with_strides_and_asymmetric_padding",
|
"test_conv_with_strides_and_asymmetric_padding",
|
||||||
"test_conv_with_strides_no_padding",
|
"test_conv_with_strides_no_padding",
|
||||||
"test_conv_with_strides_padding",
|
"test_conv_with_strides_padding",
|
||||||
|
"test_cumsum_1d",
|
||||||
|
"test_cumsum_1d_exclusive",
|
||||||
|
"test_cumsum_1d_reverse",
|
||||||
|
"test_cumsum_1d_reverse_exclusive",
|
||||||
|
"test_cumsum_2d_axis_0",
|
||||||
|
"test_cumsum_2d_axis_1",
|
||||||
|
"test_cumsum_2d_negative_axis",
|
||||||
"test_div_bcast",
|
"test_div_bcast",
|
||||||
"test_div_uint8",
|
"test_div_uint8",
|
||||||
"test_dropout_default_ratio",
|
"test_dropout_default_ratio",
|
||||||
|
@ -40,6 +40,13 @@
|
|||||||
"test_cast_STRING_to_FLOAT",
|
"test_cast_STRING_to_FLOAT",
|
||||||
"test_castlike_FLOAT_to_STRING_expanded",
|
"test_castlike_FLOAT_to_STRING_expanded",
|
||||||
"test_castlike_STRING_to_FLOAT_expanded",
|
"test_castlike_STRING_to_FLOAT_expanded",
|
||||||
|
"test_cumsum_1d",
|
||||||
|
"test_cumsum_1d_exclusive",
|
||||||
|
"test_cumsum_1d_reverse",
|
||||||
|
"test_cumsum_1d_reverse_exclusive",
|
||||||
|
"test_cumsum_2d_axis_0",
|
||||||
|
"test_cumsum_2d_axis_1",
|
||||||
|
"test_cumsum_2d_negative_axis",
|
||||||
"test_concat_1d_axis_negative_1",
|
"test_concat_1d_axis_negative_1",
|
||||||
"test_div_uint8",
|
"test_div_uint8",
|
||||||
"test_flatten_axis0",
|
"test_flatten_axis0",
|
||||||
|
@ -89,13 +89,6 @@
|
|||||||
"test_convtranspose_pad",
|
"test_convtranspose_pad",
|
||||||
"test_convtranspose_pads",
|
"test_convtranspose_pads",
|
||||||
"test_convtranspose_with_kernel",
|
"test_convtranspose_with_kernel",
|
||||||
"test_cumsum_1d",
|
|
||||||
"test_cumsum_1d_exclusive",
|
|
||||||
"test_cumsum_1d_reverse",
|
|
||||||
"test_cumsum_1d_reverse_exclusive",
|
|
||||||
"test_cumsum_2d_axis_0",
|
|
||||||
"test_cumsum_2d_axis_1",
|
|
||||||
"test_cumsum_2d_negative_axis",
|
|
||||||
"test_dequantizelinear",
|
"test_dequantizelinear",
|
||||||
"test_dequantizelinear_axis",
|
"test_dequantizelinear_axis",
|
||||||
"test_det_2d",
|
"test_det_2d",
|
||||||
@ -547,3 +540,11 @@
|
|||||||
"test_xor_bcast4v2d",
|
"test_xor_bcast4v2d",
|
||||||
"test_xor_bcast4v3d",
|
"test_xor_bcast4v3d",
|
||||||
"test_xor_bcast4v4d",
|
"test_xor_bcast4v4d",
|
||||||
|
// Cumsum related issue: https://github.com/opencv/opencv/issues/24437
|
||||||
|
"test_cumsum_1d",
|
||||||
|
"test_cumsum_1d_exclusive",
|
||||||
|
"test_cumsum_1d_reverse",
|
||||||
|
"test_cumsum_1d_reverse_exclusive",
|
||||||
|
"test_cumsum_2d_axis_0",
|
||||||
|
"test_cumsum_2d_axis_1",
|
||||||
|
"test_cumsum_2d_negative_axis",
|
||||||
|
@ -681,6 +681,9 @@ TEST_P(Test_ONNX_layers, Compare_GT)
|
|||||||
|
|
||||||
testONNXModels("greater");
|
testONNXModels("greater");
|
||||||
}
|
}
|
||||||
|
TEST_P(Test_ONNX_layers, Greater_input_dtype_int64) {
|
||||||
|
testONNXModels("greater_input_dtype_int64");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Compare_LT)
|
TEST_P(Test_ONNX_layers, Compare_LT)
|
||||||
{
|
{
|
||||||
@ -1063,10 +1066,12 @@ TEST_P(Test_ONNX_layers, ResizeUnfused)
|
|||||||
|
|
||||||
TEST_P(Test_ONNX_layers, ResizeUnfusedTwoInputs)
|
TEST_P(Test_ONNX_layers, ResizeUnfusedTwoInputs)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
testONNXModels("upsample_unfused_two_inputs_opset9_torch1.4", npy, 0, 0, false, true, 2);
|
testONNXModels("upsample_unfused_two_inputs_opset9_torch1.4", npy, 0, 0, false, true, 2);
|
||||||
testONNXModels("upsample_unfused_two_inputs_opset11_torch1.4", npy, 0, 0, false, true, 2);
|
testONNXModels("upsample_unfused_two_inputs_opset11_torch1.4", npy, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
@ -1170,10 +1175,12 @@ TEST_P(Test_ONNX_layers, ReduceL2)
|
|||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Split)
|
TEST_P(Test_ONNX_layers, Split)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
testONNXModels("split_0");
|
testONNXModels("split_0");
|
||||||
testONNXModels("split_1");
|
testONNXModels("split_1");
|
||||||
testONNXModels("split_2");
|
testONNXModels("split_2");
|
||||||
@ -1249,10 +1256,12 @@ TEST_P(Test_ONNX_layers, Softmax)
|
|||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Split_EltwiseMax)
|
TEST_P(Test_ONNX_layers, Split_EltwiseMax)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
testONNXModels("split_max");
|
testONNXModels("split_max");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2058,12 +2067,16 @@ TEST_P(Test_ONNX_layers, Quantized_Unsqueeze)
|
|||||||
TEST_P(Test_ONNX_layers, Quantized_Resize)
|
TEST_P(Test_ONNX_layers, Quantized_Resize)
|
||||||
{
|
{
|
||||||
testONNXModels("quantized_resize_nearest");
|
testONNXModels("quantized_resize_nearest");
|
||||||
testONNXModels("quantized_resize_bilinear", npy, 2e-4, 0.003);
|
double l1 = backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.0013 : 2e-4;
|
||||||
testONNXModels("quantized_resize_bilinear_align", npy, 3e-4, 0.003);
|
testONNXModels("quantized_resize_bilinear", npy, l1, 0.003);
|
||||||
|
l1 = backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.0013 : 3e-4;
|
||||||
|
testONNXModels("quantized_resize_bilinear_align", npy, l1, 0.003);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Quantized_Concat)
|
TEST_P(Test_ONNX_layers, Quantized_Concat)
|
||||||
{
|
{
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
testONNXModels("quantized_concat");
|
testONNXModels("quantized_concat");
|
||||||
testONNXModels("quantized_concat_const_blob");
|
testONNXModels("quantized_concat_const_blob");
|
||||||
}
|
}
|
||||||
@ -2080,6 +2093,8 @@ TEST_P(Test_ONNX_layers, OutputRegistration)
|
|||||||
|
|
||||||
TEST_P(Test_ONNX_layers, QLinearSoftmax)
|
TEST_P(Test_ONNX_layers, QLinearSoftmax)
|
||||||
{
|
{
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
testONNXModels("qlinearsoftmax_v11", npy, 0.002, 0.002); // 2D coerced
|
testONNXModels("qlinearsoftmax_v11", npy, 0.002, 0.002); // 2D coerced
|
||||||
testONNXModels("qlinearsoftmax_v13", npy, 0.002, 0.002);
|
testONNXModels("qlinearsoftmax_v13", npy, 0.002, 0.002);
|
||||||
}
|
}
|
||||||
@ -2669,37 +2684,37 @@ TEST_P(Test_ONNX_layers, where_node)
|
|||||||
testONNXModels("where_layer");
|
testONNXModels("where_layer");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_all_attributes) {
|
TEST_P(Test_ONNX_layers, Gemm_all_attributes) {
|
||||||
testONNXModels("test_gemm_all_attributes", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_all_attributes", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_alpha) {
|
TEST_P(Test_ONNX_layers, Gemm_alpha) {
|
||||||
testONNXModels("test_gemm_alpha", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_alpha", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_beta) {
|
TEST_P(Test_ONNX_layers, Gemm_beta) {
|
||||||
testONNXModels("test_gemm_beta", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_beta", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_default_matrix_bias) {
|
TEST_P(Test_ONNX_layers, Gemm_default_matrix_bias) {
|
||||||
testONNXModels("test_gemm_default_matrix_bias", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_default_matrix_bias", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_default_no_bias) {
|
TEST_P(Test_ONNX_layers, Gemm_default_no_bias) {
|
||||||
testONNXModels("test_gemm_default_no_bias", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_default_no_bias", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_default_scalar_bias) {
|
TEST_P(Test_ONNX_layers, Gemm_default_scalar_bias) {
|
||||||
testONNXModels("test_gemm_default_scalar_bias", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_default_scalar_bias", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_default_single_elem_vector_bias) {
|
TEST_P(Test_ONNX_layers, Gemm_default_single_elem_vector_bias) {
|
||||||
testONNXModels("test_gemm_default_single_elem_vector_bias", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_default_single_elem_vector_bias", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_default_vector_bias) {
|
TEST_P(Test_ONNX_layers, Gemm_default_vector_bias) {
|
||||||
testONNXModels("test_gemm_default_vector_bias", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_default_vector_bias", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_default_zero_bias) {
|
TEST_P(Test_ONNX_layers, Gemm_default_zero_bias) {
|
||||||
testONNXModels("test_gemm_default_zero_bias", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_default_zero_bias", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_transposeA) {
|
TEST_P(Test_ONNX_layers, Gemm_transposeA) {
|
||||||
testONNXModels("test_gemm_transposeA", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_transposeA", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
TEST_P(Test_ONNX_layers, Conformance_Gemm_transposeB) {
|
TEST_P(Test_ONNX_layers, Gemm_transposeB) {
|
||||||
testONNXModels("test_gemm_transposeB", pb, 0, 0, false, true, 2);
|
testONNXModels("test_gemm_transposeB", pb, 0, 0, false, true, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -619,10 +619,12 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_sum_1_2_true)
|
|||||||
|
|
||||||
TEST_P(Test_TensorFlow_layers, max_pool_grad)
|
TEST_P(Test_TensorFlow_layers, max_pool_grad)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
runTensorFlowNet("max_pool_grad");
|
runTensorFlowNet("max_pool_grad");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1496,17 +1498,21 @@ TEST_P(Test_TensorFlow_layers, split)
|
|||||||
|
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
runTensorFlowNet("split");
|
runTensorFlowNet("split");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_TensorFlow_layers, split_equals)
|
TEST_P(Test_TensorFlow_layers, split_equals)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
runTensorFlowNet("split_equals");
|
runTensorFlowNet("split_equals");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1581,7 +1587,7 @@ TEST_P(Test_TensorFlow_layers, relu6)
|
|||||||
|
|
||||||
TEST_P(Test_TensorFlow_layers, subpixel)
|
TEST_P(Test_TensorFlow_layers, subpixel)
|
||||||
{
|
{
|
||||||
#if defined(INF_ENGINE_RELEASE)
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
@ -1621,8 +1627,10 @@ TEST_P(Test_TensorFlow_layers, resize_bilinear_align_corners)
|
|||||||
// TF case: align_corners=False, half_pixel_centers=True
|
// TF case: align_corners=False, half_pixel_centers=True
|
||||||
TEST_P(Test_TensorFlow_layers, resize_bilinear_half_pixel)
|
TEST_P(Test_TensorFlow_layers, resize_bilinear_half_pixel)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
|
|
||||||
runTensorFlowNet("resize_bilinear", false, 0.0, 0.0, false, "_half_pixel");
|
runTensorFlowNet("resize_bilinear", false, 0.0, 0.0, false, "_half_pixel");
|
||||||
}
|
}
|
||||||
@ -1636,8 +1644,10 @@ TEST_P(Test_TensorFlow_layers, resize_bilinear_factor)
|
|||||||
// TF case: align_corners=False, half_pixel_centers=True
|
// TF case: align_corners=False, half_pixel_centers=True
|
||||||
TEST_P(Test_TensorFlow_layers, resize_bilinear_factor_half_pixel)
|
TEST_P(Test_TensorFlow_layers, resize_bilinear_factor_half_pixel)
|
||||||
{
|
{
|
||||||
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
|
#endif
|
||||||
|
|
||||||
runTensorFlowNet("resize_bilinear_factor", false, 0.0, 0.0, false, "_half_pixel");
|
runTensorFlowNet("resize_bilinear_factor", false, 0.0, 0.0, false, "_half_pixel");
|
||||||
}
|
}
|
||||||
|
@ -204,6 +204,10 @@ TEST_P(Test_TFLite, max_unpooling)
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_TFLite, EfficientDet_int8) {
|
TEST_P(Test_TFLite, EfficientDet_int8) {
|
||||||
|
if (target != DNN_TARGET_CPU || (backend != DNN_BACKEND_OPENCV &&
|
||||||
|
backend != DNN_BACKEND_TIMVX && backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) {
|
||||||
|
throw SkipTestException("Only OpenCV, TimVX and OpenVINO targets support INT8 on CPU");
|
||||||
|
}
|
||||||
Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
|
Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
|
||||||
net.setPreferableBackend(backend);
|
net.setPreferableBackend(backend);
|
||||||
net.setPreferableTarget(target);
|
net.setPreferableTarget(target);
|
||||||
|
@ -449,7 +449,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
|
|||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2023000000)
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||||
#endif
|
#endif
|
||||||
|
@ -872,11 +872,15 @@ public:
|
|||||||
@param nOctaveLayers Default number of sublevels per scale level
|
@param nOctaveLayers Default number of sublevels per scale level
|
||||||
@param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
|
@param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
|
||||||
DIFF_CHARBONNIER
|
DIFF_CHARBONNIER
|
||||||
|
@param max_points Maximum amount of returned points. In case if image contains
|
||||||
|
more features, then the features with highest response are returned.
|
||||||
|
Negative value means no limitation.
|
||||||
*/
|
*/
|
||||||
CV_WRAP static Ptr<AKAZE> create(AKAZE::DescriptorType descriptor_type = AKAZE::DESCRIPTOR_MLDB,
|
CV_WRAP static Ptr<AKAZE> create(AKAZE::DescriptorType descriptor_type = AKAZE::DESCRIPTOR_MLDB,
|
||||||
int descriptor_size = 0, int descriptor_channels = 3,
|
int descriptor_size = 0, int descriptor_channels = 3,
|
||||||
float threshold = 0.001f, int nOctaves = 4,
|
float threshold = 0.001f, int nOctaves = 4,
|
||||||
int nOctaveLayers = 4, KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2);
|
int nOctaveLayers = 4, KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2,
|
||||||
|
int max_points = -1);
|
||||||
|
|
||||||
CV_WRAP virtual void setDescriptorType(AKAZE::DescriptorType dtype) = 0;
|
CV_WRAP virtual void setDescriptorType(AKAZE::DescriptorType dtype) = 0;
|
||||||
CV_WRAP virtual AKAZE::DescriptorType getDescriptorType() const = 0;
|
CV_WRAP virtual AKAZE::DescriptorType getDescriptorType() const = 0;
|
||||||
@ -899,6 +903,9 @@ public:
|
|||||||
CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0;
|
CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0;
|
||||||
CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0;
|
CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0;
|
||||||
CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
|
CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
|
||||||
|
|
||||||
|
CV_WRAP virtual void setMaxPoints(int max_points) = 0;
|
||||||
|
CV_WRAP virtual int getMaxPoints() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
//! @} features2d_main
|
//! @} features2d_main
|
||||||
|
@ -58,7 +58,7 @@ public class AKAZEDescriptorExtractorTest extends OpenCVTestCase {
|
|||||||
|
|
||||||
extractor.write(filename);
|
extractor.write(filename);
|
||||||
|
|
||||||
String truth = "%YAML:1.0\n---\nformat: 3\nname: \"Feature2D.AKAZE\"\ndescriptor: 5\ndescriptor_channels: 3\ndescriptor_size: 0\nthreshold: 1.0000000474974513e-03\noctaves: 4\nsublevels: 4\ndiffusivity: 1\n";
|
String truth = "%YAML:1.0\n---\nformat: 3\nname: \"Feature2D.AKAZE\"\ndescriptor: 5\ndescriptor_channels: 3\ndescriptor_size: 0\nthreshold: 1.0000000474974513e-03\noctaves: 4\nsublevels: 4\ndiffusivity: 1\nmax_points: -1\n";
|
||||||
String actual = readFile(filename);
|
String actual = readFile(filename);
|
||||||
actual = actual.replaceAll("e([+-])0(\\d\\d)", "e$1$2"); // NOTE: workaround for different platforms double representation
|
actual = actual.replaceAll("e([+-])0(\\d\\d)", "e$1$2"); // NOTE: workaround for different platforms double representation
|
||||||
assertEquals(truth, actual);
|
assertEquals(truth, actual);
|
||||||
|
@ -61,7 +61,7 @@ namespace cv
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
AKAZE_Impl(DescriptorType _descriptor_type, int _descriptor_size, int _descriptor_channels,
|
AKAZE_Impl(DescriptorType _descriptor_type, int _descriptor_size, int _descriptor_channels,
|
||||||
float _threshold, int _octaves, int _sublevels, KAZE::DiffusivityType _diffusivity)
|
float _threshold, int _octaves, int _sublevels, KAZE::DiffusivityType _diffusivity, int _max_points)
|
||||||
: descriptor(_descriptor_type)
|
: descriptor(_descriptor_type)
|
||||||
, descriptor_channels(_descriptor_channels)
|
, descriptor_channels(_descriptor_channels)
|
||||||
, descriptor_size(_descriptor_size)
|
, descriptor_size(_descriptor_size)
|
||||||
@ -69,6 +69,7 @@ namespace cv
|
|||||||
, octaves(_octaves)
|
, octaves(_octaves)
|
||||||
, sublevels(_sublevels)
|
, sublevels(_sublevels)
|
||||||
, diffusivity(_diffusivity)
|
, diffusivity(_diffusivity)
|
||||||
|
, max_points(_max_points)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,6 +99,9 @@ namespace cv
|
|||||||
void setDiffusivity(KAZE::DiffusivityType diff_) CV_OVERRIDE{ diffusivity = diff_; }
|
void setDiffusivity(KAZE::DiffusivityType diff_) CV_OVERRIDE{ diffusivity = diff_; }
|
||||||
KAZE::DiffusivityType getDiffusivity() const CV_OVERRIDE{ return diffusivity; }
|
KAZE::DiffusivityType getDiffusivity() const CV_OVERRIDE{ return diffusivity; }
|
||||||
|
|
||||||
|
void setMaxPoints(int max_points_) CV_OVERRIDE { max_points = max_points_; }
|
||||||
|
int getMaxPoints() const CV_OVERRIDE { return max_points; }
|
||||||
|
|
||||||
// returns the descriptor size in bytes
|
// returns the descriptor size in bytes
|
||||||
int descriptorSize() const CV_OVERRIDE
|
int descriptorSize() const CV_OVERRIDE
|
||||||
{
|
{
|
||||||
@ -195,6 +199,12 @@ namespace cv
|
|||||||
KeyPointsFilter::runByPixelsMask(keypoints, mask.getMat());
|
KeyPointsFilter::runByPixelsMask(keypoints, mask.getMat());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (max_points > 0 && (int)keypoints.size() > max_points) {
|
||||||
|
std::partial_sort(keypoints.begin(), keypoints.begin() + max_points, keypoints.end(),
|
||||||
|
[](const cv::KeyPoint& k1, const cv::KeyPoint& k2) {return k1.response > k2.response;});
|
||||||
|
keypoints.erase(keypoints.begin() + max_points, keypoints.end());
|
||||||
|
}
|
||||||
|
|
||||||
if(descriptors.needed())
|
if(descriptors.needed())
|
||||||
{
|
{
|
||||||
impl.Compute_Descriptors(keypoints, descriptors);
|
impl.Compute_Descriptors(keypoints, descriptors);
|
||||||
@ -215,6 +225,7 @@ namespace cv
|
|||||||
fs << "octaves" << octaves;
|
fs << "octaves" << octaves;
|
||||||
fs << "sublevels" << sublevels;
|
fs << "sublevels" << sublevels;
|
||||||
fs << "diffusivity" << diffusivity;
|
fs << "diffusivity" << diffusivity;
|
||||||
|
fs << "max_points" << max_points;
|
||||||
}
|
}
|
||||||
|
|
||||||
void read(const FileNode& fn) CV_OVERRIDE
|
void read(const FileNode& fn) CV_OVERRIDE
|
||||||
@ -234,6 +245,8 @@ namespace cv
|
|||||||
sublevels = (int)fn["sublevels"];
|
sublevels = (int)fn["sublevels"];
|
||||||
if (!fn["diffusivity"].empty())
|
if (!fn["diffusivity"].empty())
|
||||||
diffusivity = static_cast<KAZE::DiffusivityType>((int)fn["diffusivity"]);
|
diffusivity = static_cast<KAZE::DiffusivityType>((int)fn["diffusivity"]);
|
||||||
|
if (!fn["max_points"].empty())
|
||||||
|
max_points = (int)fn["max_points"];
|
||||||
}
|
}
|
||||||
|
|
||||||
DescriptorType descriptor;
|
DescriptorType descriptor;
|
||||||
@ -243,15 +256,16 @@ namespace cv
|
|||||||
int octaves;
|
int octaves;
|
||||||
int sublevels;
|
int sublevels;
|
||||||
KAZE::DiffusivityType diffusivity;
|
KAZE::DiffusivityType diffusivity;
|
||||||
|
int max_points;
|
||||||
};
|
};
|
||||||
|
|
||||||
Ptr<AKAZE> AKAZE::create(DescriptorType descriptor_type,
|
Ptr<AKAZE> AKAZE::create(DescriptorType descriptor_type,
|
||||||
int descriptor_size, int descriptor_channels,
|
int descriptor_size, int descriptor_channels,
|
||||||
float threshold, int octaves,
|
float threshold, int octaves,
|
||||||
int sublevels, KAZE::DiffusivityType diffusivity)
|
int sublevels, KAZE::DiffusivityType diffusivity, int max_points)
|
||||||
{
|
{
|
||||||
return makePtr<AKAZE_Impl>(descriptor_type, descriptor_size, descriptor_channels,
|
return makePtr<AKAZE_Impl>(descriptor_type, descriptor_size, descriptor_channels,
|
||||||
threshold, octaves, sublevels, diffusivity);
|
threshold, octaves, sublevels, diffusivity, max_points);
|
||||||
}
|
}
|
||||||
|
|
||||||
String AKAZE::getDefaultName() const
|
String AKAZE::getDefaultName() const
|
||||||
|
@ -64,9 +64,12 @@
|
|||||||
//! @{
|
//! @{
|
||||||
/**
|
/**
|
||||||
@brief Detects corners using the FAST algorithm, returns mask.
|
@brief Detects corners using the FAST algorithm, returns mask.
|
||||||
@param src_data,src_step Source image
|
@param src_data Source image data
|
||||||
@param dst_data,dst_step Destination mask
|
@param src_step Source image step
|
||||||
@param width,height Source image dimensions
|
@param dst_data Destination mask data
|
||||||
|
@param dst_step Destination mask step
|
||||||
|
@param width Source image width
|
||||||
|
@param height Source image height
|
||||||
@param type FAST type
|
@param type FAST type
|
||||||
*/
|
*/
|
||||||
inline int hal_ni_FAST_dense(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, cv::FastFeatureDetector::DetectorType type) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_FAST_dense(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, cv::FastFeatureDetector::DetectorType type) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
@ -89,8 +92,10 @@ inline int hal_ni_FAST_NMS(const uchar* src_data, size_t src_step, uchar* dst_da
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
@brief Detects corners using the FAST algorithm.
|
@brief Detects corners using the FAST algorithm.
|
||||||
@param src_data,src_step Source image
|
@param src_data Source image data
|
||||||
@param width,height Source image dimensions
|
@param src_step Source image step
|
||||||
|
@param width Source image width
|
||||||
|
@param height Source image height
|
||||||
@param keypoints_data Pointer to keypoints
|
@param keypoints_data Pointer to keypoints
|
||||||
@param keypoints_count Count of keypoints
|
@param keypoints_count Count of keypoints
|
||||||
@param threshold Threshold for keypoint
|
@param threshold Threshold for keypoint
|
||||||
|
@ -86,9 +86,9 @@ void image_derivatives_scharr(const cv::Mat& src, cv::Mat& dst, int xorder, int
|
|||||||
/**
|
/**
|
||||||
* @brief This function computes the Perona and Malik conductivity coefficient g1
|
* @brief This function computes the Perona and Malik conductivity coefficient g1
|
||||||
* g1 = exp(-|dL|^2/k^2)
|
* g1 = exp(-|dL|^2/k^2)
|
||||||
* @param Lx First order image derivative in X-direction (horizontal)
|
* @param _Lx First order image derivative in X-direction (horizontal)
|
||||||
* @param Ly First order image derivative in Y-direction (vertical)
|
* @param _Ly First order image derivative in Y-direction (vertical)
|
||||||
* @param dst Output image
|
* @param _dst Output image
|
||||||
* @param k Contrast factor parameter
|
* @param k Contrast factor parameter
|
||||||
*/
|
*/
|
||||||
void pm_g1(InputArray _Lx, InputArray _Ly, OutputArray _dst, float k) {
|
void pm_g1(InputArray _Lx, InputArray _Ly, OutputArray _dst, float k) {
|
||||||
@ -117,9 +117,9 @@ void pm_g1(InputArray _Lx, InputArray _Ly, OutputArray _dst, float k) {
|
|||||||
/**
|
/**
|
||||||
* @brief This function computes the Perona and Malik conductivity coefficient g2
|
* @brief This function computes the Perona and Malik conductivity coefficient g2
|
||||||
* g2 = 1 / (1 + dL^2 / k^2)
|
* g2 = 1 / (1 + dL^2 / k^2)
|
||||||
* @param Lx First order image derivative in X-direction (horizontal)
|
* @param _Lx First order image derivative in X-direction (horizontal)
|
||||||
* @param Ly First order image derivative in Y-direction (vertical)
|
* @param _Ly First order image derivative in Y-direction (vertical)
|
||||||
* @param dst Output image
|
* @param _dst Output image
|
||||||
* @param k Contrast factor parameter
|
* @param k Contrast factor parameter
|
||||||
*/
|
*/
|
||||||
void pm_g2(InputArray _Lx, InputArray _Ly, OutputArray _dst, float k) {
|
void pm_g2(InputArray _Lx, InputArray _Ly, OutputArray _dst, float k) {
|
||||||
@ -146,9 +146,9 @@ void pm_g2(InputArray _Lx, InputArray _Ly, OutputArray _dst, float k) {
|
|||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
/**
|
/**
|
||||||
* @brief This function computes Weickert conductivity coefficient gw
|
* @brief This function computes Weickert conductivity coefficient gw
|
||||||
* @param Lx First order image derivative in X-direction (horizontal)
|
* @param _Lx First order image derivative in X-direction (horizontal)
|
||||||
* @param Ly First order image derivative in Y-direction (vertical)
|
* @param _Ly First order image derivative in Y-direction (vertical)
|
||||||
* @param dst Output image
|
* @param _dst Output image
|
||||||
* @param k Contrast factor parameter
|
* @param k Contrast factor parameter
|
||||||
* @note For more information check the following paper: J. Weickert
|
* @note For more information check the following paper: J. Weickert
|
||||||
* Applications of nonlinear diffusion in image processing and computer vision,
|
* Applications of nonlinear diffusion in image processing and computer vision,
|
||||||
@ -183,9 +183,9 @@ void weickert_diffusivity(InputArray _Lx, InputArray _Ly, OutputArray _dst, floa
|
|||||||
/**
|
/**
|
||||||
* @brief This function computes Charbonnier conductivity coefficient gc
|
* @brief This function computes Charbonnier conductivity coefficient gc
|
||||||
* gc = 1 / sqrt(1 + dL^2 / k^2)
|
* gc = 1 / sqrt(1 + dL^2 / k^2)
|
||||||
* @param Lx First order image derivative in X-direction (horizontal)
|
* @param _Lx First order image derivative in X-direction (horizontal)
|
||||||
* @param Ly First order image derivative in Y-direction (vertical)
|
* @param _Ly First order image derivative in Y-direction (vertical)
|
||||||
* @param dst Output image
|
* @param _dst Output image
|
||||||
* @param k Contrast factor parameter
|
* @param k Contrast factor parameter
|
||||||
* @note For more information check the following paper: J. Weickert
|
* @note For more information check the following paper: J. Weickert
|
||||||
* Applications of nonlinear diffusion in image processing and computer vision,
|
* Applications of nonlinear diffusion in image processing and computer vision,
|
||||||
@ -323,7 +323,7 @@ void compute_scharr_derivatives(const cv::Mat& src, cv::Mat& dst, int xorder, in
|
|||||||
* @param _ky Vertical kernel values
|
* @param _ky Vertical kernel values
|
||||||
* @param dx Derivative order in X-direction (horizontal)
|
* @param dx Derivative order in X-direction (horizontal)
|
||||||
* @param dy Derivative order in Y-direction (vertical)
|
* @param dy Derivative order in Y-direction (vertical)
|
||||||
* @param scale_ Scale factor or derivative size
|
* @param scale Scale factor or derivative size
|
||||||
*/
|
*/
|
||||||
void compute_derivative_kernels(cv::OutputArray _kx, cv::OutputArray _ky, int dx, int dy, int scale) {
|
void compute_derivative_kernels(cv::OutputArray _kx, cv::OutputArray _ky, int dx, int dy, int scale) {
|
||||||
CV_INSTRUMENT_REGION();
|
CV_INSTRUMENT_REGION();
|
||||||
@ -415,7 +415,7 @@ private:
|
|||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
/**
|
/**
|
||||||
* @brief This function performs a scalar non-linear diffusion step
|
* @brief This function performs a scalar non-linear diffusion step
|
||||||
* @param Ld2 Output image in the evolution
|
* @param Ld Output image in the evolution
|
||||||
* @param c Conductivity image
|
* @param c Conductivity image
|
||||||
* @param Lstep Previous image in the evolution
|
* @param Lstep Previous image in the evolution
|
||||||
* @param stepsize The step size in time units
|
* @param stepsize The step size in time units
|
||||||
@ -490,7 +490,7 @@ void nld_step_scalar(cv::Mat& Ld, const cv::Mat& c, cv::Mat& Lstep, float stepsi
|
|||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
/**
|
/**
|
||||||
* @brief This function downsamples the input image using OpenCV resize
|
* @brief This function downsamples the input image using OpenCV resize
|
||||||
* @param img Input image to be downsampled
|
* @param src Input image to be downsampled
|
||||||
* @param dst Output image with half of the resolution of the input image
|
* @param dst Output image with half of the resolution of the input image
|
||||||
*/
|
*/
|
||||||
void halfsample_image(const cv::Mat& src, cv::Mat& dst) {
|
void halfsample_image(const cv::Mat& src, cv::Mat& dst) {
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* @brief This function computes the value of a 2D Gaussian function
|
* @brief This function computes the value of a 2D Gaussian function
|
||||||
* @param x X Position
|
* @param x X Position
|
||||||
* @param y Y Position
|
* @param y Y Position
|
||||||
* @param sig Standard Deviation
|
* @param sigma Standard Deviation
|
||||||
*/
|
*/
|
||||||
inline float gaussian(float x, float y, float sigma) {
|
inline float gaussian(float x, float y, float sigma) {
|
||||||
return expf(-(x*x + y*y) / (2.0f*sigma*sigma));
|
return expf(-(x*x + y*y) / (2.0f*sigma*sigma));
|
||||||
|
@ -80,7 +80,6 @@ public:
|
|||||||
* @param inputData dataset containing the points to index
|
* @param inputData dataset containing the points to index
|
||||||
* @param params Index parameters
|
* @param params Index parameters
|
||||||
* @param d Distance functor
|
* @param d Distance functor
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
CompositeIndex(const Matrix<ElementType>& inputData, const IndexParams& params = CompositeIndexParams(),
|
CompositeIndex(const Matrix<ElementType>& inputData, const IndexParams& params = CompositeIndexParams(),
|
||||||
Distance d = Distance()) : index_params_(params)
|
Distance d = Distance()) : index_params_(params)
|
||||||
|
@ -97,7 +97,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** @brief set one bit to 0
|
/** @brief set one bit to 0
|
||||||
* @param index
|
|
||||||
*/
|
*/
|
||||||
void reset(size_t index)
|
void reset(size_t index)
|
||||||
{
|
{
|
||||||
@ -108,7 +107,6 @@ public:
|
|||||||
* This function is useful when resetting a given set of bits so that the
|
* This function is useful when resetting a given set of bits so that the
|
||||||
* whole bitset ends up being 0: if that's the case, we don't care about setting
|
* whole bitset ends up being 0: if that's the case, we don't care about setting
|
||||||
* other bits to 0
|
* other bits to 0
|
||||||
* @param index
|
|
||||||
*/
|
*/
|
||||||
void reset_block(size_t index)
|
void reset_block(size_t index)
|
||||||
{
|
{
|
||||||
@ -116,7 +114,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** resize the bitset so that it contains at least sz bits
|
/** resize the bitset so that it contains at least sz bits
|
||||||
* @param sz
|
|
||||||
*/
|
*/
|
||||||
void resize(size_t sz)
|
void resize(size_t sz)
|
||||||
{
|
{
|
||||||
|
@ -101,7 +101,6 @@ public:
|
|||||||
* Print log message
|
* Print log message
|
||||||
* @param level Log level
|
* @param level Log level
|
||||||
* @param fmt Message format
|
* @param fmt Message format
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
static int log(int level, const char* fmt, ...)
|
static int log(int level, const char* fmt, ...)
|
||||||
{
|
{
|
||||||
|
@ -214,8 +214,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Get a bucket given the key
|
/** Get a bucket given the key
|
||||||
* @param key
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
inline const Bucket* getBucketFromKey(BucketKey key) const
|
inline const Bucket* getBucketFromKey(BucketKey key) const
|
||||||
{
|
{
|
||||||
@ -253,7 +251,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Get statistics about the table
|
/** Get statistics about the table
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
LshStats getStats() const;
|
LshStats getStats() const;
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user