diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake deleted file mode 100644 index ee8f0cb86f..0000000000 --- a/3rdparty/libtengine/tengine.cmake +++ /dev/null @@ -1,80 +0,0 @@ -# COPYRIGHT -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# License); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Copyright (c) 2020, OPEN AI LAB -# Author: qtang@openailab.com or https://github.com/BUG1989 -# qli@openailab.com -# sqfu@openailab.com - -SET(TENGINE_COMMIT_VERSION "e89cf8870de2ff0a80cfe626c0b52b2a16fb302e") -SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine") -SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}") - -IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}") - MESSAGE(STATUS "Tengine is exist already at: ${OCV_TENGINE_SOURCE_PATH}") - - SET(Tengine_FOUND ON) - SET(BUILD_TENGINE ON) -ELSE() - SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name - SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url - SET(tengine_md5sum 23f61ebb1dd419f1207d8876496289c5) #md5sum - - ocv_download(FILENAME ${OCV_TENGINE_FILENAME} - HASH ${tengine_md5sum} - URL - "${OPENCV_TENGINE_URL}" - "$ENV{OPENCV_TENGINE_URL}" - "${OCV_TENGINE_URL}" - DESTINATION_DIR "${OCV_TENGINE_DIR}" - ID TENGINE - STATUS res - UNPACK RELATIVE_URL) - - if (NOT res) - MESSAGE(STATUS "TENGINE DOWNLOAD FAILED. Turning Tengine_FOUND off.") - SET(Tengine_FOUND OFF) - else () - MESSAGE(STATUS "TENGINE DOWNLOAD success . ") - - SET(Tengine_FOUND ON) - SET(BUILD_TENGINE ON) - endif() -ENDIF() - -if(BUILD_TENGINE) - SET(HAVE_TENGINE 1) - - if(NOT ANDROID) - # linux system - if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm) - SET(TENGINE_TOOLCHAIN_FLAG "-march=armv7-a") - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64 - SET(TENGINE_TOOLCHAIN_FLAG "-march=armv8-a") - endif() - endif() - - SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern . - SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/include" CACHE INTERNAL "") - if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt") - add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build") - else() - message(WARNING "TENGINE: Missing 'CMakeLists.txt' in source code package: ${OCV_TENGINE_SOURCE_PATH}") - endif() - SET(Tengine_LIB "tengine" CACHE INTERNAL "") -endif() diff --git a/3rdparty/readme.txt b/3rdparty/readme.txt index c3068521e3..0e6ce1e05e 100644 --- a/3rdparty/readme.txt +++ b/3rdparty/readme.txt @@ -39,7 +39,9 @@ libspng Portable Network Graphics library. libtiff Tag Image File Format (TIFF) Software Copyright (c) 1988-1997 Sam Leffler Copyright (c) 1991-1997 Silicon Graphics, Inc. - See libtiff home page http://www.libtiff.org/ + See libtiff home page #1 http://www.simplesystems.org/libtiff/ + #2 https://libtiff.gitlab.io/libtiff/ + #3 http://libtiff.maptools.org/ for details and links to the source code WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs. diff --git a/CMakeLists.txt b/CMakeLists.txt index caa8f83c99..bb87b483ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -463,9 +463,6 @@ OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" OCV_OPTION(WITH_ANDROID_NATIVE_CAMERA "Use Android NDK for Camera I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 23) VISIBLE_IF ANDROID VERIFY HAVE_ANDROID_NATIVE_CAMERA) -OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF - VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS - VERIFY HAVE_TENGINE) OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF VISIBLE_IF TRUE VERIFY HAVE_ONNX) @@ -768,9 +765,6 @@ if(WITH_LAPACK) endif() include(cmake/OpenCVFindProtobuf.cmake) include(cmake/OpenCVDetectFlatbuffers.cmake) -if(WITH_TENGINE) - include(cmake/OpenCVFindTengine.cmake) -endif() if(WITH_TIMVX) include(cmake/OpenCVFindTIMVX.cmake) endif() @@ -1623,10 +1617,6 @@ if(WITH_VA OR HAVE_VA) status(" VA:" HAVE_VA THEN "YES" ELSE NO) endif() -if(WITH_TENGINE OR HAVE_TENGINE) - status(" Tengine:" HAVE_TENGINE THEN "YES (${TENGINE_LIBRARIES})" ELSE NO) -endif() - if(WITH_LAPACK OR HAVE_LAPACK) status(" Lapack:" HAVE_LAPACK THEN "YES (${LAPACK_LIBRARIES} ${LAPACK_VERSION})" ELSE NO) endif() @@ -1693,6 +1683,10 @@ else() endif() endif() +if(BUILD_opencv_dnn AND OPENCV_DNN_BACKEND_DEFAULT) + status(" Default DNN backend:" ${OPENCV_DNN_BACKEND_DEFAULT}) +endif() + if(WITH_EIGEN OR HAVE_EIGEN) status(" Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO) endif() diff --git a/apps/visualisation/opencv_visualisation.cpp b/apps/visualisation/opencv_visualisation.cpp index 85e9697aad..9b7fcd9f48 100644 --- a/apps/visualisation/opencv_visualisation.cpp +++ b/apps/visualisation/opencv_visualisation.cpp @@ -60,6 +60,7 @@ Created by: Puttemans Steven - April 2016 #include #include +#include using namespace std; using namespace cv; diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake index 6e95fbf3a0..2c92e33eb6 100644 --- a/cmake/OpenCVDetectPython.cmake +++ b/cmake/OpenCVDetectPython.cmake @@ -251,7 +251,7 @@ if(NOT ${found}) set(${include_path} "${_include_path}" CACHE INTERNAL "") set(${include_dir} "${_include_dir}" CACHE PATH "Python include dir") set(${include_dir2} "${_include_dir2}" CACHE PATH "Python include dir 2") - set(${packages_path} "${_packages_path}" CACHE PATH "Where to install the python packages.") + set(${packages_path} "${_packages_path}" CACHE STRING "Where to install the python packages.") set(${numpy_include_dirs} ${_numpy_include_dirs} CACHE PATH "Path to numpy headers") set(${numpy_version} "${_numpy_version}" CACHE INTERNAL "") endif() diff --git a/cmake/OpenCVFindTengine.cmake b/cmake/OpenCVFindTengine.cmake deleted file mode 100644 index 2d33f5c993..0000000000 --- a/cmake/OpenCVFindTengine.cmake +++ /dev/null @@ -1,78 +0,0 @@ -# COPYRIGHT -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# License); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Copyright (c) 2020, OPEN AI LAB -# Author: qtang@openailab.com or https://github.com/BUG1989 -# - -# ---------------------------------------------------------------------------- -# Path for Tengine binaries -# ---------------------------------------------------------------------------- -set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Path to TENGINE binaries installation") - -IF(OPENCV_LIBTENGINE_ROOT_DIR AND NOT BUILD_TENGINE) - - MESSAGE(STATUS "TENGINE:-- Use binaries at ${OPENCV_LIBTENGINE_ROOT_DIR}") - - SET(Tengine_FOUND ON) - set(BUILD_TENGINE OFF) - - SET(Tengine_INCLUDE_DIR "${OPENCV_LIBTENGINE_ROOT_DIR}/include" CACHE PATH "TENGINE include dir") - SET(Tengine_LIB "${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a" CACHE PATH "TENGINE library dir") - -ELSE() - IF(ANDROID) - IF(OPENCV_TENGINE_FORCE_ANDROID) - # nothing, use Android - ELSEIF(OPENCV_TENGINE_SKIP_ANDROID) - set(Tengine_FOUND OFF) - set(HAVE_TENGINE FALSE) - return() - ELSEIF(NOT DEFINED ANDROID_NDK_REVISION) - MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION is not defined") - set(Tengine_FOUND OFF) - set(HAVE_TENGINE FALSE) - return() - ELSEIF(ANDROID_NDK_REVISION VERSION_LESS 14) - MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION=${ANDROID_NDK_REVISION}") - set(Tengine_FOUND OFF) - set(HAVE_TENGINE FALSE) - return() - ENDIF() - ENDIF() - MESSAGE(STATUS "TENGINE:-- Build Tengine from source code. ") - include("${OpenCV_SOURCE_DIR}/3rdparty/libtengine/tengine.cmake") -ENDIF() - -IF(NOT Tengine_LIB) - SET(Tengine_FOUND OFF) - MESSAGE(STATUS "#### Could not find Tengine lib. Turning Tengine_FOUND off") -ENDIF() - -IF (Tengine_FOUND) - MESSAGE(STATUS "Found Tengine include: ${Tengine_INCLUDE_DIR}") - MESSAGE(STATUS "Found Tengine libraries: ${Tengine_LIB}") - set(HAVE_TENGINE 1) - set(TENGINE_LIBRARIES ${Tengine_LIB}) - set(TENGINE_INCLUDE_DIRS ${Tengine_INCLUDE_DIR}) -ENDIF (Tengine_FOUND) - -MARK_AS_ADVANCED( - Tengine_INCLUDE_DIR - Tengine_LIB -) diff --git a/cmake/mirrors/custom.cmake b/cmake/mirrors/custom.cmake index 3cdf700e19..8c421471f3 100644 --- a/cmake/mirrors/custom.cmake +++ b/cmake/mirrors/custom.cmake @@ -1,15 +1,12 @@ # Gitlab-style mirror # CMake scripts look for opencv/opencv_3rdparty, -# OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade +# 01org/tbb(oneAPI/oneTBB), opencv/ade # from OPENCV_DOWNLOAD_MIRROR ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "") ###### # Download via commit id ###### -# Tengine -ocv_update(TENGINE_PKG_MD5_CUSTOM "") -ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e # NVIDIA_OPTICAL_FLOW ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "") ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) @@ -77,7 +74,7 @@ else() ocv_download_url_custom_usercontent(opencv) elseif(DL_ID STREQUAL "wechat_qrcode") ocv_download_url_gitcode_usercontent(WeChatCV) - elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) + elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) ocv_download_url_custom_archive_commit_id() elseif(DL_ID STREQUAL "TBB") ocv_download_url_custom_archive_release() diff --git a/cmake/mirrors/gitcode.cmake b/cmake/mirrors/gitcode.cmake index c9d41e7458..e208a87245 100644 --- a/cmake/mirrors/gitcode.cmake +++ b/cmake/mirrors/gitcode.cmake @@ -1,9 +1,6 @@ ###### # Download via commit id ###### -# Tengine -ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690) -ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e # NVIDIA_OPTICAL_FLOW ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47) ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) @@ -74,7 +71,7 @@ if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "da ocv_download_url_gitcode_usercontent(opencv) elseif(DL_ID STREQUAL "wechat_qrcode") ocv_download_url_gitcode_usercontent(mirrors/WeChatCV) -elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) +elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) ocv_download_url_gitcode_archive_commit_id() elseif(DL_ID STREQUAL "TBB") ocv_download_url_gitcode_archive_release(OPENCV_TBB_SUBDIR) diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 16acc315f5..84201f06c0 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -224,6 +224,16 @@ Following options can be used to produce special builds with instrumentation or @see [Link time optimization](https://gcc.gnu.org/wiki/LinkTimeOptimization) @see [ThinLTO](https://clang.llvm.org/docs/ThinLTO.html) +## Enable IPP optimization + +Following options can be used to enables IPP optimizations for each functions but increases the size of the opencv library. All options are disabled by default. + +| Option | Functions | + roughly size | +| -------| --------- | -------------- | +| `OPENCV_IPP_GAUSSIAN_BLUR` | GaussianBlur() | +8Mb | +| `OPENCV_IPP_MEAN` | mean() / meanStdDev() | +0.2Mb | +| `OPENCV_IPP_MINMAX` | minMaxLoc() / minMaxIdx() | +0.2Mb | +| `OPENCV_IPP_SUM` | sum() | +0.1Mb | # Functional features and dependencies {#tutorial_config_reference_func} @@ -484,7 +494,6 @@ OpenCV have own DNN inference module which have own build-in engine, but can als | `OPENCV_DNN_CUDA` | _OFF_ | Enable CUDA backend. [CUDA](https://en.wikipedia.org/wiki/CUDA), CUBLAS and [CUDNN](https://developer.nvidia.com/cudnn) must be installed. | | `WITH_HALIDE` | _OFF_ | Use experimental [Halide](https://en.wikipedia.org/wiki/Halide_(programming_language)) backend which can generate optimized code for dnn-layers at runtime. Halide must be installed. | | `WITH_VULKAN` | _OFF_ | Enable experimental [Vulkan](https://en.wikipedia.org/wiki/Vulkan_(API)) backend. Does not require additional dependencies, but can use external Vulkan headers (`VULKAN_INCLUDE_DIRS`). | -| `WITH_TENGINE` | _OFF_ | Enable experimental [Tengine](https://github.com/OAID/Tengine) backend for ARM CPUs. Tengine library must be installed. | # Installation layout {#tutorial_config_reference_install} @@ -566,6 +575,7 @@ Following options can be used to change installation layout for common scenarios | ------ | ------- | ----------- | | `OPENCV_ENABLE_NONFREE` | _OFF_ | Some algorithms included in the library are known to be protected by patents and are disabled by default. | | `OPENCV_FORCE_3RDPARTY_BUILD`| _OFF_ | Enable all `BUILD_` options at once. | +| `OPENCV_IPP_ENABLE_ALL`| _OFF_ | Enable all `OPENCV_IPP_` options at once. | | `ENABLE_CCACHE` | _ON_ (on Unix-like platforms) | Enable [ccache](https://en.wikipedia.org/wiki/Ccache) auto-detection. This tool wraps compiler calls and caches results, can significantly improve re-compilation time. | | `ENABLE_PRECOMPILED_HEADERS` | _ON_ (for MSVC) | Enable precompiled headers support. Improves build time. | | `BUILD_DOCS` | _OFF_ | Enable documentation build (_doxygen_, _doxygen_cpp_, _doxygen_python_, _doxygen_javadoc_ targets). [Doxygen](http://www.doxygen.org/index.html) must be installed for C++ documentation build. Python and [BeautifulSoup4](https://en.wikipedia.org/wiki/Beautiful_Soup_(HTML_parser)) must be installed for Python documentation build. Javadoc and Ant must be installed for Java documentation build (part of Java SDK). | diff --git a/modules/3d/src/ap3p.cpp b/modules/3d/src/ap3p.cpp index 34ac9c4323..79da0f13a7 100644 --- a/modules/3d/src/ap3p.cpp +++ b/modules/3d/src/ap3p.cpp @@ -1,5 +1,6 @@ #include "precomp.hpp" #include "ap3p.h" +#include "polynom_solver.h" #include #include @@ -7,67 +8,11 @@ static inline double cbrt(double x) { return (double)cv::cubeRoot((float)x); }; #endif -namespace cv { - -static -void solveQuartic(const double *factors, double *realRoots) -{ - const double &a4 = factors[0]; - const double &a3 = factors[1]; - const double &a2 = factors[2]; - const double &a1 = factors[3]; - const double &a0 = factors[4]; - - double a4_2 = a4 * a4; - double a3_2 = a3 * a3; - double a4_3 = a4_2 * a4; - double a2a4 = a2 * a4; - - double p4 = (8 * a2a4 - 3 * a3_2) / (8 * a4_2); - double q4 = (a3_2 * a3 - 4 * a2a4 * a3 + 8 * a1 * a4_2) / (8 * a4_3); - double r4 = (256 * a0 * a4_3 - 3 * (a3_2 * a3_2) - 64 * a1 * a3 * a4_2 + 16 * a2a4 * a3_2) / (256 * (a4_3 * a4)); - - double p3 = ((p4 * p4) / 12 + r4) / 3; // /=-3 - double q3 = (72 * r4 * p4 - 2 * p4 * p4 * p4 - 27 * q4 * q4) / 432; // /=2 - - double t; // *=2 - std::complex w; - if (q3 >= 0) - w = -std::sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; - else - w = std::sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; - if (w.imag() == 0.0) { - w.real(std::cbrt(w.real())); - t = 2.0 * (w.real() + p3 / w.real()); - } else { - w = pow(w, 1.0 / 3); - t = 4.0 * w.real(); - } - - std::complex sqrt_2m = sqrt(static_cast >(-2 * p4 / 3 + t)); - double B_4A = -a3 / (4 * a4); - double complex1 = 4 * p4 / 3 + t; -#if defined(__clang__) && defined(__arm__) && (__clang_major__ == 3 || __clang_major__ == 4) && !defined(__ANDROID__) - // details: https://github.com/opencv/opencv/issues/11135 - // details: https://github.com/opencv/opencv/issues/11056 - std::complex complex2 = 2 * q4; - complex2 = std::complex(complex2.real() / sqrt_2m.real(), 0); -#else - std::complex complex2 = 2 * q4 / sqrt_2m; -#endif - double sqrt_2m_rh = sqrt_2m.real() / 2; - double sqrt1 = sqrt(-(complex1 + complex2)).real() / 2; - realRoots[0] = B_4A + sqrt_2m_rh + sqrt1; - realRoots[1] = B_4A + sqrt_2m_rh - sqrt1; - double sqrt2 = sqrt(-(complex1 - complex2)).real() / 2; - realRoots[2] = B_4A - sqrt_2m_rh + sqrt2; - realRoots[3] = B_4A - sqrt_2m_rh - sqrt2; -} - -static void polishQuarticRoots(const double *coeffs, double *roots) { +namespace { +void polishQuarticRoots(const double *coeffs, double *roots, int nb_roots) { const int iterations = 2; for (int i = 0; i < iterations; ++i) { - for (int j = 0; j < 4; ++j) { + for (int j = 0; j < nb_roots; ++j) { double error = (((coeffs[0] * roots[j] + coeffs[1]) * roots[j] + coeffs[2]) * roots[j] + coeffs[3]) * roots[j] + coeffs[4]; @@ -124,7 +69,9 @@ inline void mat_mult(const double a[3][3], const double b[3][3], double result[3 result[2][1] = a[2][0] * b[0][1] + a[2][1] * b[1][1] + a[2][2] * b[2][1]; result[2][2] = a[2][0] * b[0][2] + a[2][1] * b[1][2] + a[2][2] * b[2][2]; } +} +namespace cv { void ap3p::init_inverse_parameters() { inv_fx = 1. / fx; inv_fy = 1. / fy; @@ -228,8 +175,9 @@ int ap3p::computePoses(const double featureVectors[3][4], 2 * (g6 * g7 - g1 * g2 - g3 * g4), g7 * g7 - g2 * g2 - g4 * g4}; double s[4]; - solveQuartic(coeffs, s); - polishQuarticRoots(coeffs, s); + int nb_roots = solve_deg4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], coeffs[4], + s[0], s[1], s[2], s[3]); + polishQuarticRoots(coeffs, s, nb_roots); double temp[3]; vect_cross(k1, nl, temp); @@ -255,7 +203,7 @@ int ap3p::computePoses(const double featureVectors[3][4], double reproj_errors[4]; int nb_solutions = 0; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < nb_roots; ++i) { double ctheta1p = s[i]; if (abs(ctheta1p) > 1) continue; diff --git a/modules/3d/src/usac/essential_solver.cpp b/modules/3d/src/usac/essential_solver.cpp index 504fec6ab5..434db6d373 100644 --- a/modules/3d/src/usac/essential_solver.cpp +++ b/modules/3d/src/usac/essential_solver.cpp @@ -239,7 +239,8 @@ public: // (5) Compute the left eigenvectors of the action matrix Eigen::EigenSolver> eigensolver(action_mat_eig); const Eigen::VectorXcd &eigenvalues = eigensolver.eigenvalues(); - const auto * const eig_vecs_ = (double *) eigensolver.eigenvectors().real().data(); + const Eigen::MatrixXcd eigenvectors = eigensolver.eigenvectors(); + const auto * const eig_vecs_ = (double *) eigenvectors.data(); #else Matx A = constraint_mat.colRange(0, 10), B = constraint_mat.colRange(10, 20), eliminated_mat; diff --git a/modules/3d/test/test_affine2d_estimator.cpp b/modules/3d/test/test_affine2d_estimator.cpp index 95f1235105..2282dc3240 100644 --- a/modules/3d/test/test_affine2d_estimator.cpp +++ b/modules/3d/test/test_affine2d_estimator.cpp @@ -115,8 +115,8 @@ TEST_P(EstimateAffine2D, testNPoints) EXPECT_NEAR(0., cvtest::norm(aff_est, aff, NORM_INF), 1e-4); - bool inliers_good = count(inliers.begin(), inliers.end(), 1) == m && - m == accumulate(inliers.begin(), inliers.begin() + m, 0); + bool inliers_good = std::count(inliers.begin(), inliers.end(), 1) == m && + m == std::accumulate(inliers.begin(), inliers.begin() + m, 0); EXPECT_TRUE(inliers_good); } diff --git a/modules/3d/test/test_affine3d_estimator.cpp b/modules/3d/test/test_affine3d_estimator.cpp index f5a118da5d..c355605385 100644 --- a/modules/3d/test/test_affine3d_estimator.cpp +++ b/modules/3d/test/test_affine3d_estimator.cpp @@ -161,8 +161,8 @@ bool CV_Affine3D_EstTest::testNPoints() return false; } - bool outl_good = count(outl.begin(), outl.end(), 1) == m && - m == accumulate(outl.begin(), outl.begin() + m, 0); + bool outl_good = std::count(outl.begin(), outl.end(), 1) == m && + m == std::accumulate(outl.begin(), outl.begin() + m, 0); if (!outl_good) { diff --git a/modules/3d/test/test_affine_partial2d_estimator.cpp b/modules/3d/test/test_affine_partial2d_estimator.cpp index 0be25ee7eb..dbbb4da0d9 100644 --- a/modules/3d/test/test_affine_partial2d_estimator.cpp +++ b/modules/3d/test/test_affine_partial2d_estimator.cpp @@ -125,8 +125,8 @@ TEST_P(EstimateAffinePartial2D, testNPoints) EXPECT_NEAR(0., cvtest::norm(aff_est, aff, NORM_INF), 1e-4); - bool inliers_good = count(inliers.begin(), inliers.end(), 1) == m && - m == accumulate(inliers.begin(), inliers.begin() + m, 0); + bool inliers_good = std::count(inliers.begin(), inliers.end(), 1) == m && + m == std::accumulate(inliers.begin(), inliers.begin() + m, 0); EXPECT_TRUE(inliers_good); } diff --git a/modules/3d/test/test_solvepnp_ransac.cpp b/modules/3d/test/test_solvepnp_ransac.cpp index be6f1342a7..b22dff15a9 100644 --- a/modules/3d/test/test_solvepnp_ransac.cpp +++ b/modules/3d/test/test_solvepnp_ransac.cpp @@ -41,6 +41,7 @@ //M*/ #include "test_precomp.hpp" +#include "opencv2/core/utils/logger.hpp" namespace opencv_test { namespace { @@ -2259,4 +2260,65 @@ TEST(Calib3d_SolvePnP, inputShape) } } +bool hasNan(const cv::Mat& mat) +{ + bool has = false; + if (mat.type() == CV_32F) + { + for(int i = 0; i < static_cast(mat.total()); i++) + has |= cvIsNaN(mat.at(i)) != 0; + } + else if (mat.type() == CV_64F) + { + for(int i = 0; i < static_cast(mat.total()); i++) + has |= cvIsNaN(mat.at(i)) != 0; + } + else + { + has = true; + CV_LOG_ERROR(NULL, "check hasNan called with unsupported type!"); + } + + return has; +} + +TEST(AP3P, ctheta1p_nan_23607) +{ + // the task is not well defined and may not converge (empty R, t) or should + // converge to some non-NaN solution + const std::array cameraPts = { + cv::Point2d{0.042784865945577621, 0.59844839572906494}, + cv::Point2d{-0.028428621590137482, 0.60354739427566528}, + cv::Point2d{0.0046037044376134872, 0.70674681663513184} + }; + const std::array modelPts = { + cv::Point3d{-0.043258000165224075, 0.020459245890378952, -0.0069921980611979961}, + cv::Point3d{-0.045648999512195587, 0.0029820732306689024, 0.0079000638797879219}, + cv::Point3d{-0.043276999145746231, -0.013622495345771313, 0.0080113131552934647} + }; + + std::vector R, t; + solveP3P(modelPts, cameraPts, Mat::eye(3, 3, CV_64F), Mat(), R, t, SOLVEPNP_AP3P); + + EXPECT_EQ(R.size(), 2ul); + EXPECT_EQ(t.size(), 2ul); + + // Try apply rvec and tvec to get model points from camera points. + Mat pts = Mat(modelPts).reshape(1, 3); + Mat expected = Mat(cameraPts).reshape(1, 3); + for (size_t i = 0; i < R.size(); ++i) { + EXPECT_TRUE(!hasNan(R[i])); + EXPECT_TRUE(!hasNan(t[i])); + + Mat transform; + cv::Rodrigues(R[i], transform); + Mat res = pts * transform.t(); + for (int j = 0; j < 3; ++j) { + res.row(j) += t[i].reshape(1, 1); + res.row(j) /= res.row(j).at(2); + } + EXPECT_LE(cvtest::norm(res.colRange(0, 2), expected, NORM_INF), 3e-16); + } +} + }} // namespace diff --git a/modules/3d/test/test_translation3d_estimator.cpp b/modules/3d/test/test_translation3d_estimator.cpp index 88ad40e0f8..97c20e5033 100644 --- a/modules/3d/test/test_translation3d_estimator.cpp +++ b/modules/3d/test/test_translation3d_estimator.cpp @@ -91,8 +91,8 @@ TEST(Calib3d_EstimateTranslation3D, testNPoints) << "aff est: " << trans_est << endl << "aff ref: " << trans; - bool outl_good = count(outl.begin(), outl.end(), 1) == m && - m == accumulate(outl.begin(), outl.begin() + m, 0); + bool outl_good = std::count(outl.begin(), outl.end(), 1) == m && + m == std::accumulate(outl.begin(), outl.begin() + m, 0); EXPECT_TRUE(outl_good); } diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 1b3f574275..ba5b61ef5f 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -60,6 +60,26 @@ if(CV_TRACE AND HAVE_ITT) add_definitions(-DOPENCV_WITH_ITT=1) endif() +# https://github.com/opencv/opencv/issues/24145 +if(HAVE_IPP) + OCV_OPTION(OPENCV_IPP_ENABLE_ALL "Enable all OPENCV_IPP_ options at once" OFF) + OCV_OPTION(OPENCV_IPP_MEAN "Enable IPP optimizations for mean (+200Kb in binary size)" OPENCV_IPP_ENABLE_ALL) + OCV_OPTION(OPENCV_IPP_MINMAX "Enable IPP optimizations for minMaxLoc/minMaxIdx (+200Kb in binary size)" OPENCV_IPP_ENABLE_ALL) + OCV_OPTION(OPENCV_IPP_SUM "Enable IPP optimizations for sum (+100Kb in binary size)" OPENCV_IPP_ENABLE_ALL) + + if(OPENCV_IPP_MEAN) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/mean.dispatch.cpp "OPENCV_IPP_MEAN=1") + endif() + + if(OPENCV_IPP_MINMAX) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/minmax.cpp "OPENCV_IPP_MINMAX=1") + endif() + + if(OPENCV_IPP_SUM) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/sum.dispatch.cpp "OPENCV_IPP_SUM=1") + endif() +endif() + file(GLOB lib_cuda_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cuda/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cuda/*.h") diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 96cf00a50d..89046d7907 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1118,6 +1118,13 @@ CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode); */ CV_EXPORTS_W void flipND(InputArray src, OutputArray dst, int axis); +/** @brief Broadcast the given Mat to the given shape. + * @param src input array + * @param shape target shape. Should be a list of CV_32S numbers. Note that negative values are not supported. + * @param dst output array that has the given shape + */ +CV_EXPORTS_W void broadcast(InputArray src, InputArray shape, OutputArray dst); + enum RotateFlags { ROTATE_90_CLOCKWISE = 0, //!(dst)); + } //! copies the GpuMat content to device memory (Non-Blocking call) - CV_WRAP void copyTo(OutputArray dst, Stream& stream) const; + void copyTo(OutputArray dst, Stream& stream) const; + //! bindings overload which copies the GpuMat content to device memory (Non-Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const { + copyTo(static_cast(dst), stream); + } //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) - CV_WRAP void copyTo(OutputArray dst, InputArray mask) const; + void copyTo(OutputArray dst, InputArray mask) const; + //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const { + copyTo(static_cast(dst), static_cast(mask)); + } //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) - CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const { + copyTo(static_cast(dst), static_cast(mask), stream); + } //! sets some of the GpuMat elements to s (Blocking call) CV_WRAP GpuMat& setTo(Scalar s); @@ -222,19 +238,31 @@ public: CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); //! converts GpuMat to another datatype (Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype) const; + void convertTo(OutputArray dst, int rtype) const; //! converts GpuMat to another datatype (Non-Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const; + void convertTo(OutputArray dst, int rtype, Stream& stream) const; + //! bindings overload which converts GpuMat to another datatype (Non-Blocking call) + CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const { + convertTo(static_cast(dst), rtype, stream); + } //! converts GpuMat to another datatype with scaling (Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + //! bindings overload which converts GpuMat to another datatype with scaling(Blocking call) + CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha = 1.0, double beta = 0.0) const { + convertTo(static_cast(dst), rtype, alpha, beta); + } //! converts GpuMat to another datatype with scaling (Non-Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; + void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; //! converts GpuMat to another datatype with scaling (Non-Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + //! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call) + CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const { + convertTo(static_cast(dst), rtype, alpha, beta, stream); + } CV_WRAP void assignTo(GpuMat& m, int type = -1) const; diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 5f8c9afbe3..2ae64ca8e4 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -2014,12 +2014,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a) inline v_int32x4 v_round(const v_float64x2& a) { static const int32x2_t zero = vdup_n_s32(0); - return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtnq_s64_f64(a.val)), zero)); } inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) { - return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val)))); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtnq_s64_f64(a.val)), vmovn_s64(vcvtnq_s64_f64(b.val)))); } inline v_int32x4 v_floor(const v_float64x2& a) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp index dab82489f8..6c28b44f5b 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp @@ -924,6 +924,9 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \ return (scalartype)v_get0(res); \ } OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32, v_float32, vfloat32m1_t, float, f32, VTraits::vlanes()) +#if CV_SIMD_SCALABLE_64F +OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64, v_float64, vfloat64m1_t, float, f64, VTraits::vlanes()) +#endif #define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp index 3ead76e5c4..0f0de893ca 100644 --- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp +++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include +#include #include #include diff --git a/modules/core/include/opencv2/core/utils/filesystem.private.hpp b/modules/core/include/opencv2/core/utils/filesystem.private.hpp index c32be15c61..70df64f0d4 100644 --- a/modules/core/include/opencv2/core/utils/filesystem.private.hpp +++ b/modules/core/include/opencv2/core/utils/filesystem.private.hpp @@ -12,7 +12,8 @@ # elif defined WINRT || defined _WIN32_WCE /* not supported */ # elif defined __ANDROID__ || defined __linux__ || defined _WIN32 || \ - defined __FreeBSD__ || defined __bsdi__ || defined __HAIKU__ + defined __FreeBSD__ || defined __bsdi__ || defined __HAIKU__ || \ + defined __GNU__ # define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 # elif defined(__APPLE__) # include diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp index 3ac9a24639..872963fc65 100644 --- a/modules/core/perf/perf_arithm.cpp +++ b/modules/core/perf/perf_arithm.cpp @@ -5,8 +5,35 @@ namespace opencv_test { using namespace perf; +using BroadcastTest = perf::TestBaseWithParam, perf::MatType, std::vector>>; typedef Size_MatType BinaryOpTest; +PERF_TEST_P_(BroadcastTest, basic) +{ + std::vector shape_src = get<0>(GetParam()); + int dt_type = get<1>(GetParam()); + std::vector shape_dst = get<2>(GetParam()); + + cv::Mat src(static_cast(shape_src.size()), shape_src.data(), dt_type); + cv::Mat dst(static_cast(shape_dst.size()), shape_dst.data(), dt_type); + + cv::randu(src, -1.f, 1.f); + + TEST_CYCLE() cv::broadcast(src, shape_dst, dst); + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/ , BroadcastTest, + testing::Combine( + testing::Values(std::vector{1, 100, 800}, + std::vector{10, 1, 800}, + std::vector{10, 100, 1}), + testing::Values(CV_32FC1), + testing::Values(std::vector{10, 100, 800}) + ) +); + PERF_TEST_P_(BinaryOpTest, min) { Size sz = get<0>(GetParam()); diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 67cc051e0b..9f85ea5f04 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1335,7 +1335,7 @@ struct InRange_SIMD } }; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) template <> struct InRange_SIMD @@ -1344,7 +1344,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_uint8::nlanes; + const int width = VTraits::vlanes(); for (; x <= len - width; x += width) { @@ -1352,7 +1352,7 @@ struct InRange_SIMD v_uint8 low = vx_load(src2 + x); v_uint8 high = vx_load(src3 + x); - v_store(dst + x, (values >= low) & (high >= values)); + v_store(dst + x, v_and(v_ge(values, low), v_ge(high, values))); } vx_cleanup(); return x; @@ -1366,7 +1366,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_int8::nlanes; + const int width = VTraits::vlanes(); for (; x <= len - width; x += width) { @@ -1374,7 +1374,7 @@ struct InRange_SIMD v_int8 low = vx_load(src2 + x); v_int8 high = vx_load(src3 + x); - v_store((schar*)(dst + x), (values >= low) & (high >= values)); + v_store((schar*)(dst + x), v_and(v_ge(values, low), v_ge(high, values))); } vx_cleanup(); return x; @@ -1388,7 +1388,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_uint16::nlanes * 2; + const int width = VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1396,11 +1396,11 @@ struct InRange_SIMD v_uint16 low1 = vx_load(src2 + x); v_uint16 high1 = vx_load(src3 + x); - v_uint16 values2 = vx_load(src1 + x + v_uint16::nlanes); - v_uint16 low2 = vx_load(src2 + x + v_uint16::nlanes); - v_uint16 high2 = vx_load(src3 + x + v_uint16::nlanes); + v_uint16 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_uint16 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_uint16 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))); + v_store(dst + x, v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2)))); } vx_cleanup(); return x; @@ -1414,7 +1414,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_int16::nlanes * 2; + const int width = (int)VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1422,11 +1422,11 @@ struct InRange_SIMD v_int16 low1 = vx_load(src2 + x); v_int16 high1 = vx_load(src3 + x); - v_int16 values2 = vx_load(src1 + x + v_int16::nlanes); - v_int16 low2 = vx_load(src2 + x + v_int16::nlanes); - v_int16 high2 = vx_load(src3 + x + v_int16::nlanes); + v_int16 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_int16 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_int16 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))); + v_store((schar*)(dst + x), v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2)))); } vx_cleanup(); return x; @@ -1440,7 +1440,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_int32::nlanes * 2; + const int width = (int)VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1448,11 +1448,11 @@ struct InRange_SIMD v_int32 low1 = vx_load(src2 + x); v_int32 high1 = vx_load(src3 + x); - v_int32 values2 = vx_load(src1 + x + v_int32::nlanes); - v_int32 low2 = vx_load(src2 + x + v_int32::nlanes); - v_int32 high2 = vx_load(src3 + x + v_int32::nlanes); + v_int32 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_int32 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_int32 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)))); + v_pack_store(dst + x, v_reinterpret_as_u16(v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2))))); } vx_cleanup(); return x; @@ -1466,7 +1466,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_float32::nlanes * 2; + const int width = (int)VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1474,12 +1474,12 @@ struct InRange_SIMD v_float32 low1 = vx_load(src2 + x); v_float32 high1 = vx_load(src3 + x); - v_float32 values2 = vx_load(src1 + x + v_float32::nlanes); - v_float32 low2 = vx_load(src2 + x + v_float32::nlanes); - v_float32 high2 = vx_load(src3 + x + v_float32::nlanes); + v_float32 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_float32 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_float32 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_pack_store(dst + x, v_pack(v_reinterpret_as_u32(values1 >= low1) & v_reinterpret_as_u32(high1 >= values1), - v_reinterpret_as_u32(values2 >= low2) & v_reinterpret_as_u32(high2 >= values2))); + v_pack_store(dst + x, v_pack(v_and(v_reinterpret_as_u32(v_ge(values1, low1)), v_reinterpret_as_u32(v_ge(high1, values1))), + v_and(v_reinterpret_as_u32(v_ge(values2, low2)), v_reinterpret_as_u32(v_ge(high2, values2))))); } vx_cleanup(); return x; diff --git a/modules/core/src/arithm.simd.hpp b/modules/core/src/arithm.simd.hpp index 20e70e5392..7054b3e6b6 100644 --- a/modules/core/src/arithm.simd.hpp +++ b/modules/core/src/arithm.simd.hpp @@ -215,7 +215,7 @@ template struct op_add { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a + b; } + { return v_add(a, b); } static inline T1 r(T1 a, T1 b) { return c_add(a, b); } }; @@ -225,7 +225,7 @@ template struct op_sub { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a - b; } + { return v_sub(a, b); } static inline T1 r(T1 a, T1 b) { return c_sub(a, b); } }; @@ -262,7 +262,7 @@ struct op_absdiff template<> struct op_absdiff { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_int8 r(const v_int8& a, const v_int8& b) { return v_absdiffs(a, b); } #endif @@ -272,7 +272,7 @@ struct op_absdiff template<> struct op_absdiff { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_int16 r(const v_int16& a, const v_int16& b) { return v_absdiffs(a, b); } #endif @@ -282,7 +282,7 @@ struct op_absdiff template<> struct op_absdiff { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_int32 r(const v_int32& a, const v_int32& b) { return v_reinterpret_as_s32(v_absdiff(a, b)); } #endif @@ -295,7 +295,7 @@ template struct op_or { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a | b; } + { return v_or(a, b); } static inline T1 r(T1 a, T1 b) { return a | b; } }; @@ -303,7 +303,7 @@ template struct op_xor { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a ^ b; } + { return v_xor(a, b); } static inline T1 r(T1 a, T1 b) { return a ^ b; } }; @@ -311,7 +311,7 @@ template struct op_and { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a & b; } + { return v_and(a, b); } static inline T1 r(T1 a, T1 b) { return a & b; } }; @@ -320,14 +320,14 @@ struct op_not { // ignored b from loader level static inline Tvec r(const Tvec& a) - { return ~a; } + { return v_not(a); } static inline T1 r(T1 a, T1) { return ~a; } }; //////////////////////////// Loaders ///////////////////////////////// -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE template< template class OP, typename T1, typename Tvec> struct bin_loader @@ -392,13 +392,13 @@ template class OP, typename T1, typename Tv static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef bin_loader ldr; - enum {wide_step = Tvec::nlanes}; + const int wide_step = VTraits::vlanes(); #if !CV_NEON && CV_SIMD_WIDTH == 16 - enum {wide_step_l = wide_step * 2}; + const int wide_step_l = wide_step * 2; #else - enum {wide_step_l = wide_step}; + const int wide_step_l = wide_step; #endif #endif // CV_SIMD @@ -410,7 +410,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE #if !CV_NEON && !CV_MSA if (is_aligned(src1, src2, dst)) { @@ -583,7 +583,7 @@ template struct op_cmplt { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a < b; } + { return v_lt(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a < b); } }; @@ -592,7 +592,7 @@ template struct op_cmple { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a <= b; } + { return v_le(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a <= b); } }; @@ -601,7 +601,7 @@ template struct op_cmpeq { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a == b; } + { return v_eq(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a == b); } }; @@ -610,14 +610,14 @@ template struct op_cmpne { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a != b; } + { return v_ne(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a != b); } }; //////////////////////////// Loaders ///////////////////////////////// -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE // todo: add support for RW alignment & stream template class OP, typename T1, typename Tvec> struct cmp_loader_n @@ -642,10 +642,10 @@ template class OP, typename T1, typename Tv struct cmp_loader_n { typedef OP op; - enum {step = Tvec::nlanes}; static inline void l(const T1* src1, const T1* src2, uchar* dst) { + const int step = VTraits::vlanes(); Tvec c0 = op::r(vx_load(src1), vx_load(src2)); Tvec c1 = op::r(vx_load(src1 + step), vx_load(src2 + step)); v_store(dst, v_pack_b(v_reinterpret_as_u16(c0), v_reinterpret_as_u16(c1))); @@ -656,10 +656,10 @@ template class OP, typename T1, typename Tv struct cmp_loader_n { typedef OP op; - enum {step = Tvec::nlanes}; static inline void l(const T1* src1, const T1* src2, uchar* dst) { + const int step = VTraits::vlanes(); v_uint32 c0 = v_reinterpret_as_u32(op::r(vx_load(src1), vx_load(src2))); v_uint32 c1 = v_reinterpret_as_u32(op::r(vx_load(src1 + step), vx_load(src2 + step))); v_uint32 c2 = v_reinterpret_as_u32(op::r(vx_load(src1 + step * 2), vx_load(src2 + step * 2))); @@ -672,10 +672,10 @@ template class OP, typename T1, typename Tv struct cmp_loader_n { typedef OP op; - enum {step = Tvec::nlanes}; static inline void l(const T1* src1, const T1* src2, uchar* dst) { + const int step = VTraits::vlanes(); v_uint64 c0 = v_reinterpret_as_u64(op::r(vx_load(src1), vx_load(src2))); v_uint64 c1 = v_reinterpret_as_u64(op::r(vx_load(src1 + step), vx_load(src2 + step))); v_uint64 c2 = v_reinterpret_as_u64(op::r(vx_load(src1 + step * 2), vx_load(src2 + step * 2))); @@ -697,9 +697,9 @@ template class OP, typename T1, typename Tv static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef cmp_loader_n ldr; - enum {wide_step = Tvec::nlanes * sizeof(T1)}; + const int wide_step = VTraits::vlanes() * sizeof(T1); #endif // CV_SIMD step1 /= sizeof(T1); @@ -709,7 +709,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE for (; x <= width - wide_step; x += wide_step) { ldr::l(src1 + x, src2 + x, dst + x); @@ -876,7 +876,7 @@ DEFINE_SIMD_ALL(cmp) //////////////////////////// Loaders /////////////////////////////// -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE // todo: add support for RW alignment & stream template class OP, typename T1, typename T2, typename Tvec> struct scalar_loader_n @@ -1009,10 +1009,10 @@ template class OP, typename T2 struct scalar_loader_n { typedef OP op; - enum {step = v_int32::nlanes}; static inline void l(const int* src1, const int* src2, const T2* scalar, int* dst) { + const int step = VTraits::vlanes(); v_int32 v_src1 = vx_load(src1); v_int32 v_src2 = vx_load(src2); v_int32 v_src1s = vx_load(src1 + step); @@ -1039,6 +1039,7 @@ struct scalar_loader_n static inline void l(const int* src1, const T2* scalar, int* dst) { + const int step = VTraits::vlanes(); v_int32 v_src1 = vx_load(src1); v_int32 v_src1s = vx_load(src1 + step); @@ -1064,10 +1065,9 @@ template class OP, typename T2 struct scalar_loader_n { typedef OP op; - enum {step = v_float32::nlanes}; - static inline void l(const float* src1, const float* src2, const T2* scalar, float* dst) { + const int step = VTraits::vlanes(); v_float32 v_src1 = vx_load(src1); v_float32 v_src2 = vx_load(src2); v_float32 v_src1s = vx_load(src1 + step); @@ -1082,6 +1082,7 @@ struct scalar_loader_n static inline void l(const float* src1, const T2* scalar, float* dst) { + const int step = VTraits::vlanes(); v_float32 v_src1 = vx_load(src1); v_float32 v_src1s = vx_load(src1 + step); @@ -1258,10 +1259,10 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste T1* dst, size_t step, int width, int height, const T2* scalar) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef scalar_loader_n ldr; - const int wide_step = sizeof(T1) > sizeof(ushort) ? Tvec::nlanes * 2 : - sizeof(T1) == sizeof(uchar) ? Tvec::nlanes / 2 : Tvec::nlanes; + const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits::vlanes() * 2 : + sizeof(T1) == sizeof(uchar) ? VTraits::vlanes() / 2 : VTraits::vlanes(); #endif // CV_SIMD step1 /= sizeof(T1); @@ -1272,7 +1273,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE for (; x <= width - wide_step; x += wide_step) { ldr::l(src1 + x, src2 + x, scalar, dst + x); @@ -1304,10 +1305,10 @@ template class OP, typename T1 static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int width, int height, const T2* scalar) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef scalar_loader_n ldr; - const int wide_step = sizeof(T1) > sizeof(ushort) ? Tvec::nlanes * 2 : - sizeof(T1) == sizeof(uchar) ? Tvec::nlanes / 2 : Tvec::nlanes; + const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits::vlanes() * 2 : + sizeof(T1) == sizeof(uchar) ? VTraits::vlanes() / 2 : VTraits::vlanes(); #endif // CV_SIMD step1 /= sizeof(T1); @@ -1317,7 +1318,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE for (; x <= width - wide_step; x += wide_step) { ldr::l(src1 + x, scalar, dst + x); @@ -1424,7 +1425,7 @@ template struct op_mul { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a * b; } + { return v_mul(a, b); } static inline T1 r(T1 a, T1 b) { return saturate_cast(a * b); } }; @@ -1432,11 +1433,11 @@ struct op_mul template struct op_mul_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return v_scalar * a * b; + return v_mul(v_scalar , a , b); } #endif static inline T1 r(T1 a, T1 b, const T2* scalar) @@ -1452,7 +1453,7 @@ struct op_mul_scale static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar) { const v_float64 v_scalar = vx_setall_f64(*scalar); - return v_scalar * a * b; + return v_mul(v_mul(v_scalar, a), b); } #endif static inline double r(double a, double b, const double* scalar) @@ -1565,7 +1566,7 @@ template struct op_div_f { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a / b; } + { return v_div(a, b); } static inline T1 r(T1 a, T1 b) { return a / b; } }; @@ -1573,16 +1574,16 @@ struct op_div_f template struct op_div_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return a * v_scalar / b; + return v_div(v_mul(a, v_scalar), b); } static inline Tvec pre(const Tvec& denom, const Tvec& res) { - const Tvec v_zero = vx_setall(0); - return v_select(denom == v_zero, v_zero, res); + const Tvec v_zero = vx_setall::lane_type>(0); + return v_select(v_eq(denom, v_zero), v_zero, res); } #endif static inline T1 r(T1 a, T1 denom, const T2* scalar) @@ -1595,11 +1596,11 @@ struct op_div_scale template<> struct op_div_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const float* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return a * v_scalar / b; + return v_div(v_mul(a, v_scalar), b); } #endif static inline float r(float a, float denom, const float* scalar) @@ -1613,7 +1614,7 @@ struct op_div_scale static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar) { const v_float64 v_scalar = vx_setall_f64(*scalar); - return a * v_scalar / b; + return v_div(v_mul(a, v_scalar), b); } #endif static inline double r(double a, double denom, const double* scalar) @@ -1681,7 +1682,7 @@ DEFINE_SIMD_ALL(div, div_loop) template struct op_add_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar) { const v_float32 v_alpha = vx_setall_f32(*scalar); @@ -1714,7 +1715,7 @@ struct op_add_scale template struct op_add_weighted { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalars) { const v_float32 v_alpha = vx_setall_f32(scalars[0]); @@ -1831,16 +1832,16 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d) template struct op_recip { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const T2* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return v_scalar / a; + return v_div(v_scalar, a); } static inline Tvec pre(const Tvec& denom, const Tvec& res) { - const Tvec v_zero = vx_setall(0); - return v_select(denom == v_zero, v_zero, res); + const Tvec v_zero = vx_setall::lane_type>(0); + return v_select(v_eq(denom, v_zero), v_zero, res); } #endif static inline T1 r(T1 denom, const T2* scalar) @@ -1853,11 +1854,11 @@ struct op_recip template<> struct op_recip { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const float* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return v_scalar / a; + return v_div(v_scalar, a); } #endif static inline float r(float denom, const float* scalar) @@ -1871,7 +1872,7 @@ struct op_recip static inline v_float64 r(const v_float64& a, const double* scalar) { const v_float64 v_scalar = vx_setall_f64(*scalar); - return v_scalar / a; + return v_div(v_scalar, a); } #endif static inline double r(double denom, const double* scalar) diff --git a/modules/core/src/check.cpp b/modules/core/src/check.cpp index ffd9b302bf..2891f3a2e3 100644 --- a/modules/core/src/check.cpp +++ b/modules/core/src/check.cpp @@ -4,6 +4,8 @@ #include "precomp.hpp" +#include + #include "opencv2/core/check.hpp" namespace cv { diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp index 3aa7dadac9..9f8e5643d5 100644 --- a/modules/core/src/convert.hpp +++ b/modules/core/src/convert.hpp @@ -11,7 +11,7 @@ namespace cv { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) static inline void vx_load_as(const uchar* ptr, v_float32& a) { a = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(ptr))); } @@ -78,7 +78,7 @@ static inline void v_store_as(int64_t* ptr, const v_float32& a) v_int64 ia_0, ia_1; v_expand(ia, ia_0, ia_1); v_store(ptr, ia_0); - v_store(ptr + v_int64::nlanes, ia_1); + v_store(ptr + VTraits::vlanes(), ia_1); } static inline void v_store_as(uint64_t* ptr, const v_float32& a) @@ -88,7 +88,7 @@ static inline void v_store_as(uint64_t* ptr, const v_float32& a) ia = v_max(ia, vx_setzero_s32()); v_expand(v_reinterpret_as_u32(ia), ia_0, ia_1); v_store(ptr, ia_0); - v_store(ptr + v_int64::nlanes, ia_1); + v_store(ptr + VTraits::vlanes(), ia_1); } static inline void vx_load_pair_as(const uchar* ptr, v_uint16& a, v_uint16& b) @@ -104,7 +104,7 @@ static inline void vx_load_pair_as(const schar* ptr, v_uint16& a, v_uint16& b) } static inline void vx_load_pair_as(const ushort* ptr, v_uint16& a, v_uint16& b) -{ a = vx_load(ptr); b = vx_load(ptr + v_uint16::nlanes); } +{ a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const uchar* ptr, v_int16& a, v_int16& b) { @@ -118,7 +118,7 @@ static inline void vx_load_pair_as(const schar* ptr, v_int16& a, v_int16& b) { v_expand(vx_load(ptr), a, b); } static inline void vx_load_pair_as(const short* ptr, v_int16& a, v_int16& b) -{ a = vx_load(ptr); b = vx_load(ptr + v_uint16::nlanes); } +{ a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const uchar* ptr, v_int32& a, v_int32& b) { @@ -147,7 +147,7 @@ static inline void vx_load_pair_as(const short* ptr, v_int32& a, v_int32& b) static inline void vx_load_pair_as(const int* ptr, v_int32& a, v_int32& b) { a = vx_load(ptr); - b = vx_load(ptr + v_int32::nlanes); + b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const uchar* ptr, v_float32& a, v_float32& b) @@ -184,14 +184,14 @@ static inline void vx_load_pair_as(const short* ptr, v_float32& a, v_float32& b) static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b) { - v_int32 ia = vx_load(ptr), ib = vx_load(ptr + v_int32::nlanes); + v_int32 ia = vx_load(ptr), ib = vx_load(ptr + VTraits::vlanes()); a = v_cvt_f32(ia); b = v_cvt_f32(ib); } static inline void vx_load_pair_as(const int64_t* ptr, v_int32& a, v_int32& b) { - const int int64_nlanes = v_int64::nlanes; + const int int64_nlanes = VTraits::vlanes(); a = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes)); b = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3)); } @@ -199,7 +199,7 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_int32& a, v_int32& b) static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b) { v_int64 z = vx_setzero_s64(); - v_int64 ia = vx_load(ptr), ib = vx_load(ptr + v_int64::nlanes); + v_int64 ia = vx_load(ptr), ib = vx_load(ptr + VTraits::vlanes()); ia &= (ia > z); ib &= (ib > z); a = v_reinterpret_as_u64(ia); @@ -208,7 +208,7 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b) static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b) { - const int nlanes = v_int64::nlanes; + const int nlanes = VTraits::vlanes(); v_int64 z = vx_setzero_s64(); v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes); v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3); @@ -222,8 +222,8 @@ static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b) static inline void vx_load_pair_as(const uint64_t* ptr, v_float32& a, v_float32& b) { - const int nlanes = v_uint64::nlanes; - float buf[v_uint64::nlanes*4]; + const int nlanes = VTraits::vlanes(); + float buf[VTraits::max_nlanes*4]; for (int i = 0; i < nlanes*4; i++) { buf[i] = (float)ptr[i]; } @@ -233,8 +233,8 @@ static inline void vx_load_pair_as(const uint64_t* ptr, v_float32& a, v_float32& static inline void vx_load_pair_as(const int64_t* ptr, v_float32& a, v_float32& b) { - const int nlanes = v_int64::nlanes; - float buf[v_int64::nlanes*4]; + const int nlanes = VTraits::vlanes(); + float buf[VTraits::max_nlanes*4]; for (int i = 0; i < nlanes*4; i++) { buf[i] = (float)ptr[i]; } @@ -277,21 +277,21 @@ static inline void vx_load_pair_as(const int* ptr, v_uint32& a, v_uint32& b) { v_int32 z = vx_setzero_s32(); v_int32 ia = v_max(vx_load(ptr), z); - v_int32 ib = v_max(vx_load(ptr + v_int32::nlanes), z); + v_int32 ib = v_max(vx_load(ptr + VTraits::vlanes()), z); a = v_reinterpret_as_u32(ia); b = v_reinterpret_as_u32(ib); } static inline void vx_load_pair_as(const uint64_t* ptr, v_uint32& a, v_uint32& b) { - const int int64_nlanes = v_int64::nlanes; + const int int64_nlanes = VTraits::vlanes(); a = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes)); b = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3)); } static inline void vx_load_pair_as(const uint64_t* ptr, v_int32& a, v_int32& b) { - const int int64_nlanes = v_int64::nlanes; + const int int64_nlanes = VTraits::vlanes(); v_uint32 ua = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes)); v_uint32 ub = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3)); a = v_reinterpret_as_s32(ua); @@ -299,37 +299,37 @@ static inline void vx_load_pair_as(const uint64_t* ptr, v_int32& a, v_int32& b) } static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b) -{ a = vx_load(ptr); b = vx_load(ptr + v_float32::nlanes); } +{ a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b) { a = vx_load_expand(ptr); - b = vx_load_expand(ptr + v_float32::nlanes); + b = vx_load_expand(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const bfloat16_t* ptr, v_float32& a, v_float32& b) { a = vx_load_expand(ptr); - b = vx_load_expand(ptr + v_float32::nlanes); + b = vx_load_expand(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const unsigned* ptr, v_uint32& a, v_uint32& b) { a = vx_load(ptr); - b = vx_load(ptr + v_uint32::nlanes); + b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const unsigned* ptr, v_int32& a, v_int32& b) { a = v_reinterpret_as_s32(vx_load(ptr)); - b = v_reinterpret_as_s32(vx_load(ptr + v_uint32::nlanes)); + b = v_reinterpret_as_s32(vx_load(ptr + VTraits::vlanes())); } static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32& b) { v_uint32 delta = vx_setall_u32(0x80000000U); v_uint32 ua = vx_load(ptr); - v_uint32 ub = vx_load(ptr + v_uint32::nlanes); + v_uint32 ub = vx_load(ptr + VTraits::vlanes()); v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta; v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31)) @@ -353,7 +353,7 @@ static inline void v_store_pair_as(schar* ptr, const v_uint16& a, const v_uint16 } static inline void v_store_pair_as(ushort* ptr, const v_uint16& a, const v_uint16& b) -{ v_store(ptr, a); v_store(ptr + v_uint16::nlanes, b); } +{ v_store(ptr, a); v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(uchar* ptr, const v_int16& a, const v_int16& b) { v_store(ptr, v_pack_u(a, b)); } @@ -362,7 +362,7 @@ static inline void v_store_pair_as(schar* ptr, const v_int16& a, const v_int16& { v_store(ptr, v_pack(a, b)); } static inline void v_store_pair_as(short* ptr, const v_int16& a, const v_int16& b) -{ v_store(ptr, a); v_store(ptr + v_int16::nlanes, b); } +{ v_store(ptr, a); v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(uchar* ptr, const v_int32& a, const v_int32& b) { v_pack_u_store(ptr, v_pack(a, b)); } @@ -379,7 +379,7 @@ static inline void v_store_pair_as(short* ptr, const v_int32& a, const v_int32& static inline void v_store_pair_as(int* ptr, const v_int32& a, const v_int32& b) { v_store(ptr, a); - v_store(ptr + v_int32::nlanes, b); + v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(int64_t* ptr, const v_int32& a, const v_int32& b) @@ -387,7 +387,7 @@ static inline void v_store_pair_as(int64_t* ptr, const v_int32& a, const v_int32 v_int64 q0, q1, q2, q3; v_expand(a, q0, q1); v_expand(b, q2, q3); - const int nlanes = v_int64::nlanes; + const int nlanes = VTraits::vlanes(); v_store(ptr, q0); v_store(ptr + nlanes, q1); v_store(ptr + nlanes*2, q2); @@ -419,11 +419,11 @@ static inline void v_store_pair_as(int* ptr, const v_float32& a, const v_float32 { v_int32 ia = v_round(a), ib = v_round(b); v_store(ptr, ia); - v_store(ptr + v_int32::nlanes, ib); + v_store(ptr + VTraits::vlanes(), ib); } static inline void v_store_pair_as(float* ptr, const v_float32& a, const v_float32& b) -{ v_store(ptr, a); v_store(ptr + v_float32::nlanes, b); } +{ v_store(ptr, a); v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(unsigned* ptr, const v_float32& a, const v_float32& b) { @@ -431,7 +431,7 @@ static inline void v_store_pair_as(unsigned* ptr, const v_float32& a, const v_fl v_int32 ia = v_max(v_round(a), z); v_int32 ib = v_max(v_round(b), z); v_store(ptr, v_reinterpret_as_u32(ia)); - v_store(ptr + v_int32::nlanes, v_reinterpret_as_u32(ib)); + v_store(ptr + VTraits::vlanes(), v_reinterpret_as_u32(ib)); } static inline void v_store_pair_as(uchar* ptr, const v_uint32& a, const v_uint32& b) @@ -447,7 +447,7 @@ static inline void v_store_pair_as(ushort* ptr, const v_uint32& a, const v_uint3 static inline void v_store_pair_as(unsigned* ptr, const v_uint32& a, const v_uint32& b) { v_store(ptr, a); - v_store(ptr + v_uint32::nlanes, b); + v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(uint64_t* ptr, const v_uint32& a, const v_uint32& b) @@ -455,7 +455,7 @@ static inline void v_store_pair_as(uint64_t* ptr, const v_uint32& a, const v_uin v_uint64 q0, q1, q2, q3; v_expand(a, q0, q1); v_expand(b, q2, q3); - const int nlanes = v_uint64::nlanes; + const int nlanes = VTraits::vlanes(); v_store(ptr, q0); v_store(ptr + nlanes, q1); v_store(ptr + nlanes*2, q2); @@ -465,28 +465,28 @@ static inline void v_store_pair_as(uint64_t* ptr, const v_uint32& a, const v_uin static inline void v_store_pair_as(uint64_t* ptr, const v_uint64& a, const v_uint64& b) { v_store(ptr, a); - v_store(ptr + v_uint64::nlanes, b); + v_store(ptr + VTraits::vlanes(), b); } -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) static inline void vx_load_as(const uint64_t* ptr, v_float32& a) { v_float64 a_0 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr))); - v_float64 a_1 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + v_uint64::nlanes))); + v_float64 a_1 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + VTraits::vlanes()))); a = v_cvt_f32(a_0, a_1); } static inline void vx_load_as(const int64_t* ptr, v_float32& a) { v_float64 a_0 = v_cvt_f64(vx_load(ptr)); - v_float64 a_1 = v_cvt_f64(vx_load(ptr + v_uint64::nlanes)); + v_float64 a_1 = v_cvt_f64(vx_load(ptr + VTraits::vlanes())); a = v_cvt_f32(a_0, a_1); } static inline void vx_load_as(const double* ptr, v_float32& a) { - v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); + v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits::vlanes()); a = v_cvt_f32(v0, v1); } @@ -516,8 +516,8 @@ static inline void vx_load_pair_as(const bfloat16_t* ptr, v_float64& a, v_float6 static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b) { - v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); - v_float64 v2 = vx_load(ptr + v_float64::nlanes*2), v3 = vx_load(ptr + v_float64::nlanes*3); + v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits::vlanes()); + v_float64 v2 = vx_load(ptr + VTraits::vlanes()*2), v3 = vx_load(ptr + VTraits::vlanes()*3); v_int32 iv0 = v_round(v0), iv1 = v_round(v1); v_int32 iv2 = v_round(v2), iv3 = v_round(v3); a = v_combine_low(iv0, iv1); @@ -526,15 +526,15 @@ static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b) static inline void vx_load_pair_as(const uint64_t* ptr, v_float64& a, v_float64& b) { - const int int64_nlanes = v_int64::nlanes; + const int int64_nlanes = VTraits::vlanes(); a = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr))); b = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + int64_nlanes))); } static inline void vx_load_pair_as(const double* ptr, v_float32& a, v_float32& b) { - v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); - v_float64 v2 = vx_load(ptr + v_float64::nlanes*2), v3 = vx_load(ptr + v_float64::nlanes*3); + v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits::vlanes()); + v_float64 v2 = vx_load(ptr + VTraits::vlanes()*2), v3 = vx_load(ptr + VTraits::vlanes()*3); a = v_cvt_f32(v0, v1); b = v_cvt_f32(v2, v3); } @@ -584,19 +584,19 @@ static inline void vx_load_pair_as(const float* ptr, v_float64& a, v_float64& b) static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b) { a = vx_load(ptr); - b = vx_load(ptr + v_float64::nlanes); + b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const int64_t* ptr, v_float64& a, v_float64& b) { a = v_cvt_f64(vx_load(ptr)); - b = v_cvt_f64(vx_load(ptr + v_float64::nlanes)); + b = v_cvt_f64(vx_load(ptr + VTraits::vlanes())); } static inline void vx_load_pair_as(const unsigned* ptr, v_float64& a, v_float64& b) { - const int nlanes = v_uint64::nlanes; - double buf[v_uint64::nlanes*2]; + const int nlanes = VTraits::vlanes(); + double buf[VTraits::max_nlanes*2]; for (int i = 0; i < nlanes*2; i++) buf[i] = (double)ptr[i]; a = vx_load(buf); @@ -607,7 +607,7 @@ static inline void v_store_as(double* ptr, const v_float32& a) { v_float64 fa0 = v_cvt_f64(a), fa1 = v_cvt_f64_high(a); v_store(ptr, fa0); - v_store(ptr + v_float64::nlanes, fa1); + v_store(ptr + VTraits::vlanes(), fa1); } static inline void v_store_pair_as(double* ptr, const v_int32& a, const v_int32& b) @@ -616,9 +616,9 @@ static inline void v_store_pair_as(double* ptr, const v_int32& a, const v_int32& v_float64 fb0 = v_cvt_f64(b), fb1 = v_cvt_f64_high(b); v_store(ptr, fa0); - v_store(ptr + v_float64::nlanes, fa1); - v_store(ptr + v_float64::nlanes*2, fb0); - v_store(ptr + v_float64::nlanes*3, fb1); + v_store(ptr + VTraits::vlanes(), fa1); + v_store(ptr + VTraits::vlanes()*2, fb0); + v_store(ptr + VTraits::vlanes()*3, fb1); } static inline void v_store_pair_as(double* ptr, const v_float32& a, const v_float32& b) @@ -627,15 +627,15 @@ static inline void v_store_pair_as(double* ptr, const v_float32& a, const v_floa v_float64 fb0 = v_cvt_f64(b), fb1 = v_cvt_f64_high(b); v_store(ptr, fa0); - v_store(ptr + v_float64::nlanes, fa1); - v_store(ptr + v_float64::nlanes*2, fb0); - v_store(ptr + v_float64::nlanes*3, fb1); + v_store(ptr + VTraits::vlanes(), fa1); + v_store(ptr + VTraits::vlanes()*2, fb0); + v_store(ptr + VTraits::vlanes()*3, fb1); } static inline void v_store_pair_as(double* ptr, const v_float64& a, const v_float64& b) { v_store(ptr, a); - v_store(ptr + v_float64::nlanes, b); + v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(int* ptr, const v_float64& a, const v_float64& b) @@ -662,7 +662,7 @@ static inline void v_store_pair_as(uint64_t* ptr, const v_float64& a, const v_fl v_int64 ia, ib; v_expand(v_round(v_max(a, z), v_max(b, z)), ia, ib); v_store(ptr, v_reinterpret_as_u64(ia)); - v_store(ptr + v_int64::nlanes, v_reinterpret_as_u64(ib)); + v_store(ptr + VTraits::vlanes(), v_reinterpret_as_u64(ib)); } static inline void v_store_pair_as(int64_t* ptr, const v_float64& a, const v_float64& b) @@ -670,7 +670,7 @@ static inline void v_store_pair_as(int64_t* ptr, const v_float64& a, const v_flo v_int64 ia, ib; v_expand(v_round(a, b), ia, ib); v_store(ptr, ia); - v_store(ptr + v_int64::nlanes, ib); + v_store(ptr + VTraits::vlanes(), ib); } static inline void v_store_pair_as(unsigned* ptr, const v_float64& a, const v_float64& b) @@ -744,9 +744,9 @@ static inline void v_store_pair_as(double* ptr, const _Tsvec& a, const _Tsvec& b ptr[i] = (double)buf[i]; } -#endif /////////// CV_SIMD_64F +#endif /////////// CV_SIMD_64F || CV_SIMD_SCALABLE_64F -#endif /////////// CV_SIMD +#endif /////////// CV_SIMD || CV_SIMD_SCALABLE } diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp index c776918846..84161b2da7 100644 --- a/modules/core/src/convert.simd.hpp +++ b/modules/core/src/convert.simd.hpp @@ -41,8 +41,8 @@ void cvt16f32f( const float16_t* src, float* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; -#if CV_SIMD - const int VECSZ = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits::vlanes(); for( ; j < len; j += VECSZ ) { if( j > len - VECSZ ) @@ -62,8 +62,8 @@ void cvt32f16f( const float* src, float16_t* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; -#if CV_SIMD - const int VECSZ = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits::vlanes(); for( ; j < len; j += VECSZ ) { if( j > len - VECSZ ) @@ -83,8 +83,8 @@ void cvt32f16bf( const float* src, bfloat16_t* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; -#if CV_SIMD - const int VECSZ = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits::vlanes(); for( ; j < len; j += VECSZ ) { if( j > len - VECSZ ) @@ -153,8 +153,8 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD - const int VECSZ = _Twvec::nlanes*2; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits<_Twvec>::vlanes()*2; for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -182,8 +182,8 @@ cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD_64F - const int VECSZ = v_float64::nlanes*2; +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) + const int VECSZ = VTraits::vlanes()*2; for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -213,8 +213,8 @@ cvt1_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD - const int VECSZ = _Twvec::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits<_Twvec>::vlanes(); for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) diff --git a/modules/core/src/convert_scale.simd.hpp b/modules/core/src/convert_scale.simd.hpp index f1ee7635e7..e29fe06a9b 100644 --- a/modules/core/src/convert_scale.simd.hpp +++ b/modules/core/src/convert_scale.simd.hpp @@ -22,9 +22,9 @@ template inline void cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -32,7 +32,7 @@ cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -72,9 +72,9 @@ template inline void cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -82,7 +82,7 @@ cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -108,9 +108,9 @@ template inline void cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes; + const int VECSZ = VTraits::vlanes(); #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -118,7 +118,7 @@ cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -143,9 +143,9 @@ template inline void cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, double a, double b ) { -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) v_float64 va = vx_setall_f64(a), vb = vx_setall_f64(b); - const int VECSZ = v_float64::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -153,7 +153,7 @@ cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 09250b8585..1b14c53ab0 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -171,15 +171,15 @@ copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mste const uchar* src = (const uchar*)_src; uchar* dst = (uchar*)_dst; int x = 0; - #if CV_SIMD + #if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint8 v_zero = vx_setzero_u8(); - for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes ) + for( ; x <= size.width - VTraits::vlanes(); x += VTraits::vlanes() ) { v_uint8 v_src = vx_load(src + x), v_dst = vx_load(dst + x), - v_nmask = vx_load(mask + x) == v_zero; + v_nmask = v_eq(vx_load(mask + x), v_zero); v_dst = v_select(v_nmask, v_dst, v_src); v_store(dst + x, v_dst); @@ -203,23 +203,23 @@ copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mst const ushort* src = (const ushort*)_src; ushort* dst = (ushort*)_dst; int x = 0; - #if CV_SIMD + #if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint8 v_zero = vx_setzero_u8(); - for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes ) + for( ; x <= size.width - VTraits::vlanes(); x += VTraits::vlanes() ) { - v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + v_uint16::nlanes), - v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + v_uint16::nlanes); + v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + VTraits::vlanes()), + v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + VTraits::vlanes()); v_uint8 v_nmask1, v_nmask2; - v_uint8 v_nmask = vx_load(mask + x) == v_zero; + v_uint8 v_nmask = v_eq(vx_load(mask + x), v_zero); v_zip(v_nmask, v_nmask, v_nmask1, v_nmask2); v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1); v_dst2 = v_select(v_reinterpret_as_u16(v_nmask2), v_dst2, v_src2); v_store(dst + x, v_dst1); - v_store(dst + x + v_uint16::nlanes, v_dst2); + v_store(dst + x + VTraits::vlanes(), v_dst2); } } vx_cleanup(); diff --git a/modules/core/src/count_non_zero.simd.hpp b/modules/core/src/count_non_zero.simd.hpp index 6994564127..ce7c75aa54 100644 --- a/modules/core/src/count_non_zero.simd.hpp +++ b/modules/core/src/count_non_zero.simd.hpp @@ -32,8 +32,8 @@ static int countNonZero_(const T* src, int len ) static int countNonZero8u( const uchar* src, int len ) { int i=0, nz = 0; -#if CV_SIMD - int len0 = len & -v_uint8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_uint8 v_zero = vx_setzero_u8(); v_uint8 v_one = vx_setall_u8(1); @@ -42,20 +42,20 @@ static int countNonZero8u( const uchar* src, int len ) { v_uint16 v_sum16 = vx_setzero_u16(); int j = i; - while (j < std::min(len0, i + 65280 * v_uint16::nlanes)) + while (j < std::min(len0, i + 65280 * VTraits::vlanes())) { v_uint8 v_sum8 = vx_setzero_u8(); int k = j; - for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes) - v_sum8 += v_one & (vx_load(src + k) == v_zero); + for (; k < std::min(len0, j + 255 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_eq(vx_load(src + k), v_zero))); v_uint16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_uint32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -69,8 +69,8 @@ static int countNonZero8u( const uchar* src, int len ) static int countNonZero16u( const ushort* src, int len ) { int i = 0, nz = 0; -#if CV_SIMD - int len0 = len & -v_int8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_uint16 v_zero = vx_setzero_u16(); v_int8 v_one = vx_setall_s8(1); @@ -79,20 +79,20 @@ static int countNonZero16u( const ushort* src, int len ) { v_int16 v_sum16 = vx_setzero_s16(); int j = i; - while (j < std::min(len0, i + 32766 * v_int16::nlanes)) + while (j < std::min(len0, i + 32766 * VTraits::vlanes())) { v_int8 v_sum8 = vx_setzero_s8(); int k = j; - for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) - v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero)); + for (; k < std::min(len0, j + 127 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_reinterpret_as_s16(v_eq(vx_load(src + k), v_zero)), v_reinterpret_as_s16(v_eq(vx_load(src + k + VTraits::vlanes()), v_zero))))); v_int16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_int32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -104,8 +104,8 @@ static int countNonZero16u( const ushort* src, int len ) static int countNonZero32s( const int* src, int len ) { int i = 0, nz = 0; -#if CV_SIMD - int len0 = len & -v_int8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_int32 v_zero = vx_setzero_s32(); v_int8 v_one = vx_setall_s8(1); @@ -114,23 +114,20 @@ static int countNonZero32s( const int* src, int len ) { v_int16 v_sum16 = vx_setzero_s16(); int j = i; - while (j < std::min(len0, i + 32766 * v_int16::nlanes)) + while (j < std::min(len0, i + 32766 * VTraits::vlanes())) { v_int8 v_sum8 = vx_setzero_s8(); int k = j; - for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) - v_sum8 += v_one & v_pack( - v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero), - v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero) - ); + for (; k < std::min(len0, j + 127 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_pack(v_eq(vx_load(src + k), v_zero), v_eq(vx_load(src + k + VTraits::vlanes()), v_zero)), v_pack(v_eq(vx_load(src + k + 2 * VTraits::vlanes()), v_zero), v_eq(vx_load(src + k + 3 * VTraits::vlanes()), v_zero))))); v_int16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_int32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -142,8 +139,8 @@ static int countNonZero32s( const int* src, int len ) static int countNonZero32f( const float* src, int len ) { int i = 0, nz = 0; -#if CV_SIMD - int len0 = len & -v_int8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_float32 v_zero = vx_setzero_f32(); v_int8 v_one = vx_setall_s8(1); @@ -152,23 +149,20 @@ static int countNonZero32f( const float* src, int len ) { v_int16 v_sum16 = vx_setzero_s16(); int j = i; - while (j < std::min(len0, i + 32766 * v_int16::nlanes)) + while (j < std::min(len0, i + 32766 * VTraits::vlanes())) { v_int8 v_sum8 = vx_setzero_s8(); int k = j; - for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) - v_sum8 += v_one & v_pack( - v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)), - v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero)) - ); + for (; k < std::min(len0, j + 127 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_pack(v_reinterpret_as_s32(v_eq(vx_load(src + k), v_zero)), v_reinterpret_as_s32(v_eq(vx_load(src + k + VTraits::vlanes()), v_zero))), v_pack(v_reinterpret_as_s32(v_eq(vx_load(src + k + 2 * VTraits::vlanes()), v_zero)), v_reinterpret_as_s32(v_eq(vx_load(src + k + 3 * VTraits::vlanes()), v_zero)))))); v_int16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_int32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -180,21 +174,21 @@ static int countNonZero32f( const float* src, int len ) static int countNonZero64f( const double* src, int len ) { int nz = 0, i = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) v_int64 sum1 = vx_setzero_s64(); v_int64 sum2 = vx_setzero_s64(); v_float64 zero = vx_setzero_f64(); - int step = v_float64::nlanes * 2; + int step = VTraits::vlanes() * 2; int len0 = len & -step; for(i = 0; i < len0; i += step ) { - sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero); - sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero); + sum1 = v_add(sum1, v_reinterpret_as_s64(v_eq(vx_load(&src[i]), zero))); + sum2 = v_add(sum2, v_reinterpret_as_s64(v_eq(vx_load(&src[i + step / 2]), zero))); } // N.B the value is incremented by -1 (0xF...F) for each value - nz = i + (int)v_reduce_sum(sum1 + sum2); + nz = i + (int)v_reduce_sum(v_add(sum1, sum2)); v_cleanup(); #endif return nz + countNonZero_(src + i, len - i); diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index a644fe15a7..43c6d07d58 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -274,22 +274,21 @@ template struct VBLAS { int dot(const T*, const T*, int, T*) const { return 0; } int givens(T*, T*, int, T, T) const { return 0; } - int givensx(T*, T*, int, T, T, T*, T*) const { return 0; } }; -#if CV_SIMD +#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE_64F template<> inline int VBLAS::dot(const float* a, const float* b, int n, float* result) const { - if( n < 2*v_float32::nlanes ) + if( n < 2*VTraits::vlanes() ) return 0; int k = 0; v_float32 s0 = vx_setzero_f32(); - for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float32 a0 = vx_load(a + k); v_float32 b0 = vx_load(b + k); - s0 += a0 * b0; + s0 = v_add(s0, v_mul(a0, b0)); } *result = v_reduce_sum(s0); vx_cleanup(); @@ -299,16 +298,16 @@ template<> inline int VBLAS::dot(const float* a, const float* b, int n, f template<> inline int VBLAS::givens(float* a, float* b, int n, float c, float s) const { - if( n < v_float32::nlanes) + if( n < VTraits::vlanes()) return 0; int k = 0; v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s); - for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float32 a0 = vx_load(a + k); v_float32 b0 = vx_load(b + k); - v_float32 t0 = (a0 * c4) + (b0 * s4); - v_float32 t1 = (b0 * c4) - (a0 * s4); + v_float32 t0 = v_add(v_mul(a0, c4), v_mul(b0, s4)); + v_float32 t1 = v_sub(v_mul(b0, c4), v_mul(a0, s4)); v_store(a + k, t0); v_store(b + k, t1); } @@ -317,44 +316,19 @@ template<> inline int VBLAS::givens(float* a, float* b, int n, float c, f } -template<> inline int VBLAS::givensx(float* a, float* b, int n, float c, float s, - float* anorm, float* bnorm) const -{ - if( n < v_float32::nlanes) - return 0; - int k = 0; - v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s); - v_float32 sa = vx_setzero_f32(), sb = vx_setzero_f32(); - for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes ) - { - v_float32 a0 = vx_load(a + k); - v_float32 b0 = vx_load(b + k); - v_float32 t0 = (a0 * c4) + (b0 * s4); - v_float32 t1 = (b0 * c4) - (a0 * s4); - v_store(a + k, t0); - v_store(b + k, t1); - sa += t0 + t0; - sb += t1 + t1; - } - *anorm = v_reduce_sum(sa); - *bnorm = v_reduce_sum(sb); - vx_cleanup(); - return k; -} - -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) template<> inline int VBLAS::dot(const double* a, const double* b, int n, double* result) const { - if( n < 2*v_float64::nlanes ) + if( n < 2*VTraits::vlanes() ) return 0; int k = 0; v_float64 s0 = vx_setzero_f64(); - for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float64 a0 = vx_load(a + k); v_float64 b0 = vx_load(b + k); - s0 += a0 * b0; + s0 = v_add(s0, v_mul(a0, b0)); } double sbuf[2]; v_store(sbuf, s0); @@ -368,12 +342,12 @@ template<> inline int VBLAS::givens(double* a, double* b, int n, double { int k = 0; v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s); - for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float64 a0 = vx_load(a + k); v_float64 b0 = vx_load(b + k); - v_float64 t0 = (a0 * c2) + (b0 * s2); - v_float64 t1 = (b0 * c2) - (a0 * s2); + v_float64 t0 = v_add(v_mul(a0, c2), v_mul(b0, s2)); + v_float64 t1 = v_sub(v_mul(b0, c2), v_mul(a0, s2)); v_store(a + k, t0); v_store(b + k, t1); } @@ -382,30 +356,6 @@ template<> inline int VBLAS::givens(double* a, double* b, int n, double } -template<> inline int VBLAS::givensx(double* a, double* b, int n, double c, double s, - double* anorm, double* bnorm) const -{ - int k = 0; - v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s); - v_float64 sa = vx_setzero_f64(), sb = vx_setzero_f64(); - for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes ) - { - v_float64 a0 = vx_load(a + k); - v_float64 b0 = vx_load(b + k); - v_float64 t0 = (a0 * c2) + (b0 * s2); - v_float64 t1 = (b0 * c2) - (a0 * s2); - v_store(a + k, t0); - v_store(b + k, t1); - sa += t0 * t0; - sb += t1 * t1; - } - double abuf[2], bbuf[2]; - v_store(abuf, sa); - v_store(bbuf, sb); - *anorm = abuf[0] + abuf[1]; - *bnorm = bbuf[0] + bbuf[1]; - return k; -} #endif //CV_SIMD_64F #endif //CV_SIMD @@ -916,7 +866,7 @@ double invert( InputArray _src, OutputArray _dst, int method ) #if CV_SIMD128 const float d_32f = (float)d; const v_float32x4 d_vec(d_32f, -d_32f, -d_32f, d_32f); - v_float32x4 s0 = v_load_halves((const float*)srcdata, (const float*)(srcdata + srcstep)) * d_vec;//0123//3120 + v_float32x4 s0 = v_mul(v_load_halves((const float *)srcdata, (const float *)(srcdata + srcstep)), d_vec);//0123//3120 s0 = v_extract<3>(s0, v_combine_low(v_rotate_right<1>(s0), s0)); v_store_low((float*)dstdata, s0); v_store_high((float*)(dstdata + dststep), s0); @@ -942,10 +892,10 @@ double invert( InputArray _src, OutputArray _dst, int method ) d = 1./d; #if CV_SIMD128_64F v_float64x2 det = v_setall_f64(d); - v_float64x2 s0 = v_load((const double*)srcdata) * det; - v_float64x2 s1 = v_load((const double*)(srcdata+srcstep)) * det; + v_float64x2 s0 = v_mul(v_load((const double *)srcdata), det); + v_float64x2 s1 = v_mul(v_load((const double *)(srcdata + srcstep)), det); v_float64x2 sm = v_extract<1>(s1, s0);//30 - v_float64x2 ss = v_setall(0) - v_extract<1>(s0, s1);//12 + v_float64x2 ss = v_sub(v_setall(0), v_extract<1>(s0, s1));//12 v_store((double*)dstdata, v_combine_low(sm, ss));//31 v_store((double*)(dstdata + dststep), v_combine_high(ss, sm));//20 #else diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 05c6d3bd1f..0bec1be6c0 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -614,13 +614,13 @@ void polarToCart( InputArray src1, InputArray src2, { k = 0; -#if CV_SIMD - int cWidth = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int cWidth = VTraits::vlanes(); for( ; k <= len - cWidth; k += cWidth ) { v_float32 v_m = vx_load(mag + k); - v_store(x + k, vx_load(x + k) * v_m); - v_store(y + k, vx_load(y + k) * v_m); + v_store(x + k, v_mul(vx_load(x + k), v_m)); + v_store(y + k, v_mul(vx_load(y + k), v_m)); } vx_cleanup(); #endif @@ -741,7 +741,7 @@ struct iPow_SIMD } }; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) template <> struct iPow_SIMD @@ -751,7 +751,7 @@ struct iPow_SIMD int i = 0; v_uint32 v_1 = vx_setall_u32(1u); - for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint32 v_a1 = v_1, v_a2 = v_1; v_uint16 v = vx_load_expand(src + i); @@ -763,16 +763,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_pack_store(dst + i, v); @@ -791,7 +791,7 @@ struct iPow_SIMD int i = 0; v_int32 v_1 = vx_setall_s32(1); - for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_int32 v_a1 = v_1, v_a2 = v_1; v_int16 v = vx_load_expand(src + i); @@ -803,16 +803,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_pack_store(dst + i, v); @@ -831,7 +831,7 @@ struct iPow_SIMD int i = 0; v_uint32 v_1 = vx_setall_u32(1u); - for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint32 v_a1 = v_1, v_a2 = v_1; v_uint16 v = vx_load(src + i); @@ -843,16 +843,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_store(dst + i, v); @@ -871,7 +871,7 @@ struct iPow_SIMD int i = 0; v_int32 v_1 = vx_setall_s32(1); - for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_int32 v_a1 = v_1, v_a2 = v_1; v_int16 v = vx_load(src + i); @@ -883,16 +883,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_store(dst + i, v); @@ -911,29 +911,29 @@ struct iPow_SIMD int i = 0; v_int32 v_1 = vx_setall_s32(1); - for ( ; i <= len - v_int32::nlanes*2; i += v_int32::nlanes*2) + for ( ; i <= len - VTraits::vlanes()*2; i += VTraits::vlanes()*2) { v_int32 v_a1 = v_1, v_a2 = v_1; - v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_int32::nlanes); + v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits::vlanes()); int p = power; while( p > 1 ) { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v_store(dst + i, v_a1); - v_store(dst + i + v_int32::nlanes, v_a2); + v_store(dst + i + VTraits::vlanes(), v_a2); } vx_cleanup(); @@ -949,34 +949,34 @@ struct iPow_SIMD int i = 0; v_float32 v_1 = vx_setall_f32(1.f); - for ( ; i <= len - v_float32::nlanes*2; i += v_float32::nlanes*2) + for ( ; i <= len - VTraits::vlanes()*2; i += VTraits::vlanes()*2) { v_float32 v_a1 = v_1, v_a2 = v_1; - v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float32::nlanes); + v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits::vlanes()); int p = std::abs(power); if( power < 0 ) { - v_b1 = v_1 / v_b1; - v_b2 = v_1 / v_b2; + v_b1 = v_div(v_1, v_b1); + v_b2 = v_div(v_1, v_b2); } while( p > 1 ) { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v_store(dst + i, v_a1); - v_store(dst + i + v_float32::nlanes, v_a2); + v_store(dst + i + VTraits::vlanes(), v_a2); } vx_cleanup(); @@ -984,7 +984,7 @@ struct iPow_SIMD } }; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) template <> struct iPow_SIMD { @@ -993,34 +993,34 @@ struct iPow_SIMD int i = 0; v_float64 v_1 = vx_setall_f64(1.); - for ( ; i <= len - v_float64::nlanes*2; i += v_float64::nlanes*2) + for ( ; i <= len - VTraits::vlanes()*2; i += VTraits::vlanes()*2) { v_float64 v_a1 = v_1, v_a2 = v_1; - v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float64::nlanes); + v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits::vlanes()); int p = std::abs(power); if( power < 0 ) { - v_b1 = v_1 / v_b1; - v_b2 = v_1 / v_b2; + v_b1 = v_div(v_1, v_b1); + v_b2 = v_div(v_1, v_b2); } while( p > 1 ) { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v_store(dst + i, v_a1); - v_store(dst + i + v_float64::nlanes, v_a2); + v_store(dst + i + VTraits::vlanes(), v_a2); } vx_cleanup(); @@ -1614,7 +1614,7 @@ void patchNaNs( InputOutputArray _a, double _val ) Cv32suf val; val.f = (float)_val; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000); v_int32 v_val = vx_setall_s32(val.i); #endif @@ -1624,12 +1624,12 @@ void patchNaNs( InputOutputArray _a, double _val ) int* tptr = ptrs[0]; size_t j = 0; -#if CV_SIMD - size_t cWidth = (size_t)v_int32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + size_t cWidth = (size_t)VTraits::vlanes(); for ( ; j + cWidth <= len; j += cWidth) { v_int32 v_src = vx_load(tptr + j); - v_int32 v_cmp_mask = v_mask2 < (v_src & v_mask1); + v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1)); v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src); v_store(tptr + j, v_dst); } diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp index e32096cf71..4ac1e21bb6 100644 --- a/modules/core/src/matmul.simd.hpp +++ b/modules/core/src/matmul.simd.hpp @@ -1454,7 +1454,7 @@ transform_( const T* src, T* dst, const WT* m, int len, int scn, int dcn ) static void transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) const int BITS = 10, SCALE = 1 << BITS; const float MAX_M = (float)(1 << (15 - BITS)); @@ -1485,7 +1485,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in v_int32 m10 = vx_setall_s32(m32[4]); v_int32 m11 = vx_setall_s32(m32[5]); int x = 0; - for (; x <= (len - v_uint8::nlanes) * nChannels; x += v_uint8::nlanes * nChannels) + for (; x <= (len - VTraits::vlanes()) * nChannels; x += VTraits::vlanes() * nChannels) { v_uint8 b, g, r; v_load_deinterleave(src + x, b, g, r); @@ -1499,20 +1499,20 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in v_int32 p1, p3; v_expand(bgl, p0, p2); v_expand(v_reinterpret_as_s16(rl), p1, p3); - dbl = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m01) + p1 * m2 + m3, - v_dotprod(v_reinterpret_as_s16(p2), m01) + p3 * m2 + m3); - dgl = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m45) + p1 * m6 + m7, - v_dotprod(v_reinterpret_as_s16(p2), m45) + p3 * m6 + m7); - drl = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m89) + p1 * m10 + m11, - v_dotprod(v_reinterpret_as_s16(p2), m89) + p3 * m10 + m11); + dbl = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m01), v_mul(p1, m2)), m3), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m01), v_mul(p3, m2)), m3)); + dgl = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m45), v_mul(p1, m6)), m7), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m45), v_mul(p3, m6)), m7)); + drl = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m89), v_mul(p1, m10)), m11), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m89), v_mul(p3, m10)), m11)); v_expand(bgh, p0, p2); v_expand(v_reinterpret_as_s16(rh), p1, p3); - dbh = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m01) + p1 * m2 + m3, - v_dotprod(v_reinterpret_as_s16(p2), m01) + p3 * m2 + m3); - dgh = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m45) + p1 * m6 + m7, - v_dotprod(v_reinterpret_as_s16(p2), m45) + p3 * m6 + m7); - drh = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m89) + p1 * m10 + m11, - v_dotprod(v_reinterpret_as_s16(p2), m89) + p3 * m10 + m11); + dbh = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m01), v_mul(p1, m2)), m3), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m01), v_mul(p3, m2)), m3)); + dgh = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m45), v_mul(p1, m6)), m7), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m45), v_mul(p3, m6)), m7)); + drh = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m89), v_mul(p1, m10)), m11), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m89), v_mul(p3, m10)), m11)); v_store_interleave(dst + x, v_pack_u(dbl, dbh), v_pack_u(dgl, dgh), v_pack_u(drl, drh)); } m32[1] = saturate_cast((m[3] + 0.5f)*SCALE); @@ -1537,7 +1537,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in static void transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) if( scn == 3 && dcn == 3 ) { int x = 0; @@ -1555,7 +1555,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, v_float32 m10 = vx_setall_f32(m[10]); v_float32 m11 = vx_setall_f32(m[11] - 32768.f); v_int16 delta = vx_setall_s16(-32768); - for (; x <= (len - v_uint16::nlanes)*3; x += v_uint16::nlanes*3) + for (; x <= (len - VTraits::vlanes())*3; x += VTraits::vlanes()*3) { v_uint16 b, g, r; v_load_deinterleave(src + x, b, g, r); @@ -1574,6 +1574,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, v_store_interleave(dst + x, v_reinterpret_as_u16(db), v_reinterpret_as_u16(dg), v_reinterpret_as_u16(dr)); } #endif +#if CV_SIMD128 v_float32x4 _m0l(m[0], m[4], m[ 8], 0.f); v_float32x4 _m1l(m[1], m[5], m[ 9], 0.f); v_float32x4 _m2l(m[2], m[6], m[10], 0.f); @@ -1587,6 +1588,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, v_store(dst + x, v_rotate_right<1>(v_reinterpret_as_u16(v_add_wrap(v_pack( v_round(v_matmuladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand(src + x ))), _m0h, _m1h, _m2h, _m3h)), v_round(v_matmuladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand(src + x + 3))), _m0l, _m1l, _m2l, _m3l))), _delta)))); +#endif //CV_SIMD128 for( ; x < len * 3; x += 3 ) { float v0 = src[x], v1 = src[x + 1], v2 = src[x + 2]; @@ -1606,25 +1608,25 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, static void transform_32f( const float* src, float* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD && !defined(__aarch64__) && !defined(_M_ARM64) +#if (CV_SIMD || CV_SIMD_SCALABLE) && !defined(__aarch64__) && !defined(_M_ARM64) int x = 0; if( scn == 3 && dcn == 3 ) { - int idx[v_float32::nlanes/2]; - for( int i = 0; i < v_float32::nlanes/4; i++ ) + int idx[VTraits::max_nlanes/2]; + for( int i = 0; i < VTraits::vlanes()/4; i++ ) { idx[i] = 3*i; - idx[i + v_float32::nlanes/4] = 0; + idx[i + VTraits::vlanes()/4] = 0; } float _m[] = { m[0], m[4], m[ 8], 0.f, m[1], m[5], m[ 9], 0.f, m[2], m[6], m[10], 0.f, m[3], m[7], m[11], 0.f }; - v_float32 m0 = vx_lut_quads(_m , idx + v_float32::nlanes/4); - v_float32 m1 = vx_lut_quads(_m + 4, idx + v_float32::nlanes/4); - v_float32 m2 = vx_lut_quads(_m + 8, idx + v_float32::nlanes/4); - v_float32 m3 = vx_lut_quads(_m + 12, idx + v_float32::nlanes/4); - for( ; x <= len*3 - v_float32::nlanes; x += 3*v_float32::nlanes/4 ) + v_float32 m0 = vx_lut_quads(_m , idx + VTraits::vlanes()/4); + v_float32 m1 = vx_lut_quads(_m + 4, idx + VTraits::vlanes()/4); + v_float32 m2 = vx_lut_quads(_m + 8, idx + VTraits::vlanes()/4); + v_float32 m3 = vx_lut_quads(_m + 12, idx + VTraits::vlanes()/4); + for( ; x <= len*3 - VTraits::vlanes(); x += 3*VTraits::vlanes()/4 ) v_store(dst + x, v_pack_triplets(v_matmuladd(vx_lut_quads(src + x, idx), m0, m1, m2, m3))); for( ; x < len*3; x += 3 ) { @@ -1641,8 +1643,8 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i if( scn == 4 && dcn == 4 ) { #if CV_SIMD_WIDTH > 16 - int idx[v_float32::nlanes/4]; - for( int i = 0; i < v_float32::nlanes/4; i++ ) + int idx[VTraits::max_nlanes/4]; + for( int i = 0; i < VTraits::vlanes()/4; i++ ) idx[i] = 0; float _m[] = { m[4], m[9], m[14], m[19] }; v_float32 m0 = vx_lut_quads(m , idx); @@ -1650,12 +1652,13 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i v_float32 m2 = vx_lut_quads(m+10, idx); v_float32 m3 = vx_lut_quads(m+15, idx); v_float32 m4 = vx_lut_quads(_m, idx); - for( ; x <= len*4 - v_float32::nlanes; x += v_float32::nlanes ) + for( ; x <= len*4 - VTraits::vlanes(); x += VTraits::vlanes() ) { v_float32 v_src = vx_load(src + x); - v_store(dst + x, v_reduce_sum4(v_src * m0, v_src * m1, v_src * m2, v_src * m3) + m4); + v_store(dst + x, v_add(v_reduce_sum4(v_mul(v_src, m0), v_mul(v_src, m1), v_mul(v_src, m2), v_mul(v_src, m3)), m4)); } #endif +#if CV_SIMD128 v_float32x4 _m0 = v_load(m ); v_float32x4 _m1 = v_load(m + 5); v_float32x4 _m2 = v_load(m + 10); @@ -1666,6 +1669,17 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i v_float32x4 v_src = v_load(src + x); v_store(dst + x, v_reduce_sum4(v_src * _m0, v_src * _m1, v_src * _m2, v_src * _m3) + _m4); } +#else // CV_SIMD_WIDTH >= 16 && !CV_SIMD128 + for( ; x < len*4; x += 4 ) + { + float v0 = src[x], v1 = src[x+1], v2 = src[x+2], v3 = src[x+3]; + float t0 = saturate_cast(m[0]*v0 + m[1]*v1 + m[ 2]*v2 + m[ 3]*v3 + m[ 4]); + float t1 = saturate_cast(m[5]*v0 + m[6]*v1 + m[ 7]*v2 + m[ 8]*v3 + m[ 9]); + float t2 = saturate_cast(m[10]*v0 + m[11]*v1 + m[12]*v2 + m[13]*v3 + m[14]); + float t3 = saturate_cast(m[15]*v0 + m[16]*v1 + m[17]*v2 + m[18]*v3 + m[19]); + dst[x] = t0; dst[x+1] = t1; dst[x+2] = t2; dst[x+3] = t3; + } +#endif vx_cleanup(); return; } @@ -1936,9 +1950,9 @@ static void scaleAdd_32f(const float* src1, const float* src2, float* dst, { float alpha = *_alpha; int i = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 v_alpha = vx_setall_f32(alpha); - const int cWidth = v_float32::nlanes; + const int cWidth = VTraits::vlanes(); for (; i <= len - cWidth; i += cWidth) v_store(dst + i, v_muladd(vx_load(src1 + i), v_alpha, vx_load(src2 + i))); vx_cleanup(); @@ -1953,9 +1967,9 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst, { double alpha = *_alpha; int i = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) v_float64 a2 = vx_setall_f64(alpha); - const int cWidth = v_float64::nlanes; + const int cWidth = VTraits::vlanes(); for (; i <= len - cWidth; i += cWidth) v_store(dst + i, v_muladd(vx_load(src1 + i), a2, vx_load(src2 + i))); vx_cleanup(); @@ -2078,7 +2092,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double deltastep = deltastep ? 4 : 0; } -#if CV_SIMD_64F +#if CV_SIMD128_64F v_float64x2 v_scale = v_setall_f64(scale); #endif @@ -2090,7 +2104,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double for( j = i; j <= size.width - 4; j += 4 ) { -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64(); @@ -2150,7 +2164,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double for( j = i; j <= size.width - 4; j += 4 ) { -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64(); @@ -2227,7 +2241,7 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double double s = 0; const sT *tsrc1 = src + i*srcstep; const sT *tsrc2 = src + j*srcstep; -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { const double *v_tsrc1 = (double *)(tsrc1); @@ -2280,7 +2294,7 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double delta_buf[2] = delta_buf[3] = tdelta2[0]; tdelta2 = delta_buf; } -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { const double *v_tsrc2 = (double *)(tsrc2); @@ -2393,14 +2407,14 @@ double dotProd_8u(const uchar* src1, const uchar* src2, int len) double r = 0; int i = 0; -#if CV_SIMD - int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 15), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 15), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_uint32 v_sum = vx_setzero_u32(); - const int cWidth = v_uint16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth * 2; j += cWidth * 2) @@ -2414,7 +2428,7 @@ double dotProd_8u(const uchar* src1, const uchar* src2, int len) { v_int16 v_src10 = v_reinterpret_as_s16(vx_load_expand(src1 + j)); v_int16 v_src20 = v_reinterpret_as_s16(vx_load_expand(src2 + j)); - v_sum += v_reinterpret_as_u32(v_dotprod_fast(v_src10, v_src20)); + v_sum = v_add(v_sum, v_reinterpret_as_u32(v_dotprod_fast(v_src10, v_src20))); } r += (double)v_reduce_sum(v_sum); @@ -2433,14 +2447,14 @@ double dotProd_8s(const schar* src1, const schar* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 14), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 14), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_int32 v_sum = vx_setzero_s32(); - const int cWidth = v_int16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth * 2; j += cWidth * 2) @@ -2473,14 +2487,14 @@ double dotProd_16u(const ushort* src1, const ushort* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 24), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 24), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_uint64 v_sum = vx_setzero_u64(); - const int cWidth = v_uint16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth; j += cWidth) @@ -2505,14 +2519,14 @@ double dotProd_16s(const short* src1, const short* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 24), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 24), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_int64 v_sum = vx_setzero_s64(); - const int cWidth = v_int16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth; j += cWidth) @@ -2534,10 +2548,10 @@ double dotProd_16s(const short* src1, const short* src2, int len) double dotProd_32s(const int* src1, const int* src2, int len) { -#if CV_SIMD_64F +#if CV_SIMD_64F // TODO: enable for CV_SIMD_SCALABLE_64F double r = .0; int i = 0; - const int step = v_int32::nlanes; + const int step = VTraits::vlanes(); v_float64 v_sum0 = vx_setzero_f64(); #if CV_SIMD_WIDTH == 16 const int wstep = step * 2; @@ -2572,8 +2586,8 @@ double dotProd_32f(const float* src1, const float* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_float32::nlanes, blockSize0 = (1 << 13), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 13), blockSize; while (i < len0) { @@ -2581,7 +2595,7 @@ double dotProd_32f(const float* src1, const float* src2, int len) v_float32 v_sum = vx_setzero_f32(); int j = 0; - int cWidth = v_float32::nlanes; + int cWidth = VTraits::vlanes(); #if CV_ENABLE_UNROLLED v_float32 v_sum1 = vx_setzero_f32(); @@ -2600,7 +2614,7 @@ double dotProd_32f(const float* src1, const float* src2, int len) vx_load(src2 + j + (cWidth * 3)), v_sum3); } - v_sum += v_sum1 + v_sum2 + v_sum3; + v_sum = v_add(v_sum, v_add(v_add(v_sum1, v_sum2), v_sum3)); #endif for (; j <= blockSize - cWidth; j += cWidth) diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp index 7f1043fbbe..43bf9be057 100644 --- a/modules/core/src/matrix_transform.cpp +++ b/modules/core/src/matrix_transform.cpp @@ -7,6 +7,7 @@ #include "opencv2/core/detail/dispatch_helper.impl.hpp" #include // std::swap_ranges +#include // std::accumulate namespace cv { @@ -440,7 +441,7 @@ template CV_ALWAYS_INLINE void flipHoriz_double( const static void flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { -#if CV_SIMD +#if CV_SIMD128 #if CV_STRONG_ALIGNMENT size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; #endif @@ -563,7 +564,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, } #endif else -#endif // CV_SIMD +#endif // CV_SIMD128 { int i, j, limit = (int)(((size.width + 1)/2)*esz); AutoBuffer _tab(size.width*esz); @@ -596,12 +597,12 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, dst0 += dstep, dst1 -= dstep ) { int i = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) #if CV_STRONG_ALIGNMENT if (isAligned(src0, src1, dst0, dst1)) #endif { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + for (; i <= size.width - VTraits::vlanes(); i += VTraits::vlanes()) { v_int32 t0 = v_reinterpret_as_s32(vx_load(src0 + i)); v_int32 t1 = v_reinterpret_as_s32(vx_load(src1 + i)); @@ -612,7 +613,7 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, #if CV_STRONG_ALIGNMENT else { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + for (; i <= size.width - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint8 t0 = vx_load(src0 + i); v_uint8 t1 = vx_load(src1 + i); @@ -857,6 +858,223 @@ void flipND(InputArray _src, OutputArray _dst, int _axis) flipNDImpl(dst.ptr(), dst.size.p, dst.step.p, axis); } +/* + This function first prepends 1 to each tensor shape to have a common max_ndims dimension, then flatten non-broadcast dimensions. +*/ +static bool _flatten_for_broadcast(int narrays, int max_ndims, const int* ndims, const int** orig_shape, + int** flatten_shape, size_t** flatten_step) { + int i, j, k; + + // step 1. + // * make all inputs and the output max_ndims-dimensional. + // * compute proper step's + for (i = max_ndims - 1; i >= 0; i-- ) { + for (k = 0; k < narrays; k++) { + j = ndims[k] - (max_ndims - i); + int sz_i = j >= 0 ? orig_shape[k][j] : 1; + size_t st_i = i == max_ndims - 1 ? 1 : flatten_step[k][i+1] * flatten_shape[k][i+1]; + flatten_shape[k][i] = sz_i; + flatten_step[k][i] = st_i; + if (flatten_shape[k][i] == 0) + return false; + } + } + + // step 2. Let's do the flattening first, + // since we'd need proper values of steps to check continuity. + // this loop is probably the most tricky part + // in the whole implementation of broadcasting. + j = max_ndims-1; + for (i = j - 1; i >= 0; i--) { + bool all_contiguous = true, all_scalars = true, all_consistent = true; + for(k = 0; k < narrays; k++) { + size_t st = flatten_step[k][j] * flatten_shape[k][j]; + bool prev_scalar = flatten_shape[k][j] == 1; + bool scalar = flatten_shape[k][i] == 1; + all_contiguous = all_contiguous && (st == flatten_step[k][i]); + all_scalars = all_scalars && scalar; + all_consistent = all_consistent && (scalar == prev_scalar); + } + if (all_contiguous && (all_consistent || all_scalars)) { + for(k = 0; k < narrays; k++) + flatten_shape[k][j] *= flatten_shape[k][i]; + } else { + j--; + if (i < j) { + for(k = 0; k < narrays; k++) { + flatten_shape[k][j] = flatten_shape[k][i]; + flatten_step[k][j] = flatten_step[k][i]; + } + } + } + } + + // step 3. Set some step's to 0's. + for (i = max_ndims-1; i >= j; i--) { + for (k = 0; k < narrays; k++) + flatten_step[k][i] = flatten_shape[k][i] == 1 ? 0 : flatten_step[k][i]; + } + for (; i >= 0; i--) { + for (k = 0; k < narrays; k++) { + flatten_step[k][i] = 0; + flatten_shape[k][i] = 1; + } + } + return true; +} + +void broadcast(InputArray _src, InputArray _shape, OutputArray _dst) { + CV_INSTRUMENT_REGION(); + + Mat src = _src.getMat(); + CV_CheckTrue(src.isContinuous(), "broadcast: input array must be contiguous"); + CV_CheckChannelsEQ(src.channels(), 1, "broadcast: input array must be single channel"); + + Mat shape = _shape.getMat(); + CV_CheckTypeEQ(shape.type(), CV_32S, "broadcast: target shape must be of type int32"); + const auto dims_shape = static_cast(shape.total()); + const auto *ptr_shape = shape.ptr(); + + // check valid shape, 1D/0D Mat would fail in the following checks + const auto dims_src = src.dims; + CV_CheckLE(dims_src, dims_shape, + "broadcast: dimension of input array must be less than or equal to dimension of target shape"); + std::vector shape_src{src.size.p, src.size.p + dims_src}; + if (shape_src.size() < static_cast(dims_shape)) { + shape_src.insert(shape_src.begin(), dims_shape - shape_src.size(), 1); + } + for (int i = 0; i < static_cast(shape_src.size()); ++i) { + const auto *shape_target = ptr_shape; + if (shape_src[i] != 1) { + CV_CheckEQ(shape_src[i], shape_target[i], "target shape must be equal to input shape or 1"); + } + } + + // impl + _dst.create(dims_shape, shape.ptr(), src.type()); + Mat dst = _dst.getMat(); + std::vector is_same_shape(dims_shape, 0); + for (int i = 0; i < static_cast(shape_src.size()); ++i) { + if (shape_src[i] == ptr_shape[i]) { + is_same_shape[i] = 1; + } + } + // copy if same shape + if (std::accumulate(is_same_shape.begin(), is_same_shape.end(), 1, std::multiplies()) != 0) { + const auto *p_src = src.ptr(); + auto *p_dst = dst.ptr(); + std::memcpy(p_dst, p_src, dst.total() * dst.elemSize()); + return; + } + // other cases + int max_ndims = std::max(dims_src, dims_shape); + const int all_ndims[2] = {src.dims, dst.dims}; + const int* orig_shapes[2] = {src.size.p, dst.size.p}; + cv::AutoBuffer buff(max_ndims * 4); + int* flatten_shapes[2] = {(int*)buff.data(), (int*)(buff.data() + max_ndims)}; + size_t* flatten_steps[2] = {(size_t*)(buff.data() + 2 * max_ndims), (size_t*)(buff.data() + 3 * max_ndims)}; + if (_flatten_for_broadcast(2, max_ndims, all_ndims, orig_shapes, flatten_shapes, flatten_steps)) { + size_t src_dp = flatten_steps[0][max_ndims - 1]; + size_t dst_dp = flatten_steps[1][max_ndims - 1]; + CV_Assert(dst_dp == 1); + CV_Assert(max_ndims >= 2); // >= 3? + size_t rowstep_src = flatten_steps[0][max_ndims - 2]; + size_t rowstep_dst = flatten_steps[1][max_ndims - 2]; + const char* ptr_src = src.ptr(); + char* ptr_dst = dst.ptr(); + size_t esz = src.elemSize(); + int nrows = flatten_shapes[1][max_ndims - 2]; + int ncols = flatten_shapes[1][max_ndims - 1]; + int nplanes = 1; + CV_Check(esz, esz == 1 || esz == 2 || esz == 4 || esz == 8, "broadcast: not supported data type"); + + for (int k = 0; k < max_ndims - 2; k++) { + nplanes *= flatten_shapes[1][k]; + } + for (int plane_idx = 0; plane_idx < nplanes; plane_idx++) { + size_t offset_src = 0, offset_dst = 0; + size_t idx = (size_t)plane_idx; + for (int k = max_ndims - 3; k >= 0; k--) { + size_t prev_idx = idx / flatten_shapes[1][k]; + size_t i_k = (int)(idx - prev_idx * flatten_shapes[1][k]); + offset_src += i_k * flatten_steps[0][k]; + offset_dst += i_k * flatten_steps[1][k]; + idx = prev_idx; + } + + #define OPENCV_CORE_BROADCAST_LOOP(_Tp) \ + for (int i = 0; i < nrows; i++) { \ + const _Tp *ptr_src_ = (const _Tp*)ptr_src + offset_src + rowstep_src * i; \ + _Tp *ptr_dst_ = (_Tp*)ptr_dst + offset_dst + rowstep_dst * i; \ + if (src_dp == 1) { \ + for (int j = 0; j < ncols; j++) { \ + ptr_dst_[j] = ptr_src_[j]; \ + } \ + } else { \ + _Tp x = *ptr_src_; \ + for (int j = 0; j < ncols; j++) { \ + ptr_dst_[j] = x; \ + } \ + } \ + } + + if (esz == 1) { + OPENCV_CORE_BROADCAST_LOOP(int8_t); + } else if (esz == 2) { + OPENCV_CORE_BROADCAST_LOOP(int16_t); + } else if (esz == 4) { + OPENCV_CORE_BROADCAST_LOOP(int32_t); + } else if (esz == 8) { + OPENCV_CORE_BROADCAST_LOOP(int64_t); + } else { + CV_Error(cv::Error::StsNotImplemented, ""); + } + #undef OPENCV_CORE_BROADCAST_LOOP + } + } else { + // initial copy (src to dst) + std::vector step_src{src.step.p, src.step.p + dims_src}; + if (step_src.size() < static_cast(dims_shape)) { + step_src.insert(step_src.begin(), dims_shape - step_src.size(), step_src[0]); + } + for (size_t i = 0; i < src.total(); ++i) { + size_t t = i; + size_t src_offset = 0, dst_offset = 0; + for (int j = static_cast(shape_src.size() - 1); j >= 0; --j) { + size_t idx = t / shape_src[j]; + size_t offset = static_cast(t - idx * shape_src[j]); + src_offset += offset * step_src[j]; + dst_offset += offset * dst.step[j]; + t = idx; + } + const auto *p_src = src.ptr(); + auto *p_dst = dst.ptr(); + std::memcpy(p_dst + dst_offset, p_src + src_offset, dst.elemSize()); + } + // broadcast copy (dst inplace) + std::vector cumulative_shape(dims_shape, 1); + int total = static_cast(dst.total()); + for (int i = dims_shape - 1; i >= 0; --i) { + cumulative_shape[i] = static_cast(total / ptr_shape[i]); + total = cumulative_shape[i]; + } + for (int i = dims_shape - 1; i >= 0; --i) { + if (is_same_shape[i] == 1) { + continue; + } + auto step = dst.step[i]; + auto *p_dst = dst.ptr(); + for (int j = 0; j < cumulative_shape[i]; j++) { + for (int k = 0; k < ptr_shape[i] - 1; k++) { + std::memcpy(p_dst + step, p_dst, step); + p_dst += step; + } + p_dst += step; + } + } + } +} + void rotate(InputArray _src, OutputArray _dst, int rotateMode) { CV_Assert(_src.dims() <= 2); diff --git a/modules/core/src/mean.dispatch.cpp b/modules/core/src/mean.dispatch.cpp index 6a5275ab43..0f94e5421a 100644 --- a/modules/core/src/mean.dispatch.cpp +++ b/modules/core/src/mean.dispatch.cpp @@ -8,20 +8,24 @@ #include "opencv2/core/openvx/ovx_defs.hpp" #include "stat.hpp" +#ifndef OPENCV_IPP_MEAN #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_MEAN #include "mean.simd.hpp" #include "mean.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content +#ifndef OPENCV_IPP_MEAN #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_MEAN namespace cv { diff --git a/modules/core/src/merge.dispatch.cpp b/modules/core/src/merge.dispatch.cpp index 6b8c2d8135..bd7a936cf9 100644 --- a/modules/core/src/merge.dispatch.cpp +++ b/modules/core/src/merge.dispatch.cpp @@ -121,6 +121,7 @@ void merge(const Mat* mv, size_t n, OutputArray _dst) CV_INSTRUMENT_REGION(); CV_Assert( mv && n > 0 ); + CV_Assert(!mv[0].empty()); int depth = mv[0].depth(); bool allch1 = true; diff --git a/modules/core/src/merge.simd.hpp b/modules/core/src/merge.simd.hpp index ad08dd8879..d67a117c7b 100644 --- a/modules/core/src/merge.simd.hpp +++ b/modules/core/src/merge.simd.hpp @@ -15,7 +15,7 @@ void merge64s(const int64** src, int64* dst, int len, int cn); #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) /* The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following: on IA there are instructions movntps and such to which @@ -38,7 +38,7 @@ void merge64s(const int64** src, int64* dst, int len, int cn); template static void vecmerge_( const T** src, T* dst, int len, int cn ) { - const int VECSZ = VecT::nlanes; + const int VECSZ = VTraits::vlanes(); int i, i0 = 0; const T* src0 = src[0]; const T* src1 = src[1]; @@ -173,8 +173,8 @@ merge_( const T** src, T* dst, int len, int cn ) void merge8u(const uchar** src, uchar* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif @@ -184,8 +184,8 @@ void merge8u(const uchar** src, uchar* dst, int len, int cn ) void merge16u(const ushort** src, ushort* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif @@ -195,8 +195,8 @@ void merge16u(const ushort** src, ushort* dst, int len, int cn ) void merge32s(const int** src, int* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_int32::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif @@ -206,8 +206,8 @@ void merge32s(const int** src, int* dst, int len, int cn ) void merge64s(const int64** src, int64* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index d694d99d3c..d4328f5070 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -11,11 +11,13 @@ #include +#ifndef OPENCV_IPP_MINMAX #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_MINMAX #define IPP_DISABLE_MINMAXIDX_MANY_ROWS 1 // see Core_MinMaxIdx.rows_overflow test diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 72d6fd9abc..931d6f02b8 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -63,25 +63,25 @@ int normHamming(const uchar* a, int n, int cellSize) return -1; int i = 0; int result = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_uint64 t = vx_setzero_u64(); if ( cellSize == 2) { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i)); - t += v_popcount(v_reinterpret_as_u64((a0 | (a0 >> 1)) & mask)); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(a0, v_shr<1>(a0)), mask)))); } } else // cellSize == 4 { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i)); - v_uint16 a1 = a0 | (a0 >> 2); - t += v_popcount(v_reinterpret_as_u64((a1 | (a1 >> 1)) & mask)); + v_uint16 a1 = v_or(a0, v_shr<2>(a0)); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(a1, v_shr<1>(a1)), mask)))); } } @@ -109,25 +109,25 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize) return -1; int i = 0; int result = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_uint64 t = vx_setzero_u64(); if ( cellSize == 2) { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { - v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i)); - t += v_popcount(v_reinterpret_as_u64((ab0 | (ab0 >> 1)) & mask)); + v_uint16 ab0 = v_reinterpret_as_u16(v_xor(vx_load(a + i), vx_load(b + i))); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(ab0, v_shr<1>(ab0)), mask)))); } } else // cellSize == 4 { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { - v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i)); - v_uint16 ab1 = ab0 | (ab0 >> 2); - t += v_popcount(v_reinterpret_as_u64((ab1 | (ab1 >> 1)) & mask)); + v_uint16 ab0 = v_reinterpret_as_u16(v_xor(vx_load(a + i), vx_load(b + i))); + v_uint16 ab1 = v_or(ab0, v_shr<2>(ab0)); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(ab1, v_shr<1>(ab1)), mask)))); } } result += (int)v_reduce_sum(t); @@ -145,21 +145,21 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize) float normL2Sqr_(const float* a, const float* b, int n) { int j = 0; float d = 0.f; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32(); v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32(); - for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes) + for (; j <= n - 4 * VTraits::vlanes(); j += 4 * VTraits::vlanes()) { - v_float32 t0 = vx_load(a + j) - vx_load(b + j); - v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes); + v_float32 t0 = v_sub(vx_load(a + j), vx_load(b + j)); + v_float32 t1 = v_sub(vx_load(a + j + VTraits::vlanes()), vx_load(b + j + VTraits::vlanes())); v_d0 = v_muladd(t0, t0, v_d0); - v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes); + v_float32 t2 = v_sub(vx_load(a + j + 2 * VTraits::vlanes()), vx_load(b + j + 2 * VTraits::vlanes())); v_d1 = v_muladd(t1, t1, v_d1); - v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes); + v_float32 t3 = v_sub(vx_load(a + j + 3 * VTraits::vlanes()), vx_load(b + j + 3 * VTraits::vlanes())); v_d2 = v_muladd(t2, t2, v_d2); v_d3 = v_muladd(t3, t3, v_d3); } - d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3); + d = v_reduce_sum(v_add(v_add(v_add(v_d0, v_d1), v_d2), v_d3)); #endif for( ; j < n; j++ ) { @@ -173,17 +173,17 @@ float normL2Sqr_(const float* a, const float* b, int n) float normL1_(const float* a, const float* b, int n) { int j = 0; float d = 0.f; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32(); v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32(); - for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes) + for (; j <= n - 4 * VTraits::vlanes(); j += 4 * VTraits::vlanes()) { - v_d0 += v_absdiff(vx_load(a + j), vx_load(b + j)); - v_d1 += v_absdiff(vx_load(a + j + v_float32::nlanes), vx_load(b + j + v_float32::nlanes)); - v_d2 += v_absdiff(vx_load(a + j + 2 * v_float32::nlanes), vx_load(b + j + 2 * v_float32::nlanes)); - v_d3 += v_absdiff(vx_load(a + j + 3 * v_float32::nlanes), vx_load(b + j + 3 * v_float32::nlanes)); + v_d0 = v_add(v_d0, v_absdiff(vx_load(a + j), vx_load(b + j))); + v_d1 = v_add(v_d1, v_absdiff(vx_load(a + j + VTraits::vlanes()), vx_load(b + j + VTraits::vlanes()))); + v_d2 = v_add(v_d2, v_absdiff(vx_load(a + j + 2 * VTraits::vlanes()), vx_load(b + j + 2 * VTraits::vlanes()))); + v_d3 = v_add(v_d3, v_absdiff(vx_load(a + j + 3 * VTraits::vlanes()), vx_load(b + j + 3 * VTraits::vlanes()))); } - d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3); + d = v_reduce_sum(v_add(v_add(v_add(v_d0, v_d1), v_d2), v_d3)); #endif for( ; j < n; j++ ) d += std::abs(a[j] - b[j]); @@ -193,12 +193,12 @@ float normL1_(const float* a, const float* b, int n) int normL1_(const uchar* a, const uchar* b, int n) { int j = 0, d = 0; -#if CV_SIMD - for (; j <= n - 4 * v_uint8::nlanes; j += 4 * v_uint8::nlanes) +#if (CV_SIMD || CV_SIMD_SCALABLE) + for (; j <= n - 4 * VTraits::vlanes(); j += 4 * VTraits::vlanes()) d += v_reduce_sad(vx_load(a + j), vx_load(b + j)) + - v_reduce_sad(vx_load(a + j + v_uint8::nlanes), vx_load(b + j + v_uint8::nlanes)) + - v_reduce_sad(vx_load(a + j + 2 * v_uint8::nlanes), vx_load(b + j + 2 * v_uint8::nlanes)) + - v_reduce_sad(vx_load(a + j + 3 * v_uint8::nlanes), vx_load(b + j + 3 * v_uint8::nlanes)); + v_reduce_sad(vx_load(a + j + VTraits::vlanes()), vx_load(b + j + VTraits::vlanes())) + + v_reduce_sad(vx_load(a + j + 2 * VTraits::vlanes()), vx_load(b + j + 2 * VTraits::vlanes())) + + v_reduce_sad(vx_load(a + j + 3 * VTraits::vlanes()), vx_load(b + j + 3 * VTraits::vlanes())); #endif for( ; j < n; j++ ) d += std::abs(a[j] - b[j]); diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index cf0a6466ea..f39dfcae7d 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -322,16 +322,20 @@ int decodeSimpleFormat( const char* dt ) } -#if defined __i386__ || defined(_M_IX86) || defined __x86_64__ || defined(_M_X64) -#define CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS 1 +#if defined __i386__ || defined(_M_IX86) || defined __x86_64__ || defined(_M_X64) || \ + (defined (__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__) +#define CV_LITTLE_ENDIAN_MEM_ACCESS 1 #else -#define CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS 0 +#define CV_LITTLE_ENDIAN_MEM_ACCESS 0 #endif static inline int readInt(const uchar* p) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - return *(const int*)p; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + int val; + memcpy(&val, p, sizeof(val)); + return val; #else int val = (int)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); return val; @@ -340,8 +344,11 @@ static inline int readInt(const uchar* p) static inline double readReal(const uchar* p) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - return *(const double*)p; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + double val; + memcpy(&val, p, sizeof(val)); + return val; #else unsigned val0 = (unsigned)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); unsigned val1 = (unsigned)(p[4] | (p[5] << 8) | (p[6] << 16) | (p[7] << 24)); @@ -353,9 +360,9 @@ static inline double readReal(const uchar* p) static inline void writeInt(uchar* p, int ival) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - int* ip = (int*)p; - *ip = ival; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + memcpy(p, &ival, sizeof(ival)); #else p[0] = (uchar)ival; p[1] = (uchar)(ival >> 8); @@ -366,9 +373,9 @@ static inline void writeInt(uchar* p, int ival) static inline void writeReal(uchar* p, double fval) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - double* fp = (double*)p; - *fp = fval; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + memcpy(p, &fval, sizeof(fval)); #else Cv64suf v; v.f = fval; diff --git a/modules/core/src/persistence_xml.cpp b/modules/core/src/persistence_xml.cpp index caba4f5bf0..6141fade2d 100644 --- a/modules/core/src/persistence_xml.cpp +++ b/modules/core/src/persistence_xml.cpp @@ -308,8 +308,8 @@ public: if( !multiline ) { - ptr = fs->resizeWriteBuffer( ptr, len + 9 ); - sprintf( ptr, "", comment ); + ptr = fs->resizeWriteBuffer( ptr, len + 5+4+1 ); + snprintf( ptr, len + 5+4+1, "", comment ); len = (int)strlen(ptr); } else @@ -344,7 +344,7 @@ public: fs->setBufferPtr(ptr); ptr = fs->flush(); } - sprintf( ptr, "-->" ); + strcpy( ptr, "-->" ); fs->setBufferPtr(ptr + 3); fs->flush(); } diff --git a/modules/core/src/split.simd.hpp b/modules/core/src/split.simd.hpp index 25e90c0063..88414161b8 100644 --- a/modules/core/src/split.simd.hpp +++ b/modules/core/src/split.simd.hpp @@ -15,12 +15,12 @@ void split64s(const int64* src, int64** dst, int len, int cn); #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) // see the comments for vecmerge_ in merge.cpp template static void vecsplit_( const T* src, T** dst, int len, int cn ) { - const int VECSZ = VecT::nlanes; + const int VECSZ = VTraits::vlanes(); int i, i0 = 0; T* dst0 = dst[0]; T* dst1 = dst[1]; @@ -177,8 +177,8 @@ split_( const T* src, T** dst, int len, int cn ) void split8u(const uchar* src, uchar** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif @@ -188,8 +188,8 @@ void split8u(const uchar* src, uchar** dst, int len, int cn ) void split16u(const ushort* src, ushort** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif @@ -199,8 +199,8 @@ void split16u(const ushort* src, ushort** dst, int len, int cn ) void split32s(const int* src, int** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint32::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif @@ -210,8 +210,8 @@ void split32s(const int* src, int** dst, int len, int cn ) void split64s(const int64* src, int64** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif diff --git a/modules/core/src/stat.simd.hpp b/modules/core/src/stat.simd.hpp index 0592f84794..a5fb05476d 100644 --- a/modules/core/src/stat.simd.hpp +++ b/modules/core/src/stat.simd.hpp @@ -33,11 +33,11 @@ int normHamming(const uchar* a, int n) int i = 0; int result = 0; -#if CV_SIMD && CV_SIMD_WIDTH > 16 +#if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint64 t = vx_setzero_u64(); - for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) - t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); + for (; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) + t = v_add(t, v_popcount(v_reinterpret_as_u64(vx_load(a + i)))); result = (int)v_reduce_sum(t); vx_cleanup(); } @@ -56,13 +56,6 @@ int normHamming(const uchar* a, int n) result += CV_POPCNT_U32(*(uint*)(a + i)); } } -#elif CV_SIMD - { - v_uint64x2 t = v_setzero_u64(); - for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) - t += v_popcount(v_reinterpret_as_u64(v_load(a + i))); - result += (int)v_reduce_sum(t); - } #endif #if CV_ENABLE_UNROLLED for(; i <= n - 4; i += 4) @@ -85,11 +78,11 @@ int normHamming(const uchar* a, const uchar* b, int n) int i = 0; int result = 0; -#if CV_SIMD && CV_SIMD_WIDTH > 16 +#if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint64 t = vx_setzero_u64(); - for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) - t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); + for (; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_xor(vx_load(a + i), vx_load(b + i))))); result += (int)v_reduce_sum(t); } #endif @@ -107,13 +100,6 @@ int normHamming(const uchar* a, const uchar* b, int n) result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i)); } } -#elif CV_SIMD - { - v_uint64x2 t = v_setzero_u64(); - for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) - t += v_popcount(v_reinterpret_as_u64(v_load(a + i) ^ v_load(b + i))); - result += (int)v_reduce_sum(t); - } #endif #if CV_ENABLE_UNROLLED for(; i <= n - 4; i += 4) diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp index a1f7d73868..fade948336 100644 --- a/modules/core/src/sum.dispatch.cpp +++ b/modules/core/src/sum.dispatch.cpp @@ -10,11 +10,13 @@ #include "sum.simd.hpp" #include "sum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content +#ifndef OPENCV_IPP_SUM #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_SUM namespace cv { diff --git a/modules/core/src/sum.simd.hpp b/modules/core/src/sum.simd.hpp index 045f40ebed..f790fc733a 100644 --- a/modules/core/src/sum.simd.hpp +++ b/modules/core/src/sum.simd.hpp @@ -22,7 +22,7 @@ struct Sum_SIMD } }; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) template <> struct Sum_SIMD @@ -36,41 +36,41 @@ struct Sum_SIMD int x = 0; v_uint32 v_sum = vx_setzero_u32(); - int len0 = len & -v_uint8::nlanes; + int len0 = len & -VTraits::vlanes(); while (x < len0) { - const int len_tmp = min(x + 256*v_uint16::nlanes, len0); + const int len_tmp = min(x + 256*VTraits::vlanes(), len0); v_uint16 v_sum16 = vx_setzero_u16(); - for (; x < len_tmp; x += v_uint8::nlanes) + for (; x < len_tmp; x += VTraits::vlanes()) { v_uint16 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum16 += v_src0 + v_src1; + v_sum16 = v_add(v_sum16, v_add(v_src0, v_src1)); } v_uint32 v_half0, v_half1; v_expand(v_sum16, v_half0, v_half1); - v_sum += v_half0 + v_half1; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); } - if (x <= len - v_uint16::nlanes) + if (x <= len - VTraits::vlanes()) { v_uint32 v_half0, v_half1; v_expand(vx_load_expand(src0 + x), v_half0, v_half1); - v_sum += v_half0 + v_half1; - x += v_uint16::nlanes; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); + x += VTraits::vlanes(); } - if (x <= len - v_uint32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand_q(src0 + x); - x += v_uint32::nlanes; + v_sum = v_add(v_sum, vx_load_expand_q(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_uint32::nlanes]; + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_uint32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -91,41 +91,41 @@ struct Sum_SIMD int x = 0; v_int32 v_sum = vx_setzero_s32(); - int len0 = len & -v_int8::nlanes; + int len0 = len & -VTraits::vlanes(); while (x < len0) { - const int len_tmp = min(x + 256*v_int16::nlanes, len0); + const int len_tmp = min(x + 256*VTraits::vlanes(), len0); v_int16 v_sum16 = vx_setzero_s16(); - for (; x < len_tmp; x += v_int8::nlanes) + for (; x < len_tmp; x += VTraits::vlanes()) { v_int16 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum16 += v_src0 + v_src1; + v_sum16 = v_add(v_sum16, v_add(v_src0, v_src1)); } v_int32 v_half0, v_half1; v_expand(v_sum16, v_half0, v_half1); - v_sum += v_half0 + v_half1; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); } - if (x <= len - v_int16::nlanes) + if (x <= len - VTraits::vlanes()) { v_int32 v_half0, v_half1; v_expand(vx_load_expand(src0 + x), v_half0, v_half1); - v_sum += v_half0 + v_half1; - x += v_int16::nlanes; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); + x += VTraits::vlanes(); } - if (x <= len - v_int32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand_q(src0 + x); - x += v_int32::nlanes; + v_sum = v_add(v_sum, vx_load_expand_q(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_int32::nlanes]; + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_int32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -146,25 +146,25 @@ struct Sum_SIMD int x = 0; v_uint32 v_sum = vx_setzero_u32(); - for (; x <= len - v_uint16::nlanes; x += v_uint16::nlanes) + for (; x <= len - VTraits::vlanes(); x += VTraits::vlanes()) { v_uint32 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum += v_src0 + v_src1; + v_sum = v_add(v_sum, v_add(v_src0, v_src1)); } - if (x <= len - v_uint32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand(src0 + x); - x += v_uint32::nlanes; + v_sum = v_add(v_sum, vx_load_expand(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_uint32::nlanes]; + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_uint32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -185,25 +185,25 @@ struct Sum_SIMD int x = 0; v_int32 v_sum = vx_setzero_s32(); - for (; x <= len - v_int16::nlanes; x += v_int16::nlanes) + for (; x <= len - VTraits::vlanes(); x += VTraits::vlanes()) { v_int32 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum += v_src0 + v_src1; + v_sum = v_add(v_sum, v_add(v_src0, v_src1)); } - if (x <= len - v_int32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand(src0 + x); - x += v_int32::nlanes; + v_sum = v_add(v_sum, vx_load_expand(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_int32::nlanes]; + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_int32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -212,7 +212,7 @@ struct Sum_SIMD } }; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) template <> struct Sum_SIMD { @@ -226,24 +226,24 @@ struct Sum_SIMD v_float64 v_sum0 = vx_setzero_f64(); v_float64 v_sum1 = vx_setzero_f64(); - for (; x <= len - 2 * v_int32::nlanes; x += 2 * v_int32::nlanes) + for (; x <= len - 2 * VTraits::vlanes(); x += 2 * VTraits::vlanes()) { v_int32 v_src0 = vx_load(src0 + x); - v_int32 v_src1 = vx_load(src0 + x + v_int32::nlanes); - v_sum0 += v_cvt_f64(v_src0) + v_cvt_f64(v_src1); - v_sum1 += v_cvt_f64_high(v_src0) + v_cvt_f64_high(v_src1); + v_int32 v_src1 = vx_load(src0 + x + VTraits::vlanes()); + v_sum0 = v_add(v_sum0, v_add(v_cvt_f64(v_src0), v_cvt_f64(v_src1))); + v_sum1 = v_add(v_sum1, v_add(v_cvt_f64_high(v_src0), v_cvt_f64_high(v_src1))); } #if CV_SIMD256 || CV_SIMD512 - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_float64::nlanes]; - v_store_aligned(ar, v_sum0 + v_sum1); - for (int i = 0; i < v_float64::nlanes; ++i) + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; + v_store_aligned(ar, v_add(v_sum0, v_sum1)); + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #else - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * v_float64::nlanes]; + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * VTraits::max_nlanes]; v_store_aligned(ar, v_sum0); - v_store_aligned(ar + v_float64::nlanes, v_sum1); - for (int i = 0; i < 2 * v_float64::nlanes; ++i) + v_store_aligned(ar + VTraits::vlanes(), v_sum1); + for (int i = 0; i < 2 * VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #endif v_cleanup(); @@ -265,24 +265,24 @@ struct Sum_SIMD v_float64 v_sum0 = vx_setzero_f64(); v_float64 v_sum1 = vx_setzero_f64(); - for (; x <= len - 2 * v_float32::nlanes; x += 2 * v_float32::nlanes) + for (; x <= len - 2 * VTraits::vlanes(); x += 2 * VTraits::vlanes()) { v_float32 v_src0 = vx_load(src0 + x); - v_float32 v_src1 = vx_load(src0 + x + v_float32::nlanes); - v_sum0 += v_cvt_f64(v_src0) + v_cvt_f64(v_src1); - v_sum1 += v_cvt_f64_high(v_src0) + v_cvt_f64_high(v_src1); + v_float32 v_src1 = vx_load(src0 + x + VTraits::vlanes()); + v_sum0 = v_add(v_sum0, v_add(v_cvt_f64(v_src0), v_cvt_f64(v_src1))); + v_sum1 = v_add(v_sum1, v_add(v_cvt_f64_high(v_src0), v_cvt_f64_high(v_src1))); } #if CV_SIMD256 || CV_SIMD512 - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_float64::nlanes]; - v_store_aligned(ar, v_sum0 + v_sum1); - for (int i = 0; i < v_float64::nlanes; ++i) + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; + v_store_aligned(ar, v_add(v_sum0, v_sum1)); + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #else - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * v_float64::nlanes]; + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * VTraits::max_nlanes]; v_store_aligned(ar, v_sum0); - v_store_aligned(ar + v_float64::nlanes, v_sum1); - for (int i = 0; i < 2 * v_float64::nlanes; ++i) + v_store_aligned(ar + VTraits::vlanes(), v_sum1); + for (int i = 0; i < 2 * VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #endif v_cleanup(); diff --git a/modules/core/src/utils/filesystem.cpp b/modules/core/src/utils/filesystem.cpp index 17004b27dd..5199f6f57b 100644 --- a/modules/core/src/utils/filesystem.cpp +++ b/modules/core/src/utils/filesystem.cpp @@ -34,7 +34,7 @@ #include #include #include -#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ +#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__ #include #include #include @@ -343,7 +343,7 @@ private: Impl& operator=(const Impl&); // disabled }; -#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ +#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__ struct FileLock::Impl { @@ -457,7 +457,7 @@ cv::String getCacheDirectory(const char* sub_directory_name, const char* configu default_cache_path = "/tmp/"; CV_LOG_WARNING(NULL, "Using world accessible cache directory. This may be not secure: " << default_cache_path); } -#elif defined __linux__ || defined __HAIKU__ || defined __FreeBSD__ +#elif defined __linux__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__ // https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html if (default_cache_path.empty()) { diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index c6756f6502..0b4c010bea 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2292,6 +2292,139 @@ INSTANTIATE_TEST_CASE_P(Arithm, FlipND, testing::Combine( testing::Values(perf::MatType(CV_8UC1), CV_32FC1) )); +TEST(BroadcastTo, basic) { + std::vector shape_src{2, 1}; + std::vector data_src{1, 2}; + Mat src(static_cast(shape_src.size()), shape_src.data(), CV_32SC1, data_src.data()); + + auto get_index = [](const std::vector& shape, size_t cnt) { + std::vector index(shape.size()); + size_t t = cnt; + for (int i = static_cast(shape.size() - 1); i >= 0; --i) { + size_t idx = t / shape[i]; + index[i] = static_cast(t - idx * shape[i]); + t = idx; + } + return index; + }; + + auto fn_verify = [&get_index](const Mat& ref, const Mat& res) { + // check type + EXPECT_EQ(ref.type(), res.type()); + // check shape + EXPECT_EQ(ref.dims, res.dims); + for (int i = 0; i < ref.dims; ++i) { + EXPECT_EQ(ref.size[i], res.size[i]); + } + // check value + std::vector shape{ref.size.p, ref.size.p + ref.dims}; + for (size_t i = 0; i < ref.total(); ++i) { + auto index = get_index(shape, i); + switch (ref.type()) { + case CV_32SC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + case CV_8UC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + case CV_32FC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + default: FAIL() << "Unsupported type: " << ref.type(); + } + } + }; + + { + std::vector shape{4, 2, 3}; + std::vector data_ref{ + 1, 1, 1, // [0, 0, :] + 2, 2, 2, // [0, 1, :] + 1, 1, 1, // [1, 0, :] + 2, 2, 2, // [1, 1, :] + 1, 1, 1, // [2, 0, :] + 2, 2, 2, // [2, 1, :] + 1, 1, 1, // [3, 0, :] + 2, 2, 2 // [3, 1, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), src.type(), data_ref.data()); + Mat dst; + broadcast(src, shape, dst); + fn_verify(ref, dst); + } + + { + Mat _src; + src.convertTo(_src, CV_8U); + std::vector shape{4, 2, 3}; + std::vector data_ref{ + 1, 1, 1, // [0, 0, :] + 2, 2, 2, // [0, 1, :] + 1, 1, 1, // [1, 0, :] + 2, 2, 2, // [1, 1, :] + 1, 1, 1, // [2, 0, :] + 2, 2, 2, // [2, 1, :] + 1, 1, 1, // [3, 0, :] + 2, 2, 2 // [3, 1, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } + + { + Mat _src; + src.convertTo(_src, CV_32F); + std::vector shape{1, 1, 2, 1}; // {2, 1} + std::vector data_ref{ + 1.f, // [0, 0, 0, 0] + 2.f, // [0, 0, 1, 0] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } + + { + std::vector _shape_src{2, 3, 4}; + std::vector _data_src{ + 1.f, 2.f, 3.f, 4.f, // [0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [1, 2, :] + }; + Mat _src(static_cast(_shape_src.size()), _shape_src.data(), CV_32FC1, _data_src.data()); + + std::vector shape{2, 1, 2, 3, 4}; + std::vector data_ref{ + 1.f, 2.f, 3.f, 4.f, // [0, 0, 0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [0, 0, 0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [0, 0, 0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [0, 0, 1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [0, 0, 1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [0, 0, 1, 2, :] + + 1.f, 2.f, 3.f, 4.f, // [1, 0, 0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [1, 0, 0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [1, 0, 0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [1, 0, 1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [1, 0, 1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [1, 0, 1, 2, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } +} + TEST(Core_minMaxIdx, regression_9207_2) { const int rows = 13; diff --git a/modules/core/test/test_countnonzero.cpp b/modules/core/test/test_countnonzero.cpp index fe14affb9c..41eaceb189 100644 --- a/modules/core/test/test_countnonzero.cpp +++ b/modules/core/test/test_countnonzero.cpp @@ -259,7 +259,7 @@ TEST_P (CountNonZeroND, ndim) const int ONE_SIZE = 5; vector sizes(dims); - fill(sizes.begin(), sizes.end(), ONE_SIZE); + std::fill(sizes.begin(), sizes.end(), ONE_SIZE); Mat data(sizes, CV_MAKETYPE(type, 1)); data = 0; diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 481e6bb1f2..1ece6de82f 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1475,12 +1475,15 @@ template struct TheTest TheTest & test_float_math() { typedef typename V_RegTraits::round_reg Ri; - Data data1, data2, data3; + Data data1, data1_border, data2, data3; + // See https://github.com/opencv/opencv/issues/24213 + data1_border *= 0.5; data1 *= 1.1; data2 += 10; - R a1 = data1, a2 = data2, a3 = data3; + R a1 = data1, a1_border = data1_border, a2 = data2, a3 = data3; Data resB = v_round(a1), + resB_border = v_round(a1_border), resC = v_trunc(a1), resD = v_floor(a1), resE = v_ceil(a1); @@ -1493,6 +1496,7 @@ template struct TheTest { SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(cvRound(data1[i]), resB[i]); + EXPECT_EQ(cvRound(data1_border[i]), resB_border[i]); EXPECT_EQ((typename VTraits::lane_type)data1[i], resC[i]); EXPECT_EQ(cvFloor(data1[i]), resD[i]); EXPECT_EQ(cvCeil(data1[i]), resE[i]); diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 804b78ead2..774e3c7b5a 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -58,11 +58,6 @@ endif() ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake") -if(HAVE_TENGINE) - ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TENGINE=1") -endif() - - if(MSVC) add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 ) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146 @@ -172,11 +167,6 @@ else() set(sources_options ${sources_options} EXCLUDE_CUDA) endif() -if(HAVE_TENGINE) - list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS}) - list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive) -endif() - if(HAVE_TIMVX) list(APPEND include_dirs ${TIMVX_INCLUDE_DIR}) list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive) @@ -237,6 +227,10 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO) endif() endif() +set(OPENCV_DNN_BACKEND_DEFAULT "" CACHE STRING "Default backend used by the DNN module (DNN_BACKEND_OPENCV if empty)") +if(OPENCV_DNN_BACKEND_DEFAULT) + ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}") +endif() ocv_install_used_external_targets(${libs} ${dnn_runtime_libs}) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index d61f7191bc..2bd3f790b4 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -69,9 +69,7 @@ CV__DNN_INLINE_NS_BEGIN */ enum Backend { - //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if - //! OpenCV is built with Intel OpenVINO or - //! DNN_BACKEND_OPENCV otherwise. + //! DNN_BACKEND_DEFAULT equals to OPENCV_DNN_BACKEND_DEFAULT, which can be defined using CMake or a configuration parameter DNN_BACKEND_DEFAULT = 0, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, //!< Intel OpenVINO computational backend @@ -688,9 +686,6 @@ CV__DNN_INLINE_NS_BEGIN * @brief Ask network to use specific computation backend where it supported. * @param[in] backendId backend identifier. * @see Backend - * - * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT - * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV. */ CV_WRAP void setPreferableBackend(int backendId); diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 5c91aae56f..a06c02ad2d 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -191,10 +191,10 @@ class dnn_test(NewOpenCVTests): def test_model(self): img_path = self.find_dnn_file("dnn/street.png") - weights = self.find_dnn_file("dnn/MobileNetSSD_deploy.caffemodel", required=False) - config = self.find_dnn_file("dnn/MobileNetSSD_deploy.prototxt", required=False) + weights = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", required=False) + config = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", required=False) if weights is None or config is None: - raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy_19e3ec3.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") frame = cv.imread(img_path) model = cv.dnn_DetectionModel(weights, config) diff --git a/modules/dnn/perf/perf_caffe.cpp b/modules/dnn/perf/perf_caffe.cpp index 370f06dba2..f1ba26afcc 100644 --- a/modules/dnn/perf/perf_caffe.cpp +++ b/modules/dnn/perf/perf_caffe.cpp @@ -101,8 +101,8 @@ PERF_TEST(SqueezeNet_v1_1_caffe, CaffePerfTest) PERF_TEST(MobileNet_SSD, CaffePerfTest) { - caffe::Net* net = initNet("dnn/MobileNetSSD_deploy.prototxt", - "dnn/MobileNetSSD_deploy.caffemodel"); + caffe::Net* net = initNet("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", + "dnn/MobileNetSSD_deploy_19e3ec3.caffemodel"); TEST_CYCLE() net->Forward(); SANITY_CHECK_NOTHING(); } diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index 261bc5c3ca..3020dbea66 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -678,7 +678,6 @@ PERF_TEST_P_(Layer_FullyConnected, fc) lp.set("axis", input.dims - 1); lp.set("is_matmul", weights.dims > 2); lp.set("bias_term", false); - lp.set("transB", true); lp.set("num_output", (int)weights.total(0, weights.dims - 1)); lp.blobs.resize(1, weights); diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index cfbb45b173..7f852e8f7b 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -141,7 +141,7 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe) { if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", "", Mat(cv::Size(300, 300), CV_32FC3)); } diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp index 86a43db757..a76f4cd512 100644 --- a/modules/dnn/src/dnn_params.cpp +++ b/modules/dnn/src/dnn_params.cpp @@ -36,7 +36,11 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES() int getParam_DNN_BACKEND_DEFAULT() { static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", +#ifdef OPENCV_DNN_BACKEND_DEFAULT + (size_t)OPENCV_DNN_BACKEND_DEFAULT +#else (size_t)DNN_BACKEND_OPENCV +#endif ); return PARAM_DNN_BACKEND_DEFAULT; } diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp index 18c7e975eb..d4d7dda008 100644 --- a/modules/dnn/src/dnn_utils.cpp +++ b/modules/dnn/src/dnn_utils.cpp @@ -5,6 +5,7 @@ #include "precomp.hpp" #include +#include namespace cv { @@ -100,15 +101,29 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con images_.getMatVector(images); CV_Assert(!images.empty()); - int nch = images[0].channels(); - Scalar scalefactor = param.scalefactor; - if (param.ddepth == CV_8U) { - CV_Assert(scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth"); + CV_Assert(param.scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth"); CV_Assert(param.mean == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); } + int nch = images[0].channels(); + Scalar scalefactor = param.scalefactor; + Scalar mean = param.mean; + + if (param.swapRB) + { + if (nch > 2) + { + std::swap(mean[0], mean[2]); + std::swap(scalefactor[0], scalefactor[2]); + } + else + { + CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels."); + } + } + for (size_t i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); @@ -126,34 +141,26 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con size); images[i] = images[i](crop); } + else if (param.paddingmode == DNN_PMODE_LETTERBOX) + { + float resizeFactor = std::min(size.width / (float)imgSize.width, + size.height / (float)imgSize.height); + int rh = int(imgSize.height * resizeFactor); + int rw = int(imgSize.width * resizeFactor); + resize(images[i], images[i], Size(rw, rh), INTER_LINEAR); + + int top = (size.height - rh)/2; + int bottom = size.height - top - rh; + int left = (size.width - rw)/2; + int right = size.width - left - rw; + copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT); + } else { - if (param.paddingmode == DNN_PMODE_LETTERBOX) - { - float resizeFactor = std::min(size.width / (float)imgSize.width, - size.height / (float)imgSize.height); - int rh = int(imgSize.height * resizeFactor); - int rw = int(imgSize.width * resizeFactor); - resize(images[i], images[i], Size(rw, rh), INTER_LINEAR); - - int top = (size.height - rh)/2; - int bottom = size.height - top - rh; - int left = (size.width - rw)/2; - int right = size.width - left - rw; - copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT); - } - else - resize(images[i], images[i], size, 0, 0, INTER_LINEAR); + resize(images[i], images[i], size, 0, 0, INTER_LINEAR); } } - Scalar mean = param.mean; - if (param.swapRB) - { - std::swap(mean[0], mean[2]); - std::swap(scalefactor[0], scalefactor[2]); - } - if (images[i].depth() == CV_8U && param.ddepth == CV_32F) images[i].convertTo(images[i], CV_32F); @@ -220,18 +227,22 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con CV_Assert(image.depth() == blob_.depth()); CV_Assert(image.channels() == image0.channels()); CV_Assert(image.size() == image0.size()); - if (param.swapRB) + if (nch > 2 && param.swapRB) { Mat tmpRB; cvtColor(image, tmpRB, COLOR_BGR2RGB); tmpRB.copyTo(Mat(tmpRB.rows, tmpRB.cols, subMatType, blob.ptr((int)i, 0))); } else + { image.copyTo(Mat(image.rows, image.cols, subMatType, blob.ptr((int)i, 0))); + } } } else + { CV_Error(Error::StsUnsupportedFormat, "Unsupported data layout in blobFromImagesWithParams function."); + } } void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 140d4b0d2f..f9341febb5 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -383,11 +383,17 @@ public: #endif // OpenVINO >= 2022.1 -InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr&& _node) - : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) {} +InfEngineNgraphNode::InfEngineNgraphNode(ngraph::Output&& _node) + : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) { + CV_Assert(node.get_node()); + CV_Assert(node.get_node_shared_ptr()); +} -InfEngineNgraphNode::InfEngineNgraphNode(const std::shared_ptr& _node) - : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) {} +InfEngineNgraphNode::InfEngineNgraphNode(const ngraph::Output& _node) + : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) { + CV_Assert(node.get_node()); + CV_Assert(node.get_node_shared_ptr()); +} InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& nodes, Ptr& cvLayer_, std::vector& inputs, @@ -420,7 +426,7 @@ InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& n } void InfEngineNgraphNode::setName(const std::string& name) { - node->set_friendly_name(name); + node.get_node()->set_friendly_name(name); } InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl) @@ -441,8 +447,7 @@ InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEn void InfEngineNgraphNet::addOutput(const Ptr& node) { CV_Assert(node); - CV_Assert(node->node); - const std::string& name = node->node->get_friendly_name(); + const std::string& name = node->node.get_node()->get_friendly_name(); requestedOutputs.insert({name, node.get()}); } @@ -458,7 +463,7 @@ void InfEngineNgraphNet::createNet(Target targetId) { CV_Assert(output_node_it->second); auto out = std::make_shared(output_node_it->second->node); #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) - out->set_friendly_name(output_node_it->first + (output_node_it->second->node->get_output_size() == 1 ? "" : ".0")); + out->set_friendly_name(output_node_it->first + (output_node_it->second->node.get_node()->get_output_size() == 1 ? "" : ".0")); #endif outs.push_back(out); } diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp index 7bb0ac09df..cc8f53ca5c 100644 --- a/modules/dnn/src/ie_ngraph.hpp +++ b/modules/dnn/src/ie_ngraph.hpp @@ -93,13 +93,13 @@ public: std::vector& inputs, std::vector& outputs, std::vector& internals); - InfEngineNgraphNode(std::shared_ptr&& _node); - InfEngineNgraphNode(const std::shared_ptr& _node); + InfEngineNgraphNode(ngraph::Output&& _node); + InfEngineNgraphNode(const ngraph::Output& _node); void setName(const std::string& name); // Inference Engine network object that allows to obtain the outputs of this layer. - std::shared_ptr node; + ngraph::Output node; Ptr net; Ptr cvLayer; }; diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index b90ee934ef..1d95096e60 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -457,7 +457,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { auto ieInpNode = nodes[0].dynamicCast()->node; - std::vector shape(ieInpNode->get_shape().size(), 1); + std::vector shape(ieInpNode.get_shape().size(), 1); shape[1] = weights_.total(); auto weight = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), weights_.data); auto bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), bias_.data); diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 3095e2d6c9..16de23b15e 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -148,7 +148,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { - auto& ieInpNode = nodes[0].dynamicCast()->node; + auto ieInpNode = nodes[0].dynamicCast()->node; ngraph::OutputVector inp{ieInpNode}; auto blank = std::make_shared(inp, 0); return Ptr(new InfEngineNgraphNode(blank)); diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 6bd3dcdea5..a5af16f32e 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -392,7 +392,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { - const int numDims = nodes[0].dynamicCast()->node->get_shape().size(); + const int numDims = nodes[0].dynamicCast()->node.get_shape().size(); const int cAxis = normalize_axis(axis, numDims); std::vector maxDims(numDims, 0); @@ -403,7 +403,7 @@ public: auto inp = nodes[i].dynamicCast()->node; inp_nodes.push_back(inp); - std::vector inpShape = inp->get_shape(); + std::vector inpShape = inp.get_shape(); for (int i = 0; i < numDims; ++i) maxDims[i] = std::max(maxDims[i], inpShape[i]); } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 2787d64880..d6e0aba1c6 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -62,9 +62,6 @@ #include "opencl_kernels_dnn.hpp" using namespace cv::dnn::ocl4dnn; #endif -#ifdef HAVE_TENGINE -#include "../tengine4dnn/include/tengine_graph_convolution.hpp" -#endif #ifdef HAVE_CUDA #include "../cuda4dnn/primitives/convolution.hpp" @@ -267,10 +264,6 @@ public: float power; #endif -#ifdef HAVE_TENGINE - teng_graph_t tengine_graph; -#endif - #ifdef HAVE_CUDA cuda4dnn::ConvolutionConfiguration::FusionMode cudaFusionMode; cuda4dnn::ConvolutionConfiguration::ActivationType cudaActType; @@ -289,20 +282,8 @@ public: #ifdef HAVE_CUDA cudaFusionMode = cuda4dnn::ConvolutionConfiguration::FusionMode::NONE; cudaActType = cuda4dnn::ConvolutionConfiguration::ActivationType::IDENTITY; -#endif -#ifdef HAVE_TENGINE - tengine_graph=NULL; #endif } -#ifdef HAVE_TENGINE - ~ConvolutionLayerImpl() - { - if(NULL != tengine_graph ) - { - tengine_release(tengine_graph); - } - } -#endif MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { @@ -466,13 +447,6 @@ public: for(int i = 0; i < numOutput; i++ ) biasvec[i] = biasMat.at(i); } -#ifdef HAVE_TENGINE - if(NULL != tengine_graph ) - { - tengine_release(tengine_graph); - tengine_graph = NULL ; - } -#endif #ifdef HAVE_OPENCL convolutionOp.release(); #endif @@ -848,13 +822,13 @@ public: CV_Assert(!blobs.empty()); CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1); auto& ieInpNode = nodes[0].dynamicCast()->node; - std::vector dims = ieInpNode->get_shape(); + std::vector dims = ieInpNode.get_shape(); CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, ""); - std::shared_ptr ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; + ngraph::Output ieWeights; if (nodes.size() > 1) - CV_Assert(ieWeights); // dynamic_cast should not fail + ieWeights = nodes[1].dynamicCast()->node; const int inpCn = dims[1]; - const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1]; + const int inpGroupCn = nodes.size() > 1 ? ieWeights.get_shape()[1] : blobs[0].size[1]; const int group = inpCn / inpGroupCn; std::vector kernel_shape; @@ -1095,7 +1069,7 @@ public: config.pads = pads; config.stride = stride; config.dilation = dilation; - if (inputs[0].dims != 4 && inputs[0].dims != umat_blobs[0].dims) + if (inputs[0].dims != 4 && inputs[0].dims != (blobs.empty() ? umat_blobs[0].dims : blobs[0].dims)) { static bool bypassCheck = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_CONVOLUTION_IGNORE_INPUT_DIMS_4_CHECK", false); if (!bypassCheck) @@ -1107,7 +1081,7 @@ public: return false; } } - config.group = inputs[0].size[1] / umat_blobs[0].size[1]; + config.group = inputs[0].size[1] / (blobs.empty() ? umat_blobs[0].size[1] : blobs[0].size[1]); if (config.group < 1) // config.group == 0 causes div by zero in ocl4dnn code { CV_LOG_WARNING(NULL, "DNN/OpenCL: Unsupported config.group=" << config.group @@ -1305,65 +1279,6 @@ public: } } -#ifdef HAVE_TENGINE - bool tengine_ret = false; - - std::vector teng_in, teng_out; - inputs_arr.getMatVector(teng_in); - outputs_arr.getMatVector(teng_out); - - int inch = teng_in[0].size[1]; // inch - int in_h = teng_in[0].size[2]; // in_h - int in_w = teng_in[0].size[3]; // in_w - - int out_b = teng_out[0].size[0]; // out batch size - int outch = teng_out[0].size[1]; // outch - int out_h = teng_out[0].size[2]; // out_h - int out_w = teng_out[0].size[3]; // out_w - - float *input_ = teng_in[0].ptr(); - float *output_ = teng_out[0].ptr(); - float *kernel_ = weightsMat.ptr(); - float *teg_bias = &biasvec[0]; - - int nstripes = std::max(getNumThreads(), 1); - - /* tengine_init will run when first time. */ - if(NULL == tengine_graph) - { - // pads_begin: 0 - pad_top, 1 - pad_left - // pads_end: 0 - pad_bottom, 1 - pad_right - // pad_h0: pad_top, pad_h1: pad_bottom - // pad_w0: pad_left, pad_w1: pad_right - tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w, - output_, out_b, outch, out_h, out_w, - kernel_, kernel_size.size(), kernel.height, kernel.width, - teg_bias, stride.height, stride.width, - pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width, - weightsMat.step1(), padMode, tengine_graph, nstripes); - // printf("Init(%s): input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ," - // "stride(%d %d), pad(%d %d %d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n", - // name.c_str(),input_, inch, ngroups, in_h, in_w, - // output_, out_b, outch, out_h, out_w, - // kernel_, kernel_size.size(), kernel.height, kernel.width, - // teg_bias, stride.height, stride.width, - // pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width, - // weightsMat.step1(), padMode.c_str() ,tengine_graph); - } - if(NULL != tengine_graph) - { - tengine_ret = tengine_forward(tengine_graph); - } - /* activation */ - if((true == tengine_ret) && activ ) - { - int out_cstep = out_h * out_w; // out_cstep - - ActivationLayer* activ_ = activ.get(); - activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch); - } - if(false == tengine_ret) -#endif { int nstripes = std::max(getNumThreads(), 1); int conv_dim = CONV_2D; diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp index 22ef9a8575..5effdc2d0c 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.hpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.hpp @@ -14,7 +14,7 @@ #define CONV_NR_FP32 28 // The FP16 can only be supported by ARM64 and with FP16 FMA supported. -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC // check FP16 FMA. +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && CV_FP16 // check FP16 FMA. #define CONV_ARM_FP16 1 #endif diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp index eb8822870f..a6f58f8983 100644 --- a/modules/dnn/src/layers/crop_and_resize_layer.cpp +++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp @@ -133,7 +133,7 @@ public: auto input = nodes[0].dynamicCast()->node; auto rois = nodes[1].dynamicCast()->node; - auto rois_shape = rois->get_shape(); + auto rois_shape = rois.get_shape(); std::vector dims(rois_shape.begin(), rois_shape.end()), offsets(4, 0); offsets[3] = 2; dims[3] = 7; diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 3bcd53f95c..4247511879 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -490,7 +490,7 @@ struct ReLUFunctor : public BaseFunctor #endif #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { if (slope) { auto param = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &slope); @@ -674,7 +674,7 @@ struct ReLU6Functor : public BaseFunctor #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node, minValue, maxValue); } @@ -796,7 +796,7 @@ struct BaseDefaultFunctor : public BaseFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { CV_Error(Error::StsNotImplemented, ""); } @@ -929,7 +929,7 @@ struct TanHFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node); } @@ -998,7 +998,7 @@ struct SwishFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto sigmoid = std::make_shared(node); return std::make_shared(node, sigmoid); @@ -1074,7 +1074,7 @@ struct MishFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { float one = 1.0f; auto constant = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &one); @@ -1157,7 +1157,7 @@ struct SigmoidFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node); } @@ -1237,7 +1237,7 @@ struct ELUFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node, alpha); } @@ -1307,7 +1307,7 @@ struct AbsValFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { float coeff = -0.999999f; // float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f; @@ -1603,7 +1603,7 @@ struct SqrtFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node); } @@ -2329,7 +2329,7 @@ struct PowerFunctor : public BaseFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto scale_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &scale); @@ -2439,7 +2439,7 @@ struct ExpFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto scale_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &normScale); @@ -2598,7 +2598,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { const size_t numChannels = scale.total(); auto slope = std::make_shared(ngraph::element::f32, ngraph::Shape{numChannels}, scale.data); @@ -2678,7 +2678,7 @@ struct PReLUFunctor : public ChannelsPReLUFunctor } #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto shape = getShape(scale); auto slope = std::make_shared(ngraph::element::f32, shape, scale.ptr()); diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 8ed1b799eb..49b3c02de3 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -896,12 +896,14 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { + CV_Assert(nodes.size() >= 2); auto curr_node = nodes[0].dynamicCast()->node; if (!coeffs.empty()) { auto coeff = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &coeffs[0]); curr_node = std::make_shared(curr_node, coeff, ngraph::op::AutoBroadcastType::NUMPY); } + std::shared_ptr res; for (size_t i = 1; i < nodes.size(); i++) { auto next_node = nodes[i].dynamicCast()->node; @@ -910,15 +912,16 @@ public: next_node = std::make_shared(next_node, coeff, ngraph::op::AutoBroadcastType::NUMPY); } switch (op) { - case SUM: curr_node = std::make_shared(curr_node, next_node); break; - case PROD: curr_node = std::make_shared(curr_node, next_node); break; - case DIV: curr_node = std::make_shared(curr_node, next_node); break; - case MAX: curr_node = std::make_shared(curr_node, next_node); break; - case MIN: curr_node = std::make_shared(curr_node, next_node); break; + case SUM: res = std::make_shared(curr_node, next_node); break; + case PROD: res = std::make_shared(curr_node, next_node); break; + case DIV: res = std::make_shared(curr_node, next_node); break; + case MAX: res = std::make_shared(curr_node, next_node); break; + case MIN: res = std::make_shared(curr_node, next_node); break; default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); } + curr_node = res; } - return Ptr(new InfEngineNgraphNode(curr_node)); + return Ptr(new InfEngineNgraphNode(res)); } #endif // HAVE_DNN_NGRAPH diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index 6a502af7e9..9ff3bec38b 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -209,7 +209,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - std::vector dims = ieInpNode->get_shape(); + std::vector dims = ieInpNode.get_shape(); int numAxes = dims.size(); int startAxis = normalize_axis(_startAxis, numAxes); diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 9cdb31023c..f03af7c1fb 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -803,7 +803,7 @@ public: } else { - std::vector shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0); + std::vector shape(1 + normalize_axis(axis, ieInpNode.get_shape().size()), 0); shape[shape.size() - 1] = -1; auto inp = std::make_shared( ieInpNode, diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 61c2224e36..f8de64cb32 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -480,7 +480,7 @@ public: if (type != SPATIAL_NRM) { axes = {1}; } else { - axes.resize(ieInpNode->get_shape().size() - 2); + axes.resize(ieInpNode.get_shape().size() - 2); std::iota(axes.begin(), axes.end(), 2); } auto ngraph_axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data()); diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp index 6a599408e1..7ed6c64ae8 100644 --- a/modules/dnn/src/layers/max_unpooling_layer.cpp +++ b/modules/dnn/src/layers/max_unpooling_layer.cpp @@ -194,7 +194,7 @@ public: std::vector inpShapes(nodes.size()); std::vector outShapes, internals; for (int i = 0; i < nodes.size(); ++i) { - std::vector shape = nodes[i].dynamicCast()->node->get_shape(); + std::vector shape = nodes[i].dynamicCast()->node.get_shape(); inpShapes[i] = std::vector(shape.begin(), shape.end()); } getMemoryShapes(inpShapes, 1, outShapes, internals); @@ -213,7 +213,7 @@ public: std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &newShape), true ); - if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) { + if (indices.get_element_type() != ngraph::element::i32 && indices.get_element_type() != ngraph::element::i64) { indices = std::make_shared(indices, ngraph::element::i64); } diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index dc23656b7a..aae53fa327 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -390,7 +390,7 @@ public: auto mvn = std::make_shared(ieInpNode, acrossChannels, normVariance, eps); #else int64_t start_axis = acrossChannels ? 1 : 2; - std::vector axes_v(ieInpNode->get_shape().size() - start_axis); + std::vector axes_v(ieInpNode.get_shape().size() - start_axis); std::iota(axes_v.begin(), axes_v.end(), start_axis); auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_v.size()}, axes_v.data()); auto mvn = std::make_shared(ieInpNode, axes, normVariance, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT); diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index fadbf58244..8572eee995 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -900,12 +900,12 @@ public: auto& inp0 = nodes[0].dynamicCast()->node; auto& inp1 = nodes[1].dynamicCast()->node; - if (inp0->get_element_type() != inp1->get_element_type()) { + if (inp0.get_element_type() != inp1.get_element_type()) { auto dtype = preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD ? ngraph::element::f16 : ngraph::element::f32; - if (inp0->get_element_type() != dtype) + if (inp0.get_element_type() != dtype) inp0 = std::make_shared(inp0, dtype); - if (inp1->get_element_type() != dtype) + if (inp1.get_element_type() != dtype) inp1 = std::make_shared(inp1, dtype); } diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index f0ad6e6f61..431eeab82d 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -273,21 +273,21 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - const size_t batch = ieInpNode->get_shape()[0]; - const size_t numChannels = ieInpNode->get_shape()[1]; + const size_t batch = ieInpNode.get_shape()[0]; + const size_t numChannels = ieInpNode.get_shape()[1]; std::vector axes_data; if (!acrossSpatial) { axes_data.push_back(1); } else { - axes_data.resize(ieInpNode->get_shape().size() - 1); + axes_data.resize(ieInpNode.get_shape().size() - 1); std::iota(axes_data.begin(), axes_data.end(), 1); } auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data); auto norm = std::make_shared(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD); CV_Assert(blobs.empty() || numChannels == blobs[0].total()); - std::vector shape(ieInpNode->get_shape().size(), 1); + std::vector shape(ieInpNode.get_shape().size(), 1); shape[0] = blobs.empty() ? 1 : batch; shape[1] = numChannels; if (!blobs.empty()) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 5caaa36ba0..a75382d8a5 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -209,7 +209,8 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); + return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()) && + (!computeMaxIdx || INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)); } #endif if (backendId == DNN_BACKEND_OPENCV) @@ -600,7 +601,7 @@ public: return Ptr(new InfEngineNgraphNode(ave_pool)); } else if (type == SUM) { - ngraph::Shape inpShape = ieInpNode->get_shape(); + ngraph::Shape inpShape = ieInpNode.get_shape(); CV_Assert(inpShape.size() == 2 + kernel_size.size()); std::vector axes; for (size_t i = 0; i < kernel_size.size(); i++) @@ -615,10 +616,14 @@ public: else if (type == MAX) { std::shared_ptr max_pool; if (computeMaxIdx) { +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) std::vector dilations(kernel_size.size(), 1); max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), rounding_type, pad_type); +#else + CV_Error(Error::StsNotImplemented, "OpenVINO MaxPool with indices"); +#endif } else { max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index e9edcf1547..2f2a33cc6f 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -366,10 +366,10 @@ public: auto& class_logits = nodes[1].dynamicCast()->node; auto& image_shape = nodes[2].dynamicCast()->node; - CV_Assert_N(image_shape->get_shape().size() == 2, image_shape->get_shape().front() == 1); + CV_Assert_N(image_shape.get_shape().size() == 2, image_shape.get_shape().front() == 1); auto shape = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, - std::vector{(int64_t)image_shape->get_shape().back()}); + std::vector{(int64_t)image_shape.get_shape().back()}); auto reshape = std::make_shared(image_shape, shape, true); auto proposal = std::make_shared(class_probs, class_logits, reshape, attr); diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 7ab8cdd93f..49952b4c83 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -466,7 +466,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& input = nodes[0].dynamicCast()->node; - auto parent_shape = input->get_shape(); + auto parent_shape = input.get_shape(); int64_t b = parent_shape[0]; int64_t h = parent_shape[1]; int64_t w = parent_shape[2]; @@ -567,7 +567,7 @@ public: int hNorm, wNorm; if (nodes.size() > 1) { - auto node_1_shape = nodes[1].dynamicCast()->node->get_shape(); + auto node_1_shape = nodes[1].dynamicCast()->node.get_shape(); hNorm = node_1_shape[2]; wNorm = node_1_shape[3]; } diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 607adb8aa1..fe27748319 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -443,7 +443,7 @@ public: std::vector shape = {outHeight, outWidth}; auto out_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shape.data()); - auto& input_shape = ieInpNode->get_shape(); + auto& input_shape = ieInpNode.get_shape(); CV_Assert_N(input_shape[2] != 0, input_shape[3] != 0); std::vector scales = {static_cast(outHeight) / input_shape[2], static_cast(outWidth) / input_shape[3]}; auto scales_shape = std::make_shared(ngraph::element::f32, ngraph::Shape{2}, scales.data()); diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 5338ab2215..2a4e1a05d5 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -331,34 +331,36 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { auto ieInpNode0 = nodes[0].dynamicCast()->node; - auto ieInpNode1 = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; + ngraph::Output ieInpNode1; + if (nodes.size() > 1) + ieInpNode1 = nodes[1].dynamicCast()->node; size_t numChannels = 1; if (blobs.empty()) - for (const size_t& dim : ieInpNode1->get_shape()) + for (const size_t& dim : ieInpNode1.get_shape()) numChannels *= dim; else numChannels = blobs[0].total(); - std::vector shape(ieInpNode0->get_shape().size(), 1); + std::vector shape(ieInpNode0.get_shape().size(), 1); int cAxis = normalize_axis(axis, shape.size()); shape[cAxis] = numChannels; - auto node = ieInpNode0; + std::shared_ptr node; if (hasWeights) { - auto weight = blobs.empty() ? ieInpNode1 : + ngraph::Output weight = blobs.empty() ? ieInpNode1 : std::make_shared(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2) - node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + node = std::make_shared(ieInpNode0, weight, ngraph::op::AutoBroadcastType::NUMPY); #else - node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + node = std::make_shared(ieInpNode0, weight, ngraph::op::AutoBroadcastType::NUMPY); #endif } if (hasBias || !hasWeights) { - std::shared_ptr bias; + ngraph::Output bias; if (hasBias) { bias = blobs.empty() ? ieInpNode1 : diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index d3675e23a5..c44d18182e 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -759,7 +759,7 @@ public: { CV_Assert_N(nodes.size() <= 2); auto& ieInpNode = nodes[0].dynamicCast()->node; - CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size()); + CV_Assert(finalSliceRanges[0].size() == ieInpNode.get_shape().size()); std::vector offsets, dims; for (int i = 0; i < finalSliceRanges[0].size(); ++i) diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index b74f2b6791..faab6a565f 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -385,7 +385,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - int axis = normalize_axis(axisRaw, ieInpNode->get_shape().size()); + int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size()); auto softmax = std::make_shared(ieInpNode, axis); if (logSoftMax) return Ptr(new InfEngineNgraphNode(std::make_shared(softmax))); diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp index 4570d2b360..dfa542bd41 100644 --- a/modules/dnn/src/net_impl_fuse.cpp +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -210,7 +210,7 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) if (!nextData->params.has("operation") || toLowerCase(nextData->params.get("operation")) != "add") { CV_LOG_DEBUG(NULL, "DNN/CPU: fusion with NaryEltwise or Eltwise Layer operation is not supported: " - << nextData->params.get("operation")); + << toLowerCase(nextData->params.get("operation", "sum"))); break; } diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp index e974ce34a3..c274f44a87 100644 --- a/modules/dnn/src/net_openvino.cpp +++ b/modules/dnn/src/net_openvino.cpp @@ -252,7 +252,7 @@ void NetImplOpenVINO::addNgraphOutputs(LayerData& ld) CV_Assert(!ieInpNode->net.empty()); if (layerNet != ieInpNode->net) { - CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); + CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node.get_node()->get_friendly_name()); ieInpNode->net->addOutput(ieInpNode); } } @@ -321,8 +321,10 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) return; } +#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2022_1) bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || openvino::checkTarget(DNN_TARGET_CPU)); +#endif // Build Inference Engine networks from sets of layers that support this // backend. Split a whole model on several Inference Engine networks if @@ -341,6 +343,10 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) bool fused = ld.skip; Ptr layer = ld.layerInstance; +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) + if (ld.id == 0) + continue; +#else if (!fused && !layer->supportBackend(preferableBackend)) { CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); @@ -355,17 +361,6 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) } } - // TODO: fix these workarounds - if (preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Concat"; - - if (preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Power"; - if (preferableTarget == DNN_TARGET_OPENCL) customizable &= ld.type != "Eltwise"; @@ -390,6 +385,7 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) continue; } } +#endif ld.skip = true; // Initially skip all Inference Engine supported layers. // Create a new network if one of inputs from different Inference Engine graph. @@ -478,7 +474,7 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) int oid = ld.inputBlobsId[i].oid; auto ieInpNode = inputNodes[i].dynamicCast(); - const auto& ngraph_input_node = ieInpNode->node; + const auto& ngraph_input_node = ieInpNode->node.get_node_shared_ptr(); CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0) @@ -498,10 +494,7 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) } CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - // FIXIT refactor ".initNgraph()" API to use Output - // WA: use Concat to emulate Identity operation with requested output port - auto oid_node = std::make_shared(ngraph::OutputVector { ngraph_input_node->output(oid) }, 0); - inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); + inputNodes[i] = new InfEngineNgraphNode(ngraph_input_node->output(oid)); #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); #else @@ -556,6 +549,36 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) addNgraphOutputs(ld); } + // User may choose to return only intermediate blobs but not network's result (see Test_TFLite.max_unpooling) + // Such layers should not be skipped when forwardLayer is called. + // Also, perform a sanity check that there is no double inferred networks (a single skip=false per unique net instance) + std::set> uniqueNets; + if (!blobsToKeep_.empty()) + { + LayerPin latestLayerPin = getLatestLayerPin(blobsToKeep_); + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + auto iter = ld.backendNodes.find(preferableBackend); + if (iter == ld.backendNodes.end()) + continue; + + Ptr& node = iter->second; + if (node.empty()) + continue; + + Ptr ieNode = node.dynamicCast(); + if (ieNode.empty()) + continue; + + if (ld.id == latestLayerPin.lid) { + ld.skip = false; + uniqueNets.insert(ieNode->net); + break; + } + } + } + // Initialize all networks. for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) { @@ -578,9 +601,15 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) { ieNode->net->addOutput(ieNode); ieNode->net->createNet((Target)preferableTarget); - ld.skip = false; + if (uniqueNets.find(ieNode->net) == uniqueNets.end()) { + ld.skip = false; + uniqueNets.insert(ieNode->net); + } } } +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) + CV_Assert(uniqueNets.size() == 1); +#endif } diff --git a/modules/dnn/src/op_halide.cpp b/modules/dnn/src/op_halide.cpp index 653de36146..db1a72278e 100644 --- a/modules/dnn/src/op_halide.cpp +++ b/modules/dnn/src/op_halide.cpp @@ -14,6 +14,7 @@ #include "halide_scheduler.hpp" #include +#include #endif // HAVE_HALIDE namespace cv { diff --git a/modules/dnn/src/opencl/gemm_buffer.cl b/modules/dnn/src/opencl/gemm_buffer.cl index b345983aee..70028b0eec 100644 --- a/modules/dnn/src/opencl/gemm_buffer.cl +++ b/modules/dnn/src/opencl/gemm_buffer.cl @@ -453,14 +453,14 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( int w; for(int b_tile = 0; b_tile < K; b_tile += SLM_BLOCK) { barrier(CLK_LOCAL_MEM_FENCE); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(0, K, local_index))), 0, (__local float *)(slm_brow + mad24(0, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(1, K, local_index))), 0, (__local float *)(slm_brow + mad24(1, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(2, K, local_index))), 0, (__local float *)(slm_brow + mad24(2, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(3, K, local_index))), 0, (__local float *)(slm_brow + mad24(3, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(4, K, local_index))), 0, (__local float *)(slm_brow + mad24(4, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(5, K, local_index))), 0, (__local float *)(slm_brow + mad24(5, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(6, K, local_index))), 0, (__local float *)(slm_brow + mad24(6, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(7, K, local_index))), 0, (__local float *)(slm_brow + mad24(7, SLM_BLOCK, local_index))); + vstore8(vload8(0, src1_read0 + mad24(0, K, local_index)), 0, slm_brow + mad24(0, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(1, K, local_index)), 0, slm_brow + mad24(1, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(2, K, local_index)), 0, slm_brow + mad24(2, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(3, K, local_index)), 0, slm_brow + mad24(3, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(4, K, local_index)), 0, slm_brow + mad24(4, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(5, K, local_index)), 0, slm_brow + mad24(5, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(6, K, local_index)), 0, slm_brow + mad24(6, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(7, K, local_index)), 0, slm_brow + mad24(7, SLM_BLOCK, local_index)); barrier(CLK_LOCAL_MEM_FENCE); slm_brow0 = slm_brow + local_x * (TILE_K / 8); @@ -469,17 +469,17 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( while( w + TILE_K <= end_w ) { Dtype8 arow; - brow0 = as_half8(vload4(0, (__local float *)(slm_brow0 + 0 * SLM_BLOCK))); - brow1 = as_half8(vload4(0, (__local float *)(slm_brow0 + 1 * SLM_BLOCK))); - brow2 = as_half8(vload4(0, (__local float *)(slm_brow0 + 2 * SLM_BLOCK))); - brow3 = as_half8(vload4(0, (__local float *)(slm_brow0 + 3 * SLM_BLOCK))); - brow4 = as_half8(vload4(0, (__local float *)(slm_brow0 + 4 * SLM_BLOCK))); - brow5 = as_half8(vload4(0, (__local float *)(slm_brow0 + 5 * SLM_BLOCK))); - brow6 = as_half8(vload4(0, (__local float *)(slm_brow0 + 6 * SLM_BLOCK))); - brow7 = as_half8(vload4(0, (__local float *)(slm_brow0 + 7 * SLM_BLOCK))); + brow0 = vload8(0, slm_brow0 + 0 * SLM_BLOCK); + brow1 = vload8(0, slm_brow0 + 1 * SLM_BLOCK); + brow2 = vload8(0, slm_brow0 + 2 * SLM_BLOCK); + brow3 = vload8(0, slm_brow0 + 3 * SLM_BLOCK); + brow4 = vload8(0, slm_brow0 + 4 * SLM_BLOCK); + brow5 = vload8(0, slm_brow0 + 5 * SLM_BLOCK); + brow6 = vload8(0, slm_brow0 + 6 * SLM_BLOCK); + brow7 = vload8(0, slm_brow0 + 7 * SLM_BLOCK); #define MM_DOT_PRODUCT( _row, _dot ) \ - arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K))); \ + arow = vload8(0, src0_read + _row * K); \ _dot = mad( (Dtype8)(arow.s0), (Dtype8)(brow0.s0, brow1.s0, brow2.s0, brow3.s0, brow4.s0, brow5.s0, brow6.s0, brow7.s0), _dot ); \ _dot = mad( (Dtype8)(arow.s1), (Dtype8)(brow0.s1, brow1.s1, brow2.s1, brow3.s1, brow4.s1, brow5.s1, brow6.s1, brow7.s1), _dot ); \ _dot = mad( (Dtype8)(arow.s2), (Dtype8)(brow0.s2, brow1.s2, brow2.s2, brow3.s2, brow4.s2, brow5.s2, brow6.s2, brow7.s2), _dot ); \ @@ -510,7 +510,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( Dtype8 arow; #define READ_BROW(_brow, _row) \ - _brow = as_half8(vload4(0, (__local float *)(slm_brow0 + _row * SLM_BLOCK))); \ + _brow = vload8(0, slm_brow0 + _row * SLM_BLOCK); \ _brow.s0 = (mad24(local_x, 8, w) < K) ? _brow.s0 : 0.0f; \ _brow.s1 = (mad24(local_x, 8, w + 1) < K) ? _brow.s1 : 0.0f; \ _brow.s2 = (mad24(local_x, 8, w + 2) < K) ? _brow.s2 : 0.0f; \ @@ -532,7 +532,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( #undef READ_BROW #define MM_DOT_PRODUCT( _row, _dot ) \ - arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K))); \ + arow = vload8(0, src0_read + _row * K); \ arow.s0 = (mad24(local_x, 8, w) < K) ? arow.s0 : 0.0f; \ arow.s1 = (mad24(local_x, 8, w + 1) < K) ? arow.s1 : 0.0f; \ arow.s2 = (mad24(local_x, 8, w + 2) < K) ? arow.s2 : 0.0f; \ diff --git a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp deleted file mode 100644 index 8ec99c9685..0000000000 --- a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (c) 2020, OPEN AI LAB - * Author: qtang@openailab.com - */ - -#ifndef TENGINE_GRAPH_CONVOLUTION_HPP -#define TENGINE_GRAPH_CONVOLUTION_HPP - -#define FLOAT_TO_REALSIZE (4) -#ifdef HAVE_TENGINE - -#include "tengine_c_api.h" - -namespace cv -{ -namespace dnn -{ -// pad_h0: pad_top -// pad_h1: pad_bottom -// pad_w0: pad_left -// pad_w1: pad_right -teng_graph_t tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w, - float *output_, int out_b, int outch, int out_h, int out_w, - float *kernel_,int kernel_s , int kernel_h, int kernel_w, - float *teg_bias, int stride_h, int stride_w, - int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, - size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ; - -bool tengine_forward(teng_graph_t& graph) ; -bool tengine_release(teng_graph_t& graph) ; -} -} -#endif -#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */ \ No newline at end of file diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp deleted file mode 100644 index d35937006c..0000000000 --- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (c) 2020, OPEN AI LAB - * Author: qtang@openailab.com - */ - -#include "../../precomp.hpp" -#include -#include - -#include -#include - -#include "../include/tengine_graph_convolution.hpp" - -#ifdef HAVE_TENGINE - -#include "tengine_c_api.h" - - -namespace cv -{ -namespace dnn -{ -static int create_input_node(teng_graph_t graph, const char* node_name, int inch, int in_h, int in_w) -{ - node_t node = teng_create_graph_node(graph, node_name, "InputOp"); - tensor_t tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32); - teng_set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT); - - int dims[4] = {1, inch, in_h, in_w}; - teng_set_tensor_shape(tensor, dims, 4); - - teng_release_graph_tensor(tensor); - teng_release_graph_node(node); - - return 0; -} - -static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w, - int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h0, int pad_h1, int pad_w0, int pad_w1, int inch, int outch, int group, - int dilation_h, int dilation_w, int activation, std::string padMode) -{ - node_t conv_node = teng_create_graph_node(graph, node_name, "Convolution"); - tensor_t input_tensor = teng_get_graph_tensor(graph, input_name); - - if (input_tensor == NULL) - { - CV_LOG_WARNING(NULL,"Tengine: input_tensor is NULL." ); - return -1; - } - - teng_set_node_input_tensor(conv_node, 0, input_tensor); - teng_release_graph_tensor(input_tensor); - - /* output */ - tensor_t output_tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32); - - teng_set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR); - teng_release_graph_tensor(output_tensor); - - /* weight */ - std::string weight_name(node_name); - weight_name += "/weight"; - - node_t w_node = teng_create_graph_node(graph, weight_name.c_str(), "Const"); - tensor_t w_tensor = teng_create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32); - teng_set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST); - teng_set_node_input_tensor(conv_node, 1, w_tensor); - int w_dims[] = {outch, inch / group, kernel_h, kernel_w}; - - teng_set_tensor_shape(w_tensor, w_dims, 4); - - teng_release_graph_node(w_node); - teng_release_graph_tensor(w_tensor); - - /* bias */ - std::string bias_name(node_name); - bias_name += "/bias"; - - node_t b_node = teng_create_graph_node(graph, bias_name.c_str(), "Const"); - tensor_t b_tensor = teng_create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32); - teng_set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST); - int b_dims[] = {outch}; - - teng_set_tensor_shape(b_tensor, b_dims, 1); - - teng_set_node_input_tensor(conv_node, 2, b_tensor); - teng_release_graph_node(b_node); - teng_release_graph_tensor(b_tensor); - - if (!padMode.empty()) - { - if (padMode == "SAME") - { - int out_h_temp = (in_h-kernel_h + 2*pad_h0)/stride_h + 1; - int out_w_temp = (in_w-kernel_w + 2*pad_w0)/stride_w + 1; - - if (out_h_temp < out_h) - pad_h1 += 1; - if (out_w_temp < out_w) - pad_w1 += 1; - } - } - - /* attr */ - teng_set_node_attr_int(conv_node, "kernel_h", &kernel_h); - teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w); - teng_set_node_attr_int(conv_node, "stride_h", &stride_h); - teng_set_node_attr_int(conv_node, "stride_w", &stride_w); - teng_set_node_attr_int(conv_node, "pad_h0", &pad_h0); - teng_set_node_attr_int(conv_node, "pad_w0", &pad_w0); - teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1); - teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1); - teng_set_node_attr_int(conv_node, "output_channel", &outch); - teng_set_node_attr_int(conv_node, "input_channel", &inch); - teng_set_node_attr_int(conv_node, "group", &group); - teng_set_node_attr_int(conv_node, "dilation_h", &dilation_h); - teng_set_node_attr_int(conv_node, "dilation_w", &dilation_w); - // set_node_attr_int(conv_node, "activation", &activation); - - teng_release_graph_node(conv_node); - - return 0; -} - -static teng_graph_t create_conv_graph(const char* layer_name, float* input_data, int inch, int group, int in_h, int in_w, - float* output_data, int outch, int out_h, int out_w, - int kernel_h, int kernel_w, - int stride_h,int stride_w, - int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, int activation, - float* teg_weight, float* teg_bias, std::string padMode, int nstripes) -{ - node_t conv_node = NULL; - - tensor_t input_tensor = NULL; - tensor_t output_tensor = NULL; - tensor_t weight_tensor = NULL; - tensor_t bias_tensor = NULL; - - /* create graph for convolution */ - int in_size = in_h * in_w * inch; - int out_size = out_h * out_w * outch; - int weight_size = outch * (inch / group) * kernel_w * kernel_h; - int bias_size = outch; - - int buf_size = 0; - int input_num = 0; - - /* create graph */ - teng_graph_t graph = teng_create_graph(NULL, NULL, NULL); - bool ok = true; - - if(graph == NULL) - { - CV_LOG_WARNING(NULL,"Tengine: create_graph failed." ); - ok = false; - } - - const char* input_name = "data"; - const char* conv_name = layer_name; - - if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: create_input_node failed." ); - ok = false; - } - - if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w, - stride_h, stride_w, pad_h0, pad_h1, pad_w0, pad_w1, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: create conv node failed." ); - ok = false; - } - - /* set input/output node */ - const char* inputs_name[] = {input_name}; - const char* outputs_name[] = {conv_name}; - - if (ok && teng_set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: set inputs failed." ); - ok = false; - } - - if (ok && teng_set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: set outputs failed." ); - ok = false; - } - - /* set input data */ - if (ok) - { - input_tensor = teng_get_graph_input_tensor(graph, 0, 0); - buf_size = teng_get_tensor_buffer_size(input_tensor); - if (buf_size != in_size * FLOAT_TO_REALSIZE) - { - CV_LOG_WARNING(NULL,"Tengine: Input data size check failed."); - ok = false; - } - } - - if (ok) - { - teng_set_tensor_buffer(input_tensor, (float *)input_data, buf_size); - teng_release_graph_tensor(input_tensor); - - /* create convolution node */ - /* set weight node */ - conv_node = teng_get_graph_node(graph, conv_name); - weight_tensor = teng_get_node_input_tensor(conv_node, 1); - buf_size = teng_get_tensor_buffer_size(weight_tensor); - - if (buf_size != weight_size * FLOAT_TO_REALSIZE) - { - CV_LOG_WARNING(NULL,"Tengine: Input weight size check failed."); - ok = false; - } - } - - if (ok) - { - teng_set_tensor_buffer(weight_tensor, teg_weight, buf_size); - - /* set bias node */ - input_num = teng_get_node_input_number(conv_node); - if (input_num > 2) - { - bias_tensor = teng_get_node_input_tensor(conv_node, 2); - buf_size = teng_get_tensor_buffer_size(bias_tensor); - if (buf_size != bias_size * FLOAT_TO_REALSIZE) - { - CV_LOG_WARNING(NULL,"Tengine: Input bias size check failed."); - ok = false; - } - else teng_set_tensor_buffer(bias_tensor, teg_bias, buf_size); - } - } - - /* prerun */ - if (ok && teng_prerun_graph_multithread(graph, TENGINE_CLUSTER_BIG, nstripes) < 0) - { - CV_LOG_WARNING(NULL, "Tengine: prerun_graph failed."); - ok = false; - } - - if (ok) - { - /* set output data */ - output_tensor = teng_get_node_output_tensor(conv_node, 0); - int ret = teng_set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE); - if(ret) - { - CV_LOG_WARNING(NULL,"Tengine: Set output tensor buffer failed." ); - ok = false; - } - } - - if (false == ok) - { - teng_destroy_graph(graph) ; - return NULL ; - } - return graph; -} -static bool tengine_init_flag = false; -teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w, - float *output_, int out_b, int outch, int out_h, int out_w, - float *kernel_, int kernel_s ,int kernel_h, int kernel_w, - float *teg_bias, int stride_h, int stride_w, - int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, - size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes) -{ - std::vector teg_weight_vec; - float *teg_weight = NULL; - int kernel_inwh = (inch / group) * kernel_w * kernel_h; - // Do not using the activation fuse mode, just convolution only. - int activation = -1; - - if (!(kernel_s == 2 && kernel_h == kernel_w - && dilation_h == dilation_w && stride_h == stride_w - && out_b == 1 && pad_h0 < 10 && pad_h1 < 10 && pad_w0 < 10 && pad_w1 < 10)) // just for Conv2D - { - // printf("return : just for Conv2D\n"); - return NULL; - } - - { - /* printf("Tengine(%s): input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n", - layer_name, inch, in_h, in_w, - out_b, outch, out_h, out_w, - kernel_w, kernel_h, - stride_w, stride_h, - dilation_w, dilation_h, - pad_h0, pad_h1, pad_w0, pad_w1); - */ - // weight - if (kernel_inwh != wstep) - { - teg_weight_vec.resize(kernel_inwh * outch); - teg_weight = &teg_weight_vec[0]; - for (int i=0; i > Reproducibility_MobileNet_SSD; TEST_P(Reproducibility_MobileNet_SSD, Accuracy) { - const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false); - const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false); + const string proto = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false); + const string model = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false); Net net = readNetFromCaffe(proto, model); int backendId = get<0>(GetParam()); int targetId = get<1>(GetParam()); @@ -731,7 +731,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - double scoreDiff = 0.0, iouDiff = 0.0; + double scoreDiff = 0.001, iouDiff = 0.03; #if defined(INF_ENGINE_RELEASE) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -779,7 +779,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); - double scoreDiff = 0.0, iouDiff = 0.0; + double scoreDiff = 0.003, iouDiff = 0.07; if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { scoreDiff = 0.02; iouDiff = 0.13; diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index d8a16d3efa..12e62c754a 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -407,15 +407,16 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine( //////////////////////////////////////////////////////////////////////////////// // Fully-connected //////////////////////////////////////////////////////////////////////////////// -typedef TestWithParam > > FullyConnected; +typedef TestWithParam > > FullyConnected; TEST_P(FullyConnected, Accuracy) { - int inChannels = get<0>(GetParam()); - Size inSize = get<1>(GetParam()); - int outChannels = get<2>(GetParam()); - bool hasBias = get<3>(GetParam()); - Backend backendId = get<0>(get<4>(GetParam())); - Target targetId = get<1>(get<4>(GetParam())); + int batch = get<0>(GetParam()); + int inChannels = get<1>(GetParam()); + Size inSize = get<2>(GetParam()); + int outChannels = get<3>(GetParam()); + bool hasBias = get<4>(GetParam()); + Backend backendId = get<0>(get<5>(GetParam())); + Target targetId = get<1>(get<5>(GetParam())); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (targetId == DNN_TARGET_OPENCL_FP16 || @@ -424,6 +425,13 @@ TEST_P(FullyConnected, Accuracy) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); } #endif + // https://github.com/openvinotoolkit/openvino/issues/19436 + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL_FP16 && batch == 16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2023000000) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL && batch == 16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL); +#endif Mat weights(outChannels, inChannels * inSize.height * inSize.width, CV_32F); randu(weights, -1.0f, 1.0f); @@ -439,7 +447,7 @@ TEST_P(FullyConnected, Accuracy) lp.type = "InnerProduct"; lp.name = "testLayer"; - int sz[] = {1, inChannels, inSize.height, inSize.width}; + int sz[] = {batch, inChannels, inSize.height, inSize.width}; Mat input(4, &sz[0], CV_32F); double l1 = 0.0; @@ -453,11 +461,13 @@ TEST_P(FullyConnected, Accuracy) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL_FP16) { l1 = 0.01; + if (INF_ENGINE_VER_MAJOR_GE(2023000000)) + lInf = 0.016; } if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL) { l1 = 5e-3; - lInf = 7e-3; + lInf = INF_ENGINE_VER_MAJOR_GE(2023000000) ? 0.016 : 7e-3; } #endif if (targetId == DNN_TARGET_CUDA_FP16) @@ -467,6 +477,7 @@ TEST_P(FullyConnected, Accuracy) } INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine( +/*batch*/ Values(1, 2, 4, 8, 16), /*in channels*/ Values(3, 4), /*in size*/ Values(Size(5, 4), Size(4, 5), Size(1, 1)), /*out channels*/ Values(3, 4), diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 8b3cd01f29..caba112516 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -878,14 +878,14 @@ TEST_P(Test_Int8_nets, MobileNet_SSD) if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel()) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); - Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy.prototxt", false), - findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false)); + Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false), + findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false)); Mat inp = imread(_tf("street.png")); Mat blob = blobFromImage(inp, 1.0 / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); - float confThreshold = FLT_MIN, scoreDiff = 0.059, iouDiff = 0.11; + float confThreshold = FLT_MIN, scoreDiff = 0.084, iouDiff = 0.43; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 4ee3e013cb..0c5fb28c5d 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -120,6 +120,28 @@ TEST(blobFromImageWithParams_4ch, letter_box) EXPECT_EQ(0, cvtest::norm(targetBlob, blob, NORM_INF)); } +TEST(blobFromImagesWithParams_4ch, multi_image) +{ + Mat img(10, 10, CV_8UC4, cv::Scalar(0, 1, 2, 3)); + Scalar scalefactor(0.1, 0.2, 0.3, 0.4); + + Image2BlobParams param; + param.scalefactor = scalefactor; + param.datalayout = DNN_LAYOUT_NHWC; + + Mat blobs = blobFromImagesWithParams(std::vector { img, 2*img }, param); + vector ranges; + ranges.push_back(Range(0, 1)); + ranges.push_back(Range(0, blobs.size[1])); + ranges.push_back(Range(0, blobs.size[2])); + ranges.push_back(Range(0, blobs.size[3])); + Mat blob0 = blobs(ranges); + ranges[0] = Range(1, 2); + Mat blob1 = blobs(ranges); + + EXPECT_EQ(0, cvtest::norm(2*blob0, blob1, NORM_INF)); +} + TEST(readNet, Regression) { Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"), diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index a19923bf28..59b51c4bc0 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -490,8 +490,8 @@ TEST_P(Test_Model, DetectionMobilenetSSD) refBoxes.emplace_back(left, top, width, height); } - std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false); - std::string config_file = _tf("MobileNetSSD_deploy.prototxt"); + std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false); + std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt"); Scalar mean = Scalar(127.5, 127.5, 127.5); double scale = 1.0 / 127.5; @@ -511,7 +511,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD) } else if (target == DNN_TARGET_CUDA_FP16) { - scoreDiff = 0.0021; + scoreDiff = 0.0028; iouDiff = 1e-2; } float confThreshold = FLT_MIN; @@ -595,8 +595,8 @@ TEST_P(Test_Model, Detection_normalized) std::vector refConfidences = {0.999222f}; std::vector refBoxes = {Rect2d(0, 4, 227, 222)}; - std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false); - std::string config_file = _tf("MobileNetSSD_deploy.prototxt"); + std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false); + std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt"); Scalar mean = Scalar(127.5, 127.5, 127.5); double scale = 1.0 / 127.5; diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index 19b3f3a94a..4f3a8b4a96 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -128,6 +128,11 @@ TEST_P(Test_TFLite, max_unpooling) if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) { if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); @@ -152,14 +157,7 @@ TEST_P(Test_TFLite, max_unpooling) net.setInput(input); std::vector > outs; - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - // TODO: seems like a bug with a retrieving intermediate tensors - net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); - outs.erase(outs.begin()); - } - else { - net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); - } + net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); ASSERT_EQ(outs.size(), 4); ASSERT_EQ(outs[0].size(), 1); diff --git a/modules/features2d/3rdparty/mscr/chi_table.h b/modules/features2d/3rdparty/mscr/chi_table.h new file mode 100644 index 0000000000..c0e9bae046 --- /dev/null +++ b/modules/features2d/3rdparty/mscr/chi_table.h @@ -0,0 +1,135 @@ +/* +** +** License Agreement +** For chi_table.h +** +** Copyright (C) 2007 Per-Erik Forssen, all rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** * Redistribution's of source code must retain the above copyright notice, +** this list of conditions and the following disclaimer. +** +** * Redistribution's in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** +** * The name of the copyright holders may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** This software is provided by the copyright holders and contributors "as is" and +** any express or implied warranties, including, but not limited to, the implied +** warranties of merchantability and fitness for a particular purpose are disclaimed. +** In no event shall the Intel Corporation or contributors be liable for any direct, +** indirect, incidental, special, exemplary, or consequential damages +** (including, but not limited to, procurement of substitute goods or services; +** loss of use, data, or profits; or business interruption) however caused +** and on any theory of liability, whether in contract, strict liability, +** or tort (including negligence or otherwise) arising in any way out of +** the use of this software, even if advised of the possibility of such damage. +** +** Content origin: http://users.isy.liu.se/cvl/perfo/software/chi_table.h +*/ +#define TABLE_SIZE 400 + +static double chitab3[]={0, 0.0150057, 0.0239478, 0.0315227, + 0.0383427, 0.0446605, 0.0506115, 0.0562786, + 0.0617174, 0.0669672, 0.0720573, 0.0770099, + 0.081843, 0.0865705, 0.0912043, 0.0957541, + 0.100228, 0.104633, 0.108976, 0.113261, + 0.117493, 0.121676, 0.125814, 0.12991, + 0.133967, 0.137987, 0.141974, 0.145929, + 0.149853, 0.15375, 0.15762, 0.161466, + 0.165287, 0.169087, 0.172866, 0.176625, + 0.180365, 0.184088, 0.187794, 0.191483, + 0.195158, 0.198819, 0.202466, 0.2061, + 0.209722, 0.213332, 0.216932, 0.220521, + 0.2241, 0.22767, 0.231231, 0.234783, + 0.238328, 0.241865, 0.245395, 0.248918, + 0.252435, 0.255947, 0.259452, 0.262952, + 0.266448, 0.269939, 0.273425, 0.276908, + 0.280386, 0.283862, 0.287334, 0.290803, + 0.29427, 0.297734, 0.301197, 0.304657, + 0.308115, 0.311573, 0.315028, 0.318483, + 0.321937, 0.32539, 0.328843, 0.332296, + 0.335749, 0.339201, 0.342654, 0.346108, + 0.349562, 0.353017, 0.356473, 0.35993, + 0.363389, 0.366849, 0.37031, 0.373774, + 0.377239, 0.380706, 0.384176, 0.387648, + 0.391123, 0.3946, 0.39808, 0.401563, + 0.405049, 0.408539, 0.412032, 0.415528, + 0.419028, 0.422531, 0.426039, 0.429551, + 0.433066, 0.436586, 0.440111, 0.44364, + 0.447173, 0.450712, 0.454255, 0.457803, + 0.461356, 0.464915, 0.468479, 0.472049, + 0.475624, 0.479205, 0.482792, 0.486384, + 0.489983, 0.493588, 0.4972, 0.500818, + 0.504442, 0.508073, 0.511711, 0.515356, + 0.519008, 0.522667, 0.526334, 0.530008, + 0.533689, 0.537378, 0.541075, 0.54478, + 0.548492, 0.552213, 0.555942, 0.55968, + 0.563425, 0.56718, 0.570943, 0.574715, + 0.578497, 0.582287, 0.586086, 0.589895, + 0.593713, 0.597541, 0.601379, 0.605227, + 0.609084, 0.612952, 0.61683, 0.620718, + 0.624617, 0.628526, 0.632447, 0.636378, + 0.64032, 0.644274, 0.648239, 0.652215, + 0.656203, 0.660203, 0.664215, 0.668238, + 0.672274, 0.676323, 0.680384, 0.684457, + 0.688543, 0.692643, 0.696755, 0.700881, + 0.70502, 0.709172, 0.713339, 0.717519, + 0.721714, 0.725922, 0.730145, 0.734383, + 0.738636, 0.742903, 0.747185, 0.751483, + 0.755796, 0.760125, 0.76447, 0.768831, + 0.773208, 0.777601, 0.782011, 0.786438, + 0.790882, 0.795343, 0.799821, 0.804318, + 0.808831, 0.813363, 0.817913, 0.822482, + 0.827069, 0.831676, 0.836301, 0.840946, + 0.84561, 0.850295, 0.854999, 0.859724, + 0.864469, 0.869235, 0.874022, 0.878831, + 0.883661, 0.888513, 0.893387, 0.898284, + 0.903204, 0.908146, 0.913112, 0.918101, + 0.923114, 0.928152, 0.933214, 0.938301, + 0.943413, 0.94855, 0.953713, 0.958903, + 0.964119, 0.969361, 0.974631, 0.979929, + 0.985254, 0.990608, 0.99599, 1.0014, + 1.00684, 1.01231, 1.01781, 1.02335, + 1.02891, 1.0345, 1.04013, 1.04579, + 1.05148, 1.05721, 1.06296, 1.06876, + 1.07459, 1.08045, 1.08635, 1.09228, + 1.09826, 1.10427, 1.11032, 1.1164, + 1.12253, 1.1287, 1.1349, 1.14115, + 1.14744, 1.15377, 1.16015, 1.16656, + 1.17303, 1.17954, 1.18609, 1.19269, + 1.19934, 1.20603, 1.21278, 1.21958, + 1.22642, 1.23332, 1.24027, 1.24727, + 1.25433, 1.26144, 1.26861, 1.27584, + 1.28312, 1.29047, 1.29787, 1.30534, + 1.31287, 1.32046, 1.32812, 1.33585, + 1.34364, 1.3515, 1.35943, 1.36744, + 1.37551, 1.38367, 1.39189, 1.4002, + 1.40859, 1.41705, 1.42561, 1.43424, + 1.44296, 1.45177, 1.46068, 1.46967, + 1.47876, 1.48795, 1.49723, 1.50662, + 1.51611, 1.52571, 1.53541, 1.54523, + 1.55517, 1.56522, 1.57539, 1.58568, + 1.59611, 1.60666, 1.61735, 1.62817, + 1.63914, 1.65025, 1.66152, 1.67293, + 1.68451, 1.69625, 1.70815, 1.72023, + 1.73249, 1.74494, 1.75757, 1.77041, + 1.78344, 1.79669, 1.81016, 1.82385, + 1.83777, 1.85194, 1.86635, 1.88103, + 1.89598, 1.91121, 1.92674, 1.94257, + 1.95871, 1.97519, 1.99201, 2.0092, + 2.02676, 2.04471, 2.06309, 2.08189, + 2.10115, 2.12089, 2.14114, 2.16192, + 2.18326, 2.2052, 2.22777, 2.25101, + 2.27496, 2.29966, 2.32518, 2.35156, + 2.37886, 2.40717, 2.43655, 2.46709, + 2.49889, 2.53206, 2.56673, 2.60305, + 2.64117, 2.6813, 2.72367, 2.76854, + 2.81623, 2.86714, 2.92173, 2.98059, + 3.04446, 3.1143, 3.19135, 3.27731, + 3.37455, 3.48653, 3.61862, 3.77982, + 3.98692, 4.2776, 4.77167, 133.333 }; diff --git a/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt b/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt new file mode 100644 index 0000000000..66b272dd2d --- /dev/null +++ b/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt @@ -0,0 +1,28 @@ + License Agreement + For chi_table.h + +Copyright (C) 2007 Per-Erik Forssen, all rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistribution's of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistribution's in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * The name of the copyright holders may not be used to endorse or promote products + derived from this software without specific prior written permission. + +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are disclaimed. +In no event shall the Intel Corporation or contributors be liable for any direct, +indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. diff --git a/modules/features2d/CMakeLists.txt b/modules/features2d/CMakeLists.txt index a586d4606e..91fea8bcc8 100644 --- a/modules/features2d/CMakeLists.txt +++ b/modules/features2d/CMakeLists.txt @@ -7,3 +7,5 @@ if(DEBUG_opencv_features2d) list(APPEND debug_modules opencv_highgui) endif() ocv_define_module(features2d opencv_imgproc ${debug_modules} OPTIONAL opencv_flann WRAP java objc python js) + +ocv_install_3rdparty_licenses(mscr "${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mscr/chi_table_LICENSE.txt") diff --git a/modules/features2d/src/mser.cpp b/modules/features2d/src/mser.cpp index 39bcbf6938..3cada4ec75 100644 --- a/modules/features2d/src/mser.cpp +++ b/modules/features2d/src/mser.cpp @@ -30,18 +30,23 @@ * OpenCV functions for MSER extraction * * 1. there are two different implementation of MSER, one for gray image, one for color image - * 2. the gray image algorithm is taken from: Linear Time Maximally Stable Extremal Regions; + * 2. the gray image algorithm is taken from: + * Linear Time Maximally Stable Extremal Regions; * the paper claims to be faster than union-find method; * it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop. - * 3. the color image algorithm is taken from: Maximally Stable Colour Regions for Recognition and Match; + * 3. the color image algorithm is taken from: + * Maximally Stable Colour Regions for Recognition and Match; * it should be much slower than gray image method ( 3~4 times ); - * the chi_table.h file is taken directly from paper's source code which is distributed under permissive BSD-like license: http://users.isy.liu.se/cvl/perfo/software/chi_table.h + * the chi_table.h file is taken directly from the paper's source code: + * http://users.isy.liu.se/cvl/perfo/software/chi_table.h + * license (BSD-like) is located in the file: 3rdparty/mscr/chi_table_LICENSE.txt * 4. though the name is *contours*, the result actually is a list of point set. */ #include "precomp.hpp" #include "opencv2/imgproc/imgproc_c.h" #include +#include "../3rdparty/mscr/chi_table.h" namespace cv { @@ -613,113 +618,6 @@ the color MSER has not been completely refactored yet. We leave it mostly as-is, with just enough changes to convert C structures to C++ ones and add support for color images into MSER_Impl::detectAndLabel. */ - -const int TABLE_SIZE = 400; - -static const float chitab3[]= -{ - 0.f, 0.0150057f, 0.0239478f, 0.0315227f, - 0.0383427f, 0.0446605f, 0.0506115f, 0.0562786f, - 0.0617174f, 0.0669672f, 0.0720573f, 0.0770099f, - 0.081843f, 0.0865705f, 0.0912043f, 0.0957541f, - 0.100228f, 0.104633f, 0.108976f, 0.113261f, - 0.117493f, 0.121676f, 0.125814f, 0.12991f, - 0.133967f, 0.137987f, 0.141974f, 0.145929f, - 0.149853f, 0.15375f, 0.15762f, 0.161466f, - 0.165287f, 0.169087f, 0.172866f, 0.176625f, - 0.180365f, 0.184088f, 0.187794f, 0.191483f, - 0.195158f, 0.198819f, 0.202466f, 0.2061f, - 0.209722f, 0.213332f, 0.216932f, 0.220521f, - 0.2241f, 0.22767f, 0.231231f, 0.234783f, - 0.238328f, 0.241865f, 0.245395f, 0.248918f, - 0.252435f, 0.255947f, 0.259452f, 0.262952f, - 0.266448f, 0.269939f, 0.273425f, 0.276908f, - 0.280386f, 0.283862f, 0.287334f, 0.290803f, - 0.29427f, 0.297734f, 0.301197f, 0.304657f, - 0.308115f, 0.311573f, 0.315028f, 0.318483f, - 0.321937f, 0.32539f, 0.328843f, 0.332296f, - 0.335749f, 0.339201f, 0.342654f, 0.346108f, - 0.349562f, 0.353017f, 0.356473f, 0.35993f, - 0.363389f, 0.366849f, 0.37031f, 0.373774f, - 0.377239f, 0.380706f, 0.384176f, 0.387648f, - 0.391123f, 0.3946f, 0.39808f, 0.401563f, - 0.405049f, 0.408539f, 0.412032f, 0.415528f, - 0.419028f, 0.422531f, 0.426039f, 0.429551f, - 0.433066f, 0.436586f, 0.440111f, 0.44364f, - 0.447173f, 0.450712f, 0.454255f, 0.457803f, - 0.461356f, 0.464915f, 0.468479f, 0.472049f, - 0.475624f, 0.479205f, 0.482792f, 0.486384f, - 0.489983f, 0.493588f, 0.4972f, 0.500818f, - 0.504442f, 0.508073f, 0.511711f, 0.515356f, - 0.519008f, 0.522667f, 0.526334f, 0.530008f, - 0.533689f, 0.537378f, 0.541075f, 0.54478f, - 0.548492f, 0.552213f, 0.555942f, 0.55968f, - 0.563425f, 0.56718f, 0.570943f, 0.574715f, - 0.578497f, 0.582287f, 0.586086f, 0.589895f, - 0.593713f, 0.597541f, 0.601379f, 0.605227f, - 0.609084f, 0.612952f, 0.61683f, 0.620718f, - 0.624617f, 0.628526f, 0.632447f, 0.636378f, - 0.64032f, 0.644274f, 0.648239f, 0.652215f, - 0.656203f, 0.660203f, 0.664215f, 0.668238f, - 0.672274f, 0.676323f, 0.680384f, 0.684457f, - 0.688543f, 0.692643f, 0.696755f, 0.700881f, - 0.70502f, 0.709172f, 0.713339f, 0.717519f, - 0.721714f, 0.725922f, 0.730145f, 0.734383f, - 0.738636f, 0.742903f, 0.747185f, 0.751483f, - 0.755796f, 0.760125f, 0.76447f, 0.768831f, - 0.773208f, 0.777601f, 0.782011f, 0.786438f, - 0.790882f, 0.795343f, 0.799821f, 0.804318f, - 0.808831f, 0.813363f, 0.817913f, 0.822482f, - 0.827069f, 0.831676f, 0.836301f, 0.840946f, - 0.84561f, 0.850295f, 0.854999f, 0.859724f, - 0.864469f, 0.869235f, 0.874022f, 0.878831f, - 0.883661f, 0.888513f, 0.893387f, 0.898284f, - 0.903204f, 0.908146f, 0.913112f, 0.918101f, - 0.923114f, 0.928152f, 0.933214f, 0.938301f, - 0.943413f, 0.94855f, 0.953713f, 0.958903f, - 0.964119f, 0.969361f, 0.974631f, 0.979929f, - 0.985254f, 0.990608f, 0.99599f, 1.0014f, - 1.00684f, 1.01231f, 1.01781f, 1.02335f, - 1.02891f, 1.0345f, 1.04013f, 1.04579f, - 1.05148f, 1.05721f, 1.06296f, 1.06876f, - 1.07459f, 1.08045f, 1.08635f, 1.09228f, - 1.09826f, 1.10427f, 1.11032f, 1.1164f, - 1.12253f, 1.1287f, 1.1349f, 1.14115f, - 1.14744f, 1.15377f, 1.16015f, 1.16656f, - 1.17303f, 1.17954f, 1.18609f, 1.19269f, - 1.19934f, 1.20603f, 1.21278f, 1.21958f, - 1.22642f, 1.23332f, 1.24027f, 1.24727f, - 1.25433f, 1.26144f, 1.26861f, 1.27584f, - 1.28312f, 1.29047f, 1.29787f, 1.30534f, - 1.31287f, 1.32046f, 1.32812f, 1.33585f, - 1.34364f, 1.3515f, 1.35943f, 1.36744f, - 1.37551f, 1.38367f, 1.39189f, 1.4002f, - 1.40859f, 1.41705f, 1.42561f, 1.43424f, - 1.44296f, 1.45177f, 1.46068f, 1.46967f, - 1.47876f, 1.48795f, 1.49723f, 1.50662f, - 1.51611f, 1.52571f, 1.53541f, 1.54523f, - 1.55517f, 1.56522f, 1.57539f, 1.58568f, - 1.59611f, 1.60666f, 1.61735f, 1.62817f, - 1.63914f, 1.65025f, 1.66152f, 1.67293f, - 1.68451f, 1.69625f, 1.70815f, 1.72023f, - 1.73249f, 1.74494f, 1.75757f, 1.77041f, - 1.78344f, 1.79669f, 1.81016f, 1.82385f, - 1.83777f, 1.85194f, 1.86635f, 1.88103f, - 1.89598f, 1.91121f, 1.92674f, 1.94257f, - 1.95871f, 1.97519f, 1.99201f, 2.0092f, - 2.02676f, 2.04471f, 2.06309f, 2.08189f, - 2.10115f, 2.12089f, 2.14114f, 2.16192f, - 2.18326f, 2.2052f, 2.22777f, 2.25101f, - 2.27496f, 2.29966f, 2.32518f, 2.35156f, - 2.37886f, 2.40717f, 2.43655f, 2.46709f, - 2.49889f, 2.53206f, 2.56673f, 2.60305f, - 2.64117f, 2.6813f, 2.72367f, 2.76854f, - 2.81623f, 2.86714f, 2.92173f, 2.98059f, - 3.04446f, 3.1143f, 3.19135f, 3.27731f, - 3.37455f, 3.48653f, 3.61862f, 3.77982f, - 3.98692f, 4.2776f, 4.77167f, 133.333f -}; - struct MSCRNode; struct TempMSCR diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp index 0258fea0f3..e44edb0769 100644 --- a/modules/features2d/test/test_descriptors_regression.cpp +++ b/modules/features2d/test/test_descriptors_regression.cpp @@ -142,7 +142,7 @@ TEST_P(DescriptorImage, no_crash) { vector fnames; glob(cvtest::TS::ptr()->get_data_path() + pattern, fnames, false); - sort(fnames.begin(), fnames.end()); + std::sort(fnames.begin(), fnames.end()); Ptr akaze_mldb = AKAZE::create(AKAZE::DESCRIPTOR_MLDB); Ptr akaze_mldb_upright = AKAZE::create(AKAZE::DESCRIPTOR_MLDB_UPRIGHT); diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index 9ecbb6d514..46ea208221 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -190,6 +190,9 @@ set(gapi_srcs src/backends/ov/bindings_ov.cpp src/backends/python/gpythonbackend.cpp + # Queue Streaming source + src/streaming/queue_source.cpp + # OpenVPL Streaming source src/streaming/onevpl/source.cpp src/streaming/onevpl/source_priv.cpp diff --git a/modules/gapi/cmake/DownloadADE.cmake b/modules/gapi/cmake/DownloadADE.cmake index e22c4f1a32..26407f4fef 100644 --- a/modules/gapi/cmake/DownloadADE.cmake +++ b/modules/gapi/cmake/DownloadADE.cmake @@ -1,7 +1,7 @@ set(ade_src_dir "${OpenCV_BINARY_DIR}/3rdparty/ade") -set(ade_filename "v0.1.2a.zip") -set(ade_subdir "ade-0.1.2a") -set(ade_md5 "fa4b3e25167319cb0fa9432ef8281945") +set(ade_filename "v0.1.2b.zip") +set(ade_subdir "ade-0.1.2b") +set(ade_md5 "4f93a0844dfc463c617d83b09011819a") ocv_download(FILENAME ${ade_filename} HASH ${ade_md5} URL diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp index b56175788f..a1703a52cb 100644 --- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp +++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp @@ -141,8 +141,10 @@ namespace detail template struct GTypeOf > { using type = cv::GArray; }; template struct GTypeOf { using type = cv::GOpaque;}; template<> struct GTypeOf { using type = cv::GFrame; }; - // FIXME: This is not quite correct since IStreamSource may produce not only Mat but also Scalar - // and vector data. TODO: Extend the type dispatching on these types too. + + // FIXME: This is not quite correct since IStreamSource may + // produce not only Mat but also MediaFrame, Scalar and vector + // data. TODO: Extend the type dispatching on these types too. template<> struct GTypeOf { using type = cv::GMat;}; template using g_type_of_t = typename GTypeOf::type; diff --git a/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp b/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp new file mode 100644 index 0000000000..bd385ed16e --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_QUEUE_SOURCE_HPP +#define OPENCV_GAPI_STREAMING_QUEUE_SOURCE_HPP + +#include // shared_ptr +#include // is_base_of + +#include // GRunArgs +#include // GMetaArg + all descr_of +#include // IStreamSource + +namespace cv { +namespace gapi { +namespace wip { +struct Data; // fwd-declare to avoid circular? header dependencies + +class GAPI_EXPORTS QueueSourceBase: public cv::gapi::wip::IStreamSource { + class Priv; + std::shared_ptr m_priv; + // FIXME: Need to understand how it works with IStreamSource's shared_from_this + // Can we avoid having too many shared_ptrs here? + +public: + explicit QueueSourceBase(const cv::GMetaArg &m); + void push(Data &&data); + virtual bool pull(Data &data) override; + virtual void halt() override; + virtual GMetaArg descr_of() const override; + virtual ~QueueSourceBase() = default; +}; + +/** + * @brief Queued streaming pipeline source. + * + */ +template +class QueueSource final: public QueueSourceBase +{ +public: + using Meta = decltype(cv::descr_of(T{})); + explicit QueueSource(Meta m) : QueueSourceBase(GMetaArg{m}) { + } + void push(T t) { + QueueSourceBase::push(Data{t}); + } +}; + +class GAPI_EXPORTS QueueInput { + std::vector > m_sources; + +public: + explicit QueueInput(const cv::GMetaArgs &args); + + void push(cv::GRunArgs &&ins); + operator cv::GRunArgs(); +}; + +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_STREAMING_SOURCE_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/source.hpp b/modules/gapi/include/opencv2/gapi/streaming/source.hpp index 6597cad8f8..267469ad1b 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/source.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/source.hpp @@ -16,7 +16,7 @@ namespace cv { namespace gapi { namespace wip { - struct Data; // "forward-declaration" of GRunArg +struct Data; // forward-declaration of Data to avoid circular dependencies /** * @brief Abstract streaming pipeline source. @@ -43,6 +43,11 @@ public: Ptr ptr() { return shared_from_this(); } virtual bool pull(Data &data) = 0; virtual GMetaArg descr_of() const = 0; + virtual void halt() { + // Do nothing by default to maintain compatibility with the existing sources... + // In fact needs to be decorated atop of the child classes to maintain the behavior + // FIXME: Make it mandatory in OpenCV 5.0 + }; virtual ~IStreamSource() = default; }; diff --git a/modules/gapi/misc/python/test/test_gapi_infer.py b/modules/gapi/misc/python/test/test_gapi_infer.py index 8ecc957e41..d075651e87 100644 --- a/modules/gapi/misc/python/test/test_gapi_infer.py +++ b/modules/gapi/misc/python/test/test_gapi_infer.py @@ -38,8 +38,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -73,8 +73,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -112,8 +112,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] @@ -161,8 +161,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] @@ -211,8 +211,8 @@ try: return root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) device_id = 'CPU' img = cv.resize(cv.imread(img_path), (544, 320)) @@ -270,8 +270,8 @@ try: return root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) device_id = 'CPU' img = cv.resize(cv.imread(img_path), (544, 320)) diff --git a/modules/gapi/misc/python/test/test_gapi_infer_ov.py b/modules/gapi/misc/python/test/test_gapi_infer_ov.py index b4022b6e2d..f48ec96369 100644 --- a/modules/gapi/misc/python/test/test_gapi_infer_ov.py +++ b/modules/gapi/misc/python/test/test_gapi_infer_ov.py @@ -86,8 +86,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -119,8 +119,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -148,8 +148,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path1 = self.find_file('cv/face/david1.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -190,8 +190,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) diff --git a/modules/gapi/src/api/gbackend.cpp b/modules/gapi/src/api/gbackend.cpp index efbe17a305..46c8dc1640 100644 --- a/modules/gapi/src/api/gbackend.cpp +++ b/modules/gapi/src/api/gbackend.cpp @@ -36,7 +36,6 @@ cv::gapi::GBackend::Priv::compile(const ade::Graph&, { // ...and this method is here for the same reason! GAPI_Error("InternalError"); - return {}; } std::unique_ptr @@ -224,7 +223,6 @@ void bindOutArg(Mag& mag, const RcDesc &rc, const GRunArgP &arg, HandleRMat hand default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -256,7 +254,6 @@ void resetInternalData(Mag& mag, const Data &d) default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -284,7 +281,6 @@ cv::GRunArg getArg(const Mag& mag, const RcDesc &ref) mag.meta().at(ref.id)); default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -327,7 +323,6 @@ cv::GRunArgP getObjPtr(Mag& mag, const RcDesc &rc, bool is_umat) default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -359,7 +354,6 @@ void writeBack(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg) default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp index 3a1a8d5ab9..ade13a6f33 100644 --- a/modules/gapi/src/compiler/gislandmodel.hpp +++ b/modules/gapi/src/compiler/gislandmodel.hpp @@ -192,6 +192,7 @@ class GIslandEmitter public: // Obtain next value from the emitter virtual bool pull(GRunArg &) = 0; + virtual void halt() = 0; virtual ~GIslandEmitter() = default; }; diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index 124b27f39c..6a397faca6 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -41,6 +41,10 @@ using namespace cv::gimpl::stream; class VideoEmitter final: public cv::gimpl::GIslandEmitter { cv::gapi::wip::IStreamSource::Ptr src; + virtual void halt() override { + src->halt(); + } + virtual bool pull(cv::GRunArg &arg) override { // FIXME: probably we can maintain a pool of (then) pre-allocated // buffers to avoid runtime allocations. @@ -62,6 +66,10 @@ public: class ConstEmitter final: public cv::gimpl::GIslandEmitter { cv::GRunArg m_arg; + virtual void halt() override { + // Not used here, but in fact can be used. + } + virtual bool pull(cv::GRunArg &arg) override { arg = const_cast(m_arg); // FIXME: variant workaround return true; @@ -1918,6 +1926,11 @@ void cv::gimpl::GStreamingExecutor::stop() for (auto &q : m_emitter_queues) { q.push(stream::Cmd{stream::Stop{}}); } + // Also kindly ask emitter object to halt to break the blocking src->pull() + // loop + for (auto &nh : m_emitters) { + m_gim.metadata(nh).get().object->halt(); + } // Pull messages from the final queue to ensure completion Cmd cmd; diff --git a/modules/gapi/src/streaming/queue_source.cpp b/modules/gapi/src/streaming/queue_source.cpp new file mode 100644 index 0000000000..59fde09c44 --- /dev/null +++ b/modules/gapi/src/streaming/queue_source.cpp @@ -0,0 +1,98 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#include +#include + +#include + +#include +#include + +#include "executor/conc_queue.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +class QueueSourceBase::Priv { +public: + explicit Priv(const cv::GMetaArg &meta) { + m = meta; + halted = false; + } + + cv::GMetaArg m; + cv::gapi::own::concurrent_bounded_queue q; + int64_t c = 0; + std::atomic halted; +}; + +QueueSourceBase::QueueSourceBase(const cv::GMetaArg &m) + : m_priv(new Priv(m)) { +} + +void QueueSourceBase::push(Data &&data) { + + // Tag data with seq_id/ts + const auto now = std::chrono::system_clock::now(); + const auto dur = std::chrono::duration_cast + (now.time_since_epoch()); + data.meta[cv::gapi::streaming::meta_tag::timestamp] = int64_t{dur.count()}; + data.meta[cv::gapi::streaming::meta_tag::seq_id] = int64_t{m_priv->c++}; + + m_priv->q.push(data); +} + +bool QueueSourceBase::pull(Data &data) { + m_priv->q.pop(data); + + if (m_priv->halted) { + return false; + } + return true; +} + +void QueueSourceBase::halt() { + m_priv->halted.store(true); + m_priv->q.push(cv::GRunArg{}); +} + +cv::GMetaArg QueueSourceBase::descr_of() const { + return m_priv->m; +} + +QueueInput::QueueInput(const cv::GMetaArgs &args) { + for (auto &&m : args) { + m_sources.emplace_back(new cv::gapi::wip::QueueSourceBase(m)); + } +} + +void QueueInput::push(cv::GRunArgs &&args) { + GAPI_Assert(m_sources.size() == args.size()); + for (auto && it : ade::util::zip(ade::util::toRange(m_sources), + ade::util::toRange(args))) + { + auto &src = std::get<0>(it); + auto &obj = std::get<1>(it); + + Data d; + d = obj; + src->push(std::move(d)); + } +} + +QueueInput::operator cv::GRunArgs () { + cv::GRunArgs args; + for (auto &&s : m_sources) { + args.push_back(s->ptr()); + } + return args; +} + +} // wip +} // gapi +} // cv diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 58e37040e8..92de39abfa 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -187,8 +187,8 @@ std::string compileAgeGenderBlob(const std::string& device) { cv::gapi::ie::detail::ParamDesc params; const std::string model_name = "age-gender-recognition-retail-0013"; const std::string output = model_name + ".blob"; - params.model_path = findDataFile(SUBDIR + model_name + ".xml"); - params.weights_path = findDataFile(SUBDIR + model_name + ".bin"); + params.model_path = findDataFile(SUBDIR + model_name + ".xml", false); + params.weights_path = findDataFile(SUBDIR + model_name + ".bin", false); params.device_id = device; compileBlob(params, output, IE::Precision::U8); return output; @@ -205,8 +205,8 @@ TEST(TestAgeGenderIE, InferBasicTensor) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -256,8 +256,8 @@ TEST(TestAgeGenderIE, InferBasicImage) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // FIXME: Ideally it should be an image from disk @@ -334,8 +334,8 @@ struct InferWithReshape: public ::testing::Test { reshape_dims = {1, 3, 70, 70}; initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; @@ -432,8 +432,8 @@ struct ROIList: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false)); @@ -505,8 +505,8 @@ struct ROIListNV12: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -585,8 +585,8 @@ struct SingleROI: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false)); @@ -644,8 +644,8 @@ struct SingleROINV12: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -809,8 +809,8 @@ TEST(TestAgeGenderIE, GenericInfer) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Mat in_mat(cv::Size(320, 240), CV_8UC3); @@ -859,8 +859,8 @@ TEST(TestAgeGenderIE, InvalidConfigGeneric) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; // Configure & run G-API @@ -885,8 +885,8 @@ TEST(TestAgeGenderIE, CPUConfigGeneric) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; // Configure & run G-API @@ -912,8 +912,8 @@ TEST(TestAgeGenderIE, InvalidConfig) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; using AGInfo = std::tuple; @@ -937,8 +937,8 @@ TEST(TestAgeGenderIE, CPUConfig) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; using AGInfo = std::tuple; @@ -1017,8 +1017,8 @@ TEST(TestAgeGenderIE, MediaInputNV12) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1082,8 +1082,8 @@ TEST(TestAgeGenderIE, MediaInputBGR) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1134,8 +1134,8 @@ TEST(InferROI, MediaInputBGR) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1196,8 +1196,8 @@ TEST(InferROI, MediaInputNV12) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1587,8 +1587,8 @@ TEST(Infer, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1654,8 +1654,8 @@ TEST(InferROI, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1732,8 +1732,8 @@ TEST(InferList, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1821,8 +1821,8 @@ TEST(Infer2, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1911,8 +1911,8 @@ TEST(InferEmptyList, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1965,8 +1965,8 @@ TEST(Infer2EmptyList, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -2294,8 +2294,8 @@ struct LimitedSourceInfer: public ::testing::Test { GStreamingCompiled compileStreaming(int nireq) { cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; auto pp = cv::gapi::ie::Params { @@ -2348,8 +2348,8 @@ TEST(TestAgeGenderIE, InferWithBatch) constexpr int batch_size = 4; cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Mat in_mat({batch_size, 3, 62, 62}, CV_8U); @@ -3091,8 +3091,8 @@ struct AgeGenderInferTest: public ::testing::Test { void SetUp() { initDLDTDataPath(); - m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); m_params.device_id = "CPU"; m_plugin = cv::gimpl::ie::wrap::getPlugin(m_params); @@ -3191,8 +3191,8 @@ TEST(TestAgeGenderIE, InferTensorWithPreproc) { initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. diff --git a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp index 09b54c1a46..abce82b329 100644 --- a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp +++ b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp @@ -255,8 +255,8 @@ private: struct BaseAgeGenderOV: public ::testing::Test { BaseAgeGenderOV() { initDLDTDataPath(); - xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); device = "CPU"; blob_path = "age-gender-recognition-retail-0013.blob"; } diff --git a/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp new file mode 100644 index 0000000000..093e654715 --- /dev/null +++ b/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp @@ -0,0 +1,127 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + + +#include "../test_precomp.hpp" + +#include +#include +#include + +namespace opencv_test +{ + +TEST(GAPI_Streaming_Queue_Source, SmokeTest) { + // This is more like an example on G-API Queue Source + + cv::GMat in; + cv::GMat out = in + 1; + cv::GStreamingCompiled comp = cv::GComputation(in, out).compileStreaming(); + + // Queue source needs to know format information to maintain contracts + auto src = std::make_shared > + (cv::GMatDesc{CV_8U, 1, cv::Size{128, 128}}); + + comp.setSource(cv::gin(src->ptr())); + comp.start(); + + // It is perfectly legal to start a pipeline at this point - the source was passed. + // Now we can push data through the source and get the pipeline results. + + cv::Mat eye = cv::Mat::eye(cv::Size{128, 128}, CV_8UC1); + src->push(eye); // Push I (identity matrix) + src->push(eye*2); // Push I*2 + + // Now its time to pop. The data could be already processed at this point. + // Note the queue source queues are unbounded to avoid deadlocks + + cv::Mat result; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye + 1, result, NORM_INF)); + + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye*2 + 1, result, NORM_INF)); +} + +TEST(GAPI_Streaming_Queue_Source, Mixed) { + // Mixing a regular "live" source (which runs on its own) with a + // manually controlled queue source may make a little sense, but + // is perfectly legal and possible. + + cv::GMat in1; + cv::GMat in2; + cv::GMat out = in2 - in1; + cv::GStreamingCompiled comp = cv::GComputation(in1, in2, out).compileStreaming(); + + // Queue source needs to know format information to maintain contracts + auto src1 = std::make_shared > + (cv::GMatDesc{CV_8U, 3, cv::Size{768, 576}}); + + std::shared_ptr src2; + auto path = findDataFile("cv/video/768x576.avi"); + try { + src2 = cv::gapi::wip::make_src(path); + } catch(...) { + throw SkipTestException("Video file can not be opened"); + } + + comp.setSource(cv::gin(src1->ptr(), src2)); // FIXME: quite inconsistent + comp.start(); + + cv::Mat eye = cv::Mat::eye(cv::Size{768, 576}, CV_8UC3); + src1->push(eye); // Push I (identity matrix) + src1->push(eye); // Push I (again) + + cv::Mat ref, result; + cv::VideoCapture cap(path); + + cap >> ref; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(ref - eye, result, NORM_INF)); + + cap >> ref; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(ref - eye, result, NORM_INF)); +} + +TEST(GAPI_Streaming_Queue_Input, SmokeTest) { + + // Queue Input: a tiny wrapper atop of multiple queue sources. + // Allows users to pass all input data at once. + + cv::GMat in1; + cv::GScalar in2; + cv::GMat out = in1 + in2; + cv::GStreamingCompiled comp = cv::GComputation(cv::GIn(in1, in2), cv::GOut(out)) + .compileStreaming(); + + // FIXME: This API is too raw + cv::gapi::wip::QueueInput input({ + cv::GMetaArg{ cv::GMatDesc{CV_8U, 1, cv::Size{64,64} } }, + cv::GMetaArg{ cv::empty_scalar_desc() } + }); + comp.setSource(input); // Implicit conversion allows it to be passed as-is. + comp.start(); + + // Push data via queue input + cv::Mat eye = cv::Mat::eye(cv::Size{64, 64}, CV_8UC1); + input.push(cv::gin(eye, cv::Scalar(1))); + input.push(cv::gin(eye, cv::Scalar(2))); + input.push(cv::gin(eye, cv::Scalar(3))); + + // Pop data and validate + cv::Mat result; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye+1, result, NORM_INF)); + + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye+2, result, NORM_INF)); + + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye+3, result, NORM_INF)); +} + +} // namespace opencv_test diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index a4b62f2717..8dc640fdd6 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -184,6 +184,9 @@ void destroyWindowImpl( const char* name) //cout << "destroyWindowImpl" << endl; CVWindow *window = cvGetWindow(name); if(window) { + if ([window styleMask] & NSFullScreenWindowMask) { + [window toggleFullScreen:nil]; + } [window close]; [windows removeObjectForKey:[NSString stringWithFormat:@"%s", name]]; } @@ -668,7 +671,11 @@ double cvGetModeWindow_COCOA( const char* name ) void cvSetModeWindow_COCOA( const char* name, double prop_value ) { CVWindow *window = nil; + +#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7 NSDictionary *fullscreenOptions = nil; +#endif + NSAutoreleasePool* localpool = nil; CV_FUNCNAME( "cvSetModeWindow_COCOA" ); @@ -692,6 +699,31 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) localpool = [[NSAutoreleasePool alloc] init]; +#if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6 + if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==cv::WINDOW_NORMAL ) + { + [window toggleFullScreen:nil]; + + window.status=cv::WINDOW_NORMAL; + } + else if( !([window styleMask] & NSFullScreenWindowMask) && prop_value==cv::WINDOW_FULLSCREEN ) + { + [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary]; + + NSScreen* screen = [window screen]; + + NSRect frame = [screen frame]; + [window setFrame:frame display:YES]; + + [window setContentSize:frame.size]; + + [window toggleFullScreen:nil]; + + [window setFrameTopLeftPoint: frame.origin]; + + window.status=cv::WINDOW_FULLSCREEN; + } +#else fullscreenOptions = [NSDictionary dictionaryWithObject:[NSNumber numberWithBool:YES] forKey:NSFullScreenModeSetting]; if ( [[window contentView] isInFullScreenMode] && prop_value==cv::WINDOW_NORMAL ) { @@ -703,7 +735,7 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) [[window contentView] enterFullScreenMode:[NSScreen mainScreen] withOptions:fullscreenOptions]; window.status=cv::WINDOW_FULLSCREEN; } - +#endif [localpool drain]; __END__; @@ -777,7 +809,7 @@ void cvSetPropTopmost_COCOA( const char* name, const bool topmost ) CV_ERROR( CV_StsNullPtr, "NULL window" ); } - if ([[window contentView] isInFullScreenMode]) + if (([window styleMask] & NSFullScreenWindowMask)) { EXIT; } diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index ed21f3f14c..4febee36db 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -245,7 +245,7 @@ bool TiffDecoder::readHeader() if (!tif) { // TIFFOpen() mode flags are different to fopen(). A 'b' in mode "rb" has no effect when reading. - // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html + // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html if ( !m_buf.empty() ) { m_buf_pos = 0; @@ -1118,7 +1118,7 @@ public: TIFF* open () { // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode. - // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html + // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html return TIFFClientOpen( "", "w", reinterpret_cast(this), &TiffEncoderBufHelper::read, &TiffEncoderBufHelper::write, &TiffEncoderBufHelper::seek, &TiffEncoderBufHelper::close, &TiffEncoderBufHelper::size, @@ -1200,7 +1200,7 @@ static bool readParam(const std::vector& params, int key, int& value) bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vector& params) { // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode. - // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html + // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html TIFF* tif = NULL; TiffEncoderBufHelper buf_helper(m_buf); diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index d85b95ed26..1c033c96fd 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -14,10 +14,12 @@ ocv_define_module(imgproc opencv_core WRAP java objc python js) ocv_module_include_directories(opencv_imgproc ${ZLIB_INCLUDE_DIRS}) -ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR) -option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF) -if(OPENCV_IPP_GAUSSIAN_BLUR) - ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1") +if(HAVE_IPP) + # OPENCV_IPP_ENABLE_ALL is defined in modules/core/CMakeList.txt + OCV_OPTION(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OPENCV_IPP_ENABLE_ALL) + if(OPENCV_IPP_GAUSSIAN_BLUR) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1") + endif() endif() set(UNIFONT_MD5 "fb79cf5b4f4c89414f1233f14c2eb273") diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index 57940935d4..adb0359c07 100755 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -78,7 +78,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met const uchar* src = _src.ptr(); int* temp = _temp.ptr(); - float* dist = _dist.ptr(); + float* dist = _dist.ptr(_dist.rows - 1); int srcstep = (int)(_src.step/sizeof(src[0])); int step = (int)(_temp.step/sizeof(temp[0])); int dststep = (int)(_dist.step/sizeof(dist[0])); @@ -87,11 +87,10 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met initTopBottom( _temp, BORDER ); // forward pass + unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER; + const uchar* s = src; for( i = 0; i < size.height; i++ ) { - const uchar* s = src + i*srcstep; - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - for( j = 0; j < BORDER; j++ ) tmp[-j-1] = tmp[size.width + j] = INIT_DIST0; @@ -111,13 +110,15 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met tmp[j] = t0; } } + tmp += step; + s += srcstep; } // backward pass + float* d = (float*)dist; for( i = size.height - 1; i >= 0; i-- ) { - float* d = (float*)(dist + i*dststep); - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; + tmp -= step; for( j = size.width - 1; j >= 0; j-- ) { @@ -137,6 +138,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met t0 = (t0 > DIST_MAX) ? DIST_MAX : t0; d[j] = (float)(t0 * scale); } + d -= dststep; } } @@ -153,7 +155,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met const uchar* src = _src.ptr(); int* temp = _temp.ptr(); - float* dist = _dist.ptr(); + float* dist = _dist.ptr(_dist.rows - 1); int srcstep = (int)(_src.step/sizeof(src[0])); int step = (int)(_temp.step/sizeof(temp[0])); int dststep = (int)(_dist.step/sizeof(dist[0])); @@ -162,11 +164,10 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met initTopBottom( _temp, BORDER ); // forward pass + unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER; + const uchar* s = src; for( i = 0; i < size.height; i++ ) { - const uchar* s = src + i*srcstep; - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - for( j = 0; j < BORDER; j++ ) tmp[-j-1] = tmp[size.width + j] = INIT_DIST0; @@ -194,13 +195,15 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met tmp[j] = t0; } } + tmp += step; + s += srcstep; } // backward pass + float* d = (float*)dist; for( i = size.height - 1; i >= 0; i-- ) { - float* d = (float*)(dist + i*dststep); - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; + tmp -= step; for( j = size.width - 1; j >= 0; j-- ) { @@ -228,6 +231,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met t0 = (t0 > DIST_MAX) ? DIST_MAX : t0; d[j] = (float)(t0 * scale); } + d -= dststep; } } @@ -245,7 +249,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, const uchar* src = _src.ptr(); int* temp = _temp.ptr(); - float* dist = _dist.ptr(); + float* dist = _dist.ptr(_dist.rows - 1); int* labels = _labels.ptr(); int srcstep = (int)(_src.step/sizeof(src[0])); int step = (int)(_temp.step/sizeof(temp[0])); @@ -256,12 +260,11 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, initTopBottom( _temp, BORDER ); // forward pass + const uchar* s = src; + unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER; + int* lls = (int*)labels; for( i = 0; i < size.height; i++ ) { - const uchar* s = src + i*srcstep; - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - int* lls = (int*)(labels + i*lstep); - for( j = 0; j < BORDER; j++ ) tmp[-j-1] = tmp[size.width + j] = INIT_DIST0; @@ -330,14 +333,17 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, lls[j] = l0; } } + s += srcstep; + tmp += step; + lls += lstep; } // backward pass + float* d = (float*)dist; for( i = size.height - 1; i >= 0; i-- ) { - float* d = (float*)(dist + i*dststep); - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - int* lls = (int*)(labels + i*lstep); + tmp -= step; + lls -= lstep; for( j = size.width - 1; j >= 0; j-- ) { @@ -399,6 +405,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, t0 = (t0 > DIST_MAX) ? DIST_MAX : t0; d[j] = (float)(t0 * scale); } + d -= dststep; } } diff --git a/modules/imgproc/src/geometry.cpp b/modules/imgproc/src/geometry.cpp index 9536514b7d..3a40caecf1 100644 --- a/modules/imgproc/src/geometry.cpp +++ b/modules/imgproc/src/geometry.cpp @@ -269,17 +269,16 @@ static LineSegmentIntersection parallelInt( Point2f a, Point2f b, Point2f c, Poi static LineSegmentIntersection intersectLineSegments( Point2f a, Point2f b, Point2f c, Point2f d, Point2f& p, Point2f& q ) { - double denom = a.x * (double)(d.y - c.y) + b.x * (double)(c.y - d.y) + - d.x * (double)(b.y - a.y) + c.x * (double)(a.y - b.y); + double denom = (a.x - b.x) * (double)(d.y - c.y) - (a.y - b.y) * (double)(d.x - c.x); // If denom is zero, then segments are parallel: handle separately. if( denom == 0. ) return parallelInt(a, b, c, d, p, q); - double num = a.x * (double)(d.y - c.y) + c.x * (double)(a.y - d.y) + d.x * (double)(c.y - a.y); + double num = (d.y - a.y) * (double)(a.x - c.x) + (a.x - d.x) * (double)(a.y - c.y); double s = num / denom; - num = a.x * (double)(b.y - c.y) + b.x * (double)(c.y - a.y) + c.x * (double)(a.y - b.y); + num = (b.y - a.y) * (double)(a.x - c.x) + (c.y - a.y) * (double)(b.x - a.x); double t = num / denom; p.x = (float)(a.x + s*(b.x - a.x)); diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 2164639127..a1441fd44f 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2679,8 +2679,13 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation } return true; +#else + CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(interpolation); + CV_UNUSED(borderType); CV_UNUSED(_M); CV_UNUSED(flags); + return false; #endif } + #endif namespace hal { diff --git a/modules/imgproc/test/test_distancetransform.cpp b/modules/imgproc/test/test_distancetransform.cpp index e8b9a8cb06..b9d480e524 100644 --- a/modules/imgproc/test/test_distancetransform.cpp +++ b/modules/imgproc/test/test_distancetransform.cpp @@ -104,4 +104,28 @@ TEST(Imgproc_DistanceTransform, large_square_22732) EXPECT_EQ(0, nerrs) << "reference distance map is different from computed one at " << nerrs << " pixels\n"; } +BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_3x3) +{ + Mat src = Mat::zeros(50000, 50000, CV_8U), dist; + distanceTransform(src.col(0), dist, DIST_L2, DIST_MASK_3); + int nz = countNonZero(dist); + EXPECT_EQ(nz, 0); +} + +BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_5x5) +{ + Mat src = Mat::zeros(50000, 50000, CV_8U), dist; + distanceTransform(src.col(0), dist, DIST_L2, DIST_MASK_5); + int nz = countNonZero(dist); + EXPECT_EQ(nz, 0); +} + +BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_5x5_labels) +{ + Mat src = Mat::zeros(50000, 50000, CV_8U), dist, labels; + distanceTransform(src.col(0), dist, labels, DIST_L2, DIST_MASK_5); + int nz = countNonZero(dist); + EXPECT_EQ(nz, 0); +} + }} // namespace diff --git a/modules/js/test/test_core.js b/modules/js/test/test_core.js new file mode 100644 index 0000000000..14d4ffe72b --- /dev/null +++ b/modules/js/test/test_core.js @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +if (typeof module !== 'undefined' && module.exports) { + // The environment is Node.js + var cv = require('./opencv.js'); // eslint-disable-line no-var +} + +QUnit.module('Core', {}); + +QUnit.test('test_LUT', function(assert) { + // test LUT + { + let src = cv.matFromArray(3, 3, cv.CV_8UC1, [255, 128, 0, 0, 128, 255, 1, 2, 254]); + let lutTable = []; + for (let i = 0; i < 256; i++) + { + lutTable[i] = 255 - i; + } + let lut = cv.matFromArray(1, 256, cv.CV_8UC1, lutTable); + let dst = new cv.Mat(); + + cv.LUT(src, lut, dst); + + //console.log(dst.data); + assert.equal(dst.ucharAt(0), 0); + assert.equal(dst.ucharAt(1), 127); + assert.equal(dst.ucharAt(2), 255); + assert.equal(dst.ucharAt(3), 255); + assert.equal(dst.ucharAt(4), 127); + assert.equal(dst.ucharAt(5), 0); + assert.equal(dst.ucharAt(6), 254); + assert.equal(dst.ucharAt(7), 253); + assert.equal(dst.ucharAt(8), 1); + + src.delete(); + lut.delete(); + dst.delete(); + } +}); diff --git a/modules/js/test/test_mat.js b/modules/js/test/test_mat.js index 409ed1b123..fd3611cd2c 100644 --- a/modules/js/test/test_mat.js +++ b/modules/js/test/test_mat.js @@ -73,7 +73,7 @@ if (typeof module !== 'undefined' && module.exports) { var cv = require('./opencv.js'); // eslint-disable-line no-var } -QUnit.module('Core', {}); +QUnit.module('CoreMat', {}); QUnit.test('test_mat_creation', function(assert) { // Mat constructors. diff --git a/modules/js/test/tests.html b/modules/js/test/tests.html index de64ca7a29..b20013ec63 100644 --- a/modules/js/test/tests.html +++ b/modules/js/test/tests.html @@ -52,12 +52,12 @@ if (window.cv instanceof Promise) { window.cv.then((target) => { window.cv = target; - //console.log(cv.getBuildInformation()); + console.log(cv.getBuildInformation()); QUnit.start(); }) } else { // for backward compatible - // console.log(cv.getBuildInformation()); + console.log(cv.getBuildInformation()); QUnit.start(); } }, @@ -108,6 +108,7 @@ + diff --git a/modules/js/test/tests.js b/modules/js/test/tests.js index f3156f6ea0..74a4b87e45 100644 --- a/modules/js/test/tests.js +++ b/modules/js/test/tests.js @@ -44,10 +44,15 @@ testrunner.options.maxBlockDuration = 20000; // cause opencv_js.js need time to testrunner.run( { code: 'opencv.js', - tests: ['test_mat.js', 'test_utils.js', 'test_imgproc.js', - 'test_objdetect.js', 'test_video.js', 'test_features2d.js', + tests: ['test_mat.js', + 'test_utils.js', + 'test_core.js', + 'test_imgproc.js', + 'test_objdetect.js', + 'test_video.js', + 'test_features2d.js', 'test_photo.js', - 'test_calib3d.js' + 'test_calib3d.js', ], }, function(err, report) { diff --git a/modules/objdetect/misc/python/test/test_objdetect_aruco.py b/modules/objdetect/misc/python/test/test_objdetect_aruco.py index d63a19cd2f..dda58b6460 100644 --- a/modules/objdetect/misc/python/test/test_objdetect_aruco.py +++ b/modules/objdetect/misc/python/test/test_objdetect_aruco.py @@ -186,6 +186,39 @@ class aruco_objdetect_test(NewOpenCVTests): self.assertEqual((1, 4, 2), refine_corners[0].shape) np.testing.assert_array_equal(corners, refine_corners) + def test_charuco_refine(self): + aruco_dict = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_6X6_50) + board_size = (3, 4) + board = cv.aruco.CharucoBoard(board_size, 1., .7, aruco_dict) + aruco_detector = cv.aruco.ArucoDetector(aruco_dict) + charuco_detector = cv.aruco.CharucoDetector(board) + cell_size = 100 + image = board.generateImage((cell_size*board_size[0], cell_size*board_size[1])) + camera = np.array([[1, 0, 0.5], + [0, 1, 0.5], + [0, 0, 1]]) + dist = np.array([0, 0, 0, 0, 0], dtype=np.float32).reshape(1, -1) + + # generate gold corners of the ArUco markers for the test + gold_corners = np.array(board.getObjPoints())[:, :, 0:2]*cell_size + + # detect corners + markerCorners, markerIds, _ = aruco_detector.detectMarkers(image) + + # test refine + rejected = [markerCorners[-1]] + markerCorners, markerIds = markerCorners[:-1], markerIds[:-1] + markerCorners, markerIds, _, _ = aruco_detector.refineDetectedMarkers(image, board, markerCorners, markerIds, + rejected, cameraMatrix=camera, distCoeffs=dist) + + charucoCorners, charucoIds, _, _ = charuco_detector.detectBoard(image, markerCorners=markerCorners, + markerIds=markerIds) + self.assertEqual(len(charucoIds), 6) + self.assertEqual(len(markerIds), 6) + + for i, id in enumerate(markerIds.reshape(-1)): + np.testing.assert_allclose(gold_corners[id], markerCorners[i].reshape(4, 2), 0.01, 1.) + def test_write_read_dictionary(self): try: aruco_dict = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_5X5_50) diff --git a/modules/objdetect/src/aruco/aruco_detector.cpp b/modules/objdetect/src/aruco/aruco_detector.cpp index 4b3af1b2c7..a62ca10faa 100644 --- a/modules/objdetect/src/aruco/aruco_detector.cpp +++ b/modules/objdetect/src/aruco/aruco_detector.cpp @@ -1000,7 +1000,13 @@ static inline void _projectUndetectedMarkers(const Board &board, InputOutputArra OutputArray undetectedMarkersIds) { Mat rvec, tvec; // first estimate board pose with the current avaible markers Mat objPoints, imgPoints; // object and image points for the solvePnP function - board.matchImagePoints(detectedCorners, detectedIds, objPoints, imgPoints); + // To refine corners of ArUco markers the function refineDetectedMarkers() find an aruco markers pose from 3D-2D point correspondences. + // To find 3D-2D point correspondences uses matchImagePoints(). + // The method matchImagePoints() works with ArUco corners (in Board/GridBoard cases) or with ChArUco corners (in CharucoBoard case). + // To refine corners of ArUco markers we need work with ArUco corners only in all boards. + // To call matchImagePoints() with ArUco corners for all boards we need to call matchImagePoints() from base class Board. + // The method matchImagePoints() implemented in Pimpl and we need to create temp Board object to call the base method. + Board(board.getObjPoints(), board.getDictionary(), board.getIds()).matchImagePoints(detectedCorners, detectedIds, objPoints, imgPoints); if (objPoints.total() < 4ull) // at least one marker from board so rvec and tvec are valid return; solvePnP(objPoints, imgPoints, cameraMatrix, distCoeffs, rvec, tvec); diff --git a/modules/objdetect/src/aruco/aruco_dictionary.cpp b/modules/objdetect/src/aruco/aruco_dictionary.cpp index f73cea3357..3d5f9b1bfd 100644 --- a/modules/objdetect/src/aruco/aruco_dictionary.cpp +++ b/modules/objdetect/src/aruco/aruco_dictionary.cpp @@ -355,6 +355,7 @@ static int _getSelfDistance(const Mat &marker) { Dictionary extendDictionary(int nMarkers, int markerSize, const Dictionary &baseDictionary, int randomSeed) { + CV_Assert(nMarkers > 0); RNG rng((uint64)(randomSeed)); Dictionary out = Dictionary(Mat(), markerSize); @@ -370,7 +371,7 @@ Dictionary extendDictionary(int nMarkers, int markerSize, const Dictionary &base // if baseDictionary is provided, calculate its intermarker distance if(baseDictionary.bytesList.rows > 0) { CV_Assert(baseDictionary.markerSize == markerSize); - out.bytesList = baseDictionary.bytesList.clone(); + out.bytesList = baseDictionary.bytesList.rowRange(0, min(nMarkers, baseDictionary.bytesList.rows)).clone(); int minDistance = markerSize * markerSize + 1; for(int i = 0; i < out.bytesList.rows; i++) { diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 0e32a2dea3..5e424fca9c 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -68,19 +68,14 @@ static void updatePointsResult(OutputArray points_, const vector& point static Point2f intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2) { + // Try to solve a two lines intersection (a1, a2) and (b1, b2) as a system of equations: + // a2 + u * (a1 - a2) = b2 + v * (b1 - b2) const float divisor = (a1.x - a2.x) * (b1.y - b2.y) - (a1.y - a2.y) * (b1.x - b2.x); const float eps = 0.001f; if (abs(divisor) < eps) return a2; - Point2f result_square_angle( - ((a1.x * a2.y - a1.y * a2.x) * (b1.x - b2.x) - - (b1.x * b2.y - b1.y * b2.x) * (a1.x - a2.x)) / - divisor, - ((a1.x * a2.y - a1.y * a2.x) * (b1.y - b2.y) - - (b1.x * b2.y - b1.y * b2.x) * (a1.y - a2.y)) / - divisor - ); - return result_square_angle; + const float u = ((b2.x - a2.x) * (b1.y - b2.y) + (b1.x - b2.x) * (a2.y - b2.y)) / divisor; + return a2 + u * (a1 - a2); } // / | b @@ -1254,14 +1249,14 @@ bool QRDecode::computeSidesPoints(const vector &result_integer_hull) { if (points.front().x > points.back().x) { - reverse(points.begin(), points.end()); + std::reverse(points.begin(), points.end()); } } else { if (points.front().y > points.back().y) { - reverse(points.begin(), points.end()); + std::reverse(points.begin(), points.end()); } } if (points.empty()) @@ -1637,7 +1632,7 @@ bool QRDecode::findPatternsVerticesPoints(vector > &patterns_verti } if ((int)min_angle_pnts_indexes.size() == num_vertices) { break; } } - sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end()); + std::sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end()); vector contour_vertices_points; @@ -1766,11 +1761,11 @@ bool QRDecode::findTempPatternsAddingPoints(vector } if (abs(p1.x - p2.x) > abs(p1.y - p2.y)) { - sort(points.begin(), points.end(), sortPointsByX()); + std::sort(points.begin(), points.end(), sortPointsByX()); } else { - sort(points.begin(), points.end(), sortPointsByY()); + std::sort(points.begin(), points.end(), sortPointsByY()); } temp_patterns_add_points.push_back(std::pair >(idx_curved_side,points)); @@ -1914,11 +1909,11 @@ void QRDecode::completeAndSortSides() Point p2 = it->second.back(); if (abs(p1.x - p2.x) > abs(p1.y - p2.y)) { - sort(it->second.begin(), it->second.end(), sortPointsByX()); + std::sort(it->second.begin(), it->second.end(), sortPointsByX()); } else { - sort(it->second.begin(), it->second.end(), sortPointsByY()); + std::sort(it->second.begin(), it->second.end(), sortPointsByY()); } } } @@ -2080,8 +2075,8 @@ bool QRDecode::divideIntoEvenSegments(vector > &segments_points) Point2f segment_start = segments_points[i][j]; Point2f segment_end = segments_points[i][j + 1]; vector::iterator it_start, it_end, it; - it_start = find(spline_lines[i].begin(), spline_lines[i].end(), segment_start); - it_end = find(spline_lines[i].begin(), spline_lines[i].end(), segment_end); + it_start = std::find(spline_lines[i].begin(), spline_lines[i].end(), segment_start); + it_end = std::find(spline_lines[i].begin(), spline_lines[i].end(), segment_end); float max_dist_to_line = 0.0; for (it = it_start; it != it_end; it++) { diff --git a/modules/objdetect/test/test_boarddetection.cpp b/modules/objdetect/test/test_boarddetection.cpp index e47e6c3cb6..0c99e6de61 100644 --- a/modules/objdetect/test/test_boarddetection.cpp +++ b/modules/objdetect/test/test_boarddetection.cpp @@ -318,4 +318,12 @@ TEST(CV_ArucoGenerateBoard, regression_1226) { }); } +TEST(CV_ArucoDictionary, extendDictionary) { + aruco::Dictionary base_dictionary = aruco::getPredefinedDictionary(aruco::DICT_4X4_250); + aruco::Dictionary custom_dictionary = aruco::extendDictionary(150, 4, base_dictionary); + + ASSERT_EQ(custom_dictionary.bytesList.rows, 150); + ASSERT_EQ(cv::norm(custom_dictionary.bytesList, base_dictionary.bytesList.rowRange(0, 150)), 0.); +} + }} // namespace diff --git a/modules/objdetect/test/test_cascadeandhog.cpp b/modules/objdetect/test/test_cascadeandhog.cpp index 4151b899e3..0a68bd9bb3 100644 --- a/modules/objdetect/test/test_cascadeandhog.cpp +++ b/modules/objdetect/test/test_cascadeandhog.cpp @@ -355,7 +355,7 @@ int CV_DetectorTest::validate( int detectorIdx, vector >& objects ) map[minIdx] = 1; } } - noPair += (int)count_if( map.begin(), map.end(), isZero ); + noPair += (int)std::count_if( map.begin(), map.end(), isZero ); totalNoPair += noPair; /*if( noPair > cvRound(valRects.size()*eps.noPair)+1 ) diff --git a/modules/objdetect/test/test_qrcode_encode.cpp b/modules/objdetect/test/test_qrcode_encode.cpp index 14900c3078..1005793269 100644 --- a/modules/objdetect/test/test_qrcode_encode.cpp +++ b/modules/objdetect/test/test_qrcode_encode.cpp @@ -264,7 +264,8 @@ TEST(Objdetect_QRCode_Encode_Decode, regression) int true_capacity = establishCapacity(mode, version, cur_capacity); std::string input_info = symbol_set; - std::random_shuffle(input_info.begin(),input_info.end()); + std::mt19937 rand_gen {1}; + std::shuffle(input_info.begin(), input_info.end(), rand_gen); int count = 0; if((int)input_info.length() > true_capacity) { @@ -390,15 +391,8 @@ TEST(Objdetect_QRCode_Encode_Decode_Structured_Append, DISABLED_regression) std::string symbol_set = config["symbols_set"]; std::string input_info = symbol_set; -#if defined CV_CXX11 - // std::random_shuffle is deprecated since C++11 and removed in C++17. - // Use manually constructed RNG with a fixed seed and std::shuffle instead. std::mt19937 rand_gen {1}; std::shuffle(input_info.begin(), input_info.end(), rand_gen); -#else - SeededRandFunctor<1> rand_gen; - std::random_shuffle(input_info.begin(), input_info.end(), rand_gen); -#endif for (int j = min_stuctures_num; j < max_stuctures_num; j++) { QRCodeEncoder::Params params; diff --git a/modules/python/test/test_cuda.py b/modules/python/test/test_cuda.py index 851a23e880..c886342832 100644 --- a/modules/python/test/test_cuda.py +++ b/modules/python/test/test_cuda.py @@ -70,6 +70,74 @@ class cuda_test(NewOpenCVTests): self.assertTrue(cuMat.step == 0) self.assertTrue(cuMat.size() == (0, 0)) + def test_cuda_convertTo(self): + # setup + npMat_8UC4 = (np.random.random((128, 128, 4)) * 255).astype(np.uint8) + npMat_32FC4 = npMat_8UC4.astype(np.single) + new_type = cv.CV_32FC4 + + # sync + # in/out + cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4) + cuMat_32FC4 = cv.cuda_GpuMat(cuMat_8UC4.size(), new_type) + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, cuMat_32FC4) + self.assertTrue(cuMat_32FC4.cudaPtr() == cuMat_32FC4_out.cudaPtr()) + npMat_32FC4_out = cuMat_32FC4.download() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + # out + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type) + npMat_32FC4_out = cuMat_32FC4.download() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + + # async + stream = cv.cuda.Stream() + cuMat_32FC4 = cv.cuda_GpuMat(cuMat_8UC4.size(), new_type) + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, cuMat_32FC4) + # in/out + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, 1, 0, stream, cuMat_32FC4) + self.assertTrue(cuMat_32FC4.cudaPtr() == cuMat_32FC4_out.cudaPtr()) + npMat_32FC4_out = cuMat_32FC4.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + # out + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, 1, 0, stream) + npMat_32FC4_out = cuMat_32FC4.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + + def test_cuda_copyTo(self): + # setup + npMat_8UC4 = (np.random.random((128, 128, 4)) * 255).astype(np.uint8) + + # sync + # in/out + cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4) + cuMat_8UC4_dst = cv.cuda_GpuMat(cuMat_8UC4.size(), cuMat_8UC4.type()) + cuMat_8UC4_out = cuMat_8UC4.copyTo(cuMat_8UC4_dst) + self.assertTrue(cuMat_8UC4_out.cudaPtr() == cuMat_8UC4_dst.cudaPtr()) + npMat_8UC4_out = cuMat_8UC4_out.download() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + # out + cuMat_8UC4_out = cuMat_8UC4.copyTo() + npMat_8UC4_out = cuMat_8UC4_out.download() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + + # async + stream = cv.cuda.Stream() + # in/out + cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4) + cuMat_8UC4_dst = cv.cuda_GpuMat(cuMat_8UC4.size(), cuMat_8UC4.type()) + cuMat_8UC4_out = cuMat_8UC4.copyTo(cuMat_8UC4_dst, stream) + self.assertTrue(cuMat_8UC4_out.cudaPtr() == cuMat_8UC4_out.cudaPtr()) + npMat_8UC4_out = cuMat_8UC4_dst.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + # out + cuMat_8UC4_out = cuMat_8UC4.copyTo(stream) + npMat_8UC4_out = cuMat_8UC4_out.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + def test_cuda_denoising(self): self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoising')) self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoisingColored')) diff --git a/modules/stereo/test/test_stereomatching.cpp b/modules/stereo/test/test_stereomatching.cpp index 02d1823d2d..c17d92292a 100644 --- a/modules/stereo/test/test_stereomatching.cpp +++ b/modules/stereo/test/test_stereomatching.cpp @@ -740,8 +740,8 @@ public: CV_StereoBMTest() { name = "stereobm"; - fill(rmsEps.begin(), rmsEps.end(), 0.4f); - fill(fracEps.begin(), fracEps.end(), 0.022f); + std::fill(rmsEps.begin(), rmsEps.end(), 0.4f); + std::fill(fracEps.begin(), fracEps.end(), 0.022f); } protected: @@ -866,8 +866,8 @@ public: CV_StereoSGBMTest() { name = "stereosgbm"; - fill(rmsEps.begin(), rmsEps.end(), 0.25f); - fill(fracEps.begin(), fracEps.end(), 0.01f); + std::fill(rmsEps.begin(), rmsEps.end(), 0.25f); + std::fill(fracEps.begin(), fracEps.end(), 0.01f); } protected: diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index 442fa08ec5..7bc3934891 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -134,7 +134,7 @@ enum VideoCaptureAPIs { */ enum VideoCaptureProperties { CAP_PROP_POS_MSEC =0, //!< Current position of the video file in milliseconds. - CAP_PROP_POS_FRAMES =1, //!< 0-based index of the frame to be decoded/captured next. + CAP_PROP_POS_FRAMES =1, //!< 0-based index of the frame to be decoded/captured next. When the index i is set in RAW mode (CAP_PROP_FORMAT == -1) this will seek to the key frame k, where k <= i. CAP_PROP_POS_AVI_RATIO =2, //!< Relative position of the video file: 0=start of the film, 1=end of the film. CAP_PROP_FRAME_WIDTH =3, //!< Width of the frames in the video stream. CAP_PROP_FRAME_HEIGHT =4, //!< Height of the frames in the video stream. @@ -1030,6 +1030,9 @@ public: - Most codecs are lossy. If you want lossless video file you need to use a lossless codecs (eg. FFMPEG FFV1, Huffman HFYU, Lagarith LAGS, etc...) - If FFMPEG is enabled, using `codec=0; fps=0;` you can create an uncompressed (raw) video file. + - If FFMPEG is used, we allow frames of odd width or height, but in this case we truncate + the rightmost column/the bottom row. Probably, this should be handled more elegantly, + but some internal functions inside FFMPEG swscale require even width/height. */ CV_WRAP VideoWriter(const String& filename, int fourcc, double fps, Size frameSize, bool isColor = true); diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp index d6b2b95545..21af06a147 100644 --- a/modules/videoio/src/cap_dshow.cpp +++ b/modules/videoio/src/cap_dshow.cpp @@ -2771,7 +2771,7 @@ int videoInput::start(int deviceID, videoDevice *VD){ if(customSize){ DebugPrintOut("SETUP: Default Format is set to %ix%i\n", currentWidth, currentHeight); - if (strcmp("OBS Virtual Camera", VD->nDeviceName) == 0) + if (strcmp("OBS Virtual Camera", VD->nDeviceName) == 0 || strcmp("Streamlabs Desktop Virtual Webcam", VD->nDeviceName) == 0) { // OBS Virtual Camera always returns S_OK on SetFormat(), even if it doesn't support // the actual format. So we have to choose a format that it supports manually, e.g. NV12. diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 982bc5c87d..e4431b323e 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -580,6 +580,7 @@ struct CvCapture_FFMPEG bool processRawPacket(); bool rawMode; bool rawModeInitialized; + bool rawSeek; bool convertRGB; AVPacket packet_filtered; #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) @@ -633,6 +634,7 @@ void CvCapture_FFMPEG::init() rawMode = false; rawModeInitialized = false; + rawSeek = false; convertRGB = true; memset(&packet_filtered, 0, sizeof(packet_filtered)); av_init_packet(&packet_filtered); @@ -1051,33 +1053,35 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& return false; } } - if (params.has(CAP_PROP_HW_ACCELERATION)) - { - va_type = params.get(CAP_PROP_HW_ACCELERATION); + if(!rawMode) { + if (params.has(CAP_PROP_HW_ACCELERATION)) + { + va_type = params.get(CAP_PROP_HW_ACCELERATION); #if !USE_AV_HW_CODECS - if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY) - { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout"); - return false; - } + if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout"); + return false; + } #endif - } - if (params.has(CAP_PROP_HW_DEVICE)) - { - hw_device = params.get(CAP_PROP_HW_DEVICE); - if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1) - { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout"); - return false; } - if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1) + if (params.has(CAP_PROP_HW_DEVICE)) { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout"); - return false; + hw_device = params.get(CAP_PROP_HW_DEVICE); + if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout"); + return false; + } + if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout"); + return false; + } + } + if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { + use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } - } - if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { - use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } #if USE_AV_INTERRUPT_CALLBACK if (params.has(CAP_PROP_OPEN_TIMEOUT_MSEC)) @@ -1153,6 +1157,23 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& CV_LOG_WARNING(NULL, "Unable to read codec parameters from stream (" << _opencv_ffmpeg_get_error_string(err) << ")"); goto exit_func; } + + if (rawMode) { + video_stream = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); + if (video_stream < 0) { + close(); + return false; + } + video_st = ic->streams[video_stream]; +#ifndef CV_FFMPEG_CODECPAR + frame.height = video_st->codec->height; + frame.width = video_st->codec->width; +#else + frame.height = video_st->codecpar->height; + frame.width = video_st->codecpar->width; +#endif + return true; + } for(i = 0; i < ic->nb_streams; i++) { #ifndef CV_FFMPEG_CODECPAR @@ -1440,6 +1461,10 @@ bool CvCapture_FFMPEG::processRawPacket() bool CvCapture_FFMPEG::grabFrame() { + if (rawSeek) { + rawSeek = false; + return true; + } bool valid = false; static const size_t max_read_attempts = cv::utils::getConfigurationParameterSizeT("OPENCV_FFMPEG_READ_ATTEMPTS", 4096); @@ -1447,7 +1472,7 @@ bool CvCapture_FFMPEG::grabFrame() size_t cur_read_attempts = 0; size_t cur_decode_attempts = 0; - if( !ic || !video_st || !context ) return false; + if( !ic || !video_st || (!rawMode && !context) ) return false; if( ic->streams[video_stream]->nb_frames > 0 && frame_number > ic->streams[video_stream]->nb_frames ) @@ -1464,7 +1489,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_SEND_FRAME_API // check if we can receive frame from previously decoded packet - valid = avcodec_receive_frame(context, picture) >= 0; + valid = rawMode ? false : avcodec_receive_frame(context, picture) >= 0; #endif // get the next frame @@ -1548,12 +1573,16 @@ bool CvCapture_FFMPEG::grabFrame() } if (valid) { - if( picture_pts == AV_NOPTS_VALUE_ ) - picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; - frame_number++; + if (picture_pts == AV_NOPTS_VALUE_) { + if (!rawMode) + picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; + else + picture_pts = packet.pts != AV_NOPTS_VALUE_ && packet.pts != 0 ? packet.pts : packet.dts; + frame_number++; + } } - if (!rawMode && valid && first_frame_number < 0) + if (valid && first_frame_number < 0) first_frame_number = dts_to_frame_number(picture_pts); #if USE_AV_INTERRUPT_CALLBACK @@ -1567,7 +1596,7 @@ bool CvCapture_FFMPEG::grabFrame() bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, int* width, int* height, int* cn, int* depth) { - if (!video_st || !context) + if (!video_st || (!rawMode && !context)) return false; if (rawMode || flag == extraDataIdx) @@ -1735,7 +1764,7 @@ static inline double getCodecIdFourcc(const AVCodecID codec_id) double CvCapture_FFMPEG::getProperty( int property_id ) const { - if( !video_st || !context ) return 0; + if( !video_st || (!rawMode && !context) ) return 0; switch( property_id ) { @@ -1814,7 +1843,8 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const //ic->start_time_realtime is in microseconds return ((double)ic->start_time_realtime); case CAP_PROP_N_THREADS: - return static_cast(context->thread_count); + if (!rawMode) + return static_cast(context->thread_count); default: break; } @@ -1910,9 +1940,11 @@ void CvCapture_FFMPEG::get_rotation_angle() void CvCapture_FFMPEG::seek(int64_t _frame_number) { - CV_Assert(context); + if (!rawMode) { + CV_Assert(context); + } _frame_number = std::min(_frame_number, get_total_frames()); - int delta = 16; + int delta = !rawMode ? 16 : 0; // if we have not grabbed a single frame before first seek, let's read the first frame // and get some valuable information during the process @@ -1927,7 +1959,8 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) double time_base = r2d(ic->streams[video_stream]->time_base); time_stamp += (int64_t)(sec / time_base + 0.5); if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD); - avcodec_flush_buffers(context); + if(!rawMode) + avcodec_flush_buffers(context); if( _frame_number > 0 ) { grabFrame(); @@ -1935,6 +1968,10 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) if( _frame_number > 1 ) { frame_number = dts_to_frame_number(picture_pts) - first_frame_number; + if (rawMode) { + rawSeek = true; + break; + } //printf("_frame_number = %d, frame_number = %d, delta = %d\n", // (int)_frame_number, (int)frame_number, delta); diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index fc031d2b5f..305d527ce9 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -2825,8 +2825,6 @@ CvResult CV_API_CALL cv_capture_open_with_params( if (!handle) return CV_ERROR_FAIL; *handle = NULL; - if (!filename) - return CV_ERROR_FAIL; GStreamerCapture *cap = 0; try { diff --git a/modules/videoio/src/cap_mjpeg_encoder.cpp b/modules/videoio/src/cap_mjpeg_encoder.cpp index efac4093ae..2e7452cf17 100644 --- a/modules/videoio/src/cap_mjpeg_encoder.cpp +++ b/modules/videoio/src/cap_mjpeg_encoder.cpp @@ -268,7 +268,7 @@ public: m_buffer_list[0].finish(); m_data_len = m_buffer_list[0].get_len(); - m_last_bit_len = m_buffer_list[0].get_bits_free() ? 32 - m_buffer_list[0].get_bits_free() : 0; + m_last_bit_len = 32 - m_buffer_list[0].get_bits_free(); return m_buffer_list[0].get_data(); } @@ -331,9 +331,14 @@ public: } //bits == 0 means that last element shouldn't be used. - m_output_buffer[m_data_len++] = currval; - - m_last_bit_len = -bits; + if (bits != 0) { + m_output_buffer[m_data_len++] = currval; + m_last_bit_len = -bits; + } + else + { + m_last_bit_len = 32; + } return &m_output_buffer[0]; } @@ -1167,8 +1172,6 @@ public: fdct_qtab(_fdct_qtab), cat_table(_cat_table) { -#if 0 // disable parallel processing due to buffer overrun bug: https://github.com/opencv/opencv/issues/19634 - //empirically found value. if number of pixels is less than that value there is no sense to parallelize it. const int min_pixels_count = 96*96; @@ -1194,12 +1197,6 @@ public: stripes_count = std::min(stripes_count, max_stripes); -#else - if (nstripes > 1) - CV_LOG_ONCE_WARNING(NULL, "VIDEOIO/MJPEG: parallel processing is disabled: https://github.com/opencv/opencv/issues/19634"); - stripes_count = 1; -#endif - m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count); } diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 78eefc34a3..4b234b8cae 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -1159,7 +1159,12 @@ bool CvCapture_MSMF::configureVideoOutput(MediaType newType, cv::uint32_t outFor { initStream(dwVideoStreamIndex, nativeFormat); } - return initStream(dwVideoStreamIndex, newFormat); + if (!initStream(dwVideoStreamIndex, newFormat)) + { + return false; + } + outputVideoFormat = outFormat; + return true; } bool CvCapture_MSMF::configureOutput() @@ -2719,8 +2724,6 @@ CvResult CV_API_CALL cv_capture_open_with_params( if (!handle) return CV_ERROR_FAIL; *handle = NULL; - if (!filename) - return CV_ERROR_FAIL; CaptureT* cap = 0; try { diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp index 905c79e42f..5b282f1966 100644 --- a/modules/videoio/src/cap_v4l.cpp +++ b/modules/videoio/src/cap_v4l.cpp @@ -2155,6 +2155,7 @@ bool CvCaptureCAM_V4L::setProperty( int property_id, double _value ) }else{ convert_rgb = false; releaseFrame(); + v4l2_create_frame(); return true; } case cv::CAP_PROP_FOURCC: diff --git a/modules/videoio/test/test_camera.cpp b/modules/videoio/test/test_camera.cpp index fc269959c3..8b0f0efe83 100644 --- a/modules/videoio/test/test_camera.cpp +++ b/modules/videoio/test/test_camera.cpp @@ -119,6 +119,21 @@ TEST(DISABLED_videoio_camera, v4l_read_mjpg) capture.release(); } +TEST(DISABLED_videoio_camera, msmf_read_yuyv) +{ + VideoCapture capture(CAP_MSMF); + ASSERT_TRUE(capture.isOpened()); + ASSERT_TRUE(capture.set(CAP_PROP_FOURCC, VideoWriter::fourcc('Y', 'U', 'Y', 'V'))); + std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl; + std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl; + std::cout << " height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl; + std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl; + int fourcc = (int)capture.get(CAP_PROP_FOURCC); + std::cout << "FOURCC code: " << cv::format("0x%8x", fourcc) << std::endl; + test_readFrames(capture); + capture.release(); +} + TEST(DISABLED_videoio_camera, v4l_open_mjpg) { VideoCapture capture; diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index 35d425d5c1..0496b8c369 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -476,6 +476,16 @@ static void ffmpeg_check_read_raw(VideoCapture& cap) EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type()); EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size; EXPECT_EQ((size_t)37118, data.total()); + +#ifndef WIN32 + // 12 is the nearset key frame to frame 18 + EXPECT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 18.)); + EXPECT_EQ(cap.get(CAP_PROP_POS_FRAMES), 12.); + cap >> data; + EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type()); + EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size; + EXPECT_EQ((size_t)8726, data.total()); +#endif } TEST(videoio_ffmpeg, ffmpeg_check_extra_data) @@ -506,6 +516,16 @@ TEST(videoio_ffmpeg, open_with_property) CAP_PROP_FORMAT, -1 // demux only })); + // confirm properties are returned without initializing AVCodecContext + EXPECT_EQ(cap.get(CAP_PROP_FORMAT), -1); + EXPECT_EQ(static_cast(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4")); +#ifndef WIN32 + EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0); +#endif + EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125); + EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0); ffmpeg_check_read_raw(cap); } @@ -519,6 +539,16 @@ TEST(videoio_ffmpeg, create_with_property) CAP_PROP_FORMAT, -1 // demux only }); + // confirm properties are returned without initializing AVCodecContext + EXPECT_TRUE(cap.get(CAP_PROP_FORMAT) == -1); + EXPECT_EQ(static_cast(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4")); +#ifndef WIN32 + EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0); +#endif + EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125); + EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0); ffmpeg_check_read_raw(cap); } diff --git a/platforms/js/opencv_js.config.py b/platforms/js/opencv_js.config.py index 66add88456..12f6254801 100644 --- a/platforms/js/opencv_js.config.py +++ b/platforms/js/opencv_js.config.py @@ -9,6 +9,7 @@ core = { 'perspectiveTransform', 'polarToCart', 'pow', 'randn', 'randu', 'reduce', 'repeat', 'rotate', 'setIdentity', 'setRNGSeed', 'solve', 'solvePoly', 'split', 'sqrt', 'subtract', 'trace', 'transform', 'transpose', 'vconcat', 'setLogLevel', 'getLogLevel', + 'LUT', ], 'Algorithm': [], } diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py index 912c2f0832..43b8b121d6 100644 --- a/samples/dnn/fast_neural_style.py +++ b/samples/dnn/fast_neural_style.py @@ -5,15 +5,15 @@ import argparse parser = argparse.ArgumentParser( description='This script is used to run style transfer models from ' - 'https://github.com/jcjohnson/fast-neural-style using OpenCV') + 'https://github.com/onnx/models/tree/main/vision/style_transfer/fast_neural_style using OpenCV') parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera') -parser.add_argument('--model', help='Path to .t7 model') +parser.add_argument('--model', help='Path to .onnx model') parser.add_argument('--width', default=-1, type=int, help='Resize input to specific width.') parser.add_argument('--height', default=-1, type=int, help='Resize input to specific height.') parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.') args = parser.parse_args() -net = cv.dnn.readNetFromTorch(cv.samples.findFile(args.model)) +net = cv.dnn.readNet(cv.samples.findFile(args.model)) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) if args.input: @@ -31,16 +31,12 @@ while cv.waitKey(1) < 0: inWidth = args.width if args.width != -1 else frame.shape[1] inHeight = args.height if args.height != -1 else frame.shape[0] inp = cv.dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), - (103.939, 116.779, 123.68), swapRB=False, crop=False) + swapRB=True, crop=False) net.setInput(inp) out = net.forward() out = out.reshape(3, out.shape[2], out.shape[3]) - out[0] += 103.939 - out[1] += 116.779 - out[2] += 123.68 - out /= 255 out = out.transpose(1, 2, 0) t, _ = net.getPerfProfile() @@ -50,4 +46,7 @@ while cv.waitKey(1) < 0: if args.median_filter: out = cv.medianBlur(out, args.median_filter) + out = np.clip(out, 0, 255) + out = out.astype(np.uint8) + cv.imshow('Styled image', out) diff --git a/samples/dnn/js_face_recognition.html b/samples/dnn/js_face_recognition.html index e5a3669a4f..95254ecd3a 100644 --- a/samples/dnn/js_face_recognition.html +++ b/samples/dnn/js_face_recognition.html @@ -40,7 +40,7 @@ function detectFaces(img) { //! [Get 128 floating points feature vector] function face2vec(face) { - var blob = cv.blobFromImage(face, 1.0 / 255, {width: 96, height: 96}, [0, 0, 0, 0], true, false) + var blob = cv.blobFromImage(face, 1.0, {width: 112, height: 112}, [0, 0, 0, 0], true, false) netRecogn.setInput(blob); var vec = netRecogn.forward(); blob.delete(); @@ -71,15 +71,15 @@ function loadModels(callback) { var utils = new Utils(''); var proto = 'https://raw.githubusercontent.com/opencv/opencv/5.x/samples/dnn/face_detector/deploy_lowres.prototxt'; var weights = 'https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel'; - var recognModel = 'https://raw.githubusercontent.com/pyannote/pyannote-data/master/openface.nn4.small2.v1.t7'; + var recognModel = 'https://media.githubusercontent.com/media/opencv/opencv_zoo/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx'; utils.createFileFromUrl('face_detector.prototxt', proto, () => { document.getElementById('status').innerHTML = 'Downloading face_detector.caffemodel'; utils.createFileFromUrl('face_detector.caffemodel', weights, () => { document.getElementById('status').innerHTML = 'Downloading OpenFace model'; - utils.createFileFromUrl('face_recognition.t7', recognModel, () => { + utils.createFileFromUrl('face_recognition_sface_2021dec.onnx', recognModel, () => { document.getElementById('status').innerHTML = ''; netDet = cv.readNetFromCaffe('face_detector.prototxt', 'face_detector.caffemodel'); - netRecogn = cv.readNetFromTorch('face_recognition.t7'); + netRecogn = cv.readNet('face_recognition_sface_2021dec.onnx'); callback(); }); }); @@ -121,8 +121,8 @@ function main() { persons[name] = face2vec(face).clone(); var canvas = document.createElement("canvas"); - canvas.setAttribute("width", 96); - canvas.setAttribute("height", 96); + canvas.setAttribute("width", 112); + canvas.setAttribute("height", 112); var cell = document.getElementById("targetImgs").insertCell(0); cell.appendChild(canvas); diff --git a/samples/python/tst_scene_render.py b/samples/python/tst_scene_render.py index 9d09ea7b9e..c3eb69ef9c 100644 --- a/samples/python/tst_scene_render.py +++ b/samples/python/tst_scene_render.py @@ -25,7 +25,7 @@ class TestSceneRender(): if bgImg is not None: self.sceneBg = bgImg.copy() else: - self.sceneBg = np.zeros(defaultSize, defaultSize, np.uint8) + self.sceneBg = np.zeros((defaultSize, defaultSize,3), np.uint8) self.w = self.sceneBg.shape[0] self.h = self.sceneBg.shape[1] @@ -85,7 +85,7 @@ class TestSceneRender(): img[self.currentCenter[0]:self.currentCenter[0]+self.foreground.shape[0], self.currentCenter[1]:self.currentCenter[1]+self.foreground.shape[1]] = self.foreground else: - self.currentRect = self.initialRect + np.int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed)) + self.currentRect = self.initialRect + int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed)) if self.deformation: self.currentRect[1:3] += int(self.h/20*cos(self.time)) cv.fillConvexPoly(img, self.currentRect, (0, 0, 255))