mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge branch 4.x
This commit is contained in:
commit
decf6538a2
@ -1455,8 +1455,8 @@ if(WITH_WEBP OR HAVE_WEBP)
|
||||
endif()
|
||||
|
||||
if(WITH_AVIF OR HAVE_AVIF)
|
||||
if(AVIF_VERSION)
|
||||
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${AVIF_VERSION})" ELSE "NO")
|
||||
if(libavif_VERSION)
|
||||
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${libavif_VERSION})" ELSE "NO")
|
||||
else()
|
||||
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY}" ELSE "NO")
|
||||
endif()
|
||||
@ -1852,6 +1852,7 @@ if(BUILD_opencv_python3)
|
||||
else()
|
||||
status(" Libraries:" HAVE_opencv_python3 THEN "${PYTHON3_LIBRARIES}" ELSE NO)
|
||||
endif()
|
||||
status(" Limited API:" PYTHON3_LIMITED_API THEN "YES (ver ${PYTHON3_LIMITED_API_VERSION})" ELSE NO)
|
||||
status(" numpy:" PYTHON3_NUMPY_INCLUDE_DIRS THEN "${PYTHON3_NUMPY_INCLUDE_DIRS} (ver ${PYTHON3_NUMPY_VERSION})" ELSE "NO (Python3 wrappers can not be generated)")
|
||||
status(" install path:" HAVE_opencv_python3 THEN "${__INSTALL_PATH_PYTHON3}" ELSE "-")
|
||||
endif()
|
||||
|
@ -1,8 +1,5 @@
|
||||
## OpenCV: Open Source Computer Vision Library
|
||||
|
||||
### Keep OpenCV Free
|
||||
|
||||
OpenCV is raising funds to keep the library free for everyone, and we need the support of the entire community to do it. [Donate to OpenCV on IndieGoGo](http://igg.me/at/opencv5) before the campaign ends on December 16 to show your support.
|
||||
|
||||
### Resources
|
||||
|
||||
@ -13,6 +10,7 @@ OpenCV is raising funds to keep the library free for everyone, and we need the s
|
||||
* previous forum (read only): <http://answers.opencv.org>
|
||||
* Issue tracking: <https://github.com/opencv/opencv/issues>
|
||||
* Additional OpenCV functionality: <https://github.com/opencv/opencv_contrib>
|
||||
* Donate to OpenCV: <https://opencv.org/support/>
|
||||
|
||||
|
||||
### Contributing
|
||||
|
@ -484,7 +484,6 @@ macro(ocv_check_compiler_optimization OPT)
|
||||
endmacro()
|
||||
|
||||
macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRING COMMON_OPTION)
|
||||
if(NOT MSVC)
|
||||
unset(_POSTFIX)
|
||||
# Check each feature option
|
||||
foreach(OPT IN LISTS ${FEATURE_NAME_LIST})
|
||||
@ -499,7 +498,6 @@ macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRI
|
||||
if(NOT "x${_POSTFIX}" STREQUAL "x")
|
||||
set(${FLAG_STRING} "${${FLAG_STRING}} ${COMMON_OPTION}${_POSTFIX}")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
|
||||
@ -596,11 +594,13 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
|
||||
endforeach()
|
||||
|
||||
if(AARCH64)
|
||||
if(NOT MSVC)
|
||||
# Define the list of NEON options to check
|
||||
set(NEON_OPTIONS_LIST NEON_DOTPROD NEON_FP16 NEON_BF16)
|
||||
set(BASE_ARCHITECTURE "-march=armv8.2-a")
|
||||
ocv_cpu_aarch64_baseline_merge_feature_options(NEON_OPTIONS_LIST CPU_BASELINE_FLAGS ${BASE_ARCHITECTURE})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
foreach(OPT ${CPU_BASELINE_REQUIRE})
|
||||
if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
|
||||
|
@ -1,13 +1,6 @@
|
||||
if("${CMAKE_CXX_COMPILER};${CMAKE_C_COMPILER};${CMAKE_CXX_COMPILER_LAUNCHER}" MATCHES "ccache")
|
||||
set(CMAKE_COMPILER_IS_CCACHE 1) # TODO: FIXIT Avoid setting of CMAKE_ variables
|
||||
set(OPENCV_COMPILER_IS_CCACHE 1)
|
||||
endif()
|
||||
function(access_CMAKE_COMPILER_IS_CCACHE)
|
||||
if(NOT OPENCV_SUPPRESS_DEPRECATIONS)
|
||||
message(WARNING "DEPRECATED: CMAKE_COMPILER_IS_CCACHE is replaced to OPENCV_COMPILER_IS_CCACHE.")
|
||||
endif()
|
||||
endfunction()
|
||||
variable_watch(CMAKE_COMPILER_IS_CCACHE access_CMAKE_COMPILER_IS_CCACHE)
|
||||
if(ENABLE_CCACHE AND NOT OPENCV_COMPILER_IS_CCACHE)
|
||||
# This works fine with Unix Makefiles and Ninja generators
|
||||
find_host_program(CCACHE_PROGRAM ccache)
|
||||
@ -391,7 +384,7 @@ endif()
|
||||
|
||||
# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347
|
||||
if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)
|
||||
if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2"))
|
||||
if(UNIX AND ((NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2") AND NOT CMAKE_SYSTEM_NAME MATCHES "OpenBSD"))
|
||||
set(_option "-Wl,--no-undefined")
|
||||
set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}") # requires CMake 3.2+ and CMP0056
|
||||
|
@ -136,11 +136,11 @@ macro(ocv_check_windows_crt_linkage)
|
||||
cmake_policy(GET CMP0091 MSVC_RUNTIME_SET_BY_ABSTRACTION)
|
||||
if(MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
|
||||
if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
|
||||
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MT")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MTd")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MD")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MDd")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
@ -270,6 +270,18 @@ find_python("${OPENCV_PYTHON3_VERSION}" "${MIN_VER_PYTHON3}" PYTHON3_LIBRARY PYT
|
||||
PYTHON3_INCLUDE_DIR PYTHON3_INCLUDE_DIR2 PYTHON3_PACKAGES_PATH
|
||||
PYTHON3_NUMPY_INCLUDE_DIRS PYTHON3_NUMPY_VERSION)
|
||||
|
||||
# Problem in numpy >=1.15 <1.17
|
||||
OCV_OPTION(PYTHON3_LIMITED_API "Build with Python Limited API (not available with numpy >=1.15 <1.17)" NO
|
||||
VISIBLE_IF PYTHON3_NUMPY_VERSION VERSION_LESS "1.15" OR NOT PYTHON3_NUMPY_VERSION VERSION_LESS "1.17")
|
||||
if(PYTHON3_LIMITED_API)
|
||||
set(_default_ver "0x03060000")
|
||||
if(PYTHON3_VERSION_STRING VERSION_LESS "3.6")
|
||||
# fix for older pythons
|
||||
set(_default_ver "0x030${PYTHON3_VERSION_MINOR}0000")
|
||||
endif()
|
||||
set(PYTHON3_LIMITED_API_VERSION ${_default_ver} CACHE STRING "Minimal Python version for Limited API")
|
||||
endif()
|
||||
|
||||
if(PYTHON_DEFAULT_EXECUTABLE)
|
||||
set(PYTHON_DEFAULT_AVAILABLE "TRUE")
|
||||
elseif(PYTHON3_EXECUTABLE AND PYTHON3INTERP_FOUND)
|
||||
|
@ -57,6 +57,18 @@ if(CANN_INSTALL_DIR)
|
||||
set(HAVE_CANN OFF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# * libacl_dvpp_mpi.so
|
||||
set(libacl_dvpp_mpi "${CANN_INSTALL_DIR}/lib64")
|
||||
find_library(found_libacldvppmpi NAMES acl_dvpp_mpi PATHS ${libacl_dvpp_mpi} NO_DEFAULT_PATH)
|
||||
if(found_libacldvppmpi)
|
||||
set(libacl_dvpp_mpi ${found_libacldvppmpi})
|
||||
message(STATUS "CANN: libacl_dvpp_mpi.so is found at ${libacl_dvpp_mpi}")
|
||||
else()
|
||||
message(STATUS "CANN: Missing libacl_dvpp_mpi.so. Turning off HAVE_CANN")
|
||||
set(HAVE_CANN OFF)
|
||||
return()
|
||||
endif()
|
||||
# * libgraph.so
|
||||
set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64")
|
||||
find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH)
|
||||
@ -105,6 +117,7 @@ if(CANN_INSTALL_DIR)
|
||||
list(APPEND libs_cann ${lib_opsproto})
|
||||
list(APPEND libs_cann ${lib_graph})
|
||||
list(APPEND libs_cann ${lib_ge_compiler})
|
||||
list(APPEND libs_cann ${libacl_dvpp_mpi})
|
||||
|
||||
# * lib_graph_base.so
|
||||
if(NOT CANN_VERSION_BELOW_6_3_ALPHA002)
|
||||
|
@ -89,15 +89,11 @@ else()
|
||||
ocv_update(OPENCV_ANDROID_NAMESPACE_DECLARATION "")
|
||||
endif()
|
||||
|
||||
# set android gradle java version in build.gradle and set aidl config
|
||||
if(NOT (ANDROID_GRADLE_PLUGIN_VERSION VERSION_LESS "8.0.0"))
|
||||
# AGP-8.0 requires a minimum JDK version of JDK17
|
||||
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "17")
|
||||
# Enable aidl configuration for OpenCV compile with AGP-8.0
|
||||
ocv_update(ANDROID_GRADLE_BUILD_FEATURE_AIDL "buildFeatures { aidl true }")
|
||||
else()
|
||||
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "1_8")
|
||||
ocv_update(ANDROID_GRADLE_BUILD_FEATURE_AIDL "")
|
||||
endif()
|
||||
|
||||
set(ANDROID_GRADLE_JAVA_VERSION "${ANDROID_GRADLE_JAVA_VERSION_INIT}" CACHE STRING "Android Gradle Java version")
|
||||
|
@ -9,6 +9,9 @@ How to use the OpenCV parallel_for_ to parallelize your code {#tutorial_how_to_u
|
||||
| -: | :- |
|
||||
| Compatibility | OpenCV >= 3.0 |
|
||||
|
||||
|
||||
@note See also C++ lambda usage with parallel for in [tuturial](@ref tutorial_how_to_use_OpenCV_parallel_for_new).
|
||||
|
||||
Goal
|
||||
----
|
||||
|
||||
@ -20,7 +23,7 @@ If you want more information about multithreading, you will have to refer to a r
|
||||
to remain simple.
|
||||
|
||||
Precondition
|
||||
----
|
||||
------------
|
||||
|
||||
The first precondition is to have OpenCV built with a parallel framework.
|
||||
In OpenCV 3.2, the following parallel frameworks are available in that order:
|
||||
@ -50,7 +53,7 @@ We will use the example of drawing a Mandelbrot set to show how from a regular s
|
||||
the code to parallelize the computation.
|
||||
|
||||
Theory
|
||||
-----------
|
||||
------
|
||||
|
||||
The Mandelbrot set definition has been named in tribute to the mathematician Benoit Mandelbrot by the mathematician
|
||||
Adrien Douady. It has been famous outside of the mathematics field as the image representation is an example of a
|
||||
@ -69,7 +72,7 @@ Here, we will just introduce the formula to draw the Mandelbrot set (from the me
|
||||
> \f[\limsup_{n\to\infty}|z_{n+1}|\leqslant2\f]
|
||||
|
||||
Pseudocode
|
||||
-----------
|
||||
----------
|
||||
|
||||
A simple algorithm to generate a representation of the Mandelbrot set is called the
|
||||
["escape time algorithm"](https://en.wikipedia.org/wiki/Mandelbrot_set#Escape_time_algorithm).
|
||||
@ -110,10 +113,10 @@ On this figure, we recall that the real part of a complex number is on the x-axi
|
||||
You can see that the whole shape can be repeatedly visible if we zoom at particular locations.
|
||||
|
||||
Implementation
|
||||
-----------
|
||||
--------------
|
||||
|
||||
Escape time algorithm implementation
|
||||
--------------------------
|
||||
------------------------------------
|
||||
|
||||
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-escape-time-algorithm
|
||||
|
||||
@ -121,7 +124,7 @@ Here, we used the [`std::complex`](http://en.cppreference.com/w/cpp/numeric/comp
|
||||
complex number. This function performs the test to check if the pixel is in set or not and returns the "escaped" iteration.
|
||||
|
||||
Sequential Mandelbrot implementation
|
||||
--------------------------
|
||||
------------------------------------
|
||||
|
||||
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-sequential
|
||||
|
||||
@ -149,7 +152,7 @@ The green curve corresponds to a simple linear scale transformation, the blue on
|
||||
and you can observe how the lowest values will be boosted when looking at the slope at these positions.
|
||||
|
||||
Parallel Mandelbrot implementation
|
||||
--------------------------
|
||||
----------------------------------
|
||||
|
||||
When looking at the sequential implementation, we can notice that each pixel is computed independently. To optimize the
|
||||
computation, we can perform multiple pixel calculations in parallel, by exploiting the multi-core architecture of modern
|
||||
@ -181,7 +184,7 @@ C++ 11 standard allows to simplify the parallel implementation by get rid of the
|
||||
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-parallel-call-cxx11
|
||||
|
||||
Results
|
||||
-----------
|
||||
-------
|
||||
|
||||
You can find the full tutorial code [here](https://github.com/opencv/opencv/blob/5.x/samples/cpp/tutorial_code/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.cpp).
|
||||
The performance of the parallel implementation depends of the type of CPU you have. For instance, on 4 cores / 8 threads
|
||||
|
@ -18,7 +18,7 @@ This tutorial assumes you have the following installed and configured:
|
||||
- Android Studio
|
||||
- JDK
|
||||
- Android SDK and NDK
|
||||
- OpenCV for Android SDK from official [release page on Github](https://github.com/opencv/opencv/releases)
|
||||
- Optional: OpenCV for Android SDK from official [release page on Github](https://github.com/opencv/opencv/releases)
|
||||
or [SourceForge](https://sourceforge.net/projects/opencvlibrary/). Advanced: as alternative the SDK may be
|
||||
built from source code by [instruction on wiki](https://github.com/opencv/opencv/wiki/Custom-OpenCV-Android-SDK-and-AAR-package-build).
|
||||
|
||||
@ -26,8 +26,9 @@ If you need help with anything of the above, you may refer to our @ref tutorial_
|
||||
|
||||
If you encounter any error after thoroughly following these steps, feel free to contact us via OpenCV [forum](https://forum.opencv.org). We'll do our best to help you out.
|
||||
|
||||
Hello OpenCV sample
|
||||
-------------------
|
||||
|
||||
Hello OpenCV sample with SDK
|
||||
----------------------------
|
||||
|
||||
In this section we're gonna create a simple app that does nothing but OpenCV loading. In next section we'll extend it to support camera.
|
||||
|
||||
@ -75,11 +76,10 @@ In addition to this instruction you can use some video guide, for example [this
|
||||
@endcode
|
||||
The fix was found [here](https://stackoverflow.com/questions/73225714/import-opencv-sdk-to-android-studio-chipmunk)
|
||||
|
||||
6. OpenCV project uses `aidl` and `buildConfig` features. Please enable them in
|
||||
6. OpenCV project uses `buildConfig` feature. Please enable it in
|
||||
`MyApplication/OpenCV/build.gradle` file to `android` block:
|
||||
@code{.gradle}
|
||||
buildFeatures{
|
||||
aidl true
|
||||
buildConfig true
|
||||
}
|
||||
|
||||
@ -115,6 +115,43 @@ In addition to this instruction you can use some video guide, for example [this
|
||||
|
||||

|
||||
|
||||
Hello OpenCV sample with Maven Central
|
||||
--------------------------------------
|
||||
|
||||
Since OpenCV 4.9.0 OpenCV for Android package is available with Maven Central and may be installed
|
||||
automatically as Gradle dependency. In this section we're gonna create a simple app that does nothing
|
||||
but OpenCV loading with Maven Central.
|
||||
|
||||
1. Open Android Studio and create empty project by choosing ***Empty Views Activity***
|
||||
|
||||

|
||||
|
||||
2. Setup the project:
|
||||
- Choose ***Java*** language
|
||||
- Choose ***Groovy DSL*** build configuration language
|
||||
- Choose ***Minumum SDK*** with the version number not less than OpenCV supports. For 4.9.0 minimal SDK version is 21.
|
||||
|
||||

|
||||
|
||||
3. Edit `build.gradle` and add OpenCV library to Dependencies list like this:
|
||||
@code{.gradle}
|
||||
dependencies {
|
||||
implementation 'org.opencv:opencv:4.9.0'
|
||||
}
|
||||
@endcode
|
||||
`4.9.0` may be replaced by any version available as [official release](https://central.sonatype.com/artifact/org.opencv/opencv).
|
||||
|
||||
4. Before using any OpenCV function you have to load the library first. If you application includes other
|
||||
OpenCV-dependent native libraries you should load them ***after*** OpenCV initialization. Add the folowing
|
||||
code to load the library at app start:
|
||||
@snippet samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java ocv_loader_init
|
||||
Like this:
|
||||

|
||||
|
||||
5. Choose a device to check the sample on and run the code by pressing `run` button
|
||||
|
||||

|
||||
|
||||
Camera view sample
|
||||
------------------
|
||||
|
||||
|
@ -378,6 +378,9 @@ our OpenCV library that we use in our projects. Start up a command window and en
|
||||
|
||||
setx OpenCV_DIR D:\OpenCV\build\x64\vc16 (suggested for Visual Studio 2019 - 64 bit Windows)
|
||||
setx OpenCV_DIR D:\OpenCV\build\x86\vc16 (suggested for Visual Studio 2019 - 32 bit Windows)
|
||||
|
||||
setx OpenCV_DIR D:\OpenCV\build\x64\vc17 (suggested for Visual Studio 2022 - 64 bit Windows)
|
||||
setx OpenCV_DIR D:\OpenCV\build\x86\vc17 (suggested for Visual Studio 2022 - 32 bit Windows)
|
||||
@endcode
|
||||
Here the directory is where you have your OpenCV binaries (*extracted* or *built*). You can have
|
||||
different platform (e.g. x64 instead of x86) or compiler type, so substitute appropriate value.
|
||||
|
@ -7,10 +7,8 @@
|
||||
|
||||
#include <opencv2/core/mat.hpp>
|
||||
|
||||
#ifdef CV_CXX11
|
||||
//#include <future>
|
||||
#include <chrono>
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
|
||||
@ -69,7 +67,6 @@ public:
|
||||
|
||||
CV_WRAP bool valid() const CV_NOEXCEPT;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
|
||||
inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||
|
||||
@ -89,7 +86,6 @@ public:
|
||||
std::future<Mat> getFutureMat() const;
|
||||
std::future<UMat> getFutureUMat() const;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
// PImpl
|
||||
|
@ -147,7 +147,7 @@
|
||||
#endif
|
||||
|
||||
#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
|
||||
# include<riscv-vector.h>
|
||||
# include<riscv_vector.h>
|
||||
# define CV_RVV071 1
|
||||
#endif
|
||||
|
||||
|
@ -476,6 +476,8 @@ Cv64suf;
|
||||
#define CV_WRAP_MAPPABLE(mappable)
|
||||
#define CV_WRAP_PHANTOM(phantom_header)
|
||||
#define CV_WRAP_DEFAULT(val)
|
||||
/* Indicates that the function parameter has filesystem path semantic */
|
||||
#define CV_WRAP_FILE_PATH
|
||||
|
||||
/****************************************************************************************\
|
||||
* Matrix type (Mat) *
|
||||
@ -755,89 +757,44 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType)
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* CV_NODISCARD attribute (deprecated, GCC only) *
|
||||
* DONT USE: use instead the standard CV_NODISCARD_STD macro above *
|
||||
* this legacy method silently fails to issue warning until some version *
|
||||
* after gcc 6.3.0. Yet with gcc 7+ you can use the above standard method *
|
||||
* which makes this method useless. Don't use it. *
|
||||
* @deprecated use instead CV_NODISCARD_STD *
|
||||
\****************************************************************************************/
|
||||
#ifndef CV_NODISCARD
|
||||
# if defined(__GNUC__)
|
||||
# define CV_NODISCARD __attribute__((__warn_unused_result__))
|
||||
# elif defined(__clang__) && defined(__has_attribute)
|
||||
# if __has_attribute(__warn_unused_result__)
|
||||
# define CV_NODISCARD __attribute__((__warn_unused_result__))
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_NODISCARD
|
||||
# define CV_NODISCARD /* nothing by default */
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* C++ 11 *
|
||||
\****************************************************************************************/
|
||||
#ifndef CV_CXX11
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
||||
# define CV_CXX11 1
|
||||
# endif
|
||||
#else
|
||||
# if CV_CXX11 == 0
|
||||
# undef CV_CXX11
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_CXX11
|
||||
#ifdef __cplusplus
|
||||
// MSVC was stuck at __cplusplus == 199711L for a long time, even where it supports C++11,
|
||||
// so check _MSC_VER instead. See:
|
||||
// <https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus>
|
||||
# if defined(_MSC_VER)
|
||||
# if _MSC_VER < 1800
|
||||
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||
# endif
|
||||
# elif __cplusplus < 201103L
|
||||
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CV_CXX11
|
||||
# define CV_CXX11 1
|
||||
#endif
|
||||
|
||||
#define CV_CXX_MOVE_SEMANTICS 1
|
||||
#define CV_CXX_MOVE(x) std::move(x)
|
||||
#define CV_CXX_STD_ARRAY 1
|
||||
#include <array>
|
||||
#ifndef CV_OVERRIDE
|
||||
# define CV_OVERRIDE override
|
||||
#endif
|
||||
|
||||
#ifndef CV_FINAL
|
||||
# define CV_FINAL final
|
||||
#endif
|
||||
|
||||
#ifndef CV_NOEXCEPT
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
||||
# define CV_NOEXCEPT noexcept
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CV_NOEXCEPT
|
||||
# define CV_NOEXCEPT
|
||||
#endif
|
||||
|
||||
#ifndef CV_CONSTEXPR
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
||||
# define CV_CONSTEXPR constexpr
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CV_CONSTEXPR
|
||||
# define CV_CONSTEXPR
|
||||
#endif
|
||||
|
||||
// Integer types portability
|
||||
#ifdef OPENCV_STDINT_HEADER
|
||||
#include OPENCV_STDINT_HEADER
|
||||
#elif defined(__cplusplus)
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
|
||||
namespace cv {
|
||||
typedef signed char int8_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
}
|
||||
#elif defined(_MSC_VER) || __cplusplus >= 201103L
|
||||
#ifdef __cplusplus
|
||||
#include <cstdint>
|
||||
namespace cv {
|
||||
using std::int8_t;
|
||||
@ -849,19 +806,6 @@ using std::uint32_t;
|
||||
using std::int64_t;
|
||||
using std::uint64_t;
|
||||
}
|
||||
#else
|
||||
#include <stdint.h>
|
||||
namespace cv {
|
||||
typedef ::int8_t int8_t;
|
||||
typedef ::uint8_t uint8_t;
|
||||
typedef ::int16_t int16_t;
|
||||
typedef ::uint16_t uint16_t;
|
||||
typedef ::int32_t int32_t;
|
||||
typedef ::uint32_t uint32_t;
|
||||
typedef ::int64_t int64_t;
|
||||
typedef ::uint64_t uint64_t;
|
||||
}
|
||||
#endif
|
||||
#else // pure C
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
@ -52,10 +52,8 @@ public:
|
||||
*/
|
||||
void setException(const cv::Exception& exception);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
|
||||
AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||
#endif
|
||||
|
||||
|
||||
// PImpl
|
||||
|
@ -8,14 +8,8 @@
|
||||
#ifndef CV__EXCEPTION_PTR
|
||||
# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
|
||||
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
|
||||
# elif defined(CV_CXX11)
|
||||
# else
|
||||
# define CV__EXCEPTION_PTR 1
|
||||
# elif defined(_MSC_VER)
|
||||
# define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
|
||||
# elif defined(__clang__)
|
||||
# define CV__EXCEPTION_PTR 0 // C++11 only (see above)
|
||||
# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV__EXCEPTION_PTR
|
||||
|
@ -61,8 +61,7 @@
|
||||
#endif
|
||||
|
||||
#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
||||
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \
|
||||
&& defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY)
|
||||
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
#define OPENCV_EIGEN_TENSOR_SUPPORT 1
|
||||
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -53,6 +53,7 @@
|
||||
|
||||
#include "opencv2/core/bufferpool.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
namespace cv
|
||||
|
@ -386,10 +386,8 @@ public:
|
||||
static Vec randn(_Tp a, _Tp b);
|
||||
static Vec randu(_Tp a, _Tp b);
|
||||
static Vec zeros();
|
||||
#ifdef CV_CXX11
|
||||
static Vec diag(_Tp alpha) = delete;
|
||||
static Vec eye() = delete;
|
||||
#endif
|
||||
|
||||
//! per-element multiplication
|
||||
Vec mul(const Vec<_Tp, cn>& v) const;
|
||||
@ -412,9 +410,7 @@ public:
|
||||
const _Tp& operator ()(int i) const;
|
||||
_Tp& operator ()(int i);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
|
||||
#endif
|
||||
|
||||
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
|
||||
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
|
||||
|
@ -28,7 +28,7 @@
|
||||
#define OPENCV_CORE_QUATERNION_INL_HPP
|
||||
|
||||
#ifndef OPENCV_CORE_QUATERNION_HPP
|
||||
#erorr This is not a standalone header. Include quaternion.hpp instead.
|
||||
#error This is not a standalone header. Include quaternion.hpp instead.
|
||||
#endif
|
||||
|
||||
//@cond IGNORE
|
||||
|
@ -9,8 +9,6 @@
|
||||
|
||||
//#define OPENCV_DISABLE_ALLOCATOR_STATS
|
||||
|
||||
#ifdef CV_CXX11
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
|
||||
@ -26,14 +24,6 @@
|
||||
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long
|
||||
#endif
|
||||
|
||||
#else // CV_CXX11
|
||||
|
||||
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
|
||||
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int // CV_XADD supports int only
|
||||
#endif
|
||||
|
||||
#endif // CV_CXX11
|
||||
|
||||
namespace cv { namespace utils {
|
||||
|
||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
||||
@ -59,7 +49,7 @@ public:
|
||||
void onAllocate(size_t /*sz*/) {}
|
||||
void onFree(size_t /*sz*/) {}
|
||||
|
||||
#elif defined(CV_CXX11)
|
||||
#else
|
||||
|
||||
protected:
|
||||
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
|
||||
@ -104,49 +94,7 @@ public:
|
||||
#endif
|
||||
curr -= (counter_t)sz;
|
||||
}
|
||||
|
||||
#else // non C++11
|
||||
|
||||
protected:
|
||||
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
|
||||
volatile counter_t curr, total, total_allocs, peak; // overflow is possible, CV_XADD operates with 'int' only
|
||||
public:
|
||||
AllocatorStatistics()
|
||||
: curr(0), total(0), total_allocs(0), peak(0)
|
||||
{}
|
||||
~AllocatorStatistics() CV_OVERRIDE {}
|
||||
|
||||
uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; }
|
||||
uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; }
|
||||
uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; }
|
||||
uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; }
|
||||
|
||||
void resetPeakUsage() CV_OVERRIDE { peak = curr; }
|
||||
|
||||
// Controller interface
|
||||
void onAllocate(size_t sz)
|
||||
{
|
||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
||||
CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
|
||||
#endif
|
||||
|
||||
counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz;
|
||||
|
||||
peak = std::max((counter_t)peak, new_curr); // non-thread safe
|
||||
|
||||
//CV_XADD(&total, (uint64_t)sz); // overflow with int, non-reliable...
|
||||
total += sz;
|
||||
|
||||
CV_XADD(&total_allocs, (counter_t)1);
|
||||
}
|
||||
void onFree(size_t sz)
|
||||
{
|
||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
||||
CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
|
||||
#endif
|
||||
CV_XADD(&curr, (counter_t)-sz);
|
||||
}
|
||||
#endif
|
||||
#endif // OPENCV_DISABLE_ALLOCATOR_STATS
|
||||
};
|
||||
|
||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
||||
|
@ -3,7 +3,6 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
//#undef CV_CXX11 // debug non C++11 mode
|
||||
#include "opencv2/core/async.hpp"
|
||||
#include "opencv2/core/detail/async_promise.hpp"
|
||||
|
||||
@ -16,11 +15,9 @@
|
||||
|
||||
#ifndef OPENCV_DISABLE_THREAD_SUPPORT
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <chrono>
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
|
||||
@ -37,12 +34,8 @@ struct AsyncArray::Impl
|
||||
void releasePromise() CV_NOEXCEPT { CV_XADD(&refcount_promise, -1); if(1 == CV_XADD(&refcount, -1)) delete this; } \
|
||||
int refcount_promise;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
mutable std::mutex mtx;
|
||||
mutable std::condition_variable cond_var;
|
||||
#else
|
||||
mutable cv::Mutex mtx;
|
||||
#endif
|
||||
|
||||
mutable bool has_result; // Mat, UMat or exception
|
||||
|
||||
@ -88,11 +81,7 @@ struct AsyncArray::Impl
|
||||
if (!wait_for(timeoutNs))
|
||||
return false;
|
||||
}
|
||||
#ifdef CV_CXX11
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
#else
|
||||
cv::AutoLock lock(mtx);
|
||||
#endif
|
||||
if (has_result)
|
||||
{
|
||||
if (!result_mat.empty())
|
||||
@ -145,7 +134,6 @@ struct AsyncArray::Impl
|
||||
if (timeoutNs == 0)
|
||||
return has_result;
|
||||
CV_LOG_INFO(NULL, "Waiting for async result ...");
|
||||
#ifdef CV_CXX11
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
const auto cond_pred = [&]{ return has_result == true; };
|
||||
if (timeoutNs > 0)
|
||||
@ -156,9 +144,6 @@ struct AsyncArray::Impl
|
||||
CV_Assert(has_result);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "OpenCV has been built without async waiting support (C++11 is required)");
|
||||
#endif
|
||||
}
|
||||
|
||||
AsyncArray getArrayResult()
|
||||
@ -175,11 +160,7 @@ struct AsyncArray::Impl
|
||||
{
|
||||
if (future_is_returned && refcount_future == 0)
|
||||
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
||||
#ifdef CV_CXX11
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
#else
|
||||
cv::AutoLock lock(mtx);
|
||||
#endif
|
||||
CV_Assert(!has_result);
|
||||
int k = value.kind();
|
||||
if (k == _InputArray::UMAT)
|
||||
@ -193,9 +174,7 @@ struct AsyncArray::Impl
|
||||
value.copyTo(*result_mat.get());
|
||||
}
|
||||
has_result = true;
|
||||
#ifdef CV_CXX11
|
||||
cond_var.notify_all();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV__EXCEPTION_PTR
|
||||
@ -203,18 +182,12 @@ struct AsyncArray::Impl
|
||||
{
|
||||
if (future_is_returned && refcount_future == 0)
|
||||
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
||||
#ifdef CV_CXX11
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
#else
|
||||
cv::AutoLock lock(mtx);
|
||||
#endif
|
||||
CV_Assert(!has_result);
|
||||
has_exception = true;
|
||||
exception = e;
|
||||
has_result = true;
|
||||
#ifdef CV_CXX11
|
||||
cond_var.notify_all();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -222,18 +195,12 @@ struct AsyncArray::Impl
|
||||
{
|
||||
if (future_is_returned && refcount_future == 0)
|
||||
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
||||
#ifdef CV_CXX11
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
#else
|
||||
cv::AutoLock lock(mtx);
|
||||
#endif
|
||||
CV_Assert(!has_result);
|
||||
has_exception = true;
|
||||
cv_exception = e;
|
||||
has_result = true;
|
||||
#ifdef CV_CXX11
|
||||
cond_var.notify_all();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1952,12 +1952,7 @@ void _OutputArray::move(UMat& u) const
|
||||
int k = kind();
|
||||
if (k == UMAT)
|
||||
{
|
||||
#ifdef CV_CXX11
|
||||
*(UMat*)obj = std::move(u);
|
||||
#else
|
||||
*(UMat*)obj = u;
|
||||
u.release();
|
||||
#endif
|
||||
}
|
||||
else if (k == MAT)
|
||||
{
|
||||
@ -1992,12 +1987,7 @@ void _OutputArray::move(Mat& m) const
|
||||
}
|
||||
else if (k == MAT)
|
||||
{
|
||||
#ifdef CV_CXX11
|
||||
*(Mat*)obj = std::move(m);
|
||||
#else
|
||||
*(Mat*)obj = m;
|
||||
m.release();
|
||||
#endif
|
||||
}
|
||||
else if (k == MATX)
|
||||
{
|
||||
|
@ -912,8 +912,7 @@ int getNumberOfCPUs_()
|
||||
* the minimum most value as it has high probablity of being right and safe.
|
||||
* Return 1 if we get 0 or not found on all methods.
|
||||
*/
|
||||
#if defined CV_CXX11 \
|
||||
&& !defined(__MINGW32__) /* not implemented (2020-03) */ \
|
||||
#if !defined(__MINGW32__) /* not implemented (2020-03) */
|
||||
|
||||
/*
|
||||
* Check for this standard C++11 way, we do not return directly because
|
||||
|
@ -120,11 +120,15 @@ void* allocSingletonNewBuffer(size_t size) { return malloc(size); }
|
||||
#include <cstdlib> // std::abort
|
||||
#endif
|
||||
|
||||
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ || defined __Fuchsia__
|
||||
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ || defined __Fuchsia__ || defined __QNX__
|
||||
# include <unistd.h>
|
||||
# include <fcntl.h>
|
||||
#if defined __QNX__
|
||||
# include <sys/elf.h>
|
||||
# include <sys/auxv.h>
|
||||
using Elf64_auxv_t = auxv64_t;
|
||||
# include <elfdefinitions.h>
|
||||
const uint64_t AT_HWCAP = NT_GNU_HWCAP;
|
||||
#else
|
||||
# include <elf.h>
|
||||
#endif
|
||||
@ -251,7 +255,7 @@ std::wstring GetTempFileNameWinRT(std::wstring prefix)
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__
|
||||
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __GLIBC__ || defined __HAIKU__
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
@ -301,9 +305,7 @@ DECLARE_CV_CPUID_X86
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined CV_CXX11
|
||||
#include <chrono>
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -562,7 +564,7 @@ struct HWFeatures
|
||||
}
|
||||
#endif // CV_CPUID_X86
|
||||
|
||||
#if defined __ANDROID__ || defined __linux__ || defined __FreeBSD__ || defined __QNX__
|
||||
#if defined __ANDROID__ || defined __linux__ || defined __QNX__
|
||||
#ifdef __aarch64__
|
||||
have[CV_CPU_NEON] = true;
|
||||
have[CV_CPU_FP16] = true;
|
||||
@ -581,10 +583,12 @@ struct HWFeatures
|
||||
have[CV_CPU_NEON_DOTPROD] = (auxv.a_un.a_val & (1 << 20)) != 0; // HWCAP_ASIMDDP
|
||||
have[CV_CPU_NEON_FP16] = (auxv.a_un.a_val & (1 << 10)) != 0; // HWCAP_ASIMDHP
|
||||
}
|
||||
#if defined(AT_HWCAP2)
|
||||
else if (auxv.a_type == AT_HWCAP2)
|
||||
{
|
||||
have[CV_CPU_NEON_BF16] = (auxv.a_un.a_val & (1 << 14)) != 0; // HWCAP2_BF16
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
close(cpufile);
|
||||
@ -611,7 +615,7 @@ struct HWFeatures
|
||||
CV_LOG_INFO(NULL, "- FP16 instructions is NOT enabled via build flags");
|
||||
#endif
|
||||
#endif
|
||||
#elif defined __arm__ && !defined __FreeBSD__
|
||||
#elif defined __arm__
|
||||
int cpufile = open("/proc/self/auxv", O_RDONLY);
|
||||
|
||||
if (cpufile >= 0)
|
||||
@ -903,50 +907,15 @@ bool useOptimized(void)
|
||||
|
||||
int64 getTickCount(void)
|
||||
{
|
||||
#if defined CV_CXX11
|
||||
std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
|
||||
return (int64)now.time_since_epoch().count();
|
||||
#elif defined _WIN32 || defined WINCE
|
||||
LARGE_INTEGER counter;
|
||||
QueryPerformanceCounter( &counter );
|
||||
return (int64)counter.QuadPart;
|
||||
#elif defined __MACH__ && defined __APPLE__
|
||||
return (int64)mach_absolute_time();
|
||||
#elif defined __unix__
|
||||
struct timespec tp;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
return (int64)tp.tv_sec*1000000000 + tp.tv_nsec;
|
||||
#else
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (int64)tv.tv_sec*1000000 + tv.tv_usec;
|
||||
#endif
|
||||
}
|
||||
|
||||
double getTickFrequency(void)
|
||||
{
|
||||
#if defined CV_CXX11
|
||||
using clock_period_t = std::chrono::steady_clock::duration::period;
|
||||
double clock_freq = clock_period_t::den / clock_period_t::num;
|
||||
return clock_freq;
|
||||
#elif defined _WIN32 || defined WINCE
|
||||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
return (double)freq.QuadPart;
|
||||
#elif defined __MACH__ && defined __APPLE__
|
||||
static double freq = 0;
|
||||
if( freq == 0 )
|
||||
{
|
||||
mach_timebase_info_data_t sTimebaseInfo;
|
||||
mach_timebase_info(&sTimebaseInfo);
|
||||
freq = sTimebaseInfo.denom*1e9/sTimebaseInfo.numer;
|
||||
}
|
||||
return freq;
|
||||
#elif defined __unix__
|
||||
return 1e9;
|
||||
#else
|
||||
return 1e6;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || defined __ppc__)
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include <opencv2/core/bindings_utils.hpp>
|
||||
|
||||
#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
||||
#if !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#endif
|
||||
@ -85,7 +85,7 @@ TEST(Core_Async, LikePythonTest)
|
||||
}
|
||||
|
||||
|
||||
#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
||||
#if !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
||||
|
||||
TEST(Core_Async, AsyncThread_Simple)
|
||||
{
|
||||
|
@ -8,10 +8,8 @@
|
||||
|
||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
@ -282,9 +280,7 @@ public:
|
||||
// FP state is not supported
|
||||
// no checks
|
||||
}
|
||||
#ifdef CV_CXX11
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
#endif
|
||||
}
|
||||
|
||||
cv::details::FPDenormalsModeState base_state;
|
||||
|
@ -4,6 +4,8 @@
|
||||
#ifndef __OPENCV_TEST_PRECOMP_HPP__
|
||||
#define __OPENCV_TEST_PRECOMP_HPP__
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
#include "opencv2/core/private.hpp"
|
||||
|
@ -4,9 +4,7 @@
|
||||
|
||||
// This is .hpp file included from test_utils.cpp
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <thread> // std::thread
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/utils/tls.hpp"
|
||||
|
||||
@ -34,8 +32,6 @@ public:
|
||||
int TLSReporter::g_last_id = 0;
|
||||
int TLSReporter::g_allocated = 0;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
|
||||
template<typename T>
|
||||
static void callNThreadsWithTLS(int N, TLSData<T>& tls)
|
||||
{
|
||||
@ -129,6 +125,4 @@ static void testTLSAccumulator(bool detachFirst)
|
||||
TEST(Core_TLS, AccumulatorHoldData_detachData) { testTLSAccumulator(true); }
|
||||
TEST(Core_TLS, AccumulatorHoldData_gather) { testTLSAccumulator(false); }
|
||||
|
||||
#endif
|
||||
|
||||
}} // namespace
|
||||
|
@ -1183,6 +1183,11 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
static Ptr<AttentionLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS GroupNormLayer : public Layer {
|
||||
public:
|
||||
static Ptr<GroupNormLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
//! @}
|
||||
//! @}
|
||||
CV__DNN_INLINE_NS_END
|
||||
|
@ -444,7 +444,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
|
||||
* backend.
|
||||
*/
|
||||
CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
|
||||
CV_WRAP static Net readFromModelOptimizer(CV_WRAP_FILE_PATH const String& xml, CV_WRAP_FILE_PATH const String& bin);
|
||||
|
||||
/** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
|
||||
* @param[in] bufferModelConfig buffer with model's configuration.
|
||||
@ -477,7 +477,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param path path to output file with .dot extension
|
||||
* @see dump()
|
||||
*/
|
||||
CV_WRAP void dumpToFile(const String& path);
|
||||
CV_WRAP void dumpToFile(CV_WRAP_FILE_PATH const String& path);
|
||||
/** @brief Adds new layer to the net.
|
||||
* @param name unique name of the adding layer.
|
||||
* @param type typename of the adding layer (type must be registered in LayerRegister).
|
||||
@ -839,7 +839,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param darknetModel path to the .weights file with learned network.
|
||||
* @returns Network object that ready to do forward, throw an exception in failure cases.
|
||||
*/
|
||||
CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String());
|
||||
CV_EXPORTS_W Net readNetFromDarknet(CV_WRAP_FILE_PATH const String &cfgFile, CV_WRAP_FILE_PATH const String &darknetModel = String());
|
||||
|
||||
/** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
|
||||
* @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture.
|
||||
@ -864,7 +864,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param caffeModel path to the .caffemodel file with learned network.
|
||||
* @returns Net object.
|
||||
*/
|
||||
CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
|
||||
CV_EXPORTS_W Net readNetFromCaffe(CV_WRAP_FILE_PATH const String &prototxt, CV_WRAP_FILE_PATH const String &caffeModel = String());
|
||||
|
||||
/** @brief Reads a network model stored in Caffe model in memory.
|
||||
* @param bufferProto buffer containing the content of the .prototxt file
|
||||
@ -893,7 +893,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* let us make it more flexible.
|
||||
* @returns Net object.
|
||||
*/
|
||||
CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
|
||||
CV_EXPORTS_W Net readNetFromTensorflow(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config = String());
|
||||
|
||||
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
|
||||
* @param bufferModel buffer containing the content of the pb file
|
||||
@ -918,7 +918,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param model path to the .tflite file with binary flatbuffers description of the network architecture
|
||||
* @returns Net object.
|
||||
*/
|
||||
CV_EXPORTS_W Net readNetFromTFLite(const String &model);
|
||||
CV_EXPORTS_W Net readNetFromTFLite(CV_WRAP_FILE_PATH const String &model);
|
||||
|
||||
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format.
|
||||
* @param bufferModel buffer containing the content of the tflite file
|
||||
@ -957,7 +957,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* or @ref readNetFromDarknet. An order of @p model and @p config
|
||||
* arguments does not matter.
|
||||
*/
|
||||
CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = "");
|
||||
CV_EXPORTS_W Net readNet(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "", const String& framework = "");
|
||||
|
||||
/**
|
||||
* @brief Read deep learning network represented in one of the supported formats.
|
||||
@ -979,7 +979,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* backend.
|
||||
*/
|
||||
CV_EXPORTS_W
|
||||
Net readNetFromModelOptimizer(const String &xml, const String &bin = "");
|
||||
Net readNetFromModelOptimizer(CV_WRAP_FILE_PATH const String &xml, CV_WRAP_FILE_PATH const String &bin = "");
|
||||
|
||||
/** @brief Load a network from Intel's Model Optimizer intermediate representation.
|
||||
* @param[in] bufferModelConfig Buffer contains XML configuration with network's topology.
|
||||
@ -1008,7 +1008,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param onnxFile path to the .onnx file with text description of the network architecture.
|
||||
* @returns Network object that ready to do forward, throw an exception in failure cases.
|
||||
*/
|
||||
CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile);
|
||||
CV_EXPORTS_W Net readNetFromONNX(CV_WRAP_FILE_PATH const String &onnxFile);
|
||||
|
||||
/** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a>
|
||||
* in-memory buffer.
|
||||
@ -1031,7 +1031,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param path to the .pb file with input tensor.
|
||||
* @returns Mat.
|
||||
*/
|
||||
CV_EXPORTS_W Mat readTensorFromONNX(const String& path);
|
||||
CV_EXPORTS_W Mat readTensorFromONNX(CV_WRAP_FILE_PATH const String& path);
|
||||
|
||||
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
|
||||
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
|
||||
@ -1204,7 +1204,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
|
||||
* So the resulting model may be used there.
|
||||
*/
|
||||
CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
|
||||
CV_EXPORTS_W void shrinkCaffeModel(CV_WRAP_FILE_PATH const String& src, CV_WRAP_FILE_PATH const String& dst,
|
||||
const std::vector<String>& layersTypes = std::vector<String>());
|
||||
|
||||
/** @brief Create a text representation for a binary network stored in protocol buffer format.
|
||||
@ -1213,7 +1213,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
*
|
||||
* @note To reduce output file size, trained weights are not included.
|
||||
*/
|
||||
CV_EXPORTS_W void writeTextGraph(const String& model, const String& output);
|
||||
CV_EXPORTS_W void writeTextGraph(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& output);
|
||||
|
||||
/** @brief Performs non maximum suppression given boxes and corresponding scores.
|
||||
|
||||
@ -1318,7 +1318,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param[in] model Binary file contains trained weights.
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP Model(const String& model, const String& config = "");
|
||||
CV_WRAP Model(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||
|
||||
/**
|
||||
* @brief Create model from deep learning network.
|
||||
@ -1423,7 +1423,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param[in] model Binary file contains trained weights.
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP ClassificationModel(const String& model, const String& config = "");
|
||||
CV_WRAP ClassificationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||
|
||||
/**
|
||||
* @brief Create model from deep learning network.
|
||||
@ -1473,7 +1473,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param[in] model Binary file contains trained weights.
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP KeypointsModel(const String& model, const String& config = "");
|
||||
CV_WRAP KeypointsModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||
|
||||
/**
|
||||
* @brief Create model from deep learning network.
|
||||
@ -1505,7 +1505,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param[in] model Binary file contains trained weights.
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP SegmentationModel(const String& model, const String& config = "");
|
||||
CV_WRAP SegmentationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||
|
||||
/**
|
||||
* @brief Create model from deep learning network.
|
||||
@ -1536,7 +1536,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param[in] model Binary file contains trained weights.
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP DetectionModel(const String& model, const String& config = "");
|
||||
CV_WRAP DetectionModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||
|
||||
/**
|
||||
* @brief Create model from deep learning network.
|
||||
@ -1602,7 +1602,7 @@ public:
|
||||
* @param[in] config Text file contains network configuration
|
||||
*/
|
||||
CV_WRAP inline
|
||||
TextRecognitionModel(const std::string& model, const std::string& config = "")
|
||||
TextRecognitionModel(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
|
||||
: TextRecognitionModel(readNet(model, config)) { /* nothing */ }
|
||||
|
||||
/**
|
||||
@ -1757,7 +1757,7 @@ public:
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP inline
|
||||
TextDetectionModel_EAST(const std::string& model, const std::string& config = "")
|
||||
TextDetectionModel_EAST(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
|
||||
: TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ }
|
||||
|
||||
/**
|
||||
@ -1818,7 +1818,7 @@ public:
|
||||
* @param[in] config Text file contains network configuration.
|
||||
*/
|
||||
CV_WRAP inline
|
||||
TextDetectionModel_DB(const std::string& model, const std::string& config = "")
|
||||
TextDetectionModel_DB(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
|
||||
: TextDetectionModel_DB(readNet(model, config)) { /* nothing */ }
|
||||
|
||||
CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold);
|
||||
|
@ -258,22 +258,21 @@ PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16)
|
||||
test_slice<4>(inputShape, begin, end);
|
||||
}
|
||||
|
||||
struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
void test_layer(const std::vector<int>& shape, const String reduction = "none", int axis = 0)
|
||||
{
|
||||
int backendId = get<0>(GetParam());
|
||||
int targetId = get<1>(GetParam());
|
||||
using Layer_Scatter = TestBaseWithParam<tuple<std::vector<int>, std::string, int, tuple<Backend, Target>>>;
|
||||
PERF_TEST_P_(Layer_Scatter, scatter) {
|
||||
std::vector<int> shape = get<0>(GetParam());
|
||||
std::string reduction = get<1>(GetParam());
|
||||
int axis = get<2>(GetParam());
|
||||
int backend_id = get<0>(get<3>(GetParam()));
|
||||
int target_id = get<1>(get<3>(GetParam()));
|
||||
|
||||
Mat data(shape, CV_32FC1);
|
||||
Mat indices(shape, CV_32FC1);
|
||||
Mat updates(shape, CV_32FC1);
|
||||
|
||||
Scalar mean = 0.f;
|
||||
Scalar std = 1.f;
|
||||
randn(data, mean, std);
|
||||
randn(data, 0.f, 1.f);
|
||||
randu(indices, 0, shape[axis]);
|
||||
randn(updates, mean, std);
|
||||
randn(updates, 0.f, 1.f);
|
||||
|
||||
indices.convertTo(indices, CV_32SC1, 1, -1);
|
||||
|
||||
@ -291,20 +290,18 @@ struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
|
||||
// warmup
|
||||
{
|
||||
std::vector<String> inpNames(3);
|
||||
inpNames[0] = "data";
|
||||
inpNames[1] = "indices";
|
||||
inpNames[2] = "updates";
|
||||
net.setInputsNames(inpNames);
|
||||
net.setInput(data, inpNames[0]);
|
||||
net.setInput(indices, inpNames[1]);
|
||||
net.setInput(updates, inpNames[2]);
|
||||
std::vector<String> input_names{"data", "indices", "updates"};
|
||||
net.setInputsNames(input_names);
|
||||
net.setInput(data, input_names[0]);
|
||||
net.setInput(indices, input_names[1]);
|
||||
net.setInput(updates, input_names[2]);
|
||||
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
net.setPreferableBackend(backend_id);
|
||||
net.setPreferableTarget(target_id);
|
||||
Mat out = net.forward();
|
||||
}
|
||||
|
||||
// perf
|
||||
TEST_CYCLE()
|
||||
{
|
||||
Mat res = net.forward();
|
||||
@ -313,28 +310,26 @@ struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
int N = 8;
|
||||
int C = 256;
|
||||
int H = 128;
|
||||
int W = 100;
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, Combine(
|
||||
Values(std::vector<int>{2, 128, 64, 50}),
|
||||
Values(std::string("none"), std::string("add")),
|
||||
Values(0), // use Values(0, 1, 2, 3) for more details
|
||||
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
|
||||
/* withHalide= */ false,
|
||||
/* withCpuOCV= */ true,
|
||||
/* withVkCom= */ false,
|
||||
/* withCUDA= */ false,
|
||||
/* withNgraph= */ false,
|
||||
/* withWebnn= */ false,
|
||||
/* withCann= */ false) // only test on CPU
|
||||
));
|
||||
|
||||
PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter)
|
||||
{
|
||||
test_layer({N, C, H, W});
|
||||
}
|
||||
|
||||
PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter_add)
|
||||
{
|
||||
test_layer({N, C, H, W}, "add");
|
||||
}
|
||||
|
||||
struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
void test_layer(const std::vector<int>& shape, const String reduction = "none")
|
||||
{
|
||||
int backendId = get<0>(GetParam());
|
||||
int targetId = get<1>(GetParam());
|
||||
using Layer_ScatterND = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
|
||||
PERF_TEST_P_(Layer_ScatterND, scatterND) {
|
||||
std::vector<int> shape = get<0>(GetParam());
|
||||
std::string reduction = get<1>(GetParam());
|
||||
int backend_id = get<0>(get<2>(GetParam()));
|
||||
int target_id = get<1>(get<2>(GetParam()));
|
||||
|
||||
std::vector<int> indices_shape(shape);
|
||||
indices_shape.push_back(int(shape.size()));
|
||||
@ -342,12 +337,10 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
Mat indices(indices_shape, CV_32FC1);
|
||||
Mat updates(shape, CV_32FC1);
|
||||
|
||||
Scalar mean = 0.f;
|
||||
Scalar std = 1.f;
|
||||
randn(data, mean, std);
|
||||
randn(updates, mean, std);
|
||||
randn(data, 0.f, 1.f);
|
||||
randn(updates, 0.f, 1.f);
|
||||
|
||||
// initialize the indices with index tuples like [0...N, 0...C, 0...H, 0...W]
|
||||
// Create indices such that indices[n_i, c_j, h_k, w_l, :4] = [i, j, k, l]
|
||||
std::vector<int> current_index_tuple(shape.size());
|
||||
int total = data.total();
|
||||
std::vector<int> indices_step;
|
||||
@ -357,6 +350,7 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
indices_step.push_back(step);
|
||||
}
|
||||
int t, j, idx, offset_at_idx, offset;
|
||||
auto *indices_ptr = indices.ptr<float>();
|
||||
for (int i = 0; i < total; i++)
|
||||
{
|
||||
t = i;
|
||||
@ -373,7 +367,7 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
offset += current_index_tuple[j] * indices_step[j];
|
||||
|
||||
for (j = 0; j < shape.size(); j++)
|
||||
indices.at<float>(offset + j) = current_index_tuple[j];
|
||||
indices_ptr[offset + j] = current_index_tuple[j];
|
||||
}
|
||||
|
||||
Net net;
|
||||
@ -389,17 +383,14 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
|
||||
// warmup
|
||||
{
|
||||
std::vector<String> inpNames(3);
|
||||
inpNames[0] = "data";
|
||||
inpNames[1] = "indices";
|
||||
inpNames[2] = "updates";
|
||||
net.setInputsNames(inpNames);
|
||||
net.setInput(data, inpNames[0]);
|
||||
net.setInput(indices, inpNames[1]);
|
||||
net.setInput(updates, inpNames[2]);
|
||||
std::vector<String> input_names{"data", "indices", "updates"};
|
||||
net.setInputsNames(input_names);
|
||||
net.setInput(data, input_names[0]);
|
||||
net.setInput(indices, input_names[1]);
|
||||
net.setInput(updates, input_names[2]);
|
||||
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
net.setPreferableBackend(backend_id);
|
||||
net.setPreferableTarget(target_id);
|
||||
Mat out = net.forward();
|
||||
}
|
||||
|
||||
@ -411,21 +402,18 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
int N = 8;
|
||||
int C = 256;
|
||||
int H = 128;
|
||||
int W = 100;
|
||||
};
|
||||
|
||||
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND)
|
||||
{
|
||||
test_layer({N, C, H ,W});
|
||||
}
|
||||
|
||||
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND_add)
|
||||
{
|
||||
test_layer({N, C, H , W}, "add");
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, Combine(
|
||||
Values(std::vector<int>{2, 128, 64, 50}),
|
||||
Values(std::string("none"), std::string("add")),
|
||||
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
|
||||
/* withHalide= */ false,
|
||||
/* withCpuOCV= */ true,
|
||||
/* withVkCom= */ false,
|
||||
/* withCUDA= */ false,
|
||||
/* withNgraph= */ false,
|
||||
/* withWebnn= */ false,
|
||||
/* withCann= */ false) // only test on CPU
|
||||
));
|
||||
|
||||
struct Layer_LayerNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
@ -795,19 +783,77 @@ PERF_TEST_P_(Layer_Attention, VisionTransformer) {
|
||||
test_layer({1, 197, 768}, {768, 768, 768}, 12);
|
||||
}
|
||||
|
||||
struct Layer_GroupNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
void test_layer(const std::vector<int>& x_shape, int num_groups)
|
||||
{
|
||||
int backendId = get<0>(GetParam());
|
||||
int targetId = get<1>(GetParam());
|
||||
|
||||
Mat x(x_shape, CV_32FC1);
|
||||
Mat scale(x_shape[1], 1, CV_32FC1);
|
||||
Mat b(x_shape[1], 1, CV_32FC1);
|
||||
|
||||
randu(x, 0.f, 1.f);
|
||||
randu(scale, 0.f, 1.f);
|
||||
randu(b, 0.f, 1.f);
|
||||
|
||||
Net net;
|
||||
LayerParams lp;
|
||||
lp.type = "GroupNormalization";
|
||||
lp.name = "testLayer";
|
||||
lp.set("num_groups", num_groups);
|
||||
|
||||
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
net.connect(0, 0, id, 0);
|
||||
net.connect(0, 1, id, 1);
|
||||
net.connect(0, 2, id, 2);
|
||||
|
||||
// warmup
|
||||
{
|
||||
std::vector<String> inpNames{"x", "scale", "b"};
|
||||
net.setInputsNames(inpNames);
|
||||
net.setInput(x, inpNames[0]);
|
||||
net.setInput(scale, inpNames[1]);
|
||||
net.setInput(b, inpNames[2]);
|
||||
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
Mat out = net.forward();
|
||||
}
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
Mat res = net.forward();
|
||||
}
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
int N = 2;
|
||||
int C = 64;
|
||||
int H = 180;
|
||||
int W = 240;
|
||||
int num_groups = 16;
|
||||
};
|
||||
|
||||
PERF_TEST_P_(Layer_GroupNorm, GroupNorm)
|
||||
{
|
||||
test_layer({N, C, H, W}, num_groups);
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
#ifdef HAVE_CUDA
|
||||
INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
|
||||
#endif
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Attention, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_GroupNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
|
||||
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
|
||||
PERF_TEST_P_(Layer_FullyConnected, fc)
|
||||
|
@ -132,8 +132,23 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(is_shape_compatible(output, x));
|
||||
CV_Assert(is_shape_compatible(output, y));
|
||||
auto inShape1 = x.shape_as_vector();
|
||||
auto inShape2 = y.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
std::size_t x_ndims = inShape1.size(), y_ndims = inShape2.size();
|
||||
if (x_ndims >= y_ndims) {
|
||||
for (std::size_t i = 0; i < (x_ndims - y_ndims); i++) {
|
||||
inShape2.insert(inShape2.begin(), 1);
|
||||
}
|
||||
} else {
|
||||
for (std::size_t i = 0; i < (y_ndims - x_ndims); i++) {
|
||||
inShape1.insert(inShape1.begin(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
CV_Assert(is_shape_compatible1(outShape, inShape1));
|
||||
CV_Assert(is_shape_compatible1(outShape, inShape2));
|
||||
|
||||
/* matching singleton axes in both input tensors can be eliminated
|
||||
*
|
||||
@ -148,20 +163,21 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
|
||||
* x: [1, 256, 32, 32] -> [256, 32, 32]
|
||||
* y: [1, 256, 1, 1] -> [256, 1, 1]
|
||||
*/
|
||||
for (int r = 0; r < output.rank(); r++)
|
||||
{
|
||||
while (x.rank() > r && y.rank() > r && x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
|
||||
CV_Assert(output.get_axis_size(r) == 1);
|
||||
|
||||
x.squeeze(r);
|
||||
y.squeeze(r);
|
||||
output.squeeze(r);
|
||||
int eliminate_times = 0;
|
||||
for (std::size_t i = 0; i < outShape.size(); i++) {
|
||||
if (inShape1[i] == 1 && inShape2[i] == 1 && outShape[i] == 1 && i != (outShape.size() - 1)) {
|
||||
eliminate_times++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (eliminate_times > 0) {
|
||||
for (int i = 0; i < eliminate_times; i++) {
|
||||
inShape1.erase(inShape1.begin());
|
||||
inShape2.erase(inShape2.begin());
|
||||
outShape.erase(outShape.begin());
|
||||
}
|
||||
}
|
||||
|
||||
auto inShape1 = x.shape_as_vector();
|
||||
auto inShape2 = y.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
/* contiguous axes that do not broadcast can be merged into one axis
|
||||
*
|
||||
@ -324,7 +340,19 @@ void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
|
||||
eltwise_op<T, SubFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_mod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, ModFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, FModFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void eltwise_mod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_fmod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
@ -333,6 +361,8 @@ void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
|
||||
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
#endif
|
||||
template void eltwise_mod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_fmod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_sub_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
|
@ -799,6 +799,40 @@ struct ReciprocalFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ModFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() {}
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ModFunctor() { }
|
||||
CUDA4DNN_DEVICE ModFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||
int res = (int)x % (int)y;
|
||||
T zero = T(0);
|
||||
if ((res > (int)zero && y < zero) || (res < (int)zero && y > zero)) {
|
||||
res += (int)y;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct FModFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() {}
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE FModFunctor() { }
|
||||
CUDA4DNN_DEVICE FModFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||
using csl::device::fmod;
|
||||
return fmod(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
|
||||
|
@ -36,6 +36,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de
|
||||
template <> inline __device__ float min(float x, float y) { return fminf(x, y); }
|
||||
template <> inline __device__ double min(double x, double y) { return fmin(x, y); }
|
||||
|
||||
template <class T> __device__ T fmod(T x, T y) { return x % y; }
|
||||
template <> inline __device__ float fmod(float x, float y) { return fmodf(x, y); }
|
||||
template <> inline __device__ double fmod(double x, double y) { return fmod(x, y); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ half fmod(half x, half y) { return fmodf((float)x, (float)y); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T log1p(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half log1p(__half val) { return hlog(__half(1) + val); }
|
||||
|
@ -78,6 +78,18 @@ namespace raw {
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void normalize_mean_variance_groupwise(Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> inv_stddev, size_type inner_size, size_type C, size_type num_groups, size_type group_size) {
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
const index_type outer_idx = idx / inner_size;
|
||||
const index_type c = outer_idx % C;
|
||||
const index_type group_idx = outer_idx / group_size;
|
||||
auto s = static_cast<float>(scale[c]) * inv_stddev[group_idx];
|
||||
auto b = static_cast<float>(bias[c]);
|
||||
output[idx] = (static_cast<float>(input[idx]) - means[group_idx]) * s + b;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void normalize_mean_variance_layernorm(Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, size_type inner_size) {
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
@ -191,6 +203,24 @@ template void normalize_mean_variance_channelwise(const Stream&, Span<__half> /*
|
||||
#endif
|
||||
template void normalize_mean_variance_channelwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t);
|
||||
|
||||
template <class T>
|
||||
void normalize_mean_variance_groupwise(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> inv_stddev, std::size_t inner_size, std::size_t C, std::size_t num_groups, std::size_t group_size)
|
||||
{
|
||||
CV_Assert(input.size() == output.size());
|
||||
CV_Assert(input.size() / inner_size == means.size() * group_size);
|
||||
CV_Assert(means.size() == inv_stddev.size());
|
||||
|
||||
auto kernel = raw::normalize_mean_variance_groupwise<T>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, scale, bias, means, inv_stddev, inner_size, C, num_groups, group_size);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void normalize_mean_variance_groupwise(const Stream&, Span<__half> /*output*/, View<__half> /*input*/, View<__half> /*scale*/, View<__half> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t, std::size_t, std::size_t);
|
||||
#endif
|
||||
template void normalize_mean_variance_groupwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t, std::size_t, std::size_t);
|
||||
|
||||
|
||||
template <class T>
|
||||
void normalize_mean_variance_layernorm(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, std::size_t inner_size)
|
||||
{
|
||||
|
@ -1262,6 +1262,23 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename ShapeType>
|
||||
bool is_shape_compatible1(const ShapeType &x_shape, const ShapeType &y_shape) noexcept {
|
||||
const auto x_ndims = x_shape.size(), y_ndims = y_shape.size();
|
||||
|
||||
if (x_ndims != y_ndims) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < x_ndims; i++) {
|
||||
if (x_shape[i] != y_shape[i] && x_shape[i] != 1 && y_shape[i] != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** returns the rank to which the given tensor can be squeezed to */
|
||||
template <class TensorType>
|
||||
std::size_t get_effective_rank(const TensorType& x) noexcept {
|
||||
|
@ -33,6 +33,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||
|
||||
template <class T>
|
||||
void eltwise_mod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||
|
||||
template <class T>
|
||||
void eltwise_fmod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
|
||||
|
@ -35,6 +35,10 @@ void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> o
|
||||
template <class T>
|
||||
void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size);
|
||||
|
||||
template <class T>
|
||||
void normalize_mean_variance_groupwise(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size, std::size_t C, std::size_t num_groups, std::size_t group_size);
|
||||
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP */
|
||||
|
@ -28,6 +28,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
DIV,
|
||||
MIN,
|
||||
SUB,
|
||||
MOD,
|
||||
FMOD,
|
||||
};
|
||||
|
||||
class EltwiseOpBase : public CUDABackendNode {
|
||||
@ -90,6 +92,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
|
||||
break;
|
||||
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
|
||||
case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, input_x, input_y); break;
|
||||
case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, input_x, input_y); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -122,6 +126,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
}
|
||||
break;
|
||||
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
|
||||
case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, output, input); break;
|
||||
case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, output, input); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
87
modules/dnn/src/cuda4dnn/primitives/group_norm.hpp
Normal file
87
modules/dnn/src/cuda4dnn/primitives/group_norm.hpp
Normal file
@ -0,0 +1,87 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
|
||||
|
||||
#include "../../op_cuda.hpp"
|
||||
|
||||
#include "../csl/stream.hpp"
|
||||
#include "../csl/span.hpp"
|
||||
#include "../csl/tensor.hpp"
|
||||
#include "../csl/workspace.hpp"
|
||||
|
||||
#include "../kernels/fill_copy.hpp"
|
||||
#include "../kernels/mvn.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
|
||||
template <class T>
|
||||
class GroupNormOp final : public CUDABackendNode {
|
||||
public:
|
||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||
|
||||
GroupNormOp(csl::Stream stream_, float epsilon_, size_t loops, size_t num_groups)
|
||||
: stream(std::move(stream_)), epsilon(epsilon_), num_groups(num_groups) {
|
||||
csl::WorkspaceBuilder builder;
|
||||
builder.require<float>(loops * num_groups); // mean and stdev for each group
|
||||
builder.require<float>(loops * num_groups);
|
||||
scratch_mem_in_bytes = builder.required_workspace_size();
|
||||
}
|
||||
|
||||
void forward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||
csl::Workspace& workspace) override {
|
||||
auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
|
||||
auto scale_wrapper = inputs[1].dynamicCast<wrapper_type>();
|
||||
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
|
||||
|
||||
auto input = input_wrapper->getView();
|
||||
auto scale = scale_wrapper->getView();
|
||||
auto bias = bias_wrapper->getView();
|
||||
|
||||
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
|
||||
auto output = output_wrapper->getSpan();
|
||||
|
||||
auto C = input.get_axis_size(1);
|
||||
auto loops = input.size_range(0, 2);
|
||||
auto norm_size = input.size_range(2, input.rank());
|
||||
auto num_groups = this->num_groups;
|
||||
auto group_size = C / num_groups;
|
||||
if (norm_size == 1) {
|
||||
kernels::fill<T>(stream, output, 0.f);
|
||||
return;
|
||||
} else {
|
||||
auto ws_allocator = csl::WorkspaceAllocator(workspace);
|
||||
|
||||
auto mean = ws_allocator.get_span<float>(loops / group_size);
|
||||
kernels::fill<float>(stream, mean, 0.f);
|
||||
|
||||
auto stdev = ws_allocator.get_span<float>(loops / group_size);
|
||||
kernels::fill<float>(stream, stdev, 0.f);
|
||||
|
||||
kernels::reduce_mean_sqr_sum<T>(stream, mean, stdev, input, norm_size * group_size);
|
||||
kernels::compute_normalization_scale(stream, stdev, mean, stdev, norm_size * group_size, epsilon);
|
||||
kernels::normalize_mean_variance_groupwise<T>(stream, output, input, scale, bias, mean, stdev, norm_size, C, num_groups, group_size);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
float epsilon;
|
||||
std::size_t num_groups;
|
||||
std::size_t scratch_mem_in_bytes;
|
||||
};
|
||||
|
||||
}}} // cv::dnn::cuda4dnn
|
||||
|
||||
#endif // OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
|
@ -163,6 +163,7 @@ void initializeLayerFactory()
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Attention, AttentionLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(GroupNormalization, GroupNormLayer);
|
||||
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
|
||||
|
@ -969,6 +969,13 @@ public:
|
||||
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
|
||||
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
|
||||
else
|
||||
#endif
|
||||
#if CV_RVP052
|
||||
if(isConv2D)
|
||||
opt_RVP052::fastDepthwiseConv(wptr, kernel_h, kernel_w,
|
||||
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
|
||||
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
|
||||
@ -1348,6 +1355,12 @@ public:
|
||||
opt_LASX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
|
||||
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
|
||||
else
|
||||
#endif
|
||||
#if CV_RVP052
|
||||
if(isConv2D)
|
||||
opt_RVP052::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
|
||||
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
|
||||
else
|
||||
#endif
|
||||
for( int i = 0; i < outCn; i += 2 )
|
||||
{
|
||||
|
@ -302,6 +302,11 @@ public:
|
||||
if( useLASX )
|
||||
opt_LASX::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
|
||||
else
|
||||
#endif
|
||||
#if CV_RVP052
|
||||
if( 1 )
|
||||
opt_RVP052::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
|
||||
else
|
||||
#endif
|
||||
{
|
||||
int i = 0;
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include "int8layers/layers_common.simd_declarations.hpp"
|
||||
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
#include "./layers_rvp052.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "../ocl4dnn/include/ocl4dnn.hpp"
|
||||
#endif
|
||||
|
221
modules/dnn/src/int8layers/layers_rvp052.cpp
Normal file
221
modules/dnn/src/int8layers/layers_rvp052.cpp
Normal file
@ -0,0 +1,221 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "./layers_rvp052.hpp"
|
||||
|
||||
#if CV_RVP052
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
namespace opt_RVP052 {
|
||||
|
||||
void fastConv(const int8_t *weights, size_t wstep, const int *bias,
|
||||
const int8_t *rowbuf, int *output, const int *outShape,
|
||||
int blockSize, int vecsize, int vecsize_aligned, int outZp,
|
||||
const float *multiplier, bool initOutput, bool finalOutput)
|
||||
{
|
||||
int outCn = outShape[1];
|
||||
size_t outPlaneSize = outShape[2] * outShape[3];
|
||||
for (int i = 0; i < outCn; i += 2)
|
||||
{
|
||||
const int8_t *wptr0 = weights + i * wstep;
|
||||
const int8_t *wptr1 = wptr0 + wstep;
|
||||
int *outptr0 = output + i * outPlaneSize;
|
||||
int *outptr1 = outptr0 + outPlaneSize;
|
||||
int bias0 = bias[i], bias1 = bias[i + 1];
|
||||
float mult0 = multiplier[i], mult1 = multiplier[i + 1];
|
||||
|
||||
if (i + 1 >= outCn)
|
||||
{
|
||||
wptr1 = wptr0;
|
||||
outptr1 = outptr0;
|
||||
bias1 = bias0;
|
||||
mult1 = mult0;
|
||||
}
|
||||
int j = 0;
|
||||
for (; j < blockSize; j++)
|
||||
{
|
||||
const int8_t *rptr = rowbuf + j * vecsize_aligned;
|
||||
int s00 = initOutput ? bias0 : outptr0[j];
|
||||
int s10 = initOutput ? bias1 : outptr1[j];
|
||||
|
||||
int32x2_t vsx0 = {s00, s10};
|
||||
|
||||
for (int k = 0; k < vecsize; k += 4)
|
||||
{
|
||||
int8x4_t vrptr[2] = {*(int8x4_t*)(rptr + k), *(int8x4_t*)(rptr + k)};
|
||||
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr0 + k), *(int8x4_t*)(wptr1 + k)};
|
||||
vsx0 = __nds__v_smaqa(vsx0, *(int8x8_t*)vwptr, *(int8x8_t*)vrptr);
|
||||
}
|
||||
|
||||
if (finalOutput)
|
||||
{
|
||||
vsx0[0] = outZp + (int)std::round(vsx0[0] * mult0);
|
||||
vsx0[1] = outZp + (int)std::round(vsx0[1] * mult1);
|
||||
vsx0 = __nds__v_sclip32(vsx0, 7);
|
||||
}
|
||||
|
||||
outptr0[j] = vsx0[0];
|
||||
outptr1[j] = vsx0[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fastDepthwiseConv(const int8_t *wptr,
|
||||
int kernel_h, int kernel_w,
|
||||
int stride_h, int stride_w,
|
||||
int dilation_h, int dilation_w,
|
||||
int pad_t, int pad_l,
|
||||
const int *biasptr, const float *multptr,
|
||||
const int8_t *inptr_,
|
||||
int height, int width,
|
||||
int *outptr_,
|
||||
int out_d, int outH, int outW,
|
||||
int inpZp, int outZp)
|
||||
{
|
||||
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
|
||||
w10 = wptr[3], w11 = wptr[4], w12 = wptr[5],
|
||||
w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8];
|
||||
int outW1 = min(outW, (width - dilation_w * (kernel_w - 1) + pad_l) / stride_w);
|
||||
int bias = biasptr[out_d], biasCopy;
|
||||
float mult = multptr[out_d];
|
||||
|
||||
for (int out_i = 0; out_i < outH; out_i++)
|
||||
{
|
||||
int in_i = out_i * stride_h - pad_t, out_j = 0;
|
||||
const int8_t *imgptr0 = inptr_ + in_i * width;
|
||||
const int8_t *imgptr1 = imgptr0 + dilation_h * width;
|
||||
const int8_t *imgptr2 = imgptr0 + (dilation_h * 2) * width;
|
||||
int8_t w00 = w00_, w01 = w01_, w02 = w02_;
|
||||
int8_t w20 = w20_, w21 = w21_, w22 = w22_;
|
||||
int out;
|
||||
biasCopy = bias;
|
||||
|
||||
if (in_i < 0)
|
||||
{
|
||||
biasCopy += inpZp * (w00 + w01 + w02);
|
||||
w00 = w01 = w02 = 0;
|
||||
imgptr0 = imgptr1;
|
||||
}
|
||||
else if (in_i + dilation_h * (kernel_h - 1) >= height)
|
||||
{
|
||||
biasCopy += inpZp * (w20 + w21 + w22);
|
||||
w20 = w21 = w22 = 0;
|
||||
imgptr2 = imgptr1;
|
||||
}
|
||||
int *outptr = outptr_ + out_i * outW;
|
||||
if (pad_l > 0)
|
||||
{
|
||||
out = (int)imgptr0[0] * w01 + (int)imgptr0[dilation_w] * w02 +
|
||||
(int)imgptr1[0] * w11 + (int)imgptr1[dilation_w] * w12 +
|
||||
(int)imgptr2[0] * w21 + (int)imgptr2[dilation_w] * w22 +
|
||||
biasCopy + inpZp * (w00 + w10 + w20);
|
||||
outptr[0] = __nds__sclip32(outZp + (int)std::round(out * mult), 7);
|
||||
out_j = 1;
|
||||
}
|
||||
|
||||
int8x8_t vwx0 = (int8x8_t){w00, w10, w20, 0, w00, w10, w20, 0};
|
||||
int8x8_t vwx1 = (int8x8_t){w01, w11, w21, 0, w01, w11, w21, 0};
|
||||
int8x8_t vwx2 = (int8x8_t){w02, w12, w22, 0, w02, w12, w22, 0};
|
||||
int8x8_t vimgx0, vimgx1, vimgx2;
|
||||
int32x2_t vout = {0, 0};
|
||||
for (; out_j < outW1; out_j+=2)
|
||||
{
|
||||
int in_j = out_j * stride_w - pad_l;
|
||||
vimgx0 = (int8x8_t){imgptr0[in_j], imgptr1[in_j], imgptr2[in_j], 0,
|
||||
imgptr0[in_j + stride_w], imgptr1[in_j + stride_w], imgptr2[in_j + stride_w], 0};
|
||||
vimgx1 = (int8x8_t){imgptr0[in_j + dilation_w], imgptr1[in_j + dilation_w], imgptr2[in_j + dilation_w], 0,
|
||||
imgptr0[in_j + dilation_w + stride_w], imgptr1[in_j + dilation_w + stride_w], imgptr2[in_j + dilation_w + stride_w], 0};
|
||||
vimgx2 = (int8x8_t){imgptr0[in_j + dilation_w * 2], imgptr1[in_j + dilation_w * 2], imgptr2[in_j + dilation_w * 2], 0,
|
||||
imgptr0[in_j + dilation_w * 2 + stride_w], imgptr1[in_j + dilation_w * 2 + stride_w], imgptr2[in_j + dilation_w * 2 + stride_w], 0};
|
||||
|
||||
vout = (int32x2_t){biasCopy, biasCopy};
|
||||
vout = __nds__v_smaqa(vout, vwx0, vimgx0);
|
||||
vout = __nds__v_smaqa(vout, vwx1, vimgx1);
|
||||
vout = __nds__v_smaqa(vout, vwx2, vimgx2);
|
||||
|
||||
outptr[out_j] = __nds__sclip32(outZp + (int)std::round(vout[0] * mult), 7);
|
||||
outptr[out_j + 1] = __nds__sclip32(outZp + (int)std::round(vout[1] * mult), 7);
|
||||
}
|
||||
|
||||
while (out_j > outW1) out_j--;
|
||||
|
||||
for (; out_j < outW; out_j++)
|
||||
{
|
||||
int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w * 2;
|
||||
int s0 = 1, s1 = 1, s2 = 1;
|
||||
if (in_j0 >= width)
|
||||
{
|
||||
in_j0 = 0;
|
||||
s0 = 0;
|
||||
biasCopy += inpZp * (w00 + w10 + w20);
|
||||
}
|
||||
if (in_j1 >= width)
|
||||
{
|
||||
in_j1 = 0;
|
||||
s1 = 0;
|
||||
biasCopy += inpZp * (w01 + w11 + w21);
|
||||
}
|
||||
if (in_j2 >= width)
|
||||
{
|
||||
in_j2 = 0;
|
||||
s2 = 0;
|
||||
biasCopy += inpZp * (w02 + w12 + w22);
|
||||
}
|
||||
out = (int)imgptr0[in_j0] * w00 * s0 + (int)imgptr0[in_j1] * w01 * s1 + (int)imgptr0[in_j2] * w02 * s2 +
|
||||
(int)imgptr1[in_j0] * w10 * s0 + (int)imgptr1[in_j1] * w11 * s1 + (int)imgptr1[in_j2] * w12 * s2 +
|
||||
(int)imgptr2[in_j0] * w20 * s0 + (int)imgptr2[in_j1] * w21 * s1 + (int)imgptr2[in_j2] * w22 * s2 + biasCopy;
|
||||
outptr[out_j] = __nds__sclip32(outZp + (int)std::round(out * mult), 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dst = vec * weights^t + bias
|
||||
void fastGEMM1T( const int8_t* vec, const int8_t* weights,
|
||||
size_t wstep, const int* bias, const float* multiplier,
|
||||
int* dst, int nvecs, int vecsize, int outZp )
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
for( ; i <= nvecs - 2; i += 2 )
|
||||
{
|
||||
const int8_t* wptr0 = weights + i * wstep;
|
||||
const int8_t* wptr1 = weights + (i + 1) * wstep;
|
||||
|
||||
int32x2_t vs0 = *(int32x2_t*)(bias + i);
|
||||
|
||||
for( int k = 0; k < vecsize; k += 4 )
|
||||
{
|
||||
int8x4_t vvec[2] = {*(int8x4_t*)(vec + k), *(int8x4_t*)(vec + k)};
|
||||
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr0 + k), *(int8x4_t*)(wptr1 + k)};
|
||||
vs0 = __nds__v_smaqa(vs0, *(int8x8_t*)vwptr, *(int8x8_t*)vvec);
|
||||
}
|
||||
|
||||
int32x2_t vdst = {(int)std::round(vs0[0] * multiplier[i]), (int)std::round(vs0[1] * multiplier[i + 1])};
|
||||
|
||||
vdst = __nds__v_sclip32(vdst + outZp, 7);
|
||||
|
||||
*(int32x2_t*)(dst + i) = vdst;
|
||||
}
|
||||
|
||||
for( ; i < nvecs; i++ )
|
||||
{
|
||||
const int8_t* wptr = weights + i * wstep;
|
||||
int s0 = bias[i];
|
||||
|
||||
for( int k = 0; k < vecsize; k += 4 )
|
||||
{
|
||||
int8x4_t vvec[2] = {*(int8x4_t*)(vec + k), 0};
|
||||
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr + k), 0};
|
||||
s0 = __nds__smaqa(s0, *(unsigned long*)vwptr, *(unsigned long*)vvec);
|
||||
}
|
||||
|
||||
dst[i] = __nds__sclip32(outZp + (int)std::round(s0 * multiplier[i]), 7);
|
||||
}
|
||||
}
|
||||
|
||||
}}} // namespace
|
||||
|
||||
#endif
|
36
modules/dnn/src/int8layers/layers_rvp052.hpp
Normal file
36
modules/dnn/src/int8layers/layers_rvp052.hpp
Normal file
@ -0,0 +1,36 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#if defined(__riscv) && defined(__riscv_dsp) && defined(__ANDES)
|
||||
# include <nds_intrinsic.h>
|
||||
# define CV_RVP052 1
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
namespace opt_RVP052 {
|
||||
|
||||
void fastConv( const int8_t* weights, size_t wstep, const int* bias,
|
||||
const int8_t* rowbuf, int* output, const int* outShape,
|
||||
int blockSize, int vecsize, int vecsize_aligned, int outZp,
|
||||
const float* multiplier, bool initOutput, bool finalOutput );
|
||||
void fastDepthwiseConv( const int8_t* wptr,
|
||||
int kernel_h, int kernel_w,
|
||||
int stride_h, int stride_w,
|
||||
int dilation_h, int dilation_w,
|
||||
int pad_t, int pad_l,
|
||||
const int* biasptr, const float* multptr,
|
||||
const int8_t* inptr_,
|
||||
int height, int width,
|
||||
int* outptr_,
|
||||
int out_d, int outH, int outW,
|
||||
int inpZp, int outZp );
|
||||
void fastGEMM1T( const int8_t* vec, const int8_t* weights,
|
||||
size_t wstep, const int* bias, const float* multiplier,
|
||||
int* dst, int nvecs, int vecsize, int outZp );
|
||||
|
||||
}}}
|
||||
|
||||
#else
|
||||
# define CV_RVP052 0
|
||||
#endif
|
@ -338,7 +338,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
|
||||
}
|
||||
#if CV_TRY_AVX2
|
||||
if (conv->useAVX2)
|
||||
opt_AVX::winofunc_AtXA_8x8_F32((float *)out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE,
|
||||
opt_AVX2::winofunc_AtXA_8x8_F32((float *)out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE,
|
||||
bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct);
|
||||
else
|
||||
#endif
|
||||
|
@ -385,7 +385,7 @@ void fastGemmBatch(bool trans_a, bool trans_b,
|
||||
const auto shape_b = shape(B);
|
||||
const auto shape_c = shape(C);
|
||||
CV_CheckGE(shape_a.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: A must be n-dimensional (n >= 2)");
|
||||
CV_CheckEQ(shape_b.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: B must be n-dimensional (n >= 2)");
|
||||
CV_CheckGE(shape_b.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: B must be n-dimensional (n >= 2)");
|
||||
|
||||
const float *a = A.ptr<const float>();
|
||||
const float *b = B.ptr<const float>();
|
||||
|
@ -158,4 +158,51 @@ void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &o
|
||||
parallel_for_(Range(0, loops), fn, nstripes);
|
||||
}
|
||||
|
||||
void fastNormGroup(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t num_groups) {
|
||||
const auto input_shape = shape(input);
|
||||
size_t N = input_shape[0], C = input_shape[1];
|
||||
CV_CheckEQ(scale.total(), bias.total(), "fastNormGroup: scale and bias should have the same shape");
|
||||
CV_CheckEQ(scale.total(), C, "fastNormGroup: scale should be a 1d tensor and match the channel of input");
|
||||
CV_CheckGE(input.dims, 3, "fastNormGroup: input dimension >= 3");
|
||||
|
||||
size_t channels_per_group = C / num_groups;
|
||||
size_t loops = N * num_groups;
|
||||
size_t norm_size = static_cast<size_t>(total(input_shape, 2) * channels_per_group);
|
||||
size_t step = norm_size / channels_per_group;
|
||||
float inv_norm_size = 1.0 / norm_size;
|
||||
|
||||
auto fn = [&](const Range &r) {
|
||||
const auto *input_data = input.ptr<const float>();
|
||||
const auto *scale_data = scale.ptr<const float>();
|
||||
const auto *bias_data = bias.ptr<const float>();
|
||||
auto *output_data = output.ptr<float>();
|
||||
|
||||
for (int i = r.start; i < r.end; i++) {
|
||||
const auto *x = input_data + norm_size * i;
|
||||
auto *y = output_data + norm_size * i;
|
||||
|
||||
float mean = 0.f, mean_square = 0.f;
|
||||
for (int j = 0; j < norm_size; j++) {
|
||||
float v = x[j];
|
||||
mean += v;
|
||||
mean_square += v * v;
|
||||
}
|
||||
|
||||
mean *= inv_norm_size;
|
||||
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon);
|
||||
float inv_stdev = 1.f / mean_square;
|
||||
|
||||
size_t group_idx = i % num_groups * channels_per_group;
|
||||
for (size_t j = 0; j < norm_size; j++) {
|
||||
size_t c = group_idx + (j / step);
|
||||
float s = scale_data[c] * inv_stdev, b = bias_data[c];
|
||||
y[j] = s * (x[j] - mean) + b;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
double nstripes = loops * norm_size * (1 / 1024.0);
|
||||
parallel_for_(Range(0, loops), fn, nstripes);
|
||||
}
|
||||
|
||||
}} // cv::dnn
|
||||
|
@ -21,6 +21,9 @@ void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output,
|
||||
// Channel-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
|
||||
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon);
|
||||
|
||||
// Group-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
|
||||
void fastNormGroup(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t num_groups);
|
||||
|
||||
}} // cv::dnn
|
||||
|
||||
#endif // OPENCV_DNN_FAST_NORM_HPP
|
||||
|
@ -1299,7 +1299,6 @@ Mat LayerEinsumImpl::batchwiseMatMul(
|
||||
const Mat& input2,
|
||||
const MatShape& input2ShapeOverride)
|
||||
{
|
||||
|
||||
// Sanity checks before the actual MatMul
|
||||
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
|
||||
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
||||
@ -1312,59 +1311,21 @@ Mat LayerEinsumImpl::batchwiseMatMul(
|
||||
int K = input1ShapeOverride[2];
|
||||
int N = input2ShapeOverride[2];
|
||||
|
||||
std::vector<Mat> output;
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
|
||||
Mat output;
|
||||
if (batches > 1)
|
||||
{
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
// create tmpout with type like input1
|
||||
output = Mat({batches, M, N}, input1.type());
|
||||
|
||||
// input1 should of size MxK
|
||||
// check if input1 needs reshape, if need reshape
|
||||
if (input1.size[0] != M || input1.size[1] != K)
|
||||
{
|
||||
int shape[] = {batches, M, K};
|
||||
reshapedInput1 = input1.reshape(1, 3, shape);
|
||||
}
|
||||
|
||||
// input2 should be of size KxN
|
||||
// check if input2 needs reshape, if needs reshape
|
||||
if (input2.size[0] != K || input2.size[1] != N)
|
||||
{
|
||||
int shape[] = {batches, K, N};
|
||||
reshapedInput2 = input2.reshape(1, 3, shape);
|
||||
}
|
||||
|
||||
for (size_t i=0; i < batches; i++)
|
||||
{
|
||||
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
|
||||
for (int j = 1; j < reshapedInput1.dims; j++)
|
||||
ranges1.emplace_back(cv::Range::all());
|
||||
|
||||
Mat part1 = reshapedInput1(ranges1);
|
||||
int shape[] = {M, K};
|
||||
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
|
||||
|
||||
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
|
||||
for (int j = 1; j < reshapedInput2.dims; j++)
|
||||
ranges2.emplace_back(cv::Range::all());
|
||||
|
||||
Mat part2 = reshapedInput2(ranges2);
|
||||
int shape2[] = {K, N};
|
||||
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
|
||||
|
||||
Mat tmp_output(M, N, part1.type());
|
||||
fastGemm(false, false, 1.0, part1, part2, 0.0, tmp_output, opt);
|
||||
int newShape[] = {1, M, N};
|
||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
||||
|
||||
output.emplace_back(tmp_output);
|
||||
}
|
||||
reshapedInput2 = reshapedInput2.reshape(1, input2ShapeOverride);
|
||||
reshapedInput1 = reshapedInput1.reshape(1, input1ShapeOverride);
|
||||
|
||||
fastGemmBatch(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, output, opt);
|
||||
} else {
|
||||
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
|
||||
// input1 should of size MxK
|
||||
// check if input1 needs reshape, if need reshape
|
||||
if (input1.dims > 2 || input1.size[0] != M || (input1.dims > 1 && input1.size[1] != K) || input1.dims == 1)
|
||||
@ -1381,23 +1342,12 @@ Mat LayerEinsumImpl::batchwiseMatMul(
|
||||
reshapedInput2 = input2.reshape(1, 2, shape2);
|
||||
}
|
||||
|
||||
Mat tmp_output(M, N, reshapedInput1.type());
|
||||
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, tmp_output, opt);
|
||||
|
||||
int newShape[] = {1, M, N};
|
||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
||||
output.emplace_back(tmp_output);
|
||||
output = Mat(M, N, reshapedInput1.type());
|
||||
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, output, opt);
|
||||
|
||||
output = output.reshape(1, {1, M, N});
|
||||
}
|
||||
|
||||
int outputDim[] = {static_cast<int>(output.size()), M, N};
|
||||
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
|
||||
|
||||
for (size_t i = 0; i < output.size(); i++) {
|
||||
Mat output_slice = output_buffer.row(i);
|
||||
output[i].copyTo(output_slice);
|
||||
}
|
||||
return output_buffer;
|
||||
return output;
|
||||
};
|
||||
Ptr<EinsumLayer> EinsumLayer::create(const LayerParams& params)
|
||||
{
|
||||
|
@ -453,13 +453,6 @@ public:
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!use_half && bias && (outerSize > 1))
|
||||
{
|
||||
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
||||
UMat& biases = umat_blobs[1];
|
||||
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret) return true;
|
||||
|
190
modules/dnn/src/layers/group_norm_layer.cpp
Normal file
190
modules/dnn/src/layers/group_norm_layer.cpp
Normal file
@ -0,0 +1,190 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "./cpu_kernels/fast_norm.hpp"
|
||||
|
||||
// CUDA backend
|
||||
#include "../op_cuda.hpp"
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/group_norm.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
// OpenCL backend
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "../ocl4dnn/include/math_functions.hpp"
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
|
||||
// https://github.com/onnx/onnx/blob/main/docs/Operators.md#GroupNormalization
|
||||
class GroupNormLayerImpl CV_FINAL : public GroupNormLayer {
|
||||
public:
|
||||
GroupNormLayerImpl(const LayerParams ¶ms) {
|
||||
setParamsFrom(params);
|
||||
|
||||
epsilon = params.get<float>("epsilon", 1e-5);
|
||||
num_groups = params.get<int>("num_groups");
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE {
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE {
|
||||
const auto &input = inputs[0];
|
||||
const auto &scale = inputs[1];
|
||||
const auto &bias = inputs[2];
|
||||
CV_CheckGE(input.size(), static_cast<size_t>(3), "DNN/GroupNorm: input dimension >= 3 is required");
|
||||
|
||||
int C = input[1];
|
||||
int scale_dim = std::accumulate(scale.begin(), scale.end(), 1, std::multiplies<int>());
|
||||
CV_CheckEQ(scale_dim, C, "DNN/InstanceNorm: scale must be a 1d tensor and match the channel of input");
|
||||
int bias_dim = std::accumulate(bias.begin(), bias.end(), 1, std::multiplies<int>());
|
||||
CV_CheckEQ(bias_dim, C, "DNN/InstanceNorm: bias must be a 1d tensor and match the channel of input");
|
||||
|
||||
outputs.assign(1, inputs[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE {
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S) {
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
const auto& input = inputs[0];
|
||||
const auto& scale = inputs[1];
|
||||
const auto& bias = inputs[2];
|
||||
|
||||
fastNormGroup(input, scale, bias, outputs[0], epsilon, num_groups);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) {
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
const auto &input = inputs[0], &scale = inputs[1], &bias = inputs[2];
|
||||
auto &output = outputs[0];
|
||||
|
||||
const auto input_shape = shape(input);
|
||||
size_t N = input_shape[0], C = input_shape[1];
|
||||
size_t num_groups = this->num_groups;
|
||||
size_t channels_per_group = C / num_groups;
|
||||
size_t loops = N * num_groups, norm_size = static_cast<size_t>(total(input_shape, 2)) * channels_per_group;
|
||||
float inv_norm_size = 1.f / norm_size;
|
||||
|
||||
// no fp16 support
|
||||
if (input.depth() == CV_16S) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String base_opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
|
||||
|
||||
// Calculate mean
|
||||
UMat one = UMat::ones(norm_size, 1, CV_32F);
|
||||
UMat mean = UMat(loops, 1, CV_32F);
|
||||
UMat mean_square = UMat(loops, 1, CV_32F);
|
||||
UMat tmp = UMat(loops, norm_size, CV_32F);
|
||||
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, loops, norm_size, inv_norm_size,
|
||||
input, 0, one, 0, 0.f, mean, 0);
|
||||
if (!ret) {
|
||||
return false;
|
||||
}
|
||||
// Calculate mean_square
|
||||
int num_vector = (norm_size % 8 == 0) ? 8 : ((norm_size % 4 == 0) ? 4 : 1);
|
||||
size_t global[] = {loops, static_cast<size_t>(norm_size / num_vector)};
|
||||
String build_opt = format(" -DNUM=%d", num_vector) + base_opts;
|
||||
String mean_square_kernel_name = format("calc_mean%d", num_vector);
|
||||
ocl::Kernel mean_square_kernel(mean_square_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt + " -DKERNEL_MEAN");
|
||||
if (mean_square_kernel.empty()) {
|
||||
return false;
|
||||
}
|
||||
mean_square_kernel.set(0, ocl::KernelArg::PtrReadOnly(input));
|
||||
mean_square_kernel.set(1, (int)loops);
|
||||
mean_square_kernel.set(2, (int)norm_size);
|
||||
mean_square_kernel.set(3, ocl::KernelArg::PtrReadOnly(mean));
|
||||
mean_square_kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmp));
|
||||
ret = mean_square_kernel.run(2, global, NULL, false);
|
||||
if (!ret) {
|
||||
return false;
|
||||
}
|
||||
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, loops, norm_size, inv_norm_size,
|
||||
tmp, 0, one, 0, 0.f, mean_square, 0);
|
||||
if (!ret) {
|
||||
return false;
|
||||
}
|
||||
// Calculate group norm: output = scale * (x - mean) / sqrt(var + eps) + bias
|
||||
String mvn_group_kernel_name = format("mvn_group%d", num_vector);
|
||||
build_opt += " -DNORM_VARIANCE -DKERNEL_MVN_GROUP";
|
||||
ocl::Kernel mvn_group_kernel(mvn_group_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt);
|
||||
if (mvn_group_kernel.empty()) {
|
||||
return false;
|
||||
}
|
||||
mvn_group_kernel.set(0, ocl::KernelArg::PtrReadOnly(input));
|
||||
mvn_group_kernel.set(1, (int)loops);
|
||||
mvn_group_kernel.set(2, (int)norm_size);
|
||||
mvn_group_kernel.set(3, (float)epsilon);
|
||||
mvn_group_kernel.set(4, ocl::KernelArg::PtrReadOnly(mean));
|
||||
mvn_group_kernel.set(5, ocl::KernelArg::PtrReadOnly(mean_square));
|
||||
mvn_group_kernel.set(6, ocl::KernelArg::PtrReadOnly(scale));
|
||||
mvn_group_kernel.set(7, ocl::KernelArg::PtrReadOnly(bias));
|
||||
mvn_group_kernel.set(8, (int)C);
|
||||
mvn_group_kernel.set(9, (int)num_groups);
|
||||
mvn_group_kernel.set(10, (float)0.f);
|
||||
mvn_group_kernel.set(11, ocl::KernelArg::PtrWriteOnly(output));
|
||||
ret = mvn_group_kernel.run(2, global, NULL, false);
|
||||
if (!ret) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs) override {
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto input_shape = input_wrapper->getShape();
|
||||
size_t N = input_shape[0];
|
||||
size_t num_groups = this->num_groups;
|
||||
size_t loops = N * num_groups;
|
||||
|
||||
return make_cuda_node<cuda4dnn::GroupNormOp>(preferableTarget, std::move(context->stream), epsilon, loops, num_groups);
|
||||
}
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
private:
|
||||
float epsilon;
|
||||
size_t num_groups;
|
||||
};
|
||||
|
||||
Ptr<GroupNormLayer> GroupNormLayer::create(const LayerParams ¶ms) {
|
||||
return Ptr<GroupNormLayer>(new GroupNormLayerImpl(params));
|
||||
}
|
||||
|
||||
}} // cv::dnn
|
@ -24,6 +24,16 @@ namespace cv
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
namespace {
|
||||
static int _mod(int x, int y) {
|
||||
int res = x % y;
|
||||
if ((res < 0 && y > 0) || (res > 0 && y < 0)) {
|
||||
res += y;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
|
||||
{
|
||||
public:
|
||||
@ -42,7 +52,8 @@ public:
|
||||
MAX,
|
||||
MEAN,
|
||||
MIN,
|
||||
MOD,
|
||||
MOD, // Integer Mod. Reminder's sign = Divisor's sign.
|
||||
FMOD, // Floating-point Mod. Reminder's sign = Dividend's sign.
|
||||
PROD,
|
||||
SUB,
|
||||
SUM,
|
||||
@ -79,6 +90,8 @@ public:
|
||||
op = OPERATION::MIN;
|
||||
else if (operation == "mod")
|
||||
op = OPERATION::MOD;
|
||||
else if (operation == "fmod")
|
||||
op = OPERATION::FMOD;
|
||||
else if (operation == "mul")
|
||||
op = OPERATION::PROD;
|
||||
else if (operation == "sub")
|
||||
@ -106,18 +119,21 @@ public:
|
||||
#ifdef HAVE_CANN
|
||||
if (backendId == DNN_BACKEND_CANN)
|
||||
return op == OPERATION::ADD || op == OPERATION::PROD || op == OPERATION::SUB ||
|
||||
op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN;
|
||||
op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN ||
|
||||
op == OPERATION::MOD || op == OPERATION::FMOD;
|
||||
#endif
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return (op == OPERATION::ADD ||
|
||||
op == OPERATION::PROD ||
|
||||
op == OPERATION::GREATER_EQUAL ||
|
||||
op == OPERATION::LESS_EQUAL
|
||||
op == OPERATION::LESS_EQUAL ||
|
||||
op == OPERATION::MOD ||
|
||||
op == OPERATION::FMOD
|
||||
);
|
||||
if (backendId == DNN_BACKEND_CUDA) {
|
||||
return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
|
||||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD ||
|
||||
op == OPERATION::SUB;
|
||||
op == OPERATION::SUB || op == OPERATION::MOD || op == OPERATION::FMOD;
|
||||
}
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
@ -707,10 +723,16 @@ public:
|
||||
}
|
||||
case OPERATION::MOD:
|
||||
{
|
||||
auto mod = [](const uint8_t &a, const uint8_t &b) { return a % b; };
|
||||
auto mod = [] (const T &a, const T &b) { return static_cast<T>(_mod(int(a), int(b))); };
|
||||
binary_forward<T>(mod, std::forward<Args>(args)...);
|
||||
break;
|
||||
}
|
||||
case OPERATION::FMOD:
|
||||
{
|
||||
auto fmod = [](const T &a, const T &b) { return std::fmod(a, b); };
|
||||
binary_forward<T>(fmod, std::forward<Args>(args)...);
|
||||
break;
|
||||
}
|
||||
case OPERATION::PROD:
|
||||
{
|
||||
auto prod = [](const T &a, const T &b) { return a * b; };
|
||||
@ -782,9 +804,8 @@ public:
|
||||
opDispatch<int32_t>(std::forward<Args>(args)...);
|
||||
break;
|
||||
case CV_32F:
|
||||
CV_Assert(op != OPERATION::BITSHIFT && op != OPERATION::MOD &&
|
||||
op != OPERATION::AND && op != OPERATION::OR &&
|
||||
op != OPERATION::XOR);
|
||||
CV_Assert(op != OPERATION::BITSHIFT && op != OPERATION::AND &&
|
||||
op != OPERATION::OR && op != OPERATION::XOR);
|
||||
opDispatch<float>(std::forward<Args>(args)...);
|
||||
break;
|
||||
default:
|
||||
@ -801,19 +822,6 @@ public:
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
auto input_0_shape = inputs[0].dynamicCast<CUDABackendWrapper>()->getShape();
|
||||
for (int i = 1; i < inputs.size(); i++)
|
||||
{
|
||||
auto input_i_shape = inputs[i].dynamicCast<CUDABackendWrapper>()->getShape();
|
||||
if (input_0_shape.size() != input_i_shape.size())
|
||||
return Ptr<BackendNode>();
|
||||
// check if the shape can be supported by `eltwise_ops.cu`, or return the default BackendNode
|
||||
for (int j = 0; j < input_0_shape.size(); j++)
|
||||
if (input_0_shape[j] != input_i_shape[j] &&
|
||||
input_0_shape[j] != 1 && input_i_shape[j] != 1)
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
cuda4dnn::EltwiseOpType op_ = cuda4dnn::EltwiseOpType::SUM;
|
||||
switch (op) {
|
||||
case OPERATION::MAX:
|
||||
@ -837,6 +845,12 @@ public:
|
||||
case OPERATION::SUB:
|
||||
op_ = cuda4dnn::EltwiseOpType::SUB;
|
||||
break;
|
||||
case OPERATION::MOD:
|
||||
op_ = cuda4dnn::EltwiseOpType::MOD;
|
||||
break;
|
||||
case OPERATION::FMOD:
|
||||
op_ = cuda4dnn::EltwiseOpType::FMOD;
|
||||
break;
|
||||
default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
|
||||
};
|
||||
|
||||
@ -881,6 +895,8 @@ public:
|
||||
BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, name);
|
||||
BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, name);
|
||||
BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, name);
|
||||
BUILD_CANN_ELTWISE_OP(OPERATION::MOD, Mod, name);
|
||||
BUILD_CANN_ELTWISE_OP(OPERATION::FMOD, Mod, name);
|
||||
#undef BUILD_CANN_ELTWISE_OP
|
||||
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
}
|
||||
@ -927,6 +943,16 @@ public:
|
||||
node = std::make_shared<ngraph::op::v1::GreaterEqual>(inp0, inp1);
|
||||
else if (op == OPERATION::LESS_EQUAL)
|
||||
node = std::make_shared<ngraph::op::v1::LessEqual>(inp0, inp1);
|
||||
// Ideally we should do this but int32 internal blobs are converted to float32 data type in inference.
|
||||
// TODO: Remove data type convertion when we have type inference.
|
||||
else if (op == OPERATION::MOD) {
|
||||
auto inp0_i64 = std::make_shared<ngraph::op::Convert>(inp0, ngraph::element::i64);
|
||||
auto inp1_i64 = std::make_shared<ngraph::op::Convert>(inp1, ngraph::element::i64);
|
||||
auto mod = std::make_shared<ngraph::op::v1::FloorMod>(inp0_i64, inp1_i64);
|
||||
node = std::make_shared<ngraph::op::Convert>(mod, ngraph::element::f32);
|
||||
}
|
||||
else if (op == OPERATION::FMOD)
|
||||
node = std::make_shared<ngraph::op::v1::Mod>(inp0, inp1);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Operation is not implemented for nGraph backend");
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||
|
@ -74,6 +74,11 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S) {
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
@ -89,49 +94,59 @@ public:
|
||||
// NOTE: This impl does not check whether indices have duplicate entries.
|
||||
// The last duplicate entry will overwrite the previous.
|
||||
template<typename T, typename Functor>
|
||||
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
|
||||
{
|
||||
data.copyTo(out);
|
||||
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat& output_mat) {
|
||||
input_mat.copyTo(output_mat);
|
||||
|
||||
const int* shape = data.size.p;
|
||||
const size_t* step = data.step.p;
|
||||
const auto &input_mat_shape = shape(input_mat);
|
||||
std::vector<size_t> input_mat_step(input_mat_shape.size());
|
||||
for (int i = 0; i < input_mat.dims; i++) {
|
||||
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
|
||||
}
|
||||
|
||||
const int ind_ndims = indices.dims;
|
||||
const int* ind_shape = indices.size.p;
|
||||
const T* p_indices = indices.ptr<const T>();
|
||||
const int indices_mat_ndims = indices_mat.dims;
|
||||
const auto &indices_mat_shape = shape(indices_mat);
|
||||
|
||||
const int upd_ndims = updates.dims;
|
||||
const int* upd_shape = updates.size.p;
|
||||
const T* p_updates = updates.ptr<const T>();
|
||||
const int updates_mat_ndims = updates_mat.dims;
|
||||
const auto &updates_mat_shape = shape(updates_mat);
|
||||
|
||||
T* p_out = out.ptr<T>();
|
||||
|
||||
int k = ind_shape[ind_ndims - 1]; // last dim of indices
|
||||
size_t total = (size_t)(indices.total() / k);
|
||||
int indices_last_dim = indices_mat_shape[indices_mat_ndims - 1]; // last dim of indices
|
||||
|
||||
size_t updates_size = 1;
|
||||
for (int i = ind_ndims - 1; i < upd_ndims; i++)
|
||||
updates_size *= upd_shape[i];
|
||||
for (int i = indices_mat_ndims - 1; i < updates_mat_ndims; i++)
|
||||
updates_size *= updates_mat_shape[i];
|
||||
|
||||
size_t inp_start_offset = 0;
|
||||
size_t ind_start_offset = 0;
|
||||
size_t upd_start_offset = 0;
|
||||
for (size_t i = 0; i < total; i++, ind_start_offset += k, upd_start_offset += updates_size)
|
||||
{
|
||||
const T* tmp_p_indices = p_indices + ind_start_offset;
|
||||
inp_start_offset = 0;
|
||||
for (int j = 0; j < k; j++)
|
||||
{
|
||||
CV_Assert(tmp_p_indices[j] < shape[j] && tmp_p_indices[j] > -shape[j]);
|
||||
inp_start_offset += (((int)tmp_p_indices[j] + shape[j]) % shape[j]) * step[j];
|
||||
}
|
||||
inp_start_offset /= sizeof(T);
|
||||
auto fn = [&](const Range &r) {
|
||||
size_t input_offset = 0,
|
||||
indices_offset = r.start * indices_last_dim,
|
||||
updates_offset = r.start * updates_size;
|
||||
for (int i = r.start; i < r.end; i++) {
|
||||
const T* indices = indices_mat.ptr<const T>();
|
||||
const T* updates = updates_mat.ptr<const T>();
|
||||
T* output = output_mat.ptr<T>();
|
||||
|
||||
const T* tmp_p_updates = p_updates + upd_start_offset;
|
||||
T* tmp_p_out = p_out + inp_start_offset;
|
||||
for (int j = 0; j < updates_size; j++)
|
||||
tmp_p_out[j] = rd(tmp_p_out[j], tmp_p_updates[j]);
|
||||
input_offset = 0;
|
||||
indices += indices_offset;
|
||||
for (int j = 0; j < indices_last_dim; j++) {
|
||||
int index = static_cast<int>(*(indices + j));
|
||||
index = (index + input_mat_shape[j]) % input_mat_shape[j];
|
||||
CV_Assert(index < input_mat_shape[j] && index >= 0);
|
||||
input_offset += index * input_mat_step[j];
|
||||
}
|
||||
|
||||
updates += updates_offset;
|
||||
output += input_offset;
|
||||
for (int j = 0; j < updates_size; j++) {
|
||||
output[j] = reduce_operation(output[j], updates[j]);
|
||||
}
|
||||
|
||||
indices_offset += indices_last_dim;
|
||||
updates_offset += updates_size;
|
||||
}
|
||||
};
|
||||
|
||||
size_t total = (size_t)(indices_mat.total() / indices_last_dim);
|
||||
double nstripes = (size_t)total * (indices_last_dim + updates_size) * (1 / 1024.0);
|
||||
parallel_for_(Range(0, total), fn, nstripes);
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
|
@ -68,6 +68,11 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S) {
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
@ -81,59 +86,62 @@ public:
|
||||
}
|
||||
|
||||
template<typename T, typename Functor>
|
||||
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
|
||||
{
|
||||
data.copyTo(out);
|
||||
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat &output_mat) {
|
||||
input_mat.copyTo(output_mat);
|
||||
|
||||
const int ndims = data.dims;
|
||||
const int* shape = data.size.p;
|
||||
const size_t* step = data.step.p;
|
||||
const int ndims = input_mat.dims;
|
||||
|
||||
const int* ind_shape = indices.size.p;
|
||||
const size_t* ind_step = indices.step.p;
|
||||
const auto &input_mat_shape = shape(input_mat);
|
||||
std::vector<size_t> input_mat_step(ndims);
|
||||
|
||||
size_t inp_offset = 0;
|
||||
size_t ind_offset = 0;
|
||||
const T* p_index = indices.ptr<const T>();
|
||||
const T* p_update = updates.ptr<const T>();
|
||||
T* p_out = out.ptr<T>();
|
||||
const auto &indices_mat_shape = shape(indices_mat);
|
||||
std::vector<size_t> indices_mat_step(ndims);
|
||||
|
||||
size_t total = indices.total();
|
||||
for (int i = 0; i < ndims; i++) {
|
||||
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
|
||||
indices_mat_step[i] = static_cast<size_t>(indices_mat.step.p[i] / sizeof(T));
|
||||
}
|
||||
|
||||
int j, offset_at_idx, index;
|
||||
size_t t, idx;
|
||||
for (size_t i = 0; i < total; i++)
|
||||
{
|
||||
t = i;
|
||||
inp_offset = 0;
|
||||
ind_offset = 0;
|
||||
int offset_at_axis = 0;
|
||||
for (j = ndims - 1; j >= 0; j--)
|
||||
{
|
||||
idx = t / ind_shape[j];
|
||||
offset_at_idx = (int)(t - idx * ind_shape[j]);
|
||||
ind_offset += offset_at_idx * ind_step[j];
|
||||
inp_offset += offset_at_idx * step[j];
|
||||
t = idx;
|
||||
if (j == axis)
|
||||
{
|
||||
offset_at_axis = offset_at_idx * step[j];
|
||||
auto fn = [&](const Range &r) {
|
||||
size_t input_offset = 0, indices_offset = 0;
|
||||
|
||||
int indices_index, index;
|
||||
size_t axis_offset, tmp_index, j_index;
|
||||
for (int i = r.start; i < r.end; i++) {
|
||||
const T* indices = indices_mat.ptr<const T>();
|
||||
const T* updates = updates_mat.ptr<const T>();
|
||||
T* output = output_mat.ptr<T>();
|
||||
|
||||
input_offset = 0;
|
||||
indices_offset = 0;
|
||||
indices_index = i;
|
||||
axis_offset = 0;
|
||||
for (int j = ndims - 1; j >= 0; j--) {
|
||||
tmp_index = indices_index / indices_mat_shape[j];
|
||||
j_index = (size_t)(indices_index - tmp_index * indices_mat_shape[j]);
|
||||
input_offset += j_index * input_mat_step[j];
|
||||
indices_offset += j_index * indices_mat_step[j];
|
||||
indices_index = tmp_index;
|
||||
if (j == axis) {
|
||||
axis_offset = j_index * input_mat_step[j];
|
||||
}
|
||||
}
|
||||
ind_offset /= sizeof(T);
|
||||
|
||||
// get index and overwrite current indices
|
||||
const T* tmp_p_index = p_index + ind_offset;
|
||||
index = (int)(*tmp_p_index);
|
||||
CV_Assert(index < shape[axis] && index > -shape[axis]);
|
||||
index = static_cast<int>(*(indices + indices_offset));
|
||||
index = (index + input_mat_shape[axis]) % input_mat_shape[axis];
|
||||
CV_Assert(index < input_mat_shape[axis] && index >= 0);
|
||||
input_offset = input_offset - axis_offset + index * input_mat_step[axis];
|
||||
|
||||
inp_offset = inp_offset - offset_at_axis + ((index + shape[axis]) % shape[axis]) * step[axis];
|
||||
inp_offset /= sizeof(T);
|
||||
|
||||
const T* tmp_p_update = p_update + ind_offset;
|
||||
T* tmp_p_out = p_out + inp_offset;
|
||||
*tmp_p_out = rd(*tmp_p_out, *tmp_p_update);
|
||||
updates += indices_offset;
|
||||
output += input_offset;
|
||||
*output = reduce_operation(*output, *updates);
|
||||
}
|
||||
};
|
||||
|
||||
size_t total = indices_mat.total();
|
||||
double nstripes = (size_t)total * ndims * (1 / 1024.0);
|
||||
parallel_for_(Range(0, total), fn, nstripes);
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
|
@ -901,7 +901,6 @@ AsyncArray Net::Impl::forwardAsync(const String& outputName)
|
||||
CV_Assert(!empty());
|
||||
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
String layerName = outputName;
|
||||
|
||||
if (layerName.empty())
|
||||
@ -922,9 +921,6 @@ AsyncArray Net::Impl::forwardAsync(const String& outputName)
|
||||
isAsync = false;
|
||||
|
||||
return getBlobAsync(layerName);
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
|
||||
#endif // CV_CXX11
|
||||
}
|
||||
|
||||
|
||||
|
@ -265,11 +265,9 @@ struct Net::Impl : public detail::NetImplBase
|
||||
|
||||
Mat getBlob(String outputName) const;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
virtual AsyncArray getBlobAsync(const LayerPin& pin);
|
||||
|
||||
AsyncArray getBlobAsync(String outputName);
|
||||
#endif // CV_CXX11
|
||||
|
||||
string dump(bool forceAllocation = false) const;
|
||||
|
||||
|
@ -728,6 +728,10 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
|
||||
if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
|
||||
break;
|
||||
#ifdef HAVE_CUDA
|
||||
/* Risk: Not every operation in "NaryEltwise" is supported in the CUDA backend. There is a chance
|
||||
that Concat's output is filled with data in both host and device, leading to data missing.
|
||||
See https://github.com/opencv/opencv/issues/24721 for more details.
|
||||
*/
|
||||
if (preferableBackend == DNN_BACKEND_CUDA &&
|
||||
(inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
|
||||
(inp_i_data->layerInstance->type != "Convolution" &&
|
||||
|
@ -97,8 +97,8 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
|
||||
max_image_size);
|
||||
}
|
||||
|
||||
if (use_half_ && bias_term_)
|
||||
{
|
||||
if (bias_term_) {
|
||||
if (use_half_) {
|
||||
UMat biasOneMat = UMat::ones(M_, 1, CV_32F);
|
||||
UMat newbias, tmpTop;
|
||||
|
||||
@ -106,6 +106,10 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
|
||||
convertFp16(top, tmpTop);
|
||||
cv::gemm(biasOneMat, newbias, 1, tmpTop, 1, tmpTop, 0);
|
||||
convertFp16(tmpTop, top);
|
||||
} else {
|
||||
UMat biasOnesMat = UMat::ones(M_, 1, CV_32F);
|
||||
cv::gemm(biasOnesMat, bias, 1, top, 1, top, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -86,6 +86,7 @@ public:
|
||||
int getTensorShapeSize(int node_id, int node_input_id) {
|
||||
const auto node = getNode(node_id);
|
||||
const auto &input_name = node->getInputName(node_input_id);
|
||||
// try to get from value_info
|
||||
for (int i = 0; i < net.value_info_size(); i++) {
|
||||
const auto value_info = net.value_info(i);
|
||||
if (value_info.name() == input_name) {
|
||||
@ -97,6 +98,18 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
// try to get from input
|
||||
for (int i = 0; i < net.input_size(); i++) {
|
||||
const auto input = net.input(i);
|
||||
if (input.name() == input_name) {
|
||||
if (input.has_type() && input.type().has_tensor_type() &&
|
||||
input.type().tensor_type().has_shape()) {
|
||||
return input.type().tensor_type().shape().dim_size();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -660,6 +673,10 @@ private:
|
||||
[Input] -> LayerNorm -> [Output]
|
||||
\
|
||||
[weight], [bias]
|
||||
|
||||
Note: axes of ReduceMean must be:
|
||||
- last element is the axis of last dimension (-1 or (input_ndims - 1))
|
||||
- a list of adjacent axes, e.g. [1, 2, 3, ..., input_ndims - 1]
|
||||
*/
|
||||
class LayerNormSubGraph : public Subgraph
|
||||
{
|
||||
@ -683,19 +700,22 @@ public:
|
||||
setFusedNode("LayerNormalization", input);
|
||||
}
|
||||
|
||||
static float extractAxis(const Ptr<ImportGraphWrapper>& net, int node_id)
|
||||
static std::vector<int64_t> extractAxis(const Ptr<ImportGraphWrapper>& net, int node_id)
|
||||
{
|
||||
// TODO: consider ReduceMean-18 which has axes as one of the inputs instead of attributes
|
||||
Ptr<ImportNodeWrapper> mean_ptr = net->getNode(node_id);
|
||||
opencv_onnx::NodeProto* mean_node = mean_ptr.dynamicCast<ONNXNodeWrapper>()->node;
|
||||
int axis_ = -1;
|
||||
std::vector<int64_t> axes;
|
||||
for (int i = 0; i < mean_node->attribute_size(); i++)
|
||||
{
|
||||
opencv_onnx::AttributeProto attr = mean_node->attribute(i);
|
||||
if (attr.name() != "axes")
|
||||
continue;
|
||||
axis_ = static_cast<int>(attr.ints(0));
|
||||
for (int j = 0; j < attr.ints_size(); j++) {
|
||||
axes.push_back(attr.ints(j));
|
||||
}
|
||||
return axis_;
|
||||
}
|
||||
return axes;
|
||||
}
|
||||
|
||||
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
||||
@ -707,11 +727,31 @@ public:
|
||||
if (pow_exp - 2 > 1e-5) // not pow(2)
|
||||
return false;
|
||||
|
||||
int axis_mean1 = extractAxis(net, matchedNodesIds[mean]);
|
||||
int axis_mean2 = extractAxis(net, matchedNodesIds[mean1]);
|
||||
if (axis_mean1 != axis_mean2)
|
||||
std::vector<int64_t> axes = extractAxis(net, matchedNodesIds[mean]);
|
||||
// check whether it is -1 or last_axis or [axis, ..., last_axis]
|
||||
int64_t input_ndims = static_cast<int64_t>(net.dynamicCast<ONNXGraphWrapper>()->getTensorShapeSize(matchedNodesIds[mean], 0));
|
||||
if (input_ndims == -1) {
|
||||
return false; // input shape unknown
|
||||
}
|
||||
// assume that axes are sorted in ascending order, e.g. [0, 1, 2, 3] or [-3, -2, -1]
|
||||
if (axes.back() != -1 && axes.back() != (input_ndims - 1)) {
|
||||
return false;
|
||||
axis = axis_mean1;
|
||||
}
|
||||
for (size_t i = 0; i < axes.size() - 1; i++) {
|
||||
if (axes[i] - axes[i + 1] != -1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int64_t> axes1 = extractAxis(net, matchedNodesIds[mean1]);
|
||||
if (axes.size() != axes1.size())
|
||||
return false;
|
||||
for (size_t i = 0; i < axes.size(); i++) {
|
||||
if (((axes[i] + input_ndims) % input_ndims) != ((axes1[i] + input_ndims) % input_ndims)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
axis = axes[0];
|
||||
|
||||
epsilon = extractConstant(net, matchedNodesIds[add], 1).at<float>(0);
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
@ -2619,6 +2620,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
|
||||
|
||||
// Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
|
||||
int axis = layerParams.get<int>("axis", 1);
|
||||
axis = normalize_axis(axis, inputShape.size());
|
||||
for (size_t i = 0; i < inputs.size(); ++i)
|
||||
{
|
||||
inputShape[axis] = inputs[i].dims == (int)inputShape.size() ? inputs[i].size[axis] : 1;
|
||||
@ -2831,6 +2833,11 @@ void ONNXImporter::parseElementWise(LayerParams& layerParams, const opencv_onnx:
|
||||
|
||||
layerParams.type = "NaryEltwise";
|
||||
layerParams.set("operation", toLowerCase(node_proto.op_type()));
|
||||
if (node_proto.op_type() == "Mod") {
|
||||
if (layerParams.get<int>("fmod", 0)) {
|
||||
layerParams.set("operation", "fmod");
|
||||
};
|
||||
}
|
||||
|
||||
// element-wise layers that can have >=1 inputs but actually have one input
|
||||
if (node_proto.input_size() == 1 && (op_type == "max" || op_type == "min" || op_type == "mean" || op_type == "sum"))
|
||||
@ -4004,10 +4011,11 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
|
||||
dispatch["ScatterElements"] = dispatch["Scatter"] = dispatch["ScatterND"] = &ONNXImporter::parseScatter;
|
||||
dispatch["Tile"] = &ONNXImporter::parseTile;
|
||||
dispatch["LayerNormalization"] = &ONNXImporter::parseLayerNorm;
|
||||
dispatch["GroupNormalization"] = &ONNXImporter::parseInstanceNormalization;
|
||||
|
||||
dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] =
|
||||
dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] =
|
||||
dispatch["LessOrEqual"] = &ONNXImporter::parseElementWise;
|
||||
dispatch["LessOrEqual"] = dispatch["Mod"] = &ONNXImporter::parseElementWise;
|
||||
|
||||
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
|
||||
dispatch["Where"] = &ONNXImporter::parseElementWise;
|
||||
|
@ -54,6 +54,7 @@
|
||||
#define vec_type Dtype8
|
||||
#define CALC_MEAN calc_mean8
|
||||
#define MVN mvn8
|
||||
#define MVN_GROUP mvn_group8
|
||||
#define MEAN_FUSE mean_fuse8
|
||||
#define MVN_FUSE mvn_fuse8
|
||||
#elif NUM == 4
|
||||
@ -62,6 +63,7 @@
|
||||
#define vec_type Dtype4
|
||||
#define CALC_MEAN calc_mean4
|
||||
#define MVN mvn4
|
||||
#define MVN_GROUP mvn_group4
|
||||
#define MEAN_FUSE mean_fuse4
|
||||
#define MVN_FUSE mvn_fuse4
|
||||
#elif NUM == 1
|
||||
@ -70,6 +72,7 @@
|
||||
#define vec_type Dtype
|
||||
#define CALC_MEAN calc_mean1
|
||||
#define MVN mvn1
|
||||
#define MVN_GROUP mvn_group1
|
||||
#define MEAN_FUSE mean_fuse1
|
||||
#define MVN_FUSE mvn_fuse1
|
||||
#endif
|
||||
@ -150,6 +153,54 @@ __kernel void MVN(__global const Dtype* src,
|
||||
store(dst_vec, dst, index);
|
||||
}
|
||||
|
||||
#elif defined KERNEL_MVN_GROUP
|
||||
|
||||
__kernel void MVN_GROUP(__global const Dtype* src,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const Dtype eps,
|
||||
__global const Dtype* mean,
|
||||
__global const Dtype* dev,
|
||||
__global const Dtype* weight,
|
||||
__global const Dtype* bias,
|
||||
const int channels,
|
||||
const int num_groups,
|
||||
const float relu_slope,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * NUM;
|
||||
int index = x * cols + y;
|
||||
|
||||
if (x >= rows || y >= cols)
|
||||
return;
|
||||
|
||||
int group_size = channels / num_groups;
|
||||
int step = norm_size / group_size;
|
||||
int channel_index = x % num_groups * group_size + y / step
|
||||
Dtype mean_val = mean[x];
|
||||
Dtype dev_val = dev[x];
|
||||
Dtype alpha;
|
||||
#ifdef NORM_VARIANCE
|
||||
alpha = 1 / sqrt(eps + dev_val);
|
||||
#else
|
||||
alpha = 1;
|
||||
#endif
|
||||
|
||||
Dtype w = weight[channel_index], b = bias[channel_index];
|
||||
|
||||
vec_type src_vec = load(src, index) - (vec_type)mean_val;
|
||||
vec_type dst_vec = src_vec * alpha;
|
||||
dst_vec = dst_vec * w + (vec_type)b;
|
||||
|
||||
#ifdef FUSE_RELU
|
||||
vec_type new_val = dst_vec * relu_slope;
|
||||
dst_vec = select(new_val, dst_vec, dst_vec > (vec_type)0.f);
|
||||
#endif
|
||||
|
||||
store(dst_vec, dst, index);
|
||||
}
|
||||
|
||||
#elif defined KERNEL_MEAN_FUSE
|
||||
|
||||
__kernel void MEAN_FUSE(__global const T * A,
|
||||
|
@ -95,6 +95,12 @@ public:
|
||||
Net net;
|
||||
};
|
||||
|
||||
TEST_P(DNNTestNetwork, DISABLED_YOLOv8n) {
|
||||
processNet("dnn/onnx/models/yolov8n.onnx", "", Size(640, 640), "output0");
|
||||
expectNoFallbacksFromIE(net);
|
||||
expectNoFallbacksFromCUDA(net);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, AlexNet)
|
||||
{
|
||||
applyTestTag(CV_TEST_TAG_MEMORY_1GB);
|
||||
@ -1454,6 +1460,71 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, Eltwise, testing::Combine(
|
||||
dnnBackendsAndTargets()
|
||||
));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Element-wise layers
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
using NaryEltwiseConcat = TestWithParam<tuple<std::vector<int>, tuple<Backend, Target>>>;
|
||||
TEST_P(NaryEltwiseConcat, Accuracy) {
|
||||
auto param = GetParam();
|
||||
std::vector<int> input_shape = get<0>(param);
|
||||
auto backend_id = get<0>(get<1>(param));
|
||||
auto target_id = get<1>(get<1>(param));
|
||||
|
||||
/* Build the following net:
|
||||
|
||||
<1x4x84>
|
||||
/
|
||||
[Input] -+-> Mul(B<1x84>) -> Concat(axis=1) -> [Output]
|
||||
| |
|
||||
+-> Sigmoid ----------+
|
||||
|
||||
*/
|
||||
Net net;
|
||||
|
||||
std::vector<int> mul_B_shape(input_shape.size() - 1, 1);
|
||||
mul_B_shape.back() = input_shape.back();
|
||||
Mat mul_B(mul_B_shape, CV_32FC1);
|
||||
randn(mul_B, 0.f, 1.f);
|
||||
LayerParams mul_B_lp;
|
||||
mul_B_lp.name = "mul_B";
|
||||
mul_B_lp.type = "Const";
|
||||
mul_B_lp.blobs.push_back(mul_B);
|
||||
int id_mul_B = net.addLayer(mul_B_lp.name, mul_B_lp.type, mul_B_lp);
|
||||
|
||||
LayerParams mul_lp;
|
||||
mul_lp.name = "mul";
|
||||
mul_lp.type = "NaryEltwise";
|
||||
mul_lp.set("operation", "mul");
|
||||
int id_mul = net.addLayer(mul_lp.name, mul_lp.type, mul_lp);
|
||||
net.connect(0, 0, id_mul, 0);
|
||||
net.connect(id_mul_B, 0, id_mul, 1);
|
||||
|
||||
LayerParams sigmoid_lp;
|
||||
sigmoid_lp.name = "sigmoid";
|
||||
sigmoid_lp.type = "Sigmoid";
|
||||
int id_sigmoid = net.addLayer(sigmoid_lp.name, sigmoid_lp.type, sigmoid_lp);
|
||||
net.connect(0, 0, id_sigmoid, 0);
|
||||
|
||||
LayerParams concat_lp;
|
||||
concat_lp.name = "concat";
|
||||
concat_lp.type = "Concat";
|
||||
concat_lp.set("axis", 1);
|
||||
int id_concat = net.addLayer(concat_lp.name, concat_lp.type, concat_lp);
|
||||
net.connect(id_mul, 0, id_concat, 0);
|
||||
net.connect(id_sigmoid, 0, id_concat, 1);
|
||||
|
||||
// Run test
|
||||
Mat input(input_shape, CV_32FC1);
|
||||
testLayer(input, net, backend_id, target_id, false);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, NaryEltwiseConcat, testing::Combine(
|
||||
testing::Values(std::vector<int>{1, 4, 84}),
|
||||
dnnBackendsAndTargets())
|
||||
);
|
||||
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_layers_backends, dnnBackendsAndTargets());
|
||||
|
||||
}} // namespace
|
||||
|
@ -47,6 +47,10 @@ TEST_F(Test_Graph_Simplifier, LayerNormSubGraph) {
|
||||
test("layer_norm_expanded_with_initializers", "LayerNormalization");
|
||||
}
|
||||
|
||||
TEST_F(Test_Graph_Simplifier, LayerNormNoFusionSubGraph) {
|
||||
test("layer_norm_no_fusion", std::vector<std::string>{"NaryEltwise", "Reduce", "Sqrt"});
|
||||
}
|
||||
|
||||
TEST_F(Test_Graph_Simplifier, ResizeSubgraph) {
|
||||
/* Test for 6 subgraphs:
|
||||
- GatherCastSubgraph
|
||||
|
@ -2050,7 +2050,7 @@ private:
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
Mat re;
|
||||
ASSERT_NO_THROW(re = net.forward()); // runtime error
|
||||
re = net.forward();
|
||||
auto ptr_re = (float *) re.data;
|
||||
for (int i = 0; i < re.total(); i++)
|
||||
if (op == "sum"){
|
||||
|
@ -1033,14 +1033,10 @@ TEST_P(Test_two_inputs, basic)
|
||||
randu(firstInp, 0, 100);
|
||||
randu(secondInp, 0, 100);
|
||||
|
||||
#ifndef CV_CXX11
|
||||
std::vector<String> input_names;
|
||||
input_names.push_back("data");
|
||||
input_names.push_back("second_input");
|
||||
net.setInputsNames(input_names);
|
||||
#else
|
||||
net.setInputsNames({"data", "second_input"});
|
||||
#endif
|
||||
net.setInput(firstInp, "data", kScale);
|
||||
net.setInput(secondInp, "second_input", kScaleInv);
|
||||
net.setPreferableBackend(backendId);
|
||||
|
@ -311,6 +311,8 @@ static const TestCase testConformanceConfig[] = {
|
||||
{"test_gridsample_nearest", 2, 1},
|
||||
{"test_gridsample_reflection_padding", 2, 1},
|
||||
{"test_gridsample_zeros_padding", 2, 1},
|
||||
{"test_group_normalization_epsilon", 3, 1},
|
||||
{"test_group_normalization_example", 3, 1},
|
||||
{"test_gru_batchwise", 3, 2},
|
||||
{"test_gru_defaults", 3, 1},
|
||||
{"test_gru_seq_length", 4, 1},
|
||||
|
@ -736,6 +736,10 @@ CASE(test_gridsample_reflection_padding)
|
||||
// no filter
|
||||
CASE(test_gridsample_zeros_padding)
|
||||
// no filter
|
||||
CASE(test_group_normalization_epsilon)
|
||||
// no filter
|
||||
CASE(test_group_normalization_example)
|
||||
// no filter
|
||||
CASE(test_gru_batchwise)
|
||||
// no filter
|
||||
CASE(test_gru_defaults)
|
||||
@ -1056,10 +1060,25 @@ CASE(test_mod_int64_fmod)
|
||||
// no filter
|
||||
CASE(test_mod_mixed_sign_float16)
|
||||
// no filter
|
||||
if (target == DNN_TARGET_OPENCL)
|
||||
{
|
||||
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104141 vs 1e-05
|
||||
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156212 vs 0.0001
|
||||
}
|
||||
CASE(test_mod_mixed_sign_float32)
|
||||
// no filter
|
||||
if (target == DNN_TARGET_OPENCL)
|
||||
{
|
||||
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104141 vs 1e-05
|
||||
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156212 vs 0.0001
|
||||
}
|
||||
CASE(test_mod_mixed_sign_float64)
|
||||
// no filter
|
||||
if (target == DNN_TARGET_OPENCL)
|
||||
{
|
||||
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104167 vs 1e-05
|
||||
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156251 vs 0.0001
|
||||
}
|
||||
CASE(test_mod_mixed_sign_int16)
|
||||
// no filter
|
||||
CASE(test_mod_mixed_sign_int32)
|
||||
|
@ -41,7 +41,7 @@
|
||||
"test_cast_STRING_to_FLOAT",
|
||||
"test_castlike_FLOAT_to_STRING_expanded",
|
||||
"test_castlike_STRING_to_FLOAT_expanded",
|
||||
"test_concat_1d_axis_negative_1",
|
||||
"test_concat_1d_axis_negative_1", // 1d support is required
|
||||
"test_div_uint8", // output type mismatch
|
||||
"test_maxpool_2d_dilations",
|
||||
"test_maxpool_2d_same_lower",
|
||||
|
@ -210,9 +210,6 @@
|
||||
"test_min_uint8",
|
||||
"test_mod_broadcast",
|
||||
"test_mod_int64_fmod",
|
||||
"test_mod_mixed_sign_float16",
|
||||
"test_mod_mixed_sign_float32",
|
||||
"test_mod_mixed_sign_float64",
|
||||
"test_mod_mixed_sign_int16",
|
||||
"test_mod_mixed_sign_int32",
|
||||
"test_mod_mixed_sign_int64",
|
||||
|
@ -2673,24 +2673,36 @@ void yoloPostProcessing(
|
||||
cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
|
||||
}
|
||||
|
||||
// each row is [cx, cy, w, h, conf_obj, conf_class1, ..., conf_class80]
|
||||
if (test_name == "yolonas"){
|
||||
// outs contains 2 elemets of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
|
||||
Mat concat_out;
|
||||
// squeeze the first dimension
|
||||
outs[0] = outs[0].reshape(1, outs[0].size[1]);
|
||||
outs[1] = outs[1].reshape(1, outs[1].size[1]);
|
||||
cv::hconcat(outs[1], outs[0], concat_out);
|
||||
outs[0] = concat_out;
|
||||
// remove the second element
|
||||
outs.pop_back();
|
||||
// unsqueeze the first dimension
|
||||
outs[0] = outs[0].reshape(0, std::vector<int>{1, 8400, 84});
|
||||
}
|
||||
|
||||
for (auto preds : outs){
|
||||
|
||||
preds = preds.reshape(1, preds.size[1]); // [1, 8400, 85] -> [8400, 85]
|
||||
|
||||
for (int i = 0; i < preds.rows; ++i)
|
||||
{
|
||||
// filter out non objects
|
||||
float obj_conf = (test_name != "yolov8") ? preds.at<float>(i, 4) : 1.0f;
|
||||
// filter out non object
|
||||
float obj_conf = (test_name == "yolov8" || test_name == "yolonas") ? 1.0f : preds.at<float>(i, 4) ;
|
||||
if (obj_conf < conf_threshold)
|
||||
continue;
|
||||
|
||||
Mat scores = preds.row(i).colRange((test_name != "yolov8") ? 5 : 4, preds.cols);
|
||||
Mat scores = preds.row(i).colRange((test_name == "yolov8" || test_name == "yolonas") ? 4 : 5, preds.cols);
|
||||
double conf;
|
||||
Point maxLoc;
|
||||
minMaxLoc(scores, 0, &conf, 0, &maxLoc);
|
||||
|
||||
conf = (test_name != "yolov8") ? conf * obj_conf : conf;
|
||||
conf = (test_name == "yolov8" || test_name == "yolonas") ? conf : conf * obj_conf;
|
||||
if (conf < conf_threshold)
|
||||
continue;
|
||||
|
||||
@ -2701,9 +2713,14 @@ void yoloPostProcessing(
|
||||
double w = det[2];
|
||||
double h = det[3];
|
||||
|
||||
// std::cout << "cx: " << cx << " cy: " << cy << " w: " << w << " h: " << h << " conf: " << conf << " idx: " << maxLoc.x << std::endl;
|
||||
// [x1, y1, x2, y2]
|
||||
if (test_name == "yolonas"){
|
||||
boxes.push_back(Rect2d(cx, cy, w, h));
|
||||
} else {
|
||||
boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
|
||||
cx + 0.5 * w, cy + 0.5 * h));
|
||||
}
|
||||
classIds.push_back(maxLoc.x);
|
||||
confidences.push_back(conf);
|
||||
}
|
||||
@ -2758,6 +2775,41 @@ TEST_P(Test_ONNX_nets, YOLOX)
|
||||
1.0e-4, 1.0e-4);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, YOLONas)
|
||||
{
|
||||
// model information: https://dl.opencv.org/models/yolo-nas/Readme.md
|
||||
std::string weightPath = _tf("models/yolo_nas_s.onnx", false);
|
||||
|
||||
Size targetSize{640, 640};
|
||||
float conf_threshold = 0.50;
|
||||
float iou_threshold = 0.50;
|
||||
|
||||
std::vector<int> refClassIds{1, 16, 7};
|
||||
std::vector<float> refScores{0.9720f, 0.9283f, 0.8990f};
|
||||
// [x1, y1, x2, y2]
|
||||
std::vector<Rect2d> refBoxes{
|
||||
Rect2d(105.516, 173.696, 471.323, 430.433),
|
||||
Rect2d(109.241, 263.406, 259.872, 531.858),
|
||||
Rect2d(390.153, 142.492, 574.932, 222.709)
|
||||
};
|
||||
|
||||
Image2BlobParams imgParams(
|
||||
Scalar::all(1/255.0),
|
||||
targetSize,
|
||||
Scalar::all(0),
|
||||
false,
|
||||
CV_32F,
|
||||
DNN_LAYOUT_NCHW,
|
||||
DNN_PMODE_LETTERBOX,
|
||||
Scalar::all(114)
|
||||
);
|
||||
|
||||
testYOLO(
|
||||
weightPath, refClassIds, refScores, refBoxes,
|
||||
imgParams, conf_threshold, iou_threshold,
|
||||
1.0e-4, 1.0e-4, "yolonas");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, YOLOv8)
|
||||
{
|
||||
std::string weightPath = _tf("models/yolov8n.onnx", false);
|
||||
@ -2804,7 +2856,7 @@ TEST_P(Test_ONNX_nets, YOLOv7)
|
||||
CV_TEST_TAG_DEBUG_VERYLONG
|
||||
);
|
||||
|
||||
std::string weightPath = _tf("models/yolov7_not_simplified.onnx", false);
|
||||
std::string weightPath = _tf("models/yolov7.onnx", false);
|
||||
// Reference, which is collected with input size of 640x640
|
||||
std::vector<int> refClassIds{1, 16, 7};
|
||||
std::vector<float> refScores{0.9614331f, 0.9589417f, 0.8679074f};
|
||||
@ -3031,6 +3083,10 @@ TEST_P(Test_ONNX_nets, VitTrack) {
|
||||
normAssert(ref_output3, outputs[2], "VitTrack output3");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, LayerNormNoFusion) {
|
||||
testONNXModels("layer_norm_no_fusion");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
||||
|
||||
}} // namespace
|
||||
|
@ -2,7 +2,7 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
// (cv::gapi::ie::backend() is still there and is defined always)
|
||||
#include "backends/ie/giebackend.hpp"
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||
|
||||
#if INF_ENGINE_RELEASE <= 2019010000
|
||||
# error G-API IE module supports only OpenVINO IE >= 2019 R1
|
||||
|
@ -2,7 +2,7 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
|
||||
#ifndef OPENCV_GAPI_GIEBACKEND_HPP
|
||||
#define OPENCV_GAPI_GIEBACKEND_HPP
|
||||
@ -10,7 +10,7 @@
|
||||
// Include anyway - cv::gapi::ie::backend() still needs to be defined
|
||||
#include "opencv2/gapi/infer/ie.hpp"
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||
|
||||
#include <ade/util/algorithm.hpp> // type_list_index
|
||||
#include <condition_variable>
|
||||
|
@ -2,9 +2,9 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "../test_precomp.hpp"
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||
|
||||
#include <stdexcept>
|
||||
#include <mutex>
|
||||
|
@ -1662,14 +1662,14 @@ CvWindow::CvWindow(QString name, int arg2)
|
||||
|
||||
//Now attach everything
|
||||
if (myToolBar)
|
||||
myGlobalLayout->addWidget(myToolBar, Qt::AlignCenter);
|
||||
myGlobalLayout->addWidget(myToolBar, 0, Qt::AlignLeft);
|
||||
|
||||
myGlobalLayout->addWidget(myView->getWidget(), Qt::AlignCenter);
|
||||
myGlobalLayout->addWidget(myView->getWidget(), 0, Qt::AlignCenter);
|
||||
|
||||
myGlobalLayout->addLayout(myBarLayout, Qt::AlignCenter);
|
||||
myGlobalLayout->addLayout(myBarLayout);
|
||||
|
||||
if (myStatusBar)
|
||||
myGlobalLayout->addWidget(myStatusBar, Qt::AlignCenter);
|
||||
myGlobalLayout->addWidget(myStatusBar, 0, Qt::AlignLeft);
|
||||
|
||||
setLayout(myGlobalLayout);
|
||||
show();
|
||||
@ -2079,7 +2079,6 @@ void CvWindow::createStatusBar()
|
||||
{
|
||||
myStatusBar = new QStatusBar(this);
|
||||
myStatusBar->setSizeGripEnabled(false);
|
||||
myStatusBar->setFixedHeight(20);
|
||||
myStatusBar->setMinimumWidth(1);
|
||||
myStatusBar_msg = new QLabel;
|
||||
|
||||
|
@ -409,7 +409,9 @@ bool JpegDecoder::readData( Mat& img )
|
||||
{
|
||||
jpeg_decompress_struct* cinfo = &((JpegState*)m_state)->cinfo;
|
||||
JpegErrorMgr* jerr = &((JpegState*)m_state)->jerr;
|
||||
#ifndef JCS_EXTENSIONS
|
||||
JSAMPARRAY buffer = 0;
|
||||
#endif
|
||||
|
||||
if( setjmp( jerr->setjmp_buffer ) == 0 )
|
||||
{
|
||||
@ -429,6 +431,18 @@ bool JpegDecoder::readData( Mat& img )
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef JCS_EXTENSIONS
|
||||
if( color )
|
||||
{
|
||||
cinfo->out_color_space = JCS_EXT_BGR;
|
||||
cinfo->out_color_components = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
cinfo->out_color_space = JCS_GRAYSCALE;
|
||||
cinfo->out_color_components = 1;
|
||||
}
|
||||
#else
|
||||
if( color )
|
||||
{
|
||||
if( cinfo->num_components != 4 )
|
||||
@ -455,6 +469,7 @@ bool JpegDecoder::readData( Mat& img )
|
||||
cinfo->out_color_components = 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check for Exif marker APP1
|
||||
jpeg_saved_marker_ptr exif_marker = NULL;
|
||||
@ -481,12 +496,17 @@ bool JpegDecoder::readData( Mat& img )
|
||||
|
||||
jpeg_start_decompress( cinfo );
|
||||
|
||||
#ifndef JCS_EXTENSIONS
|
||||
buffer = (*cinfo->mem->alloc_sarray)((j_common_ptr)cinfo,
|
||||
JPOOL_IMAGE, m_width*4, 1 );
|
||||
#endif
|
||||
|
||||
uchar* data = img.ptr();
|
||||
for( ; m_height--; data += step )
|
||||
{
|
||||
#ifdef JCS_EXTENSIONS
|
||||
jpeg_read_scanlines( cinfo, &data, 1 );
|
||||
#else
|
||||
jpeg_read_scanlines( cinfo, buffer, 1 );
|
||||
if( color )
|
||||
{
|
||||
@ -502,6 +522,7 @@ bool JpegDecoder::readData( Mat& img )
|
||||
else
|
||||
icvCvt_CMYK2Gray_8u_C4C1R( buffer[0], 0, data, 0, Size(m_width,1) );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
result = true;
|
||||
@ -593,8 +614,11 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
||||
int width = img.cols, height = img.rows;
|
||||
|
||||
std::vector<uchar> out_buf(1 << 12);
|
||||
|
||||
#ifndef JCS_EXTENSIONS
|
||||
AutoBuffer<uchar> _buffer;
|
||||
uchar* buffer;
|
||||
#endif
|
||||
|
||||
struct jpeg_compress_struct cinfo;
|
||||
JpegErrorMgr jerr;
|
||||
@ -629,8 +653,15 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
||||
|
||||
int _channels = img.channels();
|
||||
int channels = _channels > 1 ? 3 : 1;
|
||||
|
||||
#ifdef JCS_EXTENSIONS
|
||||
cinfo.input_components = _channels;
|
||||
cinfo.in_color_space = _channels == 3 ? JCS_EXT_BGR
|
||||
: _channels == 4 ? JCS_EXT_BGRX : JCS_GRAYSCALE;
|
||||
#else
|
||||
cinfo.input_components = channels;
|
||||
cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE;
|
||||
#endif
|
||||
|
||||
int quality = 95;
|
||||
int progressive = 0;
|
||||
@ -746,14 +777,17 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
||||
|
||||
jpeg_start_compress( &cinfo, TRUE );
|
||||
|
||||
#ifndef JCS_EXTENSIONS
|
||||
if( channels > 1 )
|
||||
_buffer.allocate(width*channels);
|
||||
buffer = _buffer.data();
|
||||
#endif
|
||||
|
||||
for( int y = 0; y < height; y++ )
|
||||
{
|
||||
uchar *data = img.data + img.step*y, *ptr = data;
|
||||
|
||||
#ifndef JCS_EXTENSIONS
|
||||
if( _channels == 3 )
|
||||
{
|
||||
icvCvt_BGR2RGB_8u_C3R( data, 0, buffer, 0, Size(width,1) );
|
||||
@ -764,6 +798,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
||||
icvCvt_BGRA2BGR_8u_C4C3R( data, 0, buffer, 0, Size(width,1), 2 );
|
||||
ptr = buffer;
|
||||
}
|
||||
#endif
|
||||
|
||||
jpeg_write_scanlines( &cinfo, &ptr, 1 );
|
||||
}
|
||||
|
@ -210,15 +210,8 @@ struct ImageCodecInitializer
|
||||
static
|
||||
ImageCodecInitializer& getCodecs()
|
||||
{
|
||||
#ifdef CV_CXX11
|
||||
static ImageCodecInitializer g_codecs;
|
||||
return g_codecs;
|
||||
#else
|
||||
// C++98 doesn't guarantee correctness of multi-threaded initialization of static global variables
|
||||
// (memory leak here is not critical, use C++11 to avoid that)
|
||||
static ImageCodecInitializer* g_codecs = new ImageCodecInitializer();
|
||||
return *g_codecs;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -166,7 +166,7 @@ TEST_P(Imgcodecs_Avif_Image_EncodeDecodeSuite, imencode_imdecode) {
|
||||
cv::Exception);
|
||||
return;
|
||||
}
|
||||
bool result;
|
||||
bool result = true;
|
||||
EXPECT_NO_THROW(
|
||||
result = cv::imencode(".avif", img_original, buf, encoding_params_););
|
||||
EXPECT_TRUE(result);
|
||||
|
@ -4490,7 +4490,7 @@ An example using applyColorMap function
|
||||
|
||||
/** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image.
|
||||
|
||||
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
|
||||
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. If CV_8UC3, then the CV_8UC1 image is generated internally using cv::COLOR_BGR2GRAY.
|
||||
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
|
||||
@param colormap The colormap to apply, see #ColormapTypes
|
||||
*/
|
||||
@ -4498,8 +4498,8 @@ CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
|
||||
|
||||
/** @brief Applies a user colormap on a given image.
|
||||
|
||||
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
|
||||
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
|
||||
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. If CV_8UC3, then the CV_8UC1 image is generated internally using cv::COLOR_BGR2GRAY.
|
||||
@param dst The result is the colormapped source image of the same number of channels as userColor. Note: Mat::create is called on dst.
|
||||
@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256
|
||||
*/
|
||||
CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor);
|
||||
|
@ -17,7 +17,7 @@ ocv_add_module(java BINDINGS opencv_core opencv_imgproc PRIVATE_REQUIRED opencv_
|
||||
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/common.cmake)
|
||||
|
||||
# UTILITY: glob specific sources and append them to list (type is in H, CPP, JAVA, AIDL)
|
||||
# UTILITY: glob specific sources and append them to list (type is in H, CPP, JAVA)
|
||||
macro(glob_more_specific_sources _type _root _output)
|
||||
unset(_masks)
|
||||
if(${_type} STREQUAL "H")
|
||||
@ -26,8 +26,6 @@ macro(glob_more_specific_sources _type _root _output)
|
||||
set(_masks "${_root}/cpp/*.cpp")
|
||||
elseif(${_type} STREQUAL "JAVA")
|
||||
set(_masks "${_root}/java/*.java" "${_root}/java/*.java.in")
|
||||
elseif(${_type} STREQUAL "AIDL")
|
||||
set(_masks "${_root}/java/*.aidl")
|
||||
endif()
|
||||
if (_masks)
|
||||
file(GLOB _result ${_masks})
|
||||
|
@ -42,7 +42,6 @@ android {
|
||||
main {
|
||||
jniLibs.srcDirs = ['../../jni']
|
||||
java.srcDirs = ['src'] // TODO Use original files instead of copied into build directory
|
||||
aidl.srcDirs = ['src']
|
||||
res.srcDirs = ['@OpenCV_SOURCE_DIR@/modules/java/android_sdk/android_gradle_lib/res']
|
||||
manifest.srcFile 'AndroidManifest.xml'
|
||||
}
|
||||
|
@ -121,8 +121,6 @@ android {
|
||||
targetCompatibility JavaVersion.VERSION_@ANDROID_GRADLE_JAVA_VERSION_INIT@
|
||||
}
|
||||
|
||||
@ANDROID_GRADLE_BUILD_FEATURE_AIDL@
|
||||
|
||||
buildTypes {
|
||||
debug {
|
||||
packagingOptions {
|
||||
@ -139,7 +137,6 @@ android {
|
||||
}
|
||||
|
||||
buildFeatures {
|
||||
aidl true
|
||||
prefabPublishing true
|
||||
buildConfig true
|
||||
}
|
||||
@ -153,7 +150,6 @@ android {
|
||||
main {
|
||||
jniLibs.srcDirs = ['native/libs']
|
||||
java.srcDirs = ['java/src']
|
||||
aidl.srcDirs = ['java/src']
|
||||
res.srcDirs = ['java/res']
|
||||
manifest.srcFile 'java/AndroidManifest.xml'
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
protected ImageReader mImageReader;
|
||||
protected int mPreviewFormat = ImageFormat.YUV_420_888;
|
||||
protected int mRequestTemplate = CameraDevice.TEMPLATE_PREVIEW;
|
||||
private int mFrameRotation;
|
||||
|
||||
protected CameraDevice mCameraDevice;
|
||||
protected CameraCaptureSession mCaptureSession;
|
||||
@ -86,8 +87,8 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean initializeCamera() {
|
||||
Log.i(LOGTAG, "initializeCamera");
|
||||
protected boolean selectCamera() {
|
||||
Log.i(LOGTAG, "selectCamera");
|
||||
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
|
||||
try {
|
||||
String camList[] = manager.getCameraIdList();
|
||||
@ -110,14 +111,10 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mCameraID != null) {
|
||||
Log.i(LOGTAG, "Opening camera: " + mCameraID);
|
||||
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
|
||||
} else { // make JavaCamera2View behaves in the same way as JavaCameraView
|
||||
Log.i(LOGTAG, "Trying to open camera with the value (" + mCameraIndex + ")");
|
||||
if (mCameraID == null) { // make JavaCamera2View behaves in the same way as JavaCameraView
|
||||
Log.i(LOGTAG, "Selecting camera by index (" + mCameraIndex + ")");
|
||||
if (mCameraIndex < camList.length) {
|
||||
mCameraID = camList[mCameraIndex];
|
||||
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
|
||||
} else {
|
||||
// CAMERA_DISCONNECTED is used when the camera id is no longer valid
|
||||
throw new CameraAccessException(CameraAccessException.CAMERA_DISCONNECTED);
|
||||
@ -125,11 +122,11 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
}
|
||||
return true;
|
||||
} catch (CameraAccessException e) {
|
||||
Log.e(LOGTAG, "OpenCamera - Camera Access Exception", e);
|
||||
Log.e(LOGTAG, "selectCamera - Camera Access Exception", e);
|
||||
} catch (IllegalArgumentException e) {
|
||||
Log.e(LOGTAG, "OpenCamera - Illegal Argument Exception", e);
|
||||
Log.e(LOGTAG, "selectCamera - Illegal Argument Exception", e);
|
||||
} catch (SecurityException e) {
|
||||
Log.e(LOGTAG, "OpenCamera - Security Exception", e);
|
||||
Log.e(LOGTAG, "selectCamera - Security Exception", e);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -204,6 +201,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
mImageReader.setOnImageAvailableListener(new ImageReader.OnImageAvailableListener() {
|
||||
@Override
|
||||
public void onImageAvailable(ImageReader reader) {
|
||||
|
||||
Image image = reader.acquireLatestImage();
|
||||
if (image == null)
|
||||
return;
|
||||
@ -213,8 +211,9 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
assert (planes.length == 3);
|
||||
assert (image.getFormat() == mPreviewFormat);
|
||||
|
||||
JavaCamera2Frame tempFrame = new JavaCamera2Frame(image);
|
||||
RotatedCameraFrame tempFrame = new RotatedCameraFrame(new JavaCamera2Frame(image), mFrameRotation);
|
||||
deliverAndDrawFrame(tempFrame);
|
||||
tempFrame.mFrame.release();
|
||||
tempFrame.release();
|
||||
image.close();
|
||||
}
|
||||
@ -303,11 +302,22 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
protected boolean connectCamera(int width, int height) {
|
||||
Log.i(LOGTAG, "setCameraPreviewSize(" + width + "x" + height + ")");
|
||||
startBackgroundThread();
|
||||
initializeCamera();
|
||||
selectCamera();
|
||||
try {
|
||||
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
|
||||
CameraCharacteristics characteristics = manager.getCameraCharacteristics(mCameraID);
|
||||
mFrameRotation = getFrameRotation(
|
||||
characteristics.get(CameraCharacteristics.LENS_FACING) == CameraCharacteristics.LENS_FACING_FRONT,
|
||||
characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION));
|
||||
|
||||
boolean needReconfig = calcPreviewSize(width, height);
|
||||
if (mFrameRotation % 180 == 0) {
|
||||
mFrameWidth = mPreviewSize.getWidth();
|
||||
mFrameHeight = mPreviewSize.getHeight();
|
||||
} else {
|
||||
mFrameWidth = mPreviewSize.getHeight();
|
||||
mFrameHeight = mPreviewSize.getWidth();
|
||||
}
|
||||
|
||||
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
||||
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
||||
@ -322,12 +332,16 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
mCaptureSession.close();
|
||||
mCaptureSession = null;
|
||||
}
|
||||
createCameraPreviewSession();
|
||||
}
|
||||
|
||||
if (mFpsMeter != null) {
|
||||
mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
|
||||
}
|
||||
|
||||
Log.i(LOGTAG, "Opening camera: " + mCameraID);
|
||||
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
|
||||
} catch (CameraAccessException e) {
|
||||
Log.e(LOGTAG, "OpenCamera - Camera Access Exception", e);
|
||||
} catch (RuntimeException e) {
|
||||
throw new RuntimeException("Interrupted while setCameraPreviewSize.", e);
|
||||
}
|
||||
@ -442,6 +456,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
mGray = new Mat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release() {
|
||||
mRgba.release();
|
||||
mGray.release();
|
||||
|
@ -10,6 +10,7 @@ import org.opencv.videoio.VideoCapture;
|
||||
import org.opencv.videoio.VideoWriter;
|
||||
|
||||
import android.content.Context;
|
||||
import android.hardware.Camera;
|
||||
import android.util.AttributeSet;
|
||||
import android.util.Log;
|
||||
import android.view.ViewGroup.LayoutParams;
|
||||
@ -25,7 +26,7 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
||||
private Thread mThread;
|
||||
|
||||
protected VideoCapture mCamera;
|
||||
protected NativeCameraFrame mFrame;
|
||||
protected RotatedCameraFrame mFrame;
|
||||
|
||||
public NativeCameraView(Context context, int cameraId) {
|
||||
super(context, cameraId);
|
||||
@ -89,28 +90,65 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
||||
|
||||
private boolean initializeCamera(int width, int height) {
|
||||
synchronized (this) {
|
||||
|
||||
if (mCameraIndex == -1) {
|
||||
Camera.CameraInfo cameraInfo = new Camera.CameraInfo();
|
||||
int localCameraIndex = mCameraIndex;
|
||||
if (mCameraIndex == CAMERA_ID_ANY) {
|
||||
Log.d(TAG, "Try to open default camera");
|
||||
mCamera = new VideoCapture(0, Videoio.CAP_ANDROID);
|
||||
} else {
|
||||
Log.d(TAG, "Try to open camera with index " + mCameraIndex);
|
||||
mCamera = new VideoCapture(mCameraIndex, Videoio.CAP_ANDROID);
|
||||
localCameraIndex = 0;
|
||||
} else if (mCameraIndex == CAMERA_ID_BACK) {
|
||||
Log.i(TAG, "Trying to open back camera");
|
||||
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
||||
Camera.getCameraInfo( camIdx, cameraInfo );
|
||||
if (cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_BACK) {
|
||||
localCameraIndex = camIdx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (mCameraIndex == CAMERA_ID_FRONT) {
|
||||
Log.i(TAG, "Trying to open front camera");
|
||||
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
||||
Camera.getCameraInfo( camIdx, cameraInfo );
|
||||
if (cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_FRONT) {
|
||||
localCameraIndex = camIdx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (localCameraIndex == CAMERA_ID_BACK) {
|
||||
Log.e(TAG, "Back camera not found!");
|
||||
return false;
|
||||
} else if (localCameraIndex == CAMERA_ID_FRONT) {
|
||||
Log.e(TAG, "Front camera not found!");
|
||||
return false;
|
||||
}
|
||||
|
||||
Log.d(TAG, "Try to open camera with index " + localCameraIndex);
|
||||
mCamera = new VideoCapture(localCameraIndex, Videoio.CAP_ANDROID);
|
||||
|
||||
if (mCamera == null)
|
||||
return false;
|
||||
|
||||
if (mCamera.isOpened() == false)
|
||||
return false;
|
||||
|
||||
mFrame = new NativeCameraFrame(mCamera);
|
||||
if (mCameraIndex != CAMERA_ID_BACK && mCameraIndex != CAMERA_ID_FRONT)
|
||||
Camera.getCameraInfo(localCameraIndex, cameraInfo);
|
||||
int frameRotation = getFrameRotation(
|
||||
cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_FRONT,
|
||||
cameraInfo.orientation);
|
||||
|
||||
mFrame = new RotatedCameraFrame(new NativeCameraFrame(mCamera), frameRotation);
|
||||
|
||||
mCamera.set(Videoio.CAP_PROP_FRAME_WIDTH, width);
|
||||
mCamera.set(Videoio.CAP_PROP_FRAME_HEIGHT, height);
|
||||
|
||||
if (frameRotation % 180 == 0) {
|
||||
mFrameWidth = (int) mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
|
||||
mFrameHeight = (int) mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
|
||||
} else {
|
||||
mFrameWidth = (int) mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
|
||||
mFrameHeight = (int) mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
|
||||
}
|
||||
|
||||
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
||||
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
||||
@ -131,7 +169,10 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
||||
|
||||
private void releaseCamera() {
|
||||
synchronized (this) {
|
||||
if (mFrame != null) mFrame.release();
|
||||
if (mFrame != null) {
|
||||
mFrame.mFrame.release();
|
||||
mFrame.release();
|
||||
}
|
||||
if (mCamera != null) mCamera.release();
|
||||
}
|
||||
}
|
||||
@ -162,6 +203,7 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
||||
mBgr = new Mat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release() {
|
||||
if (mGray != null) mGray.release();
|
||||
if (mRgba != null) mRgba.release();
|
||||
|
@ -4,6 +4,7 @@ import java.util.List;
|
||||
|
||||
import org.opencv.BuildConfig;
|
||||
import org.opencv.R;
|
||||
import org.opencv.core.Core;
|
||||
import org.opencv.core.Mat;
|
||||
import org.opencv.core.Size;
|
||||
|
||||
@ -17,8 +18,10 @@ import android.graphics.Canvas;
|
||||
import android.graphics.Rect;
|
||||
import android.util.AttributeSet;
|
||||
import android.util.Log;
|
||||
import android.view.Surface;
|
||||
import android.view.SurfaceHolder;
|
||||
import android.view.SurfaceView;
|
||||
import android.view.WindowManager;
|
||||
|
||||
/**
|
||||
* This is a basic class, implementing the interaction with Camera and OpenCV library.
|
||||
@ -189,8 +192,93 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
|
||||
* This method returns single channel gray scale Mat with frame
|
||||
*/
|
||||
public Mat gray();
|
||||
|
||||
public void release();
|
||||
};
|
||||
|
||||
public class RotatedCameraFrame implements CvCameraViewFrame {
|
||||
@Override
|
||||
public Mat gray() {
|
||||
if (mRotation != 0) {
|
||||
Core.rotate(mFrame.gray(), mGrayRotated, getCvRotationCode(mRotation));
|
||||
return mGrayRotated;
|
||||
} else {
|
||||
return mFrame.gray();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Mat rgba() {
|
||||
if (mRotation != 0) {
|
||||
Core.rotate(mFrame.rgba(), mRgbaRotated, getCvRotationCode(mRotation));
|
||||
return mRgbaRotated;
|
||||
} else {
|
||||
return mFrame.rgba();
|
||||
}
|
||||
}
|
||||
|
||||
private int getCvRotationCode(int degrees) {
|
||||
if (degrees == 90) {
|
||||
return Core.ROTATE_90_CLOCKWISE;
|
||||
} else if (degrees == 180) {
|
||||
return Core.ROTATE_180;
|
||||
} else {
|
||||
return Core.ROTATE_90_COUNTERCLOCKWISE;
|
||||
}
|
||||
}
|
||||
|
||||
public RotatedCameraFrame(CvCameraViewFrame frame, int rotation) {
|
||||
super();
|
||||
mFrame = frame;
|
||||
mRgbaRotated = new Mat();
|
||||
mGrayRotated = new Mat();
|
||||
mRotation = rotation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release() {
|
||||
mRgbaRotated.release();
|
||||
mGrayRotated.release();
|
||||
}
|
||||
|
||||
public CvCameraViewFrame mFrame;
|
||||
private Mat mRgbaRotated;
|
||||
private Mat mGrayRotated;
|
||||
private int mRotation;
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculates how to rotate camera frame to match current screen orientation
|
||||
*/
|
||||
protected int getFrameRotation(boolean cameraFacingFront, int cameraSensorOrientation) {
|
||||
WindowManager windowManager = (WindowManager) getContext().getSystemService(Context.WINDOW_SERVICE);
|
||||
int screenOrientation = windowManager.getDefaultDisplay().getRotation();
|
||||
int screenRotation = 0;
|
||||
switch (screenOrientation) {
|
||||
case Surface.ROTATION_0:
|
||||
screenRotation = 0;
|
||||
break;
|
||||
case Surface.ROTATION_90:
|
||||
screenRotation = 90;
|
||||
break;
|
||||
case Surface.ROTATION_180:
|
||||
screenRotation = 180;
|
||||
break;
|
||||
case Surface.ROTATION_270:
|
||||
screenRotation = 270;
|
||||
break;
|
||||
}
|
||||
|
||||
int frameRotation;
|
||||
if (cameraFacingFront) {
|
||||
frameRotation = (cameraSensorOrientation + screenRotation) % 360;
|
||||
} else {
|
||||
frameRotation = (cameraSensorOrientation - screenRotation + 360) % 360;
|
||||
}
|
||||
|
||||
return frameRotation;
|
||||
}
|
||||
|
||||
public void surfaceChanged(SurfaceHolder arg0, int arg1, int arg2, int arg3) {
|
||||
Log.d(TAG, "call surfaceChanged event");
|
||||
synchronized(mSyncObject) {
|
||||
|
@ -10,9 +10,12 @@ import android.hardware.Camera.PreviewCallback;
|
||||
import android.os.Build;
|
||||
import android.util.AttributeSet;
|
||||
import android.util.Log;
|
||||
import android.view.Surface;
|
||||
import android.view.ViewGroup.LayoutParams;
|
||||
import android.view.WindowManager;
|
||||
|
||||
import org.opencv.BuildConfig;
|
||||
import org.opencv.core.Core;
|
||||
import org.opencv.core.CvType;
|
||||
import org.opencv.core.Mat;
|
||||
import org.opencv.core.Size;
|
||||
@ -39,7 +42,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
private boolean mStopThread;
|
||||
|
||||
protected Camera mCamera;
|
||||
protected JavaCameraFrame[] mCameraFrame;
|
||||
protected RotatedCameraFrame[] mCameraFrame;
|
||||
private SurfaceTexture mSurfaceTexture;
|
||||
private int mPreviewFormat = ImageFormat.NV21;
|
||||
|
||||
@ -71,29 +74,21 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
boolean result = true;
|
||||
synchronized (this) {
|
||||
mCamera = null;
|
||||
int cameraId = -1;
|
||||
|
||||
if (mCameraIndex == CAMERA_ID_ANY) {
|
||||
Log.d(TAG, "Trying to open camera with old open()");
|
||||
try {
|
||||
mCamera = Camera.open();
|
||||
}
|
||||
catch (Exception e){
|
||||
Log.e(TAG, "Camera is not available (in use or does not exist): " + e.getLocalizedMessage());
|
||||
}
|
||||
|
||||
if(mCamera == null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
|
||||
boolean connected = false;
|
||||
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
||||
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(camIdx) + ")");
|
||||
try {
|
||||
mCamera = Camera.open(camIdx);
|
||||
connected = true;
|
||||
cameraId = camIdx;
|
||||
} catch (RuntimeException e) {
|
||||
Log.e(TAG, "Camera #" + camIdx + "failed to open: " + e.getLocalizedMessage());
|
||||
}
|
||||
if (connected) break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
|
||||
int localCameraIndex = mCameraIndex;
|
||||
@ -126,6 +121,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(localCameraIndex) + ")");
|
||||
try {
|
||||
mCamera = Camera.open(localCameraIndex);
|
||||
cameraId = localCameraIndex;
|
||||
} catch (RuntimeException e) {
|
||||
Log.e(TAG, "Camera #" + localCameraIndex + "failed to open: " + e.getLocalizedMessage());
|
||||
}
|
||||
@ -136,6 +132,11 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
if (mCamera == null)
|
||||
return false;
|
||||
|
||||
android.hardware.Camera.CameraInfo info = new android.hardware.Camera.CameraInfo();
|
||||
android.hardware.Camera.getCameraInfo(cameraId, info);
|
||||
int frameRotation = getFrameRotation(
|
||||
info.facing == Camera.CameraInfo.CAMERA_FACING_FRONT,
|
||||
info.orientation);
|
||||
/* Now set camera parameters */
|
||||
try {
|
||||
Camera.Parameters params = mCamera.getParameters();
|
||||
@ -176,8 +177,16 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
mCamera.setParameters(params);
|
||||
params = mCamera.getParameters();
|
||||
|
||||
int rawFrameWidth = params.getPreviewSize().width;
|
||||
int rawFrameHeight = params.getPreviewSize().height;
|
||||
|
||||
if (frameRotation % 180 == 0) {
|
||||
mFrameWidth = params.getPreviewSize().width;
|
||||
mFrameHeight = params.getPreviewSize().height;
|
||||
} else {
|
||||
mFrameWidth = params.getPreviewSize().height;
|
||||
mFrameHeight = params.getPreviewSize().width;
|
||||
}
|
||||
|
||||
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
||||
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
||||
@ -196,14 +205,14 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
mCamera.setPreviewCallbackWithBuffer(this);
|
||||
|
||||
mFrameChain = new Mat[2];
|
||||
mFrameChain[0] = new Mat(mFrameHeight + (mFrameHeight/2), mFrameWidth, CvType.CV_8UC1);
|
||||
mFrameChain[1] = new Mat(mFrameHeight + (mFrameHeight/2), mFrameWidth, CvType.CV_8UC1);
|
||||
mFrameChain[0] = new Mat(rawFrameHeight + (rawFrameHeight/2), rawFrameWidth, CvType.CV_8UC1);
|
||||
mFrameChain[1] = new Mat(rawFrameHeight + (rawFrameHeight/2), rawFrameWidth, CvType.CV_8UC1);
|
||||
|
||||
AllocateCache();
|
||||
|
||||
mCameraFrame = new JavaCameraFrame[2];
|
||||
mCameraFrame[0] = new JavaCameraFrame(mFrameChain[0], mFrameWidth, mFrameHeight);
|
||||
mCameraFrame[1] = new JavaCameraFrame(mFrameChain[1], mFrameWidth, mFrameHeight);
|
||||
mCameraFrame = new RotatedCameraFrame[2];
|
||||
mCameraFrame[0] = new RotatedCameraFrame(new JavaCameraFrame(mFrameChain[0], rawFrameWidth, rawFrameHeight), frameRotation);
|
||||
mCameraFrame[1] = new RotatedCameraFrame(new JavaCameraFrame(mFrameChain[1], rawFrameWidth, rawFrameHeight), frameRotation);
|
||||
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) {
|
||||
mSurfaceTexture = new SurfaceTexture(MAGIC_TEXTURE_ID);
|
||||
@ -240,7 +249,9 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
mFrameChain[1].release();
|
||||
}
|
||||
if (mCameraFrame != null) {
|
||||
mCameraFrame[0].mFrame.release();
|
||||
mCameraFrame[0].release();
|
||||
mCameraFrame[1].mFrame.release();
|
||||
mCameraFrame[1].release();
|
||||
}
|
||||
}
|
||||
@ -336,6 +347,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
||||
mRgba = new Mat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release() {
|
||||
mRgba.release();
|
||||
}
|
||||
|
@ -1254,13 +1254,13 @@ JNIEXPORT void JNICALL Java_org_opencv_%(module)s_%(j_cls)s_delete
|
||||
def copy_java_files(java_files_dir, java_base_path, default_package_path='org/opencv/'):
|
||||
global total_files, updated_files
|
||||
java_files = []
|
||||
re_filter = re.compile(r'^.+\.(java|aidl|kt)(.in)?$')
|
||||
re_filter = re.compile(r'^.+\.(java|kt)(.in)?$')
|
||||
for root, dirnames, filenames in os.walk(java_files_dir):
|
||||
java_files += [os.path.join(root, filename) for filename in filenames if re_filter.match(filename)]
|
||||
java_files = [f.replace('\\', '/') for f in java_files]
|
||||
|
||||
re_package = re.compile(r'^package +(.+);')
|
||||
re_prefix = re.compile(r'^.+[\+/]([^\+]+).(java|aidl|kt)(.in)?$')
|
||||
re_prefix = re.compile(r'^.+[\+/]([^\+]+).(java|kt)(.in)?$')
|
||||
for java_file in java_files:
|
||||
src = checkFileRemap(java_file)
|
||||
with open(src, 'r') as f:
|
||||
|
@ -27,7 +27,7 @@ public:
|
||||
* @param prototxt_path prototxt file path for the super resolution model
|
||||
* @param model_path model file path for the super resolution model
|
||||
*/
|
||||
CV_WRAP BarcodeDetector(const std::string &prototxt_path, const std::string &model_path);
|
||||
CV_WRAP BarcodeDetector(CV_WRAP_FILE_PATH const std::string &prototxt_path, CV_WRAP_FILE_PATH const std::string &model_path);
|
||||
~BarcodeDetector();
|
||||
|
||||
/** @brief Decodes barcode in image once it's found by the detect() method.
|
||||
|
@ -82,8 +82,8 @@ public:
|
||||
* @param backend_id the id of backend
|
||||
* @param target_id the id of target device
|
||||
*/
|
||||
CV_WRAP static Ptr<FaceDetectorYN> create(const String& model,
|
||||
const String& config,
|
||||
CV_WRAP static Ptr<FaceDetectorYN> create(CV_WRAP_FILE_PATH const String& model,
|
||||
CV_WRAP_FILE_PATH const String& config,
|
||||
const Size& input_size,
|
||||
float score_threshold = 0.9f,
|
||||
float nms_threshold = 0.3f,
|
||||
@ -154,7 +154,7 @@ public:
|
||||
* @param backend_id the id of backend
|
||||
* @param target_id the id of target device
|
||||
*/
|
||||
CV_WRAP static Ptr<FaceRecognizerSF> create(const String& model, const String& config, int backend_id = 0, int target_id = 0);
|
||||
CV_WRAP static Ptr<FaceRecognizerSF> create(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config, int backend_id = 0, int target_id = 0);
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
@ -483,39 +483,44 @@ void CharucoBoardImpl::generateImage(Size outSize, OutputArray img, int marginSi
|
||||
Mat noMarginsImg =
|
||||
out.colRange(marginSize, out.cols - marginSize).rowRange(marginSize, out.rows - marginSize);
|
||||
|
||||
double totalLengthX, totalLengthY;
|
||||
totalLengthX = squareLength * size.width;
|
||||
totalLengthY = squareLength * size.height;
|
||||
|
||||
// proportional transformation
|
||||
double xReduction = totalLengthX / double(noMarginsImg.cols);
|
||||
double yReduction = totalLengthY / double(noMarginsImg.rows);
|
||||
// the size of the chessboard square depends on the location of the chessboard
|
||||
float pixInSquare = 0.f;
|
||||
// the size of the chessboard in pixels
|
||||
Size pixInChessboard(noMarginsImg.cols, noMarginsImg.rows);
|
||||
|
||||
// determine the zone where the chessboard is placed
|
||||
Mat chessboardZoneImg;
|
||||
if(xReduction > yReduction) {
|
||||
int nRows = int(totalLengthY / xReduction);
|
||||
int rowsMargins = (noMarginsImg.rows - nRows) / 2;
|
||||
chessboardZoneImg = noMarginsImg.rowRange(rowsMargins, noMarginsImg.rows - rowsMargins);
|
||||
} else {
|
||||
int nCols = int(totalLengthX / yReduction);
|
||||
int colsMargins = (noMarginsImg.cols - nCols) / 2;
|
||||
chessboardZoneImg = noMarginsImg.colRange(colsMargins, noMarginsImg.cols - colsMargins);
|
||||
float pixInSquareX = (float)noMarginsImg.cols / (float)size.width;
|
||||
float pixInSquareY = (float)noMarginsImg.rows / (float)size.height;
|
||||
Point startChessboard(0, 0);
|
||||
if (pixInSquareX <= pixInSquareY) {
|
||||
// the width of "noMarginsImg" image determines the dimensions of the chessboard
|
||||
pixInSquare = pixInSquareX;
|
||||
pixInChessboard.height = cvRound(pixInSquare*size.height);
|
||||
int rowsMargin = (noMarginsImg.rows - pixInChessboard.height) / 2;
|
||||
startChessboard.y = rowsMargin;
|
||||
}
|
||||
else {
|
||||
// the height of "noMarginsImg" image determines the dimensions of the chessboard
|
||||
pixInSquare = pixInSquareY;
|
||||
pixInChessboard.width = cvRound(pixInSquare*size.width);
|
||||
int colsMargin = (noMarginsImg.cols - pixInChessboard.width) / 2;
|
||||
startChessboard.x = colsMargin;
|
||||
}
|
||||
// determine the zone where the chessboard is located
|
||||
Mat chessboardZoneImg = noMarginsImg(Rect(startChessboard, pixInChessboard));
|
||||
|
||||
// determine the margins to draw only the markers
|
||||
// take the minimum just to be sure
|
||||
double squareSizePixels = min(double(chessboardZoneImg.cols) / double(size.width),
|
||||
double(chessboardZoneImg.rows) / double(size.height));
|
||||
// marker size in pixels
|
||||
const float pixInMarker = markerLength/squareLength*pixInSquare;
|
||||
// the size of the marker margin in pixels
|
||||
const float pixInMarginMarker = 0.5f*(pixInSquare - pixInMarker);
|
||||
|
||||
double diffSquareMarkerLength = (squareLength - markerLength) / 2;
|
||||
int diffSquareMarkerLengthPixels =
|
||||
int(diffSquareMarkerLength * squareSizePixels / squareLength);
|
||||
// determine the zone where the aruco markers are located
|
||||
int endArucoX = cvRound(pixInSquare*(size.width-1)+pixInMarginMarker+pixInMarker);
|
||||
int endArucoY = cvRound(pixInSquare*(size.height-1)+pixInMarginMarker+pixInMarker);
|
||||
Mat arucoZone = chessboardZoneImg(Range(cvRound(pixInMarginMarker), endArucoY), Range(cvRound(pixInMarginMarker), endArucoX));
|
||||
|
||||
// draw markers
|
||||
Mat markersImg;
|
||||
Board::Impl::generateImage(chessboardZoneImg.size(), markersImg, diffSquareMarkerLengthPixels, borderBits);
|
||||
markersImg.copyTo(chessboardZoneImg);
|
||||
Board::Impl::generateImage(arucoZone.size(), arucoZone, 0, borderBits);
|
||||
|
||||
// now draw black squares
|
||||
for(int y = 0; y < size.height; y++) {
|
||||
@ -527,12 +532,11 @@ void CharucoBoardImpl::generateImage(Size outSize, OutputArray img, int marginSi
|
||||
if(y % 2 != x % 2) continue; // white corner, dont do anything
|
||||
}
|
||||
|
||||
double startX, startY;
|
||||
startX = squareSizePixels * double(x);
|
||||
startY = squareSizePixels * double(y);
|
||||
float startX = pixInSquare * float(x);
|
||||
float startY = pixInSquare * float(y);
|
||||
|
||||
Mat squareZone = chessboardZoneImg.rowRange(int(startY), int(startY + squareSizePixels))
|
||||
.colRange(int(startX), int(startX + squareSizePixels));
|
||||
Mat squareZone = chessboardZoneImg(Range(cvRound(startY), cvRound(startY + pixInSquare)),
|
||||
Range(cvRound(startX), cvRound(startX + pixInSquare)));
|
||||
|
||||
squareZone.setTo(0);
|
||||
}
|
||||
|
@ -684,7 +684,7 @@ struct ArucoDetector::ArucoDetectorImpl {
|
||||
contours.clear();
|
||||
|
||||
// sort candidates from big to small
|
||||
std::sort(candidateTree.begin(), candidateTree.end());
|
||||
std::stable_sort(candidateTree.begin(), candidateTree.end());
|
||||
// group index for each candidate
|
||||
vector<int> groupId(candidateTree.size(), -1);
|
||||
vector<vector<size_t> > groupedCandidates;
|
||||
@ -728,11 +728,11 @@ struct ArucoDetector::ArucoDetectorImpl {
|
||||
|
||||
for (vector<size_t>& grouped : groupedCandidates) {
|
||||
if (detectorParams.detectInvertedMarker) // if detectInvertedMarker choose smallest contours
|
||||
std::sort(grouped.begin(), grouped.end(), [](const size_t &a, const size_t &b) {
|
||||
std::stable_sort(grouped.begin(), grouped.end(), [](const size_t &a, const size_t &b) {
|
||||
return a > b;
|
||||
});
|
||||
else // if detectInvertedMarker==false choose largest contours
|
||||
std::sort(grouped.begin(), grouped.end());
|
||||
std::stable_sort(grouped.begin(), grouped.end());
|
||||
size_t currId = grouped[0];
|
||||
isSelectedContours[currId] = true;
|
||||
for (size_t i = 1ull; i < grouped.size(); i++) {
|
||||
@ -780,7 +780,7 @@ struct ArucoDetector::ArucoDetectorImpl {
|
||||
vector<int> idsTmp(ncandidates, -1);
|
||||
vector<int> rotated(ncandidates, 0);
|
||||
vector<uint8_t> validCandidates(ncandidates, 0);
|
||||
vector<bool> was(ncandidates, false);
|
||||
vector<uint8_t> was(ncandidates, false);
|
||||
bool checkCloseContours = true;
|
||||
|
||||
int maxDepth = 0;
|
||||
|
@ -52,5 +52,7 @@
|
||||
#include "opencv2/core/private.hpp"
|
||||
|
||||
#include <numeric>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#endif
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "quirc.h"
|
||||
#endif
|
||||
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include <queue>
|
||||
|
@ -771,6 +771,57 @@ TEST_P(CharucoBoard, testWrongSizeDetection)
|
||||
ASSERT_TRUE(detectedCharucoIds.empty());
|
||||
}
|
||||
|
||||
TEST(CharucoBoardGenerate, issue_24806)
|
||||
{
|
||||
aruco::Dictionary dict = aruco::getPredefinedDictionary(aruco::DICT_4X4_1000);
|
||||
const float squareLength = 13.f, markerLength = 10.f;
|
||||
const Size boardSize(7ull, 4ull);
|
||||
const aruco::CharucoBoard board(boardSize, squareLength, markerLength, dict);
|
||||
const int marginSize = 24;
|
||||
Mat boardImg;
|
||||
|
||||
// generate chessboard image
|
||||
board.generateImage(Size(400, 300), boardImg, marginSize);
|
||||
// This condition checks that the width of the image determines the dimensions of the chessboard in this test
|
||||
CV_Assert((float)(boardImg.cols) / (float)boardSize.width <=
|
||||
(float)(boardImg.rows) / (float)boardSize.height);
|
||||
|
||||
// prepare data for chessboard image test
|
||||
Mat noMarginsImg = boardImg(Range(marginSize, boardImg.rows - marginSize),
|
||||
Range(marginSize, boardImg.cols - marginSize));
|
||||
const float pixInSquare = (float)(noMarginsImg.cols) / (float)boardSize.width;
|
||||
|
||||
Size pixInChessboard(cvRound(pixInSquare*boardSize.width), cvRound(pixInSquare*boardSize.height));
|
||||
const Point startChessboard((noMarginsImg.cols - pixInChessboard.width) / 2,
|
||||
(noMarginsImg.rows - pixInChessboard.height) / 2);
|
||||
Mat chessboardZoneImg = noMarginsImg(Rect(startChessboard, pixInChessboard));
|
||||
|
||||
// B - black pixel, W - white pixel
|
||||
// chessboard corner 1:
|
||||
// B W
|
||||
// W B
|
||||
Mat goldCorner1 = (Mat_<uint8_t>(2, 2) <<
|
||||
0, 255,
|
||||
255, 0);
|
||||
// B - black pixel, W - white pixel
|
||||
// chessboard corner 2:
|
||||
// W B
|
||||
// B W
|
||||
Mat goldCorner2 = (Mat_<uint8_t>(2, 2) <<
|
||||
255, 0,
|
||||
0, 255);
|
||||
|
||||
// test chessboard corners in generated image
|
||||
for (const Point3f& p: board.getChessboardCorners()) {
|
||||
Point2f chessCorner(pixInSquare*(p.x/squareLength),
|
||||
pixInSquare*(p.y/squareLength));
|
||||
Mat winCorner = chessboardZoneImg(Rect(Point(cvRound(chessCorner.x) - 1, cvRound(chessCorner.y) - 1), Size(2, 2)));
|
||||
bool eq = (cv::countNonZero(goldCorner1 != winCorner) == 0) | (cv::countNonZero(goldCorner2 != winCorner) == 0);
|
||||
ASSERT_TRUE(eq);
|
||||
}
|
||||
// TODO: fix aruco generateImage and add test aruco corners for generated image
|
||||
}
|
||||
|
||||
// Temporary disabled in https://github.com/opencv/opencv/pull/24338
|
||||
// 5.x version produces conrnes with different shape than 4.x (32F_C2 instead of 2x 32FC1)
|
||||
TEST(Charuco, DISABLED_testSeveralBoardsWithCustomIds)
|
||||
|
@ -7,10 +7,6 @@
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#if defined CV_CXX11
|
||||
#include <random>
|
||||
#else
|
||||
#include <cstdlib>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -5,16 +5,6 @@
|
||||
#include "test_precomp.hpp"
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
#if !defined CV_CXX11
|
||||
// Wrapper for generating seeded random number via std::rand.
|
||||
template<unsigned Seed>
|
||||
class SeededRandFunctor {
|
||||
public:
|
||||
SeededRandFunctor() { std::srand(Seed); }
|
||||
int operator()(int i) { return std::rand() % (i + 1); }
|
||||
};
|
||||
#endif
|
||||
|
||||
std::string encode_qrcode_images_name[] = {
|
||||
"version1_mode1.png", "version1_mode2.png", "version1_mode4.png",
|
||||
"version2_mode1.png", "version2_mode2.png", "version2_mode4.png",
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user