mirror of
https://github.com/opencv/opencv.git
synced 2025-06-10 19:24:07 +08:00
Merge branch 4.x
This commit is contained in:
commit
decf6538a2
@ -1455,8 +1455,8 @@ if(WITH_WEBP OR HAVE_WEBP)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_AVIF OR HAVE_AVIF)
|
if(WITH_AVIF OR HAVE_AVIF)
|
||||||
if(AVIF_VERSION)
|
if(libavif_VERSION)
|
||||||
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${AVIF_VERSION})" ELSE "NO")
|
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${libavif_VERSION})" ELSE "NO")
|
||||||
else()
|
else()
|
||||||
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY}" ELSE "NO")
|
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY}" ELSE "NO")
|
||||||
endif()
|
endif()
|
||||||
@ -1852,6 +1852,7 @@ if(BUILD_opencv_python3)
|
|||||||
else()
|
else()
|
||||||
status(" Libraries:" HAVE_opencv_python3 THEN "${PYTHON3_LIBRARIES}" ELSE NO)
|
status(" Libraries:" HAVE_opencv_python3 THEN "${PYTHON3_LIBRARIES}" ELSE NO)
|
||||||
endif()
|
endif()
|
||||||
|
status(" Limited API:" PYTHON3_LIMITED_API THEN "YES (ver ${PYTHON3_LIMITED_API_VERSION})" ELSE NO)
|
||||||
status(" numpy:" PYTHON3_NUMPY_INCLUDE_DIRS THEN "${PYTHON3_NUMPY_INCLUDE_DIRS} (ver ${PYTHON3_NUMPY_VERSION})" ELSE "NO (Python3 wrappers can not be generated)")
|
status(" numpy:" PYTHON3_NUMPY_INCLUDE_DIRS THEN "${PYTHON3_NUMPY_INCLUDE_DIRS} (ver ${PYTHON3_NUMPY_VERSION})" ELSE "NO (Python3 wrappers can not be generated)")
|
||||||
status(" install path:" HAVE_opencv_python3 THEN "${__INSTALL_PATH_PYTHON3}" ELSE "-")
|
status(" install path:" HAVE_opencv_python3 THEN "${__INSTALL_PATH_PYTHON3}" ELSE "-")
|
||||||
endif()
|
endif()
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
## OpenCV: Open Source Computer Vision Library
|
## OpenCV: Open Source Computer Vision Library
|
||||||
|
|
||||||
### Keep OpenCV Free
|
|
||||||
|
|
||||||
OpenCV is raising funds to keep the library free for everyone, and we need the support of the entire community to do it. [Donate to OpenCV on IndieGoGo](http://igg.me/at/opencv5) before the campaign ends on December 16 to show your support.
|
|
||||||
|
|
||||||
### Resources
|
### Resources
|
||||||
|
|
||||||
@ -13,6 +10,7 @@ OpenCV is raising funds to keep the library free for everyone, and we need the s
|
|||||||
* previous forum (read only): <http://answers.opencv.org>
|
* previous forum (read only): <http://answers.opencv.org>
|
||||||
* Issue tracking: <https://github.com/opencv/opencv/issues>
|
* Issue tracking: <https://github.com/opencv/opencv/issues>
|
||||||
* Additional OpenCV functionality: <https://github.com/opencv/opencv_contrib>
|
* Additional OpenCV functionality: <https://github.com/opencv/opencv_contrib>
|
||||||
|
* Donate to OpenCV: <https://opencv.org/support/>
|
||||||
|
|
||||||
|
|
||||||
### Contributing
|
### Contributing
|
||||||
|
@ -484,7 +484,6 @@ macro(ocv_check_compiler_optimization OPT)
|
|||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRING COMMON_OPTION)
|
macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRING COMMON_OPTION)
|
||||||
if(NOT MSVC)
|
|
||||||
unset(_POSTFIX)
|
unset(_POSTFIX)
|
||||||
# Check each feature option
|
# Check each feature option
|
||||||
foreach(OPT IN LISTS ${FEATURE_NAME_LIST})
|
foreach(OPT IN LISTS ${FEATURE_NAME_LIST})
|
||||||
@ -499,7 +498,6 @@ macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRI
|
|||||||
if(NOT "x${_POSTFIX}" STREQUAL "x")
|
if(NOT "x${_POSTFIX}" STREQUAL "x")
|
||||||
set(${FLAG_STRING} "${${FLAG_STRING}} ${COMMON_OPTION}${_POSTFIX}")
|
set(${FLAG_STRING} "${${FLAG_STRING}} ${COMMON_OPTION}${_POSTFIX}")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
|
foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
|
||||||
@ -596,10 +594,12 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
|
|||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
if(AARCH64)
|
if(AARCH64)
|
||||||
|
if(NOT MSVC)
|
||||||
# Define the list of NEON options to check
|
# Define the list of NEON options to check
|
||||||
set(NEON_OPTIONS_LIST NEON_DOTPROD NEON_FP16 NEON_BF16)
|
set(NEON_OPTIONS_LIST NEON_DOTPROD NEON_FP16 NEON_BF16)
|
||||||
set(BASE_ARCHITECTURE "-march=armv8.2-a")
|
set(BASE_ARCHITECTURE "-march=armv8.2-a")
|
||||||
ocv_cpu_aarch64_baseline_merge_feature_options(NEON_OPTIONS_LIST CPU_BASELINE_FLAGS ${BASE_ARCHITECTURE})
|
ocv_cpu_aarch64_baseline_merge_feature_options(NEON_OPTIONS_LIST CPU_BASELINE_FLAGS ${BASE_ARCHITECTURE})
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
foreach(OPT ${CPU_BASELINE_REQUIRE})
|
foreach(OPT ${CPU_BASELINE_REQUIRE})
|
||||||
|
@ -1,13 +1,6 @@
|
|||||||
if("${CMAKE_CXX_COMPILER};${CMAKE_C_COMPILER};${CMAKE_CXX_COMPILER_LAUNCHER}" MATCHES "ccache")
|
if("${CMAKE_CXX_COMPILER};${CMAKE_C_COMPILER};${CMAKE_CXX_COMPILER_LAUNCHER}" MATCHES "ccache")
|
||||||
set(CMAKE_COMPILER_IS_CCACHE 1) # TODO: FIXIT Avoid setting of CMAKE_ variables
|
|
||||||
set(OPENCV_COMPILER_IS_CCACHE 1)
|
set(OPENCV_COMPILER_IS_CCACHE 1)
|
||||||
endif()
|
endif()
|
||||||
function(access_CMAKE_COMPILER_IS_CCACHE)
|
|
||||||
if(NOT OPENCV_SUPPRESS_DEPRECATIONS)
|
|
||||||
message(WARNING "DEPRECATED: CMAKE_COMPILER_IS_CCACHE is replaced to OPENCV_COMPILER_IS_CCACHE.")
|
|
||||||
endif()
|
|
||||||
endfunction()
|
|
||||||
variable_watch(CMAKE_COMPILER_IS_CCACHE access_CMAKE_COMPILER_IS_CCACHE)
|
|
||||||
if(ENABLE_CCACHE AND NOT OPENCV_COMPILER_IS_CCACHE)
|
if(ENABLE_CCACHE AND NOT OPENCV_COMPILER_IS_CCACHE)
|
||||||
# This works fine with Unix Makefiles and Ninja generators
|
# This works fine with Unix Makefiles and Ninja generators
|
||||||
find_host_program(CCACHE_PROGRAM ccache)
|
find_host_program(CCACHE_PROGRAM ccache)
|
||||||
@ -391,7 +384,7 @@ endif()
|
|||||||
|
|
||||||
# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347
|
# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347
|
||||||
if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)
|
if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)
|
||||||
if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2"))
|
if(UNIX AND ((NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2") AND NOT CMAKE_SYSTEM_NAME MATCHES "OpenBSD"))
|
||||||
set(_option "-Wl,--no-undefined")
|
set(_option "-Wl,--no-undefined")
|
||||||
set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
|
set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}") # requires CMake 3.2+ and CMP0056
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}") # requires CMake 3.2+ and CMP0056
|
||||||
|
@ -136,11 +136,11 @@ macro(ocv_check_windows_crt_linkage)
|
|||||||
cmake_policy(GET CMP0091 MSVC_RUNTIME_SET_BY_ABSTRACTION)
|
cmake_policy(GET CMP0091 MSVC_RUNTIME_SET_BY_ABSTRACTION)
|
||||||
if(MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
|
if(MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
|
||||||
if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
|
if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MT")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MTd")
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MD")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD")
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MDd")
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
@ -270,6 +270,18 @@ find_python("${OPENCV_PYTHON3_VERSION}" "${MIN_VER_PYTHON3}" PYTHON3_LIBRARY PYT
|
|||||||
PYTHON3_INCLUDE_DIR PYTHON3_INCLUDE_DIR2 PYTHON3_PACKAGES_PATH
|
PYTHON3_INCLUDE_DIR PYTHON3_INCLUDE_DIR2 PYTHON3_PACKAGES_PATH
|
||||||
PYTHON3_NUMPY_INCLUDE_DIRS PYTHON3_NUMPY_VERSION)
|
PYTHON3_NUMPY_INCLUDE_DIRS PYTHON3_NUMPY_VERSION)
|
||||||
|
|
||||||
|
# Problem in numpy >=1.15 <1.17
|
||||||
|
OCV_OPTION(PYTHON3_LIMITED_API "Build with Python Limited API (not available with numpy >=1.15 <1.17)" NO
|
||||||
|
VISIBLE_IF PYTHON3_NUMPY_VERSION VERSION_LESS "1.15" OR NOT PYTHON3_NUMPY_VERSION VERSION_LESS "1.17")
|
||||||
|
if(PYTHON3_LIMITED_API)
|
||||||
|
set(_default_ver "0x03060000")
|
||||||
|
if(PYTHON3_VERSION_STRING VERSION_LESS "3.6")
|
||||||
|
# fix for older pythons
|
||||||
|
set(_default_ver "0x030${PYTHON3_VERSION_MINOR}0000")
|
||||||
|
endif()
|
||||||
|
set(PYTHON3_LIMITED_API_VERSION ${_default_ver} CACHE STRING "Minimal Python version for Limited API")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(PYTHON_DEFAULT_EXECUTABLE)
|
if(PYTHON_DEFAULT_EXECUTABLE)
|
||||||
set(PYTHON_DEFAULT_AVAILABLE "TRUE")
|
set(PYTHON_DEFAULT_AVAILABLE "TRUE")
|
||||||
elseif(PYTHON3_EXECUTABLE AND PYTHON3INTERP_FOUND)
|
elseif(PYTHON3_EXECUTABLE AND PYTHON3INTERP_FOUND)
|
||||||
|
@ -57,6 +57,18 @@ if(CANN_INSTALL_DIR)
|
|||||||
set(HAVE_CANN OFF)
|
set(HAVE_CANN OFF)
|
||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# * libacl_dvpp_mpi.so
|
||||||
|
set(libacl_dvpp_mpi "${CANN_INSTALL_DIR}/lib64")
|
||||||
|
find_library(found_libacldvppmpi NAMES acl_dvpp_mpi PATHS ${libacl_dvpp_mpi} NO_DEFAULT_PATH)
|
||||||
|
if(found_libacldvppmpi)
|
||||||
|
set(libacl_dvpp_mpi ${found_libacldvppmpi})
|
||||||
|
message(STATUS "CANN: libacl_dvpp_mpi.so is found at ${libacl_dvpp_mpi}")
|
||||||
|
else()
|
||||||
|
message(STATUS "CANN: Missing libacl_dvpp_mpi.so. Turning off HAVE_CANN")
|
||||||
|
set(HAVE_CANN OFF)
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
# * libgraph.so
|
# * libgraph.so
|
||||||
set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64")
|
set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64")
|
||||||
find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH)
|
find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH)
|
||||||
@ -105,6 +117,7 @@ if(CANN_INSTALL_DIR)
|
|||||||
list(APPEND libs_cann ${lib_opsproto})
|
list(APPEND libs_cann ${lib_opsproto})
|
||||||
list(APPEND libs_cann ${lib_graph})
|
list(APPEND libs_cann ${lib_graph})
|
||||||
list(APPEND libs_cann ${lib_ge_compiler})
|
list(APPEND libs_cann ${lib_ge_compiler})
|
||||||
|
list(APPEND libs_cann ${libacl_dvpp_mpi})
|
||||||
|
|
||||||
# * lib_graph_base.so
|
# * lib_graph_base.so
|
||||||
if(NOT CANN_VERSION_BELOW_6_3_ALPHA002)
|
if(NOT CANN_VERSION_BELOW_6_3_ALPHA002)
|
||||||
|
@ -89,15 +89,11 @@ else()
|
|||||||
ocv_update(OPENCV_ANDROID_NAMESPACE_DECLARATION "")
|
ocv_update(OPENCV_ANDROID_NAMESPACE_DECLARATION "")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# set android gradle java version in build.gradle and set aidl config
|
|
||||||
if(NOT (ANDROID_GRADLE_PLUGIN_VERSION VERSION_LESS "8.0.0"))
|
if(NOT (ANDROID_GRADLE_PLUGIN_VERSION VERSION_LESS "8.0.0"))
|
||||||
# AGP-8.0 requires a minimum JDK version of JDK17
|
# AGP-8.0 requires a minimum JDK version of JDK17
|
||||||
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "17")
|
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "17")
|
||||||
# Enable aidl configuration for OpenCV compile with AGP-8.0
|
|
||||||
ocv_update(ANDROID_GRADLE_BUILD_FEATURE_AIDL "buildFeatures { aidl true }")
|
|
||||||
else()
|
else()
|
||||||
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "1_8")
|
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "1_8")
|
||||||
ocv_update(ANDROID_GRADLE_BUILD_FEATURE_AIDL "")
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(ANDROID_GRADLE_JAVA_VERSION "${ANDROID_GRADLE_JAVA_VERSION_INIT}" CACHE STRING "Android Gradle Java version")
|
set(ANDROID_GRADLE_JAVA_VERSION "${ANDROID_GRADLE_JAVA_VERSION_INIT}" CACHE STRING "Android Gradle Java version")
|
||||||
|
@ -9,6 +9,9 @@ How to use the OpenCV parallel_for_ to parallelize your code {#tutorial_how_to_u
|
|||||||
| -: | :- |
|
| -: | :- |
|
||||||
| Compatibility | OpenCV >= 3.0 |
|
| Compatibility | OpenCV >= 3.0 |
|
||||||
|
|
||||||
|
|
||||||
|
@note See also C++ lambda usage with parallel for in [tuturial](@ref tutorial_how_to_use_OpenCV_parallel_for_new).
|
||||||
|
|
||||||
Goal
|
Goal
|
||||||
----
|
----
|
||||||
|
|
||||||
@ -20,7 +23,7 @@ If you want more information about multithreading, you will have to refer to a r
|
|||||||
to remain simple.
|
to remain simple.
|
||||||
|
|
||||||
Precondition
|
Precondition
|
||||||
----
|
------------
|
||||||
|
|
||||||
The first precondition is to have OpenCV built with a parallel framework.
|
The first precondition is to have OpenCV built with a parallel framework.
|
||||||
In OpenCV 3.2, the following parallel frameworks are available in that order:
|
In OpenCV 3.2, the following parallel frameworks are available in that order:
|
||||||
@ -50,7 +53,7 @@ We will use the example of drawing a Mandelbrot set to show how from a regular s
|
|||||||
the code to parallelize the computation.
|
the code to parallelize the computation.
|
||||||
|
|
||||||
Theory
|
Theory
|
||||||
-----------
|
------
|
||||||
|
|
||||||
The Mandelbrot set definition has been named in tribute to the mathematician Benoit Mandelbrot by the mathematician
|
The Mandelbrot set definition has been named in tribute to the mathematician Benoit Mandelbrot by the mathematician
|
||||||
Adrien Douady. It has been famous outside of the mathematics field as the image representation is an example of a
|
Adrien Douady. It has been famous outside of the mathematics field as the image representation is an example of a
|
||||||
@ -69,7 +72,7 @@ Here, we will just introduce the formula to draw the Mandelbrot set (from the me
|
|||||||
> \f[\limsup_{n\to\infty}|z_{n+1}|\leqslant2\f]
|
> \f[\limsup_{n\to\infty}|z_{n+1}|\leqslant2\f]
|
||||||
|
|
||||||
Pseudocode
|
Pseudocode
|
||||||
-----------
|
----------
|
||||||
|
|
||||||
A simple algorithm to generate a representation of the Mandelbrot set is called the
|
A simple algorithm to generate a representation of the Mandelbrot set is called the
|
||||||
["escape time algorithm"](https://en.wikipedia.org/wiki/Mandelbrot_set#Escape_time_algorithm).
|
["escape time algorithm"](https://en.wikipedia.org/wiki/Mandelbrot_set#Escape_time_algorithm).
|
||||||
@ -110,10 +113,10 @@ On this figure, we recall that the real part of a complex number is on the x-axi
|
|||||||
You can see that the whole shape can be repeatedly visible if we zoom at particular locations.
|
You can see that the whole shape can be repeatedly visible if we zoom at particular locations.
|
||||||
|
|
||||||
Implementation
|
Implementation
|
||||||
-----------
|
--------------
|
||||||
|
|
||||||
Escape time algorithm implementation
|
Escape time algorithm implementation
|
||||||
--------------------------
|
------------------------------------
|
||||||
|
|
||||||
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-escape-time-algorithm
|
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-escape-time-algorithm
|
||||||
|
|
||||||
@ -121,7 +124,7 @@ Here, we used the [`std::complex`](http://en.cppreference.com/w/cpp/numeric/comp
|
|||||||
complex number. This function performs the test to check if the pixel is in set or not and returns the "escaped" iteration.
|
complex number. This function performs the test to check if the pixel is in set or not and returns the "escaped" iteration.
|
||||||
|
|
||||||
Sequential Mandelbrot implementation
|
Sequential Mandelbrot implementation
|
||||||
--------------------------
|
------------------------------------
|
||||||
|
|
||||||
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-sequential
|
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-sequential
|
||||||
|
|
||||||
@ -149,7 +152,7 @@ The green curve corresponds to a simple linear scale transformation, the blue on
|
|||||||
and you can observe how the lowest values will be boosted when looking at the slope at these positions.
|
and you can observe how the lowest values will be boosted when looking at the slope at these positions.
|
||||||
|
|
||||||
Parallel Mandelbrot implementation
|
Parallel Mandelbrot implementation
|
||||||
--------------------------
|
----------------------------------
|
||||||
|
|
||||||
When looking at the sequential implementation, we can notice that each pixel is computed independently. To optimize the
|
When looking at the sequential implementation, we can notice that each pixel is computed independently. To optimize the
|
||||||
computation, we can perform multiple pixel calculations in parallel, by exploiting the multi-core architecture of modern
|
computation, we can perform multiple pixel calculations in parallel, by exploiting the multi-core architecture of modern
|
||||||
@ -181,7 +184,7 @@ C++ 11 standard allows to simplify the parallel implementation by get rid of the
|
|||||||
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-parallel-call-cxx11
|
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-parallel-call-cxx11
|
||||||
|
|
||||||
Results
|
Results
|
||||||
-----------
|
-------
|
||||||
|
|
||||||
You can find the full tutorial code [here](https://github.com/opencv/opencv/blob/5.x/samples/cpp/tutorial_code/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.cpp).
|
You can find the full tutorial code [here](https://github.com/opencv/opencv/blob/5.x/samples/cpp/tutorial_code/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.cpp).
|
||||||
The performance of the parallel implementation depends of the type of CPU you have. For instance, on 4 cores / 8 threads
|
The performance of the parallel implementation depends of the type of CPU you have. For instance, on 4 cores / 8 threads
|
||||||
|
@ -18,7 +18,7 @@ This tutorial assumes you have the following installed and configured:
|
|||||||
- Android Studio
|
- Android Studio
|
||||||
- JDK
|
- JDK
|
||||||
- Android SDK and NDK
|
- Android SDK and NDK
|
||||||
- OpenCV for Android SDK from official [release page on Github](https://github.com/opencv/opencv/releases)
|
- Optional: OpenCV for Android SDK from official [release page on Github](https://github.com/opencv/opencv/releases)
|
||||||
or [SourceForge](https://sourceforge.net/projects/opencvlibrary/). Advanced: as alternative the SDK may be
|
or [SourceForge](https://sourceforge.net/projects/opencvlibrary/). Advanced: as alternative the SDK may be
|
||||||
built from source code by [instruction on wiki](https://github.com/opencv/opencv/wiki/Custom-OpenCV-Android-SDK-and-AAR-package-build).
|
built from source code by [instruction on wiki](https://github.com/opencv/opencv/wiki/Custom-OpenCV-Android-SDK-and-AAR-package-build).
|
||||||
|
|
||||||
@ -26,8 +26,9 @@ If you need help with anything of the above, you may refer to our @ref tutorial_
|
|||||||
|
|
||||||
If you encounter any error after thoroughly following these steps, feel free to contact us via OpenCV [forum](https://forum.opencv.org). We'll do our best to help you out.
|
If you encounter any error after thoroughly following these steps, feel free to contact us via OpenCV [forum](https://forum.opencv.org). We'll do our best to help you out.
|
||||||
|
|
||||||
Hello OpenCV sample
|
|
||||||
-------------------
|
Hello OpenCV sample with SDK
|
||||||
|
----------------------------
|
||||||
|
|
||||||
In this section we're gonna create a simple app that does nothing but OpenCV loading. In next section we'll extend it to support camera.
|
In this section we're gonna create a simple app that does nothing but OpenCV loading. In next section we'll extend it to support camera.
|
||||||
|
|
||||||
@ -75,11 +76,10 @@ In addition to this instruction you can use some video guide, for example [this
|
|||||||
@endcode
|
@endcode
|
||||||
The fix was found [here](https://stackoverflow.com/questions/73225714/import-opencv-sdk-to-android-studio-chipmunk)
|
The fix was found [here](https://stackoverflow.com/questions/73225714/import-opencv-sdk-to-android-studio-chipmunk)
|
||||||
|
|
||||||
6. OpenCV project uses `aidl` and `buildConfig` features. Please enable them in
|
6. OpenCV project uses `buildConfig` feature. Please enable it in
|
||||||
`MyApplication/OpenCV/build.gradle` file to `android` block:
|
`MyApplication/OpenCV/build.gradle` file to `android` block:
|
||||||
@code{.gradle}
|
@code{.gradle}
|
||||||
buildFeatures{
|
buildFeatures{
|
||||||
aidl true
|
|
||||||
buildConfig true
|
buildConfig true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,6 +115,43 @@ In addition to this instruction you can use some video guide, for example [this
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
Hello OpenCV sample with Maven Central
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
Since OpenCV 4.9.0 OpenCV for Android package is available with Maven Central and may be installed
|
||||||
|
automatically as Gradle dependency. In this section we're gonna create a simple app that does nothing
|
||||||
|
but OpenCV loading with Maven Central.
|
||||||
|
|
||||||
|
1. Open Android Studio and create empty project by choosing ***Empty Views Activity***
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
2. Setup the project:
|
||||||
|
- Choose ***Java*** language
|
||||||
|
- Choose ***Groovy DSL*** build configuration language
|
||||||
|
- Choose ***Minumum SDK*** with the version number not less than OpenCV supports. For 4.9.0 minimal SDK version is 21.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
3. Edit `build.gradle` and add OpenCV library to Dependencies list like this:
|
||||||
|
@code{.gradle}
|
||||||
|
dependencies {
|
||||||
|
implementation 'org.opencv:opencv:4.9.0'
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
`4.9.0` may be replaced by any version available as [official release](https://central.sonatype.com/artifact/org.opencv/opencv).
|
||||||
|
|
||||||
|
4. Before using any OpenCV function you have to load the library first. If you application includes other
|
||||||
|
OpenCV-dependent native libraries you should load them ***after*** OpenCV initialization. Add the folowing
|
||||||
|
code to load the library at app start:
|
||||||
|
@snippet samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java ocv_loader_init
|
||||||
|
Like this:
|
||||||
|

|
||||||
|
|
||||||
|
5. Choose a device to check the sample on and run the code by pressing `run` button
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
Camera view sample
|
Camera view sample
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
@ -378,6 +378,9 @@ our OpenCV library that we use in our projects. Start up a command window and en
|
|||||||
|
|
||||||
setx OpenCV_DIR D:\OpenCV\build\x64\vc16 (suggested for Visual Studio 2019 - 64 bit Windows)
|
setx OpenCV_DIR D:\OpenCV\build\x64\vc16 (suggested for Visual Studio 2019 - 64 bit Windows)
|
||||||
setx OpenCV_DIR D:\OpenCV\build\x86\vc16 (suggested for Visual Studio 2019 - 32 bit Windows)
|
setx OpenCV_DIR D:\OpenCV\build\x86\vc16 (suggested for Visual Studio 2019 - 32 bit Windows)
|
||||||
|
|
||||||
|
setx OpenCV_DIR D:\OpenCV\build\x64\vc17 (suggested for Visual Studio 2022 - 64 bit Windows)
|
||||||
|
setx OpenCV_DIR D:\OpenCV\build\x86\vc17 (suggested for Visual Studio 2022 - 32 bit Windows)
|
||||||
@endcode
|
@endcode
|
||||||
Here the directory is where you have your OpenCV binaries (*extracted* or *built*). You can have
|
Here the directory is where you have your OpenCV binaries (*extracted* or *built*). You can have
|
||||||
different platform (e.g. x64 instead of x86) or compiler type, so substitute appropriate value.
|
different platform (e.g. x64 instead of x86) or compiler type, so substitute appropriate value.
|
||||||
|
@ -7,10 +7,8 @@
|
|||||||
|
|
||||||
#include <opencv2/core/mat.hpp>
|
#include <opencv2/core/mat.hpp>
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
//#include <future>
|
//#include <future>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace cv {
|
namespace cv {
|
||||||
|
|
||||||
@ -69,7 +67,6 @@ public:
|
|||||||
|
|
||||||
CV_WRAP bool valid() const CV_NOEXCEPT;
|
CV_WRAP bool valid() const CV_NOEXCEPT;
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
|
inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
|
||||||
inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||||
|
|
||||||
@ -89,7 +86,6 @@ public:
|
|||||||
std::future<Mat> getFutureMat() const;
|
std::future<Mat> getFutureMat() const;
|
||||||
std::future<UMat> getFutureUMat() const;
|
std::future<UMat> getFutureUMat() const;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// PImpl
|
// PImpl
|
||||||
|
@ -147,7 +147,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
|
#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
|
||||||
# include<riscv-vector.h>
|
# include<riscv_vector.h>
|
||||||
# define CV_RVV071 1
|
# define CV_RVV071 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -476,6 +476,8 @@ Cv64suf;
|
|||||||
#define CV_WRAP_MAPPABLE(mappable)
|
#define CV_WRAP_MAPPABLE(mappable)
|
||||||
#define CV_WRAP_PHANTOM(phantom_header)
|
#define CV_WRAP_PHANTOM(phantom_header)
|
||||||
#define CV_WRAP_DEFAULT(val)
|
#define CV_WRAP_DEFAULT(val)
|
||||||
|
/* Indicates that the function parameter has filesystem path semantic */
|
||||||
|
#define CV_WRAP_FILE_PATH
|
||||||
|
|
||||||
/****************************************************************************************\
|
/****************************************************************************************\
|
||||||
* Matrix type (Mat) *
|
* Matrix type (Mat) *
|
||||||
@ -755,89 +757,44 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************************\
|
|
||||||
* CV_NODISCARD attribute (deprecated, GCC only) *
|
|
||||||
* DONT USE: use instead the standard CV_NODISCARD_STD macro above *
|
|
||||||
* this legacy method silently fails to issue warning until some version *
|
|
||||||
* after gcc 6.3.0. Yet with gcc 7+ you can use the above standard method *
|
|
||||||
* which makes this method useless. Don't use it. *
|
|
||||||
* @deprecated use instead CV_NODISCARD_STD *
|
|
||||||
\****************************************************************************************/
|
|
||||||
#ifndef CV_NODISCARD
|
|
||||||
# if defined(__GNUC__)
|
|
||||||
# define CV_NODISCARD __attribute__((__warn_unused_result__))
|
|
||||||
# elif defined(__clang__) && defined(__has_attribute)
|
|
||||||
# if __has_attribute(__warn_unused_result__)
|
|
||||||
# define CV_NODISCARD __attribute__((__warn_unused_result__))
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifndef CV_NODISCARD
|
|
||||||
# define CV_NODISCARD /* nothing by default */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************************\
|
/****************************************************************************************\
|
||||||
* C++ 11 *
|
* C++ 11 *
|
||||||
\****************************************************************************************/
|
\****************************************************************************************/
|
||||||
#ifndef CV_CXX11
|
#ifdef __cplusplus
|
||||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
// MSVC was stuck at __cplusplus == 199711L for a long time, even where it supports C++11,
|
||||||
# define CV_CXX11 1
|
// so check _MSC_VER instead. See:
|
||||||
# endif
|
// <https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus>
|
||||||
#else
|
# if defined(_MSC_VER)
|
||||||
# if CV_CXX11 == 0
|
# if _MSC_VER < 1800
|
||||||
# undef CV_CXX11
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifndef CV_CXX11
|
|
||||||
# error "OpenCV 4.x+ requires enabled C++11 support"
|
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||||
|
# endif
|
||||||
|
# elif __cplusplus < 201103L
|
||||||
|
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_CXX11
|
||||||
|
# define CV_CXX11 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CV_CXX_MOVE_SEMANTICS 1
|
|
||||||
#define CV_CXX_MOVE(x) std::move(x)
|
|
||||||
#define CV_CXX_STD_ARRAY 1
|
|
||||||
#include <array>
|
|
||||||
#ifndef CV_OVERRIDE
|
#ifndef CV_OVERRIDE
|
||||||
# define CV_OVERRIDE override
|
# define CV_OVERRIDE override
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CV_FINAL
|
#ifndef CV_FINAL
|
||||||
# define CV_FINAL final
|
# define CV_FINAL final
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CV_NOEXCEPT
|
#ifndef CV_NOEXCEPT
|
||||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
|
||||||
# define CV_NOEXCEPT noexcept
|
# define CV_NOEXCEPT noexcept
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifndef CV_NOEXCEPT
|
|
||||||
# define CV_NOEXCEPT
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CV_CONSTEXPR
|
#ifndef CV_CONSTEXPR
|
||||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
|
||||||
# define CV_CONSTEXPR constexpr
|
# define CV_CONSTEXPR constexpr
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifndef CV_CONSTEXPR
|
|
||||||
# define CV_CONSTEXPR
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Integer types portability
|
// Integer types portability
|
||||||
#ifdef OPENCV_STDINT_HEADER
|
#ifdef __cplusplus
|
||||||
#include OPENCV_STDINT_HEADER
|
|
||||||
#elif defined(__cplusplus)
|
|
||||||
#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
|
|
||||||
namespace cv {
|
|
||||||
typedef signed char int8_t;
|
|
||||||
typedef unsigned char uint8_t;
|
|
||||||
typedef signed short int16_t;
|
|
||||||
typedef unsigned short uint16_t;
|
|
||||||
typedef signed int int32_t;
|
|
||||||
typedef unsigned int uint32_t;
|
|
||||||
typedef signed __int64 int64_t;
|
|
||||||
typedef unsigned __int64 uint64_t;
|
|
||||||
}
|
|
||||||
#elif defined(_MSC_VER) || __cplusplus >= 201103L
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
namespace cv {
|
namespace cv {
|
||||||
using std::int8_t;
|
using std::int8_t;
|
||||||
@ -849,19 +806,6 @@ using std::uint32_t;
|
|||||||
using std::int64_t;
|
using std::int64_t;
|
||||||
using std::uint64_t;
|
using std::uint64_t;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
#include <stdint.h>
|
|
||||||
namespace cv {
|
|
||||||
typedef ::int8_t int8_t;
|
|
||||||
typedef ::uint8_t uint8_t;
|
|
||||||
typedef ::int16_t int16_t;
|
|
||||||
typedef ::uint16_t uint16_t;
|
|
||||||
typedef ::int32_t int32_t;
|
|
||||||
typedef ::uint32_t uint32_t;
|
|
||||||
typedef ::int64_t int64_t;
|
|
||||||
typedef ::uint64_t uint64_t;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#else // pure C
|
#else // pure C
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -52,10 +52,8 @@ public:
|
|||||||
*/
|
*/
|
||||||
void setException(const cv::Exception& exception);
|
void setException(const cv::Exception& exception);
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
|
explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
|
||||||
AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// PImpl
|
// PImpl
|
||||||
|
@ -8,14 +8,8 @@
|
|||||||
#ifndef CV__EXCEPTION_PTR
|
#ifndef CV__EXCEPTION_PTR
|
||||||
# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
|
# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
|
||||||
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
|
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
|
||||||
# elif defined(CV_CXX11)
|
# else
|
||||||
# define CV__EXCEPTION_PTR 1
|
# define CV__EXCEPTION_PTR 1
|
||||||
# elif defined(_MSC_VER)
|
|
||||||
# define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
|
|
||||||
# elif defined(__clang__)
|
|
||||||
# define CV__EXCEPTION_PTR 0 // C++11 only (see above)
|
|
||||||
# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
|
|
||||||
# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
#ifndef CV__EXCEPTION_PTR
|
#ifndef CV__EXCEPTION_PTR
|
||||||
|
@ -61,8 +61,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
||||||
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \
|
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||||
&& defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY)
|
|
||||||
#include <unsupported/Eigen/CXX11/Tensor>
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
#define OPENCV_EIGEN_TENSOR_SUPPORT 1
|
#define OPENCV_EIGEN_TENSOR_SUPPORT 1
|
||||||
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -53,6 +53,7 @@
|
|||||||
|
|
||||||
#include "opencv2/core/bufferpool.hpp"
|
#include "opencv2/core/bufferpool.hpp"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
|
@ -386,10 +386,8 @@ public:
|
|||||||
static Vec randn(_Tp a, _Tp b);
|
static Vec randn(_Tp a, _Tp b);
|
||||||
static Vec randu(_Tp a, _Tp b);
|
static Vec randu(_Tp a, _Tp b);
|
||||||
static Vec zeros();
|
static Vec zeros();
|
||||||
#ifdef CV_CXX11
|
|
||||||
static Vec diag(_Tp alpha) = delete;
|
static Vec diag(_Tp alpha) = delete;
|
||||||
static Vec eye() = delete;
|
static Vec eye() = delete;
|
||||||
#endif
|
|
||||||
|
|
||||||
//! per-element multiplication
|
//! per-element multiplication
|
||||||
Vec mul(const Vec<_Tp, cn>& v) const;
|
Vec mul(const Vec<_Tp, cn>& v) const;
|
||||||
@ -412,9 +410,7 @@ public:
|
|||||||
const _Tp& operator ()(int i) const;
|
const _Tp& operator ()(int i) const;
|
||||||
_Tp& operator ()(int i);
|
_Tp& operator ()(int i);
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
|
Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
|
||||||
#endif
|
|
||||||
|
|
||||||
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
|
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
|
||||||
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
|
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
#define OPENCV_CORE_QUATERNION_INL_HPP
|
#define OPENCV_CORE_QUATERNION_INL_HPP
|
||||||
|
|
||||||
#ifndef OPENCV_CORE_QUATERNION_HPP
|
#ifndef OPENCV_CORE_QUATERNION_HPP
|
||||||
#erorr This is not a standalone header. Include quaternion.hpp instead.
|
#error This is not a standalone header. Include quaternion.hpp instead.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//@cond IGNORE
|
//@cond IGNORE
|
||||||
|
@ -9,8 +9,6 @@
|
|||||||
|
|
||||||
//#define OPENCV_DISABLE_ALLOCATOR_STATS
|
//#define OPENCV_DISABLE_ALLOCATOR_STATS
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
|
||||||
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
|
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
|
||||||
@ -26,14 +24,6 @@
|
|||||||
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long
|
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#else // CV_CXX11
|
|
||||||
|
|
||||||
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
|
|
||||||
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int // CV_XADD supports int only
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // CV_CXX11
|
|
||||||
|
|
||||||
namespace cv { namespace utils {
|
namespace cv { namespace utils {
|
||||||
|
|
||||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
#ifdef CV__ALLOCATOR_STATS_LOG
|
||||||
@ -59,7 +49,7 @@ public:
|
|||||||
void onAllocate(size_t /*sz*/) {}
|
void onAllocate(size_t /*sz*/) {}
|
||||||
void onFree(size_t /*sz*/) {}
|
void onFree(size_t /*sz*/) {}
|
||||||
|
|
||||||
#elif defined(CV_CXX11)
|
#else
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
|
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
|
||||||
@ -104,49 +94,7 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
curr -= (counter_t)sz;
|
curr -= (counter_t)sz;
|
||||||
}
|
}
|
||||||
|
#endif // OPENCV_DISABLE_ALLOCATOR_STATS
|
||||||
#else // non C++11
|
|
||||||
|
|
||||||
protected:
|
|
||||||
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
|
|
||||||
volatile counter_t curr, total, total_allocs, peak; // overflow is possible, CV_XADD operates with 'int' only
|
|
||||||
public:
|
|
||||||
AllocatorStatistics()
|
|
||||||
: curr(0), total(0), total_allocs(0), peak(0)
|
|
||||||
{}
|
|
||||||
~AllocatorStatistics() CV_OVERRIDE {}
|
|
||||||
|
|
||||||
uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; }
|
|
||||||
uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; }
|
|
||||||
uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; }
|
|
||||||
uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; }
|
|
||||||
|
|
||||||
void resetPeakUsage() CV_OVERRIDE { peak = curr; }
|
|
||||||
|
|
||||||
// Controller interface
|
|
||||||
void onAllocate(size_t sz)
|
|
||||||
{
|
|
||||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
|
||||||
CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz;
|
|
||||||
|
|
||||||
peak = std::max((counter_t)peak, new_curr); // non-thread safe
|
|
||||||
|
|
||||||
//CV_XADD(&total, (uint64_t)sz); // overflow with int, non-reliable...
|
|
||||||
total += sz;
|
|
||||||
|
|
||||||
CV_XADD(&total_allocs, (counter_t)1);
|
|
||||||
}
|
|
||||||
void onFree(size_t sz)
|
|
||||||
{
|
|
||||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
|
||||||
CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
|
|
||||||
#endif
|
|
||||||
CV_XADD(&curr, (counter_t)-sz);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CV__ALLOCATOR_STATS_LOG
|
#ifdef CV__ALLOCATOR_STATS_LOG
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
//#undef CV_CXX11 // debug non C++11 mode
|
|
||||||
#include "opencv2/core/async.hpp"
|
#include "opencv2/core/async.hpp"
|
||||||
#include "opencv2/core/detail/async_promise.hpp"
|
#include "opencv2/core/detail/async_promise.hpp"
|
||||||
|
|
||||||
@ -16,11 +15,9 @@
|
|||||||
|
|
||||||
#ifndef OPENCV_DISABLE_THREAD_SUPPORT
|
#ifndef OPENCV_DISABLE_THREAD_SUPPORT
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace cv {
|
namespace cv {
|
||||||
|
|
||||||
@ -37,12 +34,8 @@ struct AsyncArray::Impl
|
|||||||
void releasePromise() CV_NOEXCEPT { CV_XADD(&refcount_promise, -1); if(1 == CV_XADD(&refcount, -1)) delete this; } \
|
void releasePromise() CV_NOEXCEPT { CV_XADD(&refcount_promise, -1); if(1 == CV_XADD(&refcount, -1)) delete this; } \
|
||||||
int refcount_promise;
|
int refcount_promise;
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
mutable std::mutex mtx;
|
mutable std::mutex mtx;
|
||||||
mutable std::condition_variable cond_var;
|
mutable std::condition_variable cond_var;
|
||||||
#else
|
|
||||||
mutable cv::Mutex mtx;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
mutable bool has_result; // Mat, UMat or exception
|
mutable bool has_result; // Mat, UMat or exception
|
||||||
|
|
||||||
@ -88,11 +81,7 @@ struct AsyncArray::Impl
|
|||||||
if (!wait_for(timeoutNs))
|
if (!wait_for(timeoutNs))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#ifdef CV_CXX11
|
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
#else
|
|
||||||
cv::AutoLock lock(mtx);
|
|
||||||
#endif
|
|
||||||
if (has_result)
|
if (has_result)
|
||||||
{
|
{
|
||||||
if (!result_mat.empty())
|
if (!result_mat.empty())
|
||||||
@ -145,7 +134,6 @@ struct AsyncArray::Impl
|
|||||||
if (timeoutNs == 0)
|
if (timeoutNs == 0)
|
||||||
return has_result;
|
return has_result;
|
||||||
CV_LOG_INFO(NULL, "Waiting for async result ...");
|
CV_LOG_INFO(NULL, "Waiting for async result ...");
|
||||||
#ifdef CV_CXX11
|
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
const auto cond_pred = [&]{ return has_result == true; };
|
const auto cond_pred = [&]{ return has_result == true; };
|
||||||
if (timeoutNs > 0)
|
if (timeoutNs > 0)
|
||||||
@ -156,9 +144,6 @@ struct AsyncArray::Impl
|
|||||||
CV_Assert(has_result);
|
CV_Assert(has_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
CV_Error(Error::StsNotImplemented, "OpenCV has been built without async waiting support (C++11 is required)");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AsyncArray getArrayResult()
|
AsyncArray getArrayResult()
|
||||||
@ -175,11 +160,7 @@ struct AsyncArray::Impl
|
|||||||
{
|
{
|
||||||
if (future_is_returned && refcount_future == 0)
|
if (future_is_returned && refcount_future == 0)
|
||||||
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
||||||
#ifdef CV_CXX11
|
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
#else
|
|
||||||
cv::AutoLock lock(mtx);
|
|
||||||
#endif
|
|
||||||
CV_Assert(!has_result);
|
CV_Assert(!has_result);
|
||||||
int k = value.kind();
|
int k = value.kind();
|
||||||
if (k == _InputArray::UMAT)
|
if (k == _InputArray::UMAT)
|
||||||
@ -193,9 +174,7 @@ struct AsyncArray::Impl
|
|||||||
value.copyTo(*result_mat.get());
|
value.copyTo(*result_mat.get());
|
||||||
}
|
}
|
||||||
has_result = true;
|
has_result = true;
|
||||||
#ifdef CV_CXX11
|
|
||||||
cond_var.notify_all();
|
cond_var.notify_all();
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV__EXCEPTION_PTR
|
#if CV__EXCEPTION_PTR
|
||||||
@ -203,18 +182,12 @@ struct AsyncArray::Impl
|
|||||||
{
|
{
|
||||||
if (future_is_returned && refcount_future == 0)
|
if (future_is_returned && refcount_future == 0)
|
||||||
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
||||||
#ifdef CV_CXX11
|
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
#else
|
|
||||||
cv::AutoLock lock(mtx);
|
|
||||||
#endif
|
|
||||||
CV_Assert(!has_result);
|
CV_Assert(!has_result);
|
||||||
has_exception = true;
|
has_exception = true;
|
||||||
exception = e;
|
exception = e;
|
||||||
has_result = true;
|
has_result = true;
|
||||||
#ifdef CV_CXX11
|
|
||||||
cond_var.notify_all();
|
cond_var.notify_all();
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -222,18 +195,12 @@ struct AsyncArray::Impl
|
|||||||
{
|
{
|
||||||
if (future_is_returned && refcount_future == 0)
|
if (future_is_returned && refcount_future == 0)
|
||||||
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
|
||||||
#ifdef CV_CXX11
|
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
#else
|
|
||||||
cv::AutoLock lock(mtx);
|
|
||||||
#endif
|
|
||||||
CV_Assert(!has_result);
|
CV_Assert(!has_result);
|
||||||
has_exception = true;
|
has_exception = true;
|
||||||
cv_exception = e;
|
cv_exception = e;
|
||||||
has_result = true;
|
has_result = true;
|
||||||
#ifdef CV_CXX11
|
|
||||||
cond_var.notify_all();
|
cond_var.notify_all();
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1952,12 +1952,7 @@ void _OutputArray::move(UMat& u) const
|
|||||||
int k = kind();
|
int k = kind();
|
||||||
if (k == UMAT)
|
if (k == UMAT)
|
||||||
{
|
{
|
||||||
#ifdef CV_CXX11
|
|
||||||
*(UMat*)obj = std::move(u);
|
*(UMat*)obj = std::move(u);
|
||||||
#else
|
|
||||||
*(UMat*)obj = u;
|
|
||||||
u.release();
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
else if (k == MAT)
|
else if (k == MAT)
|
||||||
{
|
{
|
||||||
@ -1992,12 +1987,7 @@ void _OutputArray::move(Mat& m) const
|
|||||||
}
|
}
|
||||||
else if (k == MAT)
|
else if (k == MAT)
|
||||||
{
|
{
|
||||||
#ifdef CV_CXX11
|
|
||||||
*(Mat*)obj = std::move(m);
|
*(Mat*)obj = std::move(m);
|
||||||
#else
|
|
||||||
*(Mat*)obj = m;
|
|
||||||
m.release();
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
else if (k == MATX)
|
else if (k == MATX)
|
||||||
{
|
{
|
||||||
|
@ -912,8 +912,7 @@ int getNumberOfCPUs_()
|
|||||||
* the minimum most value as it has high probablity of being right and safe.
|
* the minimum most value as it has high probablity of being right and safe.
|
||||||
* Return 1 if we get 0 or not found on all methods.
|
* Return 1 if we get 0 or not found on all methods.
|
||||||
*/
|
*/
|
||||||
#if defined CV_CXX11 \
|
#if !defined(__MINGW32__) /* not implemented (2020-03) */
|
||||||
&& !defined(__MINGW32__) /* not implemented (2020-03) */ \
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for this standard C++11 way, we do not return directly because
|
* Check for this standard C++11 way, we do not return directly because
|
||||||
|
@ -120,11 +120,15 @@ void* allocSingletonNewBuffer(size_t size) { return malloc(size); }
|
|||||||
#include <cstdlib> // std::abort
|
#include <cstdlib> // std::abort
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ || defined __Fuchsia__
|
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ || defined __Fuchsia__ || defined __QNX__
|
||||||
# include <unistd.h>
|
# include <unistd.h>
|
||||||
# include <fcntl.h>
|
# include <fcntl.h>
|
||||||
#if defined __QNX__
|
#if defined __QNX__
|
||||||
# include <sys/elf.h>
|
# include <sys/elf.h>
|
||||||
|
# include <sys/auxv.h>
|
||||||
|
using Elf64_auxv_t = auxv64_t;
|
||||||
|
# include <elfdefinitions.h>
|
||||||
|
const uint64_t AT_HWCAP = NT_GNU_HWCAP;
|
||||||
#else
|
#else
|
||||||
# include <elf.h>
|
# include <elf.h>
|
||||||
#endif
|
#endif
|
||||||
@ -251,7 +255,7 @@ std::wstring GetTempFileNameWinRT(std::wstring prefix)
|
|||||||
#include "omp.h"
|
#include "omp.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__
|
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __GLIBC__ || defined __HAIKU__
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -301,9 +305,7 @@ DECLARE_CV_CPUID_X86
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined CV_CXX11
|
#include <chrono>
|
||||||
#include <chrono>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
@ -562,7 +564,7 @@ struct HWFeatures
|
|||||||
}
|
}
|
||||||
#endif // CV_CPUID_X86
|
#endif // CV_CPUID_X86
|
||||||
|
|
||||||
#if defined __ANDROID__ || defined __linux__ || defined __FreeBSD__ || defined __QNX__
|
#if defined __ANDROID__ || defined __linux__ || defined __QNX__
|
||||||
#ifdef __aarch64__
|
#ifdef __aarch64__
|
||||||
have[CV_CPU_NEON] = true;
|
have[CV_CPU_NEON] = true;
|
||||||
have[CV_CPU_FP16] = true;
|
have[CV_CPU_FP16] = true;
|
||||||
@ -581,10 +583,12 @@ struct HWFeatures
|
|||||||
have[CV_CPU_NEON_DOTPROD] = (auxv.a_un.a_val & (1 << 20)) != 0; // HWCAP_ASIMDDP
|
have[CV_CPU_NEON_DOTPROD] = (auxv.a_un.a_val & (1 << 20)) != 0; // HWCAP_ASIMDDP
|
||||||
have[CV_CPU_NEON_FP16] = (auxv.a_un.a_val & (1 << 10)) != 0; // HWCAP_ASIMDHP
|
have[CV_CPU_NEON_FP16] = (auxv.a_un.a_val & (1 << 10)) != 0; // HWCAP_ASIMDHP
|
||||||
}
|
}
|
||||||
|
#if defined(AT_HWCAP2)
|
||||||
else if (auxv.a_type == AT_HWCAP2)
|
else if (auxv.a_type == AT_HWCAP2)
|
||||||
{
|
{
|
||||||
have[CV_CPU_NEON_BF16] = (auxv.a_un.a_val & (1 << 14)) != 0; // HWCAP2_BF16
|
have[CV_CPU_NEON_BF16] = (auxv.a_un.a_val & (1 << 14)) != 0; // HWCAP2_BF16
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
close(cpufile);
|
close(cpufile);
|
||||||
@ -611,7 +615,7 @@ struct HWFeatures
|
|||||||
CV_LOG_INFO(NULL, "- FP16 instructions is NOT enabled via build flags");
|
CV_LOG_INFO(NULL, "- FP16 instructions is NOT enabled via build flags");
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#elif defined __arm__ && !defined __FreeBSD__
|
#elif defined __arm__
|
||||||
int cpufile = open("/proc/self/auxv", O_RDONLY);
|
int cpufile = open("/proc/self/auxv", O_RDONLY);
|
||||||
|
|
||||||
if (cpufile >= 0)
|
if (cpufile >= 0)
|
||||||
@ -903,50 +907,15 @@ bool useOptimized(void)
|
|||||||
|
|
||||||
int64 getTickCount(void)
|
int64 getTickCount(void)
|
||||||
{
|
{
|
||||||
#if defined CV_CXX11
|
|
||||||
std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
|
std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
|
||||||
return (int64)now.time_since_epoch().count();
|
return (int64)now.time_since_epoch().count();
|
||||||
#elif defined _WIN32 || defined WINCE
|
|
||||||
LARGE_INTEGER counter;
|
|
||||||
QueryPerformanceCounter( &counter );
|
|
||||||
return (int64)counter.QuadPart;
|
|
||||||
#elif defined __MACH__ && defined __APPLE__
|
|
||||||
return (int64)mach_absolute_time();
|
|
||||||
#elif defined __unix__
|
|
||||||
struct timespec tp;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &tp);
|
|
||||||
return (int64)tp.tv_sec*1000000000 + tp.tv_nsec;
|
|
||||||
#else
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv, NULL);
|
|
||||||
return (int64)tv.tv_sec*1000000 + tv.tv_usec;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double getTickFrequency(void)
|
double getTickFrequency(void)
|
||||||
{
|
{
|
||||||
#if defined CV_CXX11
|
|
||||||
using clock_period_t = std::chrono::steady_clock::duration::period;
|
using clock_period_t = std::chrono::steady_clock::duration::period;
|
||||||
double clock_freq = clock_period_t::den / clock_period_t::num;
|
double clock_freq = clock_period_t::den / clock_period_t::num;
|
||||||
return clock_freq;
|
return clock_freq;
|
||||||
#elif defined _WIN32 || defined WINCE
|
|
||||||
LARGE_INTEGER freq;
|
|
||||||
QueryPerformanceFrequency(&freq);
|
|
||||||
return (double)freq.QuadPart;
|
|
||||||
#elif defined __MACH__ && defined __APPLE__
|
|
||||||
static double freq = 0;
|
|
||||||
if( freq == 0 )
|
|
||||||
{
|
|
||||||
mach_timebase_info_data_t sTimebaseInfo;
|
|
||||||
mach_timebase_info(&sTimebaseInfo);
|
|
||||||
freq = sTimebaseInfo.denom*1e9/sTimebaseInfo.numer;
|
|
||||||
}
|
|
||||||
return freq;
|
|
||||||
#elif defined __unix__
|
|
||||||
return 1e9;
|
|
||||||
#else
|
|
||||||
return 1e6;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || defined __ppc__)
|
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || defined __ppc__)
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include <opencv2/core/bindings_utils.hpp>
|
#include <opencv2/core/bindings_utils.hpp>
|
||||||
|
|
||||||
#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
#if !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#endif
|
#endif
|
||||||
@ -85,7 +85,7 @@ TEST(Core_Async, LikePythonTest)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
#if !defined(OPENCV_DISABLE_THREAD_SUPPORT)
|
||||||
|
|
||||||
TEST(Core_Async, AsyncThread_Simple)
|
TEST(Core_Async, AsyncThread_Simple)
|
||||||
{
|
{
|
||||||
|
@ -8,10 +8,8 @@
|
|||||||
|
|
||||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace opencv_test { namespace {
|
namespace opencv_test { namespace {
|
||||||
|
|
||||||
@ -282,9 +280,7 @@ public:
|
|||||||
// FP state is not supported
|
// FP state is not supported
|
||||||
// no checks
|
// no checks
|
||||||
}
|
}
|
||||||
#ifdef CV_CXX11
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::details::FPDenormalsModeState base_state;
|
cv::details::FPDenormalsModeState base_state;
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
#ifndef __OPENCV_TEST_PRECOMP_HPP__
|
#ifndef __OPENCV_TEST_PRECOMP_HPP__
|
||||||
#define __OPENCV_TEST_PRECOMP_HPP__
|
#define __OPENCV_TEST_PRECOMP_HPP__
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
#include "opencv2/ts.hpp"
|
#include "opencv2/ts.hpp"
|
||||||
#include "opencv2/ts/ocl_test.hpp"
|
#include "opencv2/ts/ocl_test.hpp"
|
||||||
#include "opencv2/core/private.hpp"
|
#include "opencv2/core/private.hpp"
|
||||||
|
@ -4,9 +4,7 @@
|
|||||||
|
|
||||||
// This is .hpp file included from test_utils.cpp
|
// This is .hpp file included from test_utils.cpp
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
#include <thread> // std::thread
|
#include <thread> // std::thread
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "opencv2/core/utils/tls.hpp"
|
#include "opencv2/core/utils/tls.hpp"
|
||||||
|
|
||||||
@ -34,8 +32,6 @@ public:
|
|||||||
int TLSReporter::g_last_id = 0;
|
int TLSReporter::g_last_id = 0;
|
||||||
int TLSReporter::g_allocated = 0;
|
int TLSReporter::g_allocated = 0;
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static void callNThreadsWithTLS(int N, TLSData<T>& tls)
|
static void callNThreadsWithTLS(int N, TLSData<T>& tls)
|
||||||
{
|
{
|
||||||
@ -129,6 +125,4 @@ static void testTLSAccumulator(bool detachFirst)
|
|||||||
TEST(Core_TLS, AccumulatorHoldData_detachData) { testTLSAccumulator(true); }
|
TEST(Core_TLS, AccumulatorHoldData_detachData) { testTLSAccumulator(true); }
|
||||||
TEST(Core_TLS, AccumulatorHoldData_gather) { testTLSAccumulator(false); }
|
TEST(Core_TLS, AccumulatorHoldData_gather) { testTLSAccumulator(false); }
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
@ -1183,6 +1183,11 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
static Ptr<AttentionLayer> create(const LayerParams ¶ms);
|
static Ptr<AttentionLayer> create(const LayerParams ¶ms);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS GroupNormLayer : public Layer {
|
||||||
|
public:
|
||||||
|
static Ptr<GroupNormLayer> create(const LayerParams ¶ms);
|
||||||
|
};
|
||||||
|
|
||||||
//! @}
|
//! @}
|
||||||
//! @}
|
//! @}
|
||||||
CV__DNN_INLINE_NS_END
|
CV__DNN_INLINE_NS_END
|
||||||
|
@ -444,7 +444,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
|
* Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
|
||||||
* backend.
|
* backend.
|
||||||
*/
|
*/
|
||||||
CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
|
CV_WRAP static Net readFromModelOptimizer(CV_WRAP_FILE_PATH const String& xml, CV_WRAP_FILE_PATH const String& bin);
|
||||||
|
|
||||||
/** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
|
/** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
|
||||||
* @param[in] bufferModelConfig buffer with model's configuration.
|
* @param[in] bufferModelConfig buffer with model's configuration.
|
||||||
@ -477,7 +477,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param path path to output file with .dot extension
|
* @param path path to output file with .dot extension
|
||||||
* @see dump()
|
* @see dump()
|
||||||
*/
|
*/
|
||||||
CV_WRAP void dumpToFile(const String& path);
|
CV_WRAP void dumpToFile(CV_WRAP_FILE_PATH const String& path);
|
||||||
/** @brief Adds new layer to the net.
|
/** @brief Adds new layer to the net.
|
||||||
* @param name unique name of the adding layer.
|
* @param name unique name of the adding layer.
|
||||||
* @param type typename of the adding layer (type must be registered in LayerRegister).
|
* @param type typename of the adding layer (type must be registered in LayerRegister).
|
||||||
@ -839,7 +839,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param darknetModel path to the .weights file with learned network.
|
* @param darknetModel path to the .weights file with learned network.
|
||||||
* @returns Network object that ready to do forward, throw an exception in failure cases.
|
* @returns Network object that ready to do forward, throw an exception in failure cases.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String());
|
CV_EXPORTS_W Net readNetFromDarknet(CV_WRAP_FILE_PATH const String &cfgFile, CV_WRAP_FILE_PATH const String &darknetModel = String());
|
||||||
|
|
||||||
/** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
|
/** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
|
||||||
* @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture.
|
* @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture.
|
||||||
@ -864,7 +864,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param caffeModel path to the .caffemodel file with learned network.
|
* @param caffeModel path to the .caffemodel file with learned network.
|
||||||
* @returns Net object.
|
* @returns Net object.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
|
CV_EXPORTS_W Net readNetFromCaffe(CV_WRAP_FILE_PATH const String &prototxt, CV_WRAP_FILE_PATH const String &caffeModel = String());
|
||||||
|
|
||||||
/** @brief Reads a network model stored in Caffe model in memory.
|
/** @brief Reads a network model stored in Caffe model in memory.
|
||||||
* @param bufferProto buffer containing the content of the .prototxt file
|
* @param bufferProto buffer containing the content of the .prototxt file
|
||||||
@ -893,7 +893,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* let us make it more flexible.
|
* let us make it more flexible.
|
||||||
* @returns Net object.
|
* @returns Net object.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
|
CV_EXPORTS_W Net readNetFromTensorflow(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config = String());
|
||||||
|
|
||||||
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
|
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
|
||||||
* @param bufferModel buffer containing the content of the pb file
|
* @param bufferModel buffer containing the content of the pb file
|
||||||
@ -918,7 +918,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param model path to the .tflite file with binary flatbuffers description of the network architecture
|
* @param model path to the .tflite file with binary flatbuffers description of the network architecture
|
||||||
* @returns Net object.
|
* @returns Net object.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Net readNetFromTFLite(const String &model);
|
CV_EXPORTS_W Net readNetFromTFLite(CV_WRAP_FILE_PATH const String &model);
|
||||||
|
|
||||||
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format.
|
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format.
|
||||||
* @param bufferModel buffer containing the content of the tflite file
|
* @param bufferModel buffer containing the content of the tflite file
|
||||||
@ -957,7 +957,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* or @ref readNetFromDarknet. An order of @p model and @p config
|
* or @ref readNetFromDarknet. An order of @p model and @p config
|
||||||
* arguments does not matter.
|
* arguments does not matter.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = "");
|
CV_EXPORTS_W Net readNet(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "", const String& framework = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Read deep learning network represented in one of the supported formats.
|
* @brief Read deep learning network represented in one of the supported formats.
|
||||||
@ -979,7 +979,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* backend.
|
* backend.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W
|
CV_EXPORTS_W
|
||||||
Net readNetFromModelOptimizer(const String &xml, const String &bin = "");
|
Net readNetFromModelOptimizer(CV_WRAP_FILE_PATH const String &xml, CV_WRAP_FILE_PATH const String &bin = "");
|
||||||
|
|
||||||
/** @brief Load a network from Intel's Model Optimizer intermediate representation.
|
/** @brief Load a network from Intel's Model Optimizer intermediate representation.
|
||||||
* @param[in] bufferModelConfig Buffer contains XML configuration with network's topology.
|
* @param[in] bufferModelConfig Buffer contains XML configuration with network's topology.
|
||||||
@ -1008,7 +1008,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param onnxFile path to the .onnx file with text description of the network architecture.
|
* @param onnxFile path to the .onnx file with text description of the network architecture.
|
||||||
* @returns Network object that ready to do forward, throw an exception in failure cases.
|
* @returns Network object that ready to do forward, throw an exception in failure cases.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile);
|
CV_EXPORTS_W Net readNetFromONNX(CV_WRAP_FILE_PATH const String &onnxFile);
|
||||||
|
|
||||||
/** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a>
|
/** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a>
|
||||||
* in-memory buffer.
|
* in-memory buffer.
|
||||||
@ -1031,7 +1031,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param path to the .pb file with input tensor.
|
* @param path to the .pb file with input tensor.
|
||||||
* @returns Mat.
|
* @returns Mat.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Mat readTensorFromONNX(const String& path);
|
CV_EXPORTS_W Mat readTensorFromONNX(CV_WRAP_FILE_PATH const String& path);
|
||||||
|
|
||||||
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
|
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
|
||||||
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
|
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
|
||||||
@ -1204,7 +1204,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
|
* is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
|
||||||
* So the resulting model may be used there.
|
* So the resulting model may be used there.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
|
CV_EXPORTS_W void shrinkCaffeModel(CV_WRAP_FILE_PATH const String& src, CV_WRAP_FILE_PATH const String& dst,
|
||||||
const std::vector<String>& layersTypes = std::vector<String>());
|
const std::vector<String>& layersTypes = std::vector<String>());
|
||||||
|
|
||||||
/** @brief Create a text representation for a binary network stored in protocol buffer format.
|
/** @brief Create a text representation for a binary network stored in protocol buffer format.
|
||||||
@ -1213,7 +1213,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
*
|
*
|
||||||
* @note To reduce output file size, trained weights are not included.
|
* @note To reduce output file size, trained weights are not included.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W void writeTextGraph(const String& model, const String& output);
|
CV_EXPORTS_W void writeTextGraph(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& output);
|
||||||
|
|
||||||
/** @brief Performs non maximum suppression given boxes and corresponding scores.
|
/** @brief Performs non maximum suppression given boxes and corresponding scores.
|
||||||
|
|
||||||
@ -1318,7 +1318,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param[in] model Binary file contains trained weights.
|
* @param[in] model Binary file contains trained weights.
|
||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP Model(const String& model, const String& config = "");
|
CV_WRAP Model(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Create model from deep learning network.
|
* @brief Create model from deep learning network.
|
||||||
@ -1423,7 +1423,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param[in] model Binary file contains trained weights.
|
* @param[in] model Binary file contains trained weights.
|
||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP ClassificationModel(const String& model, const String& config = "");
|
CV_WRAP ClassificationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Create model from deep learning network.
|
* @brief Create model from deep learning network.
|
||||||
@ -1473,7 +1473,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param[in] model Binary file contains trained weights.
|
* @param[in] model Binary file contains trained weights.
|
||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP KeypointsModel(const String& model, const String& config = "");
|
CV_WRAP KeypointsModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Create model from deep learning network.
|
* @brief Create model from deep learning network.
|
||||||
@ -1505,7 +1505,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param[in] model Binary file contains trained weights.
|
* @param[in] model Binary file contains trained weights.
|
||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP SegmentationModel(const String& model, const String& config = "");
|
CV_WRAP SegmentationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Create model from deep learning network.
|
* @brief Create model from deep learning network.
|
||||||
@ -1536,7 +1536,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param[in] model Binary file contains trained weights.
|
* @param[in] model Binary file contains trained weights.
|
||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP DetectionModel(const String& model, const String& config = "");
|
CV_WRAP DetectionModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Create model from deep learning network.
|
* @brief Create model from deep learning network.
|
||||||
@ -1602,7 +1602,7 @@ public:
|
|||||||
* @param[in] config Text file contains network configuration
|
* @param[in] config Text file contains network configuration
|
||||||
*/
|
*/
|
||||||
CV_WRAP inline
|
CV_WRAP inline
|
||||||
TextRecognitionModel(const std::string& model, const std::string& config = "")
|
TextRecognitionModel(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
|
||||||
: TextRecognitionModel(readNet(model, config)) { /* nothing */ }
|
: TextRecognitionModel(readNet(model, config)) { /* nothing */ }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1757,7 +1757,7 @@ public:
|
|||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP inline
|
CV_WRAP inline
|
||||||
TextDetectionModel_EAST(const std::string& model, const std::string& config = "")
|
TextDetectionModel_EAST(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
|
||||||
: TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ }
|
: TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1818,7 +1818,7 @@ public:
|
|||||||
* @param[in] config Text file contains network configuration.
|
* @param[in] config Text file contains network configuration.
|
||||||
*/
|
*/
|
||||||
CV_WRAP inline
|
CV_WRAP inline
|
||||||
TextDetectionModel_DB(const std::string& model, const std::string& config = "")
|
TextDetectionModel_DB(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
|
||||||
: TextDetectionModel_DB(readNet(model, config)) { /* nothing */ }
|
: TextDetectionModel_DB(readNet(model, config)) { /* nothing */ }
|
||||||
|
|
||||||
CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold);
|
CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold);
|
||||||
|
@ -258,22 +258,21 @@ PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16)
|
|||||||
test_slice<4>(inputShape, begin, end);
|
test_slice<4>(inputShape, begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
|
using Layer_Scatter = TestBaseWithParam<tuple<std::vector<int>, std::string, int, tuple<Backend, Target>>>;
|
||||||
{
|
PERF_TEST_P_(Layer_Scatter, scatter) {
|
||||||
void test_layer(const std::vector<int>& shape, const String reduction = "none", int axis = 0)
|
std::vector<int> shape = get<0>(GetParam());
|
||||||
{
|
std::string reduction = get<1>(GetParam());
|
||||||
int backendId = get<0>(GetParam());
|
int axis = get<2>(GetParam());
|
||||||
int targetId = get<1>(GetParam());
|
int backend_id = get<0>(get<3>(GetParam()));
|
||||||
|
int target_id = get<1>(get<3>(GetParam()));
|
||||||
|
|
||||||
Mat data(shape, CV_32FC1);
|
Mat data(shape, CV_32FC1);
|
||||||
Mat indices(shape, CV_32FC1);
|
Mat indices(shape, CV_32FC1);
|
||||||
Mat updates(shape, CV_32FC1);
|
Mat updates(shape, CV_32FC1);
|
||||||
|
|
||||||
Scalar mean = 0.f;
|
randn(data, 0.f, 1.f);
|
||||||
Scalar std = 1.f;
|
|
||||||
randn(data, mean, std);
|
|
||||||
randu(indices, 0, shape[axis]);
|
randu(indices, 0, shape[axis]);
|
||||||
randn(updates, mean, std);
|
randn(updates, 0.f, 1.f);
|
||||||
|
|
||||||
indices.convertTo(indices, CV_32SC1, 1, -1);
|
indices.convertTo(indices, CV_32SC1, 1, -1);
|
||||||
|
|
||||||
@ -291,50 +290,46 @@ struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
|
|||||||
|
|
||||||
// warmup
|
// warmup
|
||||||
{
|
{
|
||||||
std::vector<String> inpNames(3);
|
std::vector<String> input_names{"data", "indices", "updates"};
|
||||||
inpNames[0] = "data";
|
net.setInputsNames(input_names);
|
||||||
inpNames[1] = "indices";
|
net.setInput(data, input_names[0]);
|
||||||
inpNames[2] = "updates";
|
net.setInput(indices, input_names[1]);
|
||||||
net.setInputsNames(inpNames);
|
net.setInput(updates, input_names[2]);
|
||||||
net.setInput(data, inpNames[0]);
|
|
||||||
net.setInput(indices, inpNames[1]);
|
|
||||||
net.setInput(updates, inpNames[2]);
|
|
||||||
|
|
||||||
net.setPreferableBackend(backendId);
|
net.setPreferableBackend(backend_id);
|
||||||
net.setPreferableTarget(targetId);
|
net.setPreferableTarget(target_id);
|
||||||
Mat out = net.forward();
|
Mat out = net.forward();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// perf
|
||||||
TEST_CYCLE()
|
TEST_CYCLE()
|
||||||
{
|
{
|
||||||
Mat res = net.forward();
|
Mat res = net.forward();
|
||||||
}
|
}
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
|
||||||
|
|
||||||
int N = 8;
|
|
||||||
int C = 256;
|
|
||||||
int H = 128;
|
|
||||||
int W = 100;
|
|
||||||
};
|
|
||||||
|
|
||||||
PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter)
|
|
||||||
{
|
|
||||||
test_layer({N, C, H, W});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter_add)
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, Combine(
|
||||||
{
|
Values(std::vector<int>{2, 128, 64, 50}),
|
||||||
test_layer({N, C, H, W}, "add");
|
Values(std::string("none"), std::string("add")),
|
||||||
}
|
Values(0), // use Values(0, 1, 2, 3) for more details
|
||||||
|
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
|
||||||
|
/* withHalide= */ false,
|
||||||
|
/* withCpuOCV= */ true,
|
||||||
|
/* withVkCom= */ false,
|
||||||
|
/* withCUDA= */ false,
|
||||||
|
/* withNgraph= */ false,
|
||||||
|
/* withWebnn= */ false,
|
||||||
|
/* withCann= */ false) // only test on CPU
|
||||||
|
));
|
||||||
|
|
||||||
struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
using Layer_ScatterND = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
|
||||||
{
|
PERF_TEST_P_(Layer_ScatterND, scatterND) {
|
||||||
void test_layer(const std::vector<int>& shape, const String reduction = "none")
|
std::vector<int> shape = get<0>(GetParam());
|
||||||
{
|
std::string reduction = get<1>(GetParam());
|
||||||
int backendId = get<0>(GetParam());
|
int backend_id = get<0>(get<2>(GetParam()));
|
||||||
int targetId = get<1>(GetParam());
|
int target_id = get<1>(get<2>(GetParam()));
|
||||||
|
|
||||||
std::vector<int> indices_shape(shape);
|
std::vector<int> indices_shape(shape);
|
||||||
indices_shape.push_back(int(shape.size()));
|
indices_shape.push_back(int(shape.size()));
|
||||||
@ -342,12 +337,10 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
|||||||
Mat indices(indices_shape, CV_32FC1);
|
Mat indices(indices_shape, CV_32FC1);
|
||||||
Mat updates(shape, CV_32FC1);
|
Mat updates(shape, CV_32FC1);
|
||||||
|
|
||||||
Scalar mean = 0.f;
|
randn(data, 0.f, 1.f);
|
||||||
Scalar std = 1.f;
|
randn(updates, 0.f, 1.f);
|
||||||
randn(data, mean, std);
|
|
||||||
randn(updates, mean, std);
|
|
||||||
|
|
||||||
// initialize the indices with index tuples like [0...N, 0...C, 0...H, 0...W]
|
// Create indices such that indices[n_i, c_j, h_k, w_l, :4] = [i, j, k, l]
|
||||||
std::vector<int> current_index_tuple(shape.size());
|
std::vector<int> current_index_tuple(shape.size());
|
||||||
int total = data.total();
|
int total = data.total();
|
||||||
std::vector<int> indices_step;
|
std::vector<int> indices_step;
|
||||||
@ -357,6 +350,7 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
|||||||
indices_step.push_back(step);
|
indices_step.push_back(step);
|
||||||
}
|
}
|
||||||
int t, j, idx, offset_at_idx, offset;
|
int t, j, idx, offset_at_idx, offset;
|
||||||
|
auto *indices_ptr = indices.ptr<float>();
|
||||||
for (int i = 0; i < total; i++)
|
for (int i = 0; i < total; i++)
|
||||||
{
|
{
|
||||||
t = i;
|
t = i;
|
||||||
@ -373,7 +367,7 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
|||||||
offset += current_index_tuple[j] * indices_step[j];
|
offset += current_index_tuple[j] * indices_step[j];
|
||||||
|
|
||||||
for (j = 0; j < shape.size(); j++)
|
for (j = 0; j < shape.size(); j++)
|
||||||
indices.at<float>(offset + j) = current_index_tuple[j];
|
indices_ptr[offset + j] = current_index_tuple[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
Net net;
|
Net net;
|
||||||
@ -389,17 +383,14 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
|||||||
|
|
||||||
// warmup
|
// warmup
|
||||||
{
|
{
|
||||||
std::vector<String> inpNames(3);
|
std::vector<String> input_names{"data", "indices", "updates"};
|
||||||
inpNames[0] = "data";
|
net.setInputsNames(input_names);
|
||||||
inpNames[1] = "indices";
|
net.setInput(data, input_names[0]);
|
||||||
inpNames[2] = "updates";
|
net.setInput(indices, input_names[1]);
|
||||||
net.setInputsNames(inpNames);
|
net.setInput(updates, input_names[2]);
|
||||||
net.setInput(data, inpNames[0]);
|
|
||||||
net.setInput(indices, inpNames[1]);
|
|
||||||
net.setInput(updates, inpNames[2]);
|
|
||||||
|
|
||||||
net.setPreferableBackend(backendId);
|
net.setPreferableBackend(backend_id);
|
||||||
net.setPreferableTarget(targetId);
|
net.setPreferableTarget(target_id);
|
||||||
Mat out = net.forward();
|
Mat out = net.forward();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -409,23 +400,20 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
|
||||||
|
|
||||||
int N = 8;
|
|
||||||
int C = 256;
|
|
||||||
int H = 128;
|
|
||||||
int W = 100;
|
|
||||||
};
|
|
||||||
|
|
||||||
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND)
|
|
||||||
{
|
|
||||||
test_layer({N, C, H ,W});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND_add)
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, Combine(
|
||||||
{
|
Values(std::vector<int>{2, 128, 64, 50}),
|
||||||
test_layer({N, C, H , W}, "add");
|
Values(std::string("none"), std::string("add")),
|
||||||
}
|
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
|
||||||
|
/* withHalide= */ false,
|
||||||
|
/* withCpuOCV= */ true,
|
||||||
|
/* withVkCom= */ false,
|
||||||
|
/* withCUDA= */ false,
|
||||||
|
/* withNgraph= */ false,
|
||||||
|
/* withWebnn= */ false,
|
||||||
|
/* withCann= */ false) // only test on CPU
|
||||||
|
));
|
||||||
|
|
||||||
struct Layer_LayerNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
struct Layer_LayerNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
||||||
{
|
{
|
||||||
@ -795,19 +783,77 @@ PERF_TEST_P_(Layer_Attention, VisionTransformer) {
|
|||||||
test_layer({1, 197, 768}, {768, 768, 768}, 12);
|
test_layer({1, 197, 768}, {768, 768, 768}, 12);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Layer_GroupNorm : public TestBaseWithParam<tuple<Backend, Target> >
|
||||||
|
{
|
||||||
|
void test_layer(const std::vector<int>& x_shape, int num_groups)
|
||||||
|
{
|
||||||
|
int backendId = get<0>(GetParam());
|
||||||
|
int targetId = get<1>(GetParam());
|
||||||
|
|
||||||
|
Mat x(x_shape, CV_32FC1);
|
||||||
|
Mat scale(x_shape[1], 1, CV_32FC1);
|
||||||
|
Mat b(x_shape[1], 1, CV_32FC1);
|
||||||
|
|
||||||
|
randu(x, 0.f, 1.f);
|
||||||
|
randu(scale, 0.f, 1.f);
|
||||||
|
randu(b, 0.f, 1.f);
|
||||||
|
|
||||||
|
Net net;
|
||||||
|
LayerParams lp;
|
||||||
|
lp.type = "GroupNormalization";
|
||||||
|
lp.name = "testLayer";
|
||||||
|
lp.set("num_groups", num_groups);
|
||||||
|
|
||||||
|
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
||||||
|
net.connect(0, 0, id, 0);
|
||||||
|
net.connect(0, 1, id, 1);
|
||||||
|
net.connect(0, 2, id, 2);
|
||||||
|
|
||||||
|
// warmup
|
||||||
|
{
|
||||||
|
std::vector<String> inpNames{"x", "scale", "b"};
|
||||||
|
net.setInputsNames(inpNames);
|
||||||
|
net.setInput(x, inpNames[0]);
|
||||||
|
net.setInput(scale, inpNames[1]);
|
||||||
|
net.setInput(b, inpNames[2]);
|
||||||
|
|
||||||
|
net.setPreferableBackend(backendId);
|
||||||
|
net.setPreferableTarget(targetId);
|
||||||
|
Mat out = net.forward();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CYCLE()
|
||||||
|
{
|
||||||
|
Mat res = net.forward();
|
||||||
|
}
|
||||||
|
|
||||||
|
SANITY_CHECK_NOTHING();
|
||||||
|
}
|
||||||
|
|
||||||
|
int N = 2;
|
||||||
|
int C = 64;
|
||||||
|
int H = 180;
|
||||||
|
int W = 240;
|
||||||
|
int num_groups = 16;
|
||||||
|
};
|
||||||
|
|
||||||
|
PERF_TEST_P_(Layer_GroupNorm, GroupNorm)
|
||||||
|
{
|
||||||
|
test_layer({N, C, H, W}, num_groups);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
#ifdef HAVE_CUDA
|
#ifdef HAVE_CUDA
|
||||||
INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
|
INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
|
||||||
#endif
|
#endif
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Attention, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Attention, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_GroupNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||||
|
|
||||||
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
|
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
|
||||||
PERF_TEST_P_(Layer_FullyConnected, fc)
|
PERF_TEST_P_(Layer_FullyConnected, fc)
|
||||||
|
@ -132,8 +132,23 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
CV_Assert(is_shape_compatible(output, x));
|
auto inShape1 = x.shape_as_vector();
|
||||||
CV_Assert(is_shape_compatible(output, y));
|
auto inShape2 = y.shape_as_vector();
|
||||||
|
auto outShape = output.shape_as_vector();
|
||||||
|
|
||||||
|
std::size_t x_ndims = inShape1.size(), y_ndims = inShape2.size();
|
||||||
|
if (x_ndims >= y_ndims) {
|
||||||
|
for (std::size_t i = 0; i < (x_ndims - y_ndims); i++) {
|
||||||
|
inShape2.insert(inShape2.begin(), 1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (std::size_t i = 0; i < (y_ndims - x_ndims); i++) {
|
||||||
|
inShape1.insert(inShape1.begin(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_Assert(is_shape_compatible1(outShape, inShape1));
|
||||||
|
CV_Assert(is_shape_compatible1(outShape, inShape2));
|
||||||
|
|
||||||
/* matching singleton axes in both input tensors can be eliminated
|
/* matching singleton axes in both input tensors can be eliminated
|
||||||
*
|
*
|
||||||
@ -148,20 +163,21 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
|
|||||||
* x: [1, 256, 32, 32] -> [256, 32, 32]
|
* x: [1, 256, 32, 32] -> [256, 32, 32]
|
||||||
* y: [1, 256, 1, 1] -> [256, 1, 1]
|
* y: [1, 256, 1, 1] -> [256, 1, 1]
|
||||||
*/
|
*/
|
||||||
for (int r = 0; r < output.rank(); r++)
|
int eliminate_times = 0;
|
||||||
{
|
for (std::size_t i = 0; i < outShape.size(); i++) {
|
||||||
while (x.rank() > r && y.rank() > r && x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
|
if (inShape1[i] == 1 && inShape2[i] == 1 && outShape[i] == 1 && i != (outShape.size() - 1)) {
|
||||||
CV_Assert(output.get_axis_size(r) == 1);
|
eliminate_times++;
|
||||||
|
} else {
|
||||||
x.squeeze(r);
|
break;
|
||||||
y.squeeze(r);
|
}
|
||||||
output.squeeze(r);
|
}
|
||||||
|
if (eliminate_times > 0) {
|
||||||
|
for (int i = 0; i < eliminate_times; i++) {
|
||||||
|
inShape1.erase(inShape1.begin());
|
||||||
|
inShape2.erase(inShape2.begin());
|
||||||
|
outShape.erase(outShape.begin());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto inShape1 = x.shape_as_vector();
|
|
||||||
auto inShape2 = y.shape_as_vector();
|
|
||||||
auto outShape = output.shape_as_vector();
|
|
||||||
|
|
||||||
/* contiguous axes that do not broadcast can be merged into one axis
|
/* contiguous axes that do not broadcast can be merged into one axis
|
||||||
*
|
*
|
||||||
@ -324,7 +340,19 @@ void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
|
|||||||
eltwise_op<T, SubFunctor<T>>(stream, output, x, y);
|
eltwise_op<T, SubFunctor<T>>(stream, output, x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void eltwise_mod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||||
|
eltwise_op<T, ModFunctor<T>>(stream, output, x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||||
|
eltwise_op<T, FModFunctor<T>>(stream, output, x, y);
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||||
|
template void eltwise_mod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
|
template void eltwise_fmod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
@ -333,6 +361,8 @@ void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
|
|||||||
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||||
#endif
|
#endif
|
||||||
|
template void eltwise_mod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||||
|
template void eltwise_fmod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||||
template void eltwise_sub_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
template void eltwise_sub_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||||
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||||
|
@ -799,6 +799,40 @@ struct ReciprocalFunctor {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
struct ModFunctor {
|
||||||
|
struct Params {
|
||||||
|
CUDA4DNN_HOST_DEVICE Params() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
CUDA4DNN_DEVICE ModFunctor() { }
|
||||||
|
CUDA4DNN_DEVICE ModFunctor(const Params& params) { }
|
||||||
|
|
||||||
|
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||||
|
int res = (int)x % (int)y;
|
||||||
|
T zero = T(0);
|
||||||
|
if ((res > (int)zero && y < zero) || (res < (int)zero && y > zero)) {
|
||||||
|
res += (int)y;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
struct FModFunctor {
|
||||||
|
struct Params {
|
||||||
|
CUDA4DNN_HOST_DEVICE Params() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
CUDA4DNN_DEVICE FModFunctor() { }
|
||||||
|
CUDA4DNN_DEVICE FModFunctor(const Params& params) { }
|
||||||
|
|
||||||
|
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||||
|
using csl::device::fmod;
|
||||||
|
return fmod(x, y);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||||
|
|
||||||
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
|
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
|
||||||
|
@ -36,6 +36,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de
|
|||||||
template <> inline __device__ float min(float x, float y) { return fminf(x, y); }
|
template <> inline __device__ float min(float x, float y) { return fminf(x, y); }
|
||||||
template <> inline __device__ double min(double x, double y) { return fmin(x, y); }
|
template <> inline __device__ double min(double x, double y) { return fmin(x, y); }
|
||||||
|
|
||||||
|
template <class T> __device__ T fmod(T x, T y) { return x % y; }
|
||||||
|
template <> inline __device__ float fmod(float x, float y) { return fmodf(x, y); }
|
||||||
|
template <> inline __device__ double fmod(double x, double y) { return fmod(x, y); }
|
||||||
|
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||||
|
template <> inline __device__ half fmod(half x, half y) { return fmodf((float)x, (float)y); }
|
||||||
|
#endif
|
||||||
|
|
||||||
template <class T> __device__ T log1p(T val);
|
template <class T> __device__ T log1p(T val);
|
||||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||||
template <> inline __device__ __half log1p(__half val) { return hlog(__half(1) + val); }
|
template <> inline __device__ __half log1p(__half val) { return hlog(__half(1) + val); }
|
||||||
|
@ -78,6 +78,18 @@ namespace raw {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__global__ void normalize_mean_variance_groupwise(Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> inv_stddev, size_type inner_size, size_type C, size_type num_groups, size_type group_size) {
|
||||||
|
for (auto idx : grid_stride_range(output.size())) {
|
||||||
|
const index_type outer_idx = idx / inner_size;
|
||||||
|
const index_type c = outer_idx % C;
|
||||||
|
const index_type group_idx = outer_idx / group_size;
|
||||||
|
auto s = static_cast<float>(scale[c]) * inv_stddev[group_idx];
|
||||||
|
auto b = static_cast<float>(bias[c]);
|
||||||
|
output[idx] = (static_cast<float>(input[idx]) - means[group_idx]) * s + b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
__global__ void normalize_mean_variance_layernorm(Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, size_type inner_size) {
|
__global__ void normalize_mean_variance_layernorm(Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, size_type inner_size) {
|
||||||
for (auto idx : grid_stride_range(output.size())) {
|
for (auto idx : grid_stride_range(output.size())) {
|
||||||
@ -191,6 +203,24 @@ template void normalize_mean_variance_channelwise(const Stream&, Span<__half> /*
|
|||||||
#endif
|
#endif
|
||||||
template void normalize_mean_variance_channelwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t);
|
template void normalize_mean_variance_channelwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void normalize_mean_variance_groupwise(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> inv_stddev, std::size_t inner_size, std::size_t C, std::size_t num_groups, std::size_t group_size)
|
||||||
|
{
|
||||||
|
CV_Assert(input.size() == output.size());
|
||||||
|
CV_Assert(input.size() / inner_size == means.size() * group_size);
|
||||||
|
CV_Assert(means.size() == inv_stddev.size());
|
||||||
|
|
||||||
|
auto kernel = raw::normalize_mean_variance_groupwise<T>;
|
||||||
|
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||||
|
launch_kernel(kernel, policy, output, input, scale, bias, means, inv_stddev, inner_size, C, num_groups, group_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||||
|
template void normalize_mean_variance_groupwise(const Stream&, Span<__half> /*output*/, View<__half> /*input*/, View<__half> /*scale*/, View<__half> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t, std::size_t, std::size_t);
|
||||||
|
#endif
|
||||||
|
template void normalize_mean_variance_groupwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t, std::size_t, std::size_t);
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void normalize_mean_variance_layernorm(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, std::size_t inner_size)
|
void normalize_mean_variance_layernorm(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, std::size_t inner_size)
|
||||||
{
|
{
|
||||||
|
@ -1262,6 +1262,23 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename ShapeType>
|
||||||
|
bool is_shape_compatible1(const ShapeType &x_shape, const ShapeType &y_shape) noexcept {
|
||||||
|
const auto x_ndims = x_shape.size(), y_ndims = y_shape.size();
|
||||||
|
|
||||||
|
if (x_ndims != y_ndims) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < x_ndims; i++) {
|
||||||
|
if (x_shape[i] != y_shape[i] && x_shape[i] != 1 && y_shape[i] != 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/** returns the rank to which the given tensor can be squeezed to */
|
/** returns the rank to which the given tensor can be squeezed to */
|
||||||
template <class TensorType>
|
template <class TensorType>
|
||||||
std::size_t get_effective_rank(const TensorType& x) noexcept {
|
std::size_t get_effective_rank(const TensorType& x) noexcept {
|
||||||
|
@ -33,6 +33,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
|||||||
template <class T>
|
template <class T>
|
||||||
void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void eltwise_mod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void eltwise_fmod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||||
|
|
||||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||||
|
|
||||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
|
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
|
||||||
|
@ -35,6 +35,10 @@ void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> o
|
|||||||
template <class T>
|
template <class T>
|
||||||
void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size);
|
void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void normalize_mean_variance_groupwise(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size, std::size_t C, std::size_t num_groups, std::size_t group_size);
|
||||||
|
|
||||||
|
|
||||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||||
|
|
||||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP */
|
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP */
|
||||||
|
@ -28,6 +28,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
DIV,
|
DIV,
|
||||||
MIN,
|
MIN,
|
||||||
SUB,
|
SUB,
|
||||||
|
MOD,
|
||||||
|
FMOD,
|
||||||
};
|
};
|
||||||
|
|
||||||
class EltwiseOpBase : public CUDABackendNode {
|
class EltwiseOpBase : public CUDABackendNode {
|
||||||
@ -90,6 +92,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
|
kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
|
||||||
break;
|
break;
|
||||||
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
|
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
|
||||||
|
case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, input_x, input_y); break;
|
||||||
|
case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, input_x, input_y); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -122,6 +126,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
|
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
|
||||||
|
case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, output, input); break;
|
||||||
|
case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, output, input); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
87
modules/dnn/src/cuda4dnn/primitives/group_norm.hpp
Normal file
87
modules/dnn/src/cuda4dnn/primitives/group_norm.hpp
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
|
||||||
|
#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
|
||||||
|
|
||||||
|
#include "../../op_cuda.hpp"
|
||||||
|
|
||||||
|
#include "../csl/stream.hpp"
|
||||||
|
#include "../csl/span.hpp"
|
||||||
|
#include "../csl/tensor.hpp"
|
||||||
|
#include "../csl/workspace.hpp"
|
||||||
|
|
||||||
|
#include "../kernels/fill_copy.hpp"
|
||||||
|
#include "../kernels/mvn.hpp"
|
||||||
|
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace cv { namespace dnn { namespace cuda4dnn {
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class GroupNormOp final : public CUDABackendNode {
|
||||||
|
public:
|
||||||
|
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||||
|
|
||||||
|
GroupNormOp(csl::Stream stream_, float epsilon_, size_t loops, size_t num_groups)
|
||||||
|
: stream(std::move(stream_)), epsilon(epsilon_), num_groups(num_groups) {
|
||||||
|
csl::WorkspaceBuilder builder;
|
||||||
|
builder.require<float>(loops * num_groups); // mean and stdev for each group
|
||||||
|
builder.require<float>(loops * num_groups);
|
||||||
|
scratch_mem_in_bytes = builder.required_workspace_size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,
|
||||||
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||||
|
csl::Workspace& workspace) override {
|
||||||
|
auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
|
||||||
|
auto scale_wrapper = inputs[1].dynamicCast<wrapper_type>();
|
||||||
|
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
|
||||||
|
|
||||||
|
auto input = input_wrapper->getView();
|
||||||
|
auto scale = scale_wrapper->getView();
|
||||||
|
auto bias = bias_wrapper->getView();
|
||||||
|
|
||||||
|
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
|
||||||
|
auto output = output_wrapper->getSpan();
|
||||||
|
|
||||||
|
auto C = input.get_axis_size(1);
|
||||||
|
auto loops = input.size_range(0, 2);
|
||||||
|
auto norm_size = input.size_range(2, input.rank());
|
||||||
|
auto num_groups = this->num_groups;
|
||||||
|
auto group_size = C / num_groups;
|
||||||
|
if (norm_size == 1) {
|
||||||
|
kernels::fill<T>(stream, output, 0.f);
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
auto ws_allocator = csl::WorkspaceAllocator(workspace);
|
||||||
|
|
||||||
|
auto mean = ws_allocator.get_span<float>(loops / group_size);
|
||||||
|
kernels::fill<float>(stream, mean, 0.f);
|
||||||
|
|
||||||
|
auto stdev = ws_allocator.get_span<float>(loops / group_size);
|
||||||
|
kernels::fill<float>(stream, stdev, 0.f);
|
||||||
|
|
||||||
|
kernels::reduce_mean_sqr_sum<T>(stream, mean, stdev, input, norm_size * group_size);
|
||||||
|
kernels::compute_normalization_scale(stream, stdev, mean, stdev, norm_size * group_size, epsilon);
|
||||||
|
kernels::normalize_mean_variance_groupwise<T>(stream, output, input, scale, bias, mean, stdev, norm_size, C, num_groups, group_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
csl::Stream stream;
|
||||||
|
float epsilon;
|
||||||
|
std::size_t num_groups;
|
||||||
|
std::size_t scratch_mem_in_bytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
}}} // cv::dnn::cuda4dnn
|
||||||
|
|
||||||
|
#endif // OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
|
@ -163,6 +163,7 @@ void initializeLayerFactory()
|
|||||||
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
|
||||||
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
|
||||||
CV_DNN_REGISTER_LAYER_CLASS(Attention, AttentionLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(Attention, AttentionLayer);
|
||||||
|
CV_DNN_REGISTER_LAYER_CLASS(GroupNormalization, GroupNormLayer);
|
||||||
|
|
||||||
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
|
||||||
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
|
||||||
|
@ -969,6 +969,13 @@ public:
|
|||||||
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
|
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
|
||||||
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
|
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
|
#if CV_RVP052
|
||||||
|
if(isConv2D)
|
||||||
|
opt_RVP052::fastDepthwiseConv(wptr, kernel_h, kernel_w,
|
||||||
|
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
|
||||||
|
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
|
||||||
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
|
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
|
||||||
@ -1348,6 +1355,12 @@ public:
|
|||||||
opt_LASX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
|
opt_LASX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
|
||||||
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
|
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
|
#if CV_RVP052
|
||||||
|
if(isConv2D)
|
||||||
|
opt_RVP052::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
|
||||||
|
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
|
||||||
|
else
|
||||||
#endif
|
#endif
|
||||||
for( int i = 0; i < outCn; i += 2 )
|
for( int i = 0; i < outCn; i += 2 )
|
||||||
{
|
{
|
||||||
|
@ -302,6 +302,11 @@ public:
|
|||||||
if( useLASX )
|
if( useLASX )
|
||||||
opt_LASX::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
|
opt_LASX::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
|
#if CV_RVP052
|
||||||
|
if( 1 )
|
||||||
|
opt_RVP052::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
|
||||||
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
#include "int8layers/layers_common.simd_declarations.hpp"
|
#include "int8layers/layers_common.simd_declarations.hpp"
|
||||||
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
|
|
||||||
|
#include "./layers_rvp052.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
#include "../ocl4dnn/include/ocl4dnn.hpp"
|
#include "../ocl4dnn/include/ocl4dnn.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
221
modules/dnn/src/int8layers/layers_rvp052.cpp
Normal file
221
modules/dnn/src/int8layers/layers_rvp052.cpp
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include "../precomp.hpp"
|
||||||
|
#include "./layers_rvp052.hpp"
|
||||||
|
|
||||||
|
#if CV_RVP052
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
namespace dnn {
|
||||||
|
namespace opt_RVP052 {
|
||||||
|
|
||||||
|
void fastConv(const int8_t *weights, size_t wstep, const int *bias,
|
||||||
|
const int8_t *rowbuf, int *output, const int *outShape,
|
||||||
|
int blockSize, int vecsize, int vecsize_aligned, int outZp,
|
||||||
|
const float *multiplier, bool initOutput, bool finalOutput)
|
||||||
|
{
|
||||||
|
int outCn = outShape[1];
|
||||||
|
size_t outPlaneSize = outShape[2] * outShape[3];
|
||||||
|
for (int i = 0; i < outCn; i += 2)
|
||||||
|
{
|
||||||
|
const int8_t *wptr0 = weights + i * wstep;
|
||||||
|
const int8_t *wptr1 = wptr0 + wstep;
|
||||||
|
int *outptr0 = output + i * outPlaneSize;
|
||||||
|
int *outptr1 = outptr0 + outPlaneSize;
|
||||||
|
int bias0 = bias[i], bias1 = bias[i + 1];
|
||||||
|
float mult0 = multiplier[i], mult1 = multiplier[i + 1];
|
||||||
|
|
||||||
|
if (i + 1 >= outCn)
|
||||||
|
{
|
||||||
|
wptr1 = wptr0;
|
||||||
|
outptr1 = outptr0;
|
||||||
|
bias1 = bias0;
|
||||||
|
mult1 = mult0;
|
||||||
|
}
|
||||||
|
int j = 0;
|
||||||
|
for (; j < blockSize; j++)
|
||||||
|
{
|
||||||
|
const int8_t *rptr = rowbuf + j * vecsize_aligned;
|
||||||
|
int s00 = initOutput ? bias0 : outptr0[j];
|
||||||
|
int s10 = initOutput ? bias1 : outptr1[j];
|
||||||
|
|
||||||
|
int32x2_t vsx0 = {s00, s10};
|
||||||
|
|
||||||
|
for (int k = 0; k < vecsize; k += 4)
|
||||||
|
{
|
||||||
|
int8x4_t vrptr[2] = {*(int8x4_t*)(rptr + k), *(int8x4_t*)(rptr + k)};
|
||||||
|
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr0 + k), *(int8x4_t*)(wptr1 + k)};
|
||||||
|
vsx0 = __nds__v_smaqa(vsx0, *(int8x8_t*)vwptr, *(int8x8_t*)vrptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (finalOutput)
|
||||||
|
{
|
||||||
|
vsx0[0] = outZp + (int)std::round(vsx0[0] * mult0);
|
||||||
|
vsx0[1] = outZp + (int)std::round(vsx0[1] * mult1);
|
||||||
|
vsx0 = __nds__v_sclip32(vsx0, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
outptr0[j] = vsx0[0];
|
||||||
|
outptr1[j] = vsx0[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fastDepthwiseConv(const int8_t *wptr,
|
||||||
|
int kernel_h, int kernel_w,
|
||||||
|
int stride_h, int stride_w,
|
||||||
|
int dilation_h, int dilation_w,
|
||||||
|
int pad_t, int pad_l,
|
||||||
|
const int *biasptr, const float *multptr,
|
||||||
|
const int8_t *inptr_,
|
||||||
|
int height, int width,
|
||||||
|
int *outptr_,
|
||||||
|
int out_d, int outH, int outW,
|
||||||
|
int inpZp, int outZp)
|
||||||
|
{
|
||||||
|
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
|
||||||
|
w10 = wptr[3], w11 = wptr[4], w12 = wptr[5],
|
||||||
|
w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8];
|
||||||
|
int outW1 = min(outW, (width - dilation_w * (kernel_w - 1) + pad_l) / stride_w);
|
||||||
|
int bias = biasptr[out_d], biasCopy;
|
||||||
|
float mult = multptr[out_d];
|
||||||
|
|
||||||
|
for (int out_i = 0; out_i < outH; out_i++)
|
||||||
|
{
|
||||||
|
int in_i = out_i * stride_h - pad_t, out_j = 0;
|
||||||
|
const int8_t *imgptr0 = inptr_ + in_i * width;
|
||||||
|
const int8_t *imgptr1 = imgptr0 + dilation_h * width;
|
||||||
|
const int8_t *imgptr2 = imgptr0 + (dilation_h * 2) * width;
|
||||||
|
int8_t w00 = w00_, w01 = w01_, w02 = w02_;
|
||||||
|
int8_t w20 = w20_, w21 = w21_, w22 = w22_;
|
||||||
|
int out;
|
||||||
|
biasCopy = bias;
|
||||||
|
|
||||||
|
if (in_i < 0)
|
||||||
|
{
|
||||||
|
biasCopy += inpZp * (w00 + w01 + w02);
|
||||||
|
w00 = w01 = w02 = 0;
|
||||||
|
imgptr0 = imgptr1;
|
||||||
|
}
|
||||||
|
else if (in_i + dilation_h * (kernel_h - 1) >= height)
|
||||||
|
{
|
||||||
|
biasCopy += inpZp * (w20 + w21 + w22);
|
||||||
|
w20 = w21 = w22 = 0;
|
||||||
|
imgptr2 = imgptr1;
|
||||||
|
}
|
||||||
|
int *outptr = outptr_ + out_i * outW;
|
||||||
|
if (pad_l > 0)
|
||||||
|
{
|
||||||
|
out = (int)imgptr0[0] * w01 + (int)imgptr0[dilation_w] * w02 +
|
||||||
|
(int)imgptr1[0] * w11 + (int)imgptr1[dilation_w] * w12 +
|
||||||
|
(int)imgptr2[0] * w21 + (int)imgptr2[dilation_w] * w22 +
|
||||||
|
biasCopy + inpZp * (w00 + w10 + w20);
|
||||||
|
outptr[0] = __nds__sclip32(outZp + (int)std::round(out * mult), 7);
|
||||||
|
out_j = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int8x8_t vwx0 = (int8x8_t){w00, w10, w20, 0, w00, w10, w20, 0};
|
||||||
|
int8x8_t vwx1 = (int8x8_t){w01, w11, w21, 0, w01, w11, w21, 0};
|
||||||
|
int8x8_t vwx2 = (int8x8_t){w02, w12, w22, 0, w02, w12, w22, 0};
|
||||||
|
int8x8_t vimgx0, vimgx1, vimgx2;
|
||||||
|
int32x2_t vout = {0, 0};
|
||||||
|
for (; out_j < outW1; out_j+=2)
|
||||||
|
{
|
||||||
|
int in_j = out_j * stride_w - pad_l;
|
||||||
|
vimgx0 = (int8x8_t){imgptr0[in_j], imgptr1[in_j], imgptr2[in_j], 0,
|
||||||
|
imgptr0[in_j + stride_w], imgptr1[in_j + stride_w], imgptr2[in_j + stride_w], 0};
|
||||||
|
vimgx1 = (int8x8_t){imgptr0[in_j + dilation_w], imgptr1[in_j + dilation_w], imgptr2[in_j + dilation_w], 0,
|
||||||
|
imgptr0[in_j + dilation_w + stride_w], imgptr1[in_j + dilation_w + stride_w], imgptr2[in_j + dilation_w + stride_w], 0};
|
||||||
|
vimgx2 = (int8x8_t){imgptr0[in_j + dilation_w * 2], imgptr1[in_j + dilation_w * 2], imgptr2[in_j + dilation_w * 2], 0,
|
||||||
|
imgptr0[in_j + dilation_w * 2 + stride_w], imgptr1[in_j + dilation_w * 2 + stride_w], imgptr2[in_j + dilation_w * 2 + stride_w], 0};
|
||||||
|
|
||||||
|
vout = (int32x2_t){biasCopy, biasCopy};
|
||||||
|
vout = __nds__v_smaqa(vout, vwx0, vimgx0);
|
||||||
|
vout = __nds__v_smaqa(vout, vwx1, vimgx1);
|
||||||
|
vout = __nds__v_smaqa(vout, vwx2, vimgx2);
|
||||||
|
|
||||||
|
outptr[out_j] = __nds__sclip32(outZp + (int)std::round(vout[0] * mult), 7);
|
||||||
|
outptr[out_j + 1] = __nds__sclip32(outZp + (int)std::round(vout[1] * mult), 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (out_j > outW1) out_j--;
|
||||||
|
|
||||||
|
for (; out_j < outW; out_j++)
|
||||||
|
{
|
||||||
|
int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w * 2;
|
||||||
|
int s0 = 1, s1 = 1, s2 = 1;
|
||||||
|
if (in_j0 >= width)
|
||||||
|
{
|
||||||
|
in_j0 = 0;
|
||||||
|
s0 = 0;
|
||||||
|
biasCopy += inpZp * (w00 + w10 + w20);
|
||||||
|
}
|
||||||
|
if (in_j1 >= width)
|
||||||
|
{
|
||||||
|
in_j1 = 0;
|
||||||
|
s1 = 0;
|
||||||
|
biasCopy += inpZp * (w01 + w11 + w21);
|
||||||
|
}
|
||||||
|
if (in_j2 >= width)
|
||||||
|
{
|
||||||
|
in_j2 = 0;
|
||||||
|
s2 = 0;
|
||||||
|
biasCopy += inpZp * (w02 + w12 + w22);
|
||||||
|
}
|
||||||
|
out = (int)imgptr0[in_j0] * w00 * s0 + (int)imgptr0[in_j1] * w01 * s1 + (int)imgptr0[in_j2] * w02 * s2 +
|
||||||
|
(int)imgptr1[in_j0] * w10 * s0 + (int)imgptr1[in_j1] * w11 * s1 + (int)imgptr1[in_j2] * w12 * s2 +
|
||||||
|
(int)imgptr2[in_j0] * w20 * s0 + (int)imgptr2[in_j1] * w21 * s1 + (int)imgptr2[in_j2] * w22 * s2 + biasCopy;
|
||||||
|
outptr[out_j] = __nds__sclip32(outZp + (int)std::round(out * mult), 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// dst = vec * weights^t + bias
|
||||||
|
void fastGEMM1T( const int8_t* vec, const int8_t* weights,
|
||||||
|
size_t wstep, const int* bias, const float* multiplier,
|
||||||
|
int* dst, int nvecs, int vecsize, int outZp )
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
for( ; i <= nvecs - 2; i += 2 )
|
||||||
|
{
|
||||||
|
const int8_t* wptr0 = weights + i * wstep;
|
||||||
|
const int8_t* wptr1 = weights + (i + 1) * wstep;
|
||||||
|
|
||||||
|
int32x2_t vs0 = *(int32x2_t*)(bias + i);
|
||||||
|
|
||||||
|
for( int k = 0; k < vecsize; k += 4 )
|
||||||
|
{
|
||||||
|
int8x4_t vvec[2] = {*(int8x4_t*)(vec + k), *(int8x4_t*)(vec + k)};
|
||||||
|
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr0 + k), *(int8x4_t*)(wptr1 + k)};
|
||||||
|
vs0 = __nds__v_smaqa(vs0, *(int8x8_t*)vwptr, *(int8x8_t*)vvec);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32x2_t vdst = {(int)std::round(vs0[0] * multiplier[i]), (int)std::round(vs0[1] * multiplier[i + 1])};
|
||||||
|
|
||||||
|
vdst = __nds__v_sclip32(vdst + outZp, 7);
|
||||||
|
|
||||||
|
*(int32x2_t*)(dst + i) = vdst;
|
||||||
|
}
|
||||||
|
|
||||||
|
for( ; i < nvecs; i++ )
|
||||||
|
{
|
||||||
|
const int8_t* wptr = weights + i * wstep;
|
||||||
|
int s0 = bias[i];
|
||||||
|
|
||||||
|
for( int k = 0; k < vecsize; k += 4 )
|
||||||
|
{
|
||||||
|
int8x4_t vvec[2] = {*(int8x4_t*)(vec + k), 0};
|
||||||
|
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr + k), 0};
|
||||||
|
s0 = __nds__smaqa(s0, *(unsigned long*)vwptr, *(unsigned long*)vvec);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst[i] = __nds__sclip32(outZp + (int)std::round(s0 * multiplier[i]), 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}}} // namespace
|
||||||
|
|
||||||
|
#endif
|
36
modules/dnn/src/int8layers/layers_rvp052.hpp
Normal file
36
modules/dnn/src/int8layers/layers_rvp052.hpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#if defined(__riscv) && defined(__riscv_dsp) && defined(__ANDES)
|
||||||
|
# include <nds_intrinsic.h>
|
||||||
|
# define CV_RVP052 1
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
namespace dnn {
|
||||||
|
namespace opt_RVP052 {
|
||||||
|
|
||||||
|
void fastConv( const int8_t* weights, size_t wstep, const int* bias,
|
||||||
|
const int8_t* rowbuf, int* output, const int* outShape,
|
||||||
|
int blockSize, int vecsize, int vecsize_aligned, int outZp,
|
||||||
|
const float* multiplier, bool initOutput, bool finalOutput );
|
||||||
|
void fastDepthwiseConv( const int8_t* wptr,
|
||||||
|
int kernel_h, int kernel_w,
|
||||||
|
int stride_h, int stride_w,
|
||||||
|
int dilation_h, int dilation_w,
|
||||||
|
int pad_t, int pad_l,
|
||||||
|
const int* biasptr, const float* multptr,
|
||||||
|
const int8_t* inptr_,
|
||||||
|
int height, int width,
|
||||||
|
int* outptr_,
|
||||||
|
int out_d, int outH, int outW,
|
||||||
|
int inpZp, int outZp );
|
||||||
|
void fastGEMM1T( const int8_t* vec, const int8_t* weights,
|
||||||
|
size_t wstep, const int* bias, const float* multiplier,
|
||||||
|
int* dst, int nvecs, int vecsize, int outZp );
|
||||||
|
|
||||||
|
}}}
|
||||||
|
|
||||||
|
#else
|
||||||
|
# define CV_RVP052 0
|
||||||
|
#endif
|
@ -338,7 +338,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
|
|||||||
}
|
}
|
||||||
#if CV_TRY_AVX2
|
#if CV_TRY_AVX2
|
||||||
if (conv->useAVX2)
|
if (conv->useAVX2)
|
||||||
opt_AVX::winofunc_AtXA_8x8_F32((float *)out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE,
|
opt_AVX2::winofunc_AtXA_8x8_F32((float *)out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE,
|
||||||
bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct);
|
bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -385,7 +385,7 @@ void fastGemmBatch(bool trans_a, bool trans_b,
|
|||||||
const auto shape_b = shape(B);
|
const auto shape_b = shape(B);
|
||||||
const auto shape_c = shape(C);
|
const auto shape_c = shape(C);
|
||||||
CV_CheckGE(shape_a.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: A must be n-dimensional (n >= 2)");
|
CV_CheckGE(shape_a.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: A must be n-dimensional (n >= 2)");
|
||||||
CV_CheckEQ(shape_b.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: B must be n-dimensional (n >= 2)");
|
CV_CheckGE(shape_b.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: B must be n-dimensional (n >= 2)");
|
||||||
|
|
||||||
const float *a = A.ptr<const float>();
|
const float *a = A.ptr<const float>();
|
||||||
const float *b = B.ptr<const float>();
|
const float *b = B.ptr<const float>();
|
||||||
|
@ -158,4 +158,51 @@ void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &o
|
|||||||
parallel_for_(Range(0, loops), fn, nstripes);
|
parallel_for_(Range(0, loops), fn, nstripes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void fastNormGroup(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t num_groups) {
|
||||||
|
const auto input_shape = shape(input);
|
||||||
|
size_t N = input_shape[0], C = input_shape[1];
|
||||||
|
CV_CheckEQ(scale.total(), bias.total(), "fastNormGroup: scale and bias should have the same shape");
|
||||||
|
CV_CheckEQ(scale.total(), C, "fastNormGroup: scale should be a 1d tensor and match the channel of input");
|
||||||
|
CV_CheckGE(input.dims, 3, "fastNormGroup: input dimension >= 3");
|
||||||
|
|
||||||
|
size_t channels_per_group = C / num_groups;
|
||||||
|
size_t loops = N * num_groups;
|
||||||
|
size_t norm_size = static_cast<size_t>(total(input_shape, 2) * channels_per_group);
|
||||||
|
size_t step = norm_size / channels_per_group;
|
||||||
|
float inv_norm_size = 1.0 / norm_size;
|
||||||
|
|
||||||
|
auto fn = [&](const Range &r) {
|
||||||
|
const auto *input_data = input.ptr<const float>();
|
||||||
|
const auto *scale_data = scale.ptr<const float>();
|
||||||
|
const auto *bias_data = bias.ptr<const float>();
|
||||||
|
auto *output_data = output.ptr<float>();
|
||||||
|
|
||||||
|
for (int i = r.start; i < r.end; i++) {
|
||||||
|
const auto *x = input_data + norm_size * i;
|
||||||
|
auto *y = output_data + norm_size * i;
|
||||||
|
|
||||||
|
float mean = 0.f, mean_square = 0.f;
|
||||||
|
for (int j = 0; j < norm_size; j++) {
|
||||||
|
float v = x[j];
|
||||||
|
mean += v;
|
||||||
|
mean_square += v * v;
|
||||||
|
}
|
||||||
|
|
||||||
|
mean *= inv_norm_size;
|
||||||
|
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon);
|
||||||
|
float inv_stdev = 1.f / mean_square;
|
||||||
|
|
||||||
|
size_t group_idx = i % num_groups * channels_per_group;
|
||||||
|
for (size_t j = 0; j < norm_size; j++) {
|
||||||
|
size_t c = group_idx + (j / step);
|
||||||
|
float s = scale_data[c] * inv_stdev, b = bias_data[c];
|
||||||
|
y[j] = s * (x[j] - mean) + b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
double nstripes = loops * norm_size * (1 / 1024.0);
|
||||||
|
parallel_for_(Range(0, loops), fn, nstripes);
|
||||||
|
}
|
||||||
|
|
||||||
}} // cv::dnn
|
}} // cv::dnn
|
||||||
|
@ -21,6 +21,9 @@ void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output,
|
|||||||
// Channel-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
|
// Channel-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
|
||||||
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon);
|
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon);
|
||||||
|
|
||||||
|
// Group-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
|
||||||
|
void fastNormGroup(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t num_groups);
|
||||||
|
|
||||||
}} // cv::dnn
|
}} // cv::dnn
|
||||||
|
|
||||||
#endif // OPENCV_DNN_FAST_NORM_HPP
|
#endif // OPENCV_DNN_FAST_NORM_HPP
|
||||||
|
@ -1299,7 +1299,6 @@ Mat LayerEinsumImpl::batchwiseMatMul(
|
|||||||
const Mat& input2,
|
const Mat& input2,
|
||||||
const MatShape& input2ShapeOverride)
|
const MatShape& input2ShapeOverride)
|
||||||
{
|
{
|
||||||
|
|
||||||
// Sanity checks before the actual MatMul
|
// Sanity checks before the actual MatMul
|
||||||
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
|
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
|
||||||
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
||||||
@ -1312,59 +1311,21 @@ Mat LayerEinsumImpl::batchwiseMatMul(
|
|||||||
int K = input1ShapeOverride[2];
|
int K = input1ShapeOverride[2];
|
||||||
int N = input2ShapeOverride[2];
|
int N = input2ShapeOverride[2];
|
||||||
|
|
||||||
std::vector<Mat> output;
|
Mat reshapedInput1 = input1;
|
||||||
|
Mat reshapedInput2 = input2;
|
||||||
|
|
||||||
|
Mat output;
|
||||||
if (batches > 1)
|
if (batches > 1)
|
||||||
{
|
{
|
||||||
Mat reshapedInput1 = input1;
|
// create tmpout with type like input1
|
||||||
Mat reshapedInput2 = input2;
|
output = Mat({batches, M, N}, input1.type());
|
||||||
|
|
||||||
// input1 should of size MxK
|
reshapedInput2 = reshapedInput2.reshape(1, input2ShapeOverride);
|
||||||
// check if input1 needs reshape, if need reshape
|
reshapedInput1 = reshapedInput1.reshape(1, input1ShapeOverride);
|
||||||
if (input1.size[0] != M || input1.size[1] != K)
|
|
||||||
{
|
|
||||||
int shape[] = {batches, M, K};
|
|
||||||
reshapedInput1 = input1.reshape(1, 3, shape);
|
|
||||||
}
|
|
||||||
|
|
||||||
// input2 should be of size KxN
|
|
||||||
// check if input2 needs reshape, if needs reshape
|
|
||||||
if (input2.size[0] != K || input2.size[1] != N)
|
|
||||||
{
|
|
||||||
int shape[] = {batches, K, N};
|
|
||||||
reshapedInput2 = input2.reshape(1, 3, shape);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i=0; i < batches; i++)
|
|
||||||
{
|
|
||||||
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
|
|
||||||
for (int j = 1; j < reshapedInput1.dims; j++)
|
|
||||||
ranges1.emplace_back(cv::Range::all());
|
|
||||||
|
|
||||||
Mat part1 = reshapedInput1(ranges1);
|
|
||||||
int shape[] = {M, K};
|
|
||||||
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
|
|
||||||
|
|
||||||
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
|
|
||||||
for (int j = 1; j < reshapedInput2.dims; j++)
|
|
||||||
ranges2.emplace_back(cv::Range::all());
|
|
||||||
|
|
||||||
Mat part2 = reshapedInput2(ranges2);
|
|
||||||
int shape2[] = {K, N};
|
|
||||||
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
|
|
||||||
|
|
||||||
Mat tmp_output(M, N, part1.type());
|
|
||||||
fastGemm(false, false, 1.0, part1, part2, 0.0, tmp_output, opt);
|
|
||||||
int newShape[] = {1, M, N};
|
|
||||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
|
||||||
|
|
||||||
output.emplace_back(tmp_output);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
fastGemmBatch(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, output, opt);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
Mat reshapedInput1 = input1;
|
|
||||||
Mat reshapedInput2 = input2;
|
|
||||||
|
|
||||||
// input1 should of size MxK
|
// input1 should of size MxK
|
||||||
// check if input1 needs reshape, if need reshape
|
// check if input1 needs reshape, if need reshape
|
||||||
if (input1.dims > 2 || input1.size[0] != M || (input1.dims > 1 && input1.size[1] != K) || input1.dims == 1)
|
if (input1.dims > 2 || input1.size[0] != M || (input1.dims > 1 && input1.size[1] != K) || input1.dims == 1)
|
||||||
@ -1381,23 +1342,12 @@ Mat LayerEinsumImpl::batchwiseMatMul(
|
|||||||
reshapedInput2 = input2.reshape(1, 2, shape2);
|
reshapedInput2 = input2.reshape(1, 2, shape2);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat tmp_output(M, N, reshapedInput1.type());
|
output = Mat(M, N, reshapedInput1.type());
|
||||||
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, tmp_output, opt);
|
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, output, opt);
|
||||||
|
|
||||||
int newShape[] = {1, M, N};
|
|
||||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
|
||||||
output.emplace_back(tmp_output);
|
|
||||||
|
|
||||||
|
output = output.reshape(1, {1, M, N});
|
||||||
}
|
}
|
||||||
|
return output;
|
||||||
int outputDim[] = {static_cast<int>(output.size()), M, N};
|
|
||||||
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < output.size(); i++) {
|
|
||||||
Mat output_slice = output_buffer.row(i);
|
|
||||||
output[i].copyTo(output_slice);
|
|
||||||
}
|
|
||||||
return output_buffer;
|
|
||||||
};
|
};
|
||||||
Ptr<EinsumLayer> EinsumLayer::create(const LayerParams& params)
|
Ptr<EinsumLayer> EinsumLayer::create(const LayerParams& params)
|
||||||
{
|
{
|
||||||
|
@ -453,13 +453,6 @@ public:
|
|||||||
ret = false;
|
ret = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!use_half && bias && (outerSize > 1))
|
|
||||||
{
|
|
||||||
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
|
||||||
UMat& biases = umat_blobs[1];
|
|
||||||
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret) return true;
|
if (ret) return true;
|
||||||
|
190
modules/dnn/src/layers/group_norm_layer.cpp
Normal file
190
modules/dnn/src/layers/group_norm_layer.cpp
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include "../precomp.hpp"
|
||||||
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
|
#include "./cpu_kernels/fast_norm.hpp"
|
||||||
|
|
||||||
|
// CUDA backend
|
||||||
|
#include "../op_cuda.hpp"
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
#include "../cuda4dnn/primitives/group_norm.hpp"
|
||||||
|
using namespace cv::dnn::cuda4dnn;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// OpenCL backend
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
#include "../ocl4dnn/include/math_functions.hpp"
|
||||||
|
#include "opencl_kernels_dnn.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
namespace dnn {
|
||||||
|
|
||||||
|
// https://github.com/onnx/onnx/blob/main/docs/Operators.md#GroupNormalization
|
||||||
|
class GroupNormLayerImpl CV_FINAL : public GroupNormLayer {
|
||||||
|
public:
|
||||||
|
GroupNormLayerImpl(const LayerParams ¶ms) {
|
||||||
|
setParamsFrom(params);
|
||||||
|
|
||||||
|
epsilon = params.get<float>("epsilon", 1e-5);
|
||||||
|
num_groups = params.get<int>("num_groups");
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool supportBackend(int backendId) CV_OVERRIDE {
|
||||||
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
|
backendId == DNN_BACKEND_CUDA;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
|
const int requiredOutputs,
|
||||||
|
std::vector<MatShape> &outputs,
|
||||||
|
std::vector<MatShape> &internals) const CV_OVERRIDE {
|
||||||
|
const auto &input = inputs[0];
|
||||||
|
const auto &scale = inputs[1];
|
||||||
|
const auto &bias = inputs[2];
|
||||||
|
CV_CheckGE(input.size(), static_cast<size_t>(3), "DNN/GroupNorm: input dimension >= 3 is required");
|
||||||
|
|
||||||
|
int C = input[1];
|
||||||
|
int scale_dim = std::accumulate(scale.begin(), scale.end(), 1, std::multiplies<int>());
|
||||||
|
CV_CheckEQ(scale_dim, C, "DNN/InstanceNorm: scale must be a 1d tensor and match the channel of input");
|
||||||
|
int bias_dim = std::accumulate(bias.begin(), bias.end(), 1, std::multiplies<int>());
|
||||||
|
CV_CheckEQ(bias_dim, C, "DNN/InstanceNorm: bias must be a 1d tensor and match the channel of input");
|
||||||
|
|
||||||
|
outputs.assign(1, inputs[0]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE {
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
if (inputs_arr.depth() == CV_16S) {
|
||||||
|
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Mat> inputs, outputs;
|
||||||
|
inputs_arr.getMatVector(inputs);
|
||||||
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
|
const auto& input = inputs[0];
|
||||||
|
const auto& scale = inputs[1];
|
||||||
|
const auto& bias = inputs[2];
|
||||||
|
|
||||||
|
fastNormGroup(input, scale, bias, outputs[0], epsilon, num_groups);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) {
|
||||||
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inputs_.getUMatVector(inputs);
|
||||||
|
outputs_.getUMatVector(outputs);
|
||||||
|
|
||||||
|
const auto &input = inputs[0], &scale = inputs[1], &bias = inputs[2];
|
||||||
|
auto &output = outputs[0];
|
||||||
|
|
||||||
|
const auto input_shape = shape(input);
|
||||||
|
size_t N = input_shape[0], C = input_shape[1];
|
||||||
|
size_t num_groups = this->num_groups;
|
||||||
|
size_t channels_per_group = C / num_groups;
|
||||||
|
size_t loops = N * num_groups, norm_size = static_cast<size_t>(total(input_shape, 2)) * channels_per_group;
|
||||||
|
float inv_norm_size = 1.f / norm_size;
|
||||||
|
|
||||||
|
// no fp16 support
|
||||||
|
if (input.depth() == CV_16S) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
String base_opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
|
||||||
|
|
||||||
|
// Calculate mean
|
||||||
|
UMat one = UMat::ones(norm_size, 1, CV_32F);
|
||||||
|
UMat mean = UMat(loops, 1, CV_32F);
|
||||||
|
UMat mean_square = UMat(loops, 1, CV_32F);
|
||||||
|
UMat tmp = UMat(loops, norm_size, CV_32F);
|
||||||
|
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, loops, norm_size, inv_norm_size,
|
||||||
|
input, 0, one, 0, 0.f, mean, 0);
|
||||||
|
if (!ret) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Calculate mean_square
|
||||||
|
int num_vector = (norm_size % 8 == 0) ? 8 : ((norm_size % 4 == 0) ? 4 : 1);
|
||||||
|
size_t global[] = {loops, static_cast<size_t>(norm_size / num_vector)};
|
||||||
|
String build_opt = format(" -DNUM=%d", num_vector) + base_opts;
|
||||||
|
String mean_square_kernel_name = format("calc_mean%d", num_vector);
|
||||||
|
ocl::Kernel mean_square_kernel(mean_square_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt + " -DKERNEL_MEAN");
|
||||||
|
if (mean_square_kernel.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mean_square_kernel.set(0, ocl::KernelArg::PtrReadOnly(input));
|
||||||
|
mean_square_kernel.set(1, (int)loops);
|
||||||
|
mean_square_kernel.set(2, (int)norm_size);
|
||||||
|
mean_square_kernel.set(3, ocl::KernelArg::PtrReadOnly(mean));
|
||||||
|
mean_square_kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmp));
|
||||||
|
ret = mean_square_kernel.run(2, global, NULL, false);
|
||||||
|
if (!ret) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, loops, norm_size, inv_norm_size,
|
||||||
|
tmp, 0, one, 0, 0.f, mean_square, 0);
|
||||||
|
if (!ret) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Calculate group norm: output = scale * (x - mean) / sqrt(var + eps) + bias
|
||||||
|
String mvn_group_kernel_name = format("mvn_group%d", num_vector);
|
||||||
|
build_opt += " -DNORM_VARIANCE -DKERNEL_MVN_GROUP";
|
||||||
|
ocl::Kernel mvn_group_kernel(mvn_group_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt);
|
||||||
|
if (mvn_group_kernel.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mvn_group_kernel.set(0, ocl::KernelArg::PtrReadOnly(input));
|
||||||
|
mvn_group_kernel.set(1, (int)loops);
|
||||||
|
mvn_group_kernel.set(2, (int)norm_size);
|
||||||
|
mvn_group_kernel.set(3, (float)epsilon);
|
||||||
|
mvn_group_kernel.set(4, ocl::KernelArg::PtrReadOnly(mean));
|
||||||
|
mvn_group_kernel.set(5, ocl::KernelArg::PtrReadOnly(mean_square));
|
||||||
|
mvn_group_kernel.set(6, ocl::KernelArg::PtrReadOnly(scale));
|
||||||
|
mvn_group_kernel.set(7, ocl::KernelArg::PtrReadOnly(bias));
|
||||||
|
mvn_group_kernel.set(8, (int)C);
|
||||||
|
mvn_group_kernel.set(9, (int)num_groups);
|
||||||
|
mvn_group_kernel.set(10, (float)0.f);
|
||||||
|
mvn_group_kernel.set(11, ocl::KernelArg::PtrWriteOnly(output));
|
||||||
|
ret = mvn_group_kernel.run(2, global, NULL, false);
|
||||||
|
if (!ret) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
Ptr<BackendNode> initCUDA(void *context_,
|
||||||
|
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||||
|
const std::vector<Ptr<BackendWrapper>>& outputs) override {
|
||||||
|
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||||
|
|
||||||
|
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||||
|
auto input_shape = input_wrapper->getShape();
|
||||||
|
size_t N = input_shape[0];
|
||||||
|
size_t num_groups = this->num_groups;
|
||||||
|
size_t loops = N * num_groups;
|
||||||
|
|
||||||
|
return make_cuda_node<cuda4dnn::GroupNormOp>(preferableTarget, std::move(context->stream), epsilon, loops, num_groups);
|
||||||
|
}
|
||||||
|
#endif // HAVE_CUDA
|
||||||
|
|
||||||
|
private:
|
||||||
|
float epsilon;
|
||||||
|
size_t num_groups;
|
||||||
|
};
|
||||||
|
|
||||||
|
Ptr<GroupNormLayer> GroupNormLayer::create(const LayerParams ¶ms) {
|
||||||
|
return Ptr<GroupNormLayer>(new GroupNormLayerImpl(params));
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // cv::dnn
|
@ -24,6 +24,16 @@ namespace cv
|
|||||||
namespace dnn
|
namespace dnn
|
||||||
{
|
{
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
static int _mod(int x, int y) {
|
||||||
|
int res = x % y;
|
||||||
|
if ((res < 0 && y > 0) || (res > 0 && y < 0)) {
|
||||||
|
res += y;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
|
class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -42,7 +52,8 @@ public:
|
|||||||
MAX,
|
MAX,
|
||||||
MEAN,
|
MEAN,
|
||||||
MIN,
|
MIN,
|
||||||
MOD,
|
MOD, // Integer Mod. Reminder's sign = Divisor's sign.
|
||||||
|
FMOD, // Floating-point Mod. Reminder's sign = Dividend's sign.
|
||||||
PROD,
|
PROD,
|
||||||
SUB,
|
SUB,
|
||||||
SUM,
|
SUM,
|
||||||
@ -79,6 +90,8 @@ public:
|
|||||||
op = OPERATION::MIN;
|
op = OPERATION::MIN;
|
||||||
else if (operation == "mod")
|
else if (operation == "mod")
|
||||||
op = OPERATION::MOD;
|
op = OPERATION::MOD;
|
||||||
|
else if (operation == "fmod")
|
||||||
|
op = OPERATION::FMOD;
|
||||||
else if (operation == "mul")
|
else if (operation == "mul")
|
||||||
op = OPERATION::PROD;
|
op = OPERATION::PROD;
|
||||||
else if (operation == "sub")
|
else if (operation == "sub")
|
||||||
@ -106,18 +119,21 @@ public:
|
|||||||
#ifdef HAVE_CANN
|
#ifdef HAVE_CANN
|
||||||
if (backendId == DNN_BACKEND_CANN)
|
if (backendId == DNN_BACKEND_CANN)
|
||||||
return op == OPERATION::ADD || op == OPERATION::PROD || op == OPERATION::SUB ||
|
return op == OPERATION::ADD || op == OPERATION::PROD || op == OPERATION::SUB ||
|
||||||
op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN;
|
op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN ||
|
||||||
|
op == OPERATION::MOD || op == OPERATION::FMOD;
|
||||||
#endif
|
#endif
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||||
return (op == OPERATION::ADD ||
|
return (op == OPERATION::ADD ||
|
||||||
op == OPERATION::PROD ||
|
op == OPERATION::PROD ||
|
||||||
op == OPERATION::GREATER_EQUAL ||
|
op == OPERATION::GREATER_EQUAL ||
|
||||||
op == OPERATION::LESS_EQUAL
|
op == OPERATION::LESS_EQUAL ||
|
||||||
|
op == OPERATION::MOD ||
|
||||||
|
op == OPERATION::FMOD
|
||||||
);
|
);
|
||||||
if (backendId == DNN_BACKEND_CUDA) {
|
if (backendId == DNN_BACKEND_CUDA) {
|
||||||
return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
|
return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
|
||||||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD ||
|
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD ||
|
||||||
op == OPERATION::SUB;
|
op == OPERATION::SUB || op == OPERATION::MOD || op == OPERATION::FMOD;
|
||||||
}
|
}
|
||||||
return backendId == DNN_BACKEND_OPENCV;
|
return backendId == DNN_BACKEND_OPENCV;
|
||||||
}
|
}
|
||||||
@ -707,10 +723,16 @@ public:
|
|||||||
}
|
}
|
||||||
case OPERATION::MOD:
|
case OPERATION::MOD:
|
||||||
{
|
{
|
||||||
auto mod = [](const uint8_t &a, const uint8_t &b) { return a % b; };
|
auto mod = [] (const T &a, const T &b) { return static_cast<T>(_mod(int(a), int(b))); };
|
||||||
binary_forward<T>(mod, std::forward<Args>(args)...);
|
binary_forward<T>(mod, std::forward<Args>(args)...);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OPERATION::FMOD:
|
||||||
|
{
|
||||||
|
auto fmod = [](const T &a, const T &b) { return std::fmod(a, b); };
|
||||||
|
binary_forward<T>(fmod, std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OPERATION::PROD:
|
case OPERATION::PROD:
|
||||||
{
|
{
|
||||||
auto prod = [](const T &a, const T &b) { return a * b; };
|
auto prod = [](const T &a, const T &b) { return a * b; };
|
||||||
@ -782,9 +804,8 @@ public:
|
|||||||
opDispatch<int32_t>(std::forward<Args>(args)...);
|
opDispatch<int32_t>(std::forward<Args>(args)...);
|
||||||
break;
|
break;
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
CV_Assert(op != OPERATION::BITSHIFT && op != OPERATION::MOD &&
|
CV_Assert(op != OPERATION::BITSHIFT && op != OPERATION::AND &&
|
||||||
op != OPERATION::AND && op != OPERATION::OR &&
|
op != OPERATION::OR && op != OPERATION::XOR);
|
||||||
op != OPERATION::XOR);
|
|
||||||
opDispatch<float>(std::forward<Args>(args)...);
|
opDispatch<float>(std::forward<Args>(args)...);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -801,19 +822,6 @@ public:
|
|||||||
{
|
{
|
||||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||||
|
|
||||||
auto input_0_shape = inputs[0].dynamicCast<CUDABackendWrapper>()->getShape();
|
|
||||||
for (int i = 1; i < inputs.size(); i++)
|
|
||||||
{
|
|
||||||
auto input_i_shape = inputs[i].dynamicCast<CUDABackendWrapper>()->getShape();
|
|
||||||
if (input_0_shape.size() != input_i_shape.size())
|
|
||||||
return Ptr<BackendNode>();
|
|
||||||
// check if the shape can be supported by `eltwise_ops.cu`, or return the default BackendNode
|
|
||||||
for (int j = 0; j < input_0_shape.size(); j++)
|
|
||||||
if (input_0_shape[j] != input_i_shape[j] &&
|
|
||||||
input_0_shape[j] != 1 && input_i_shape[j] != 1)
|
|
||||||
return Ptr<BackendNode>();
|
|
||||||
}
|
|
||||||
|
|
||||||
cuda4dnn::EltwiseOpType op_ = cuda4dnn::EltwiseOpType::SUM;
|
cuda4dnn::EltwiseOpType op_ = cuda4dnn::EltwiseOpType::SUM;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case OPERATION::MAX:
|
case OPERATION::MAX:
|
||||||
@ -837,6 +845,12 @@ public:
|
|||||||
case OPERATION::SUB:
|
case OPERATION::SUB:
|
||||||
op_ = cuda4dnn::EltwiseOpType::SUB;
|
op_ = cuda4dnn::EltwiseOpType::SUB;
|
||||||
break;
|
break;
|
||||||
|
case OPERATION::MOD:
|
||||||
|
op_ = cuda4dnn::EltwiseOpType::MOD;
|
||||||
|
break;
|
||||||
|
case OPERATION::FMOD:
|
||||||
|
op_ = cuda4dnn::EltwiseOpType::FMOD;
|
||||||
|
break;
|
||||||
default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
|
default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -881,6 +895,8 @@ public:
|
|||||||
BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, name);
|
BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, name);
|
||||||
BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, name);
|
BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, name);
|
||||||
BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, name);
|
BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, name);
|
||||||
|
BUILD_CANN_ELTWISE_OP(OPERATION::MOD, Mod, name);
|
||||||
|
BUILD_CANN_ELTWISE_OP(OPERATION::FMOD, Mod, name);
|
||||||
#undef BUILD_CANN_ELTWISE_OP
|
#undef BUILD_CANN_ELTWISE_OP
|
||||||
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||||
}
|
}
|
||||||
@ -927,6 +943,16 @@ public:
|
|||||||
node = std::make_shared<ngraph::op::v1::GreaterEqual>(inp0, inp1);
|
node = std::make_shared<ngraph::op::v1::GreaterEqual>(inp0, inp1);
|
||||||
else if (op == OPERATION::LESS_EQUAL)
|
else if (op == OPERATION::LESS_EQUAL)
|
||||||
node = std::make_shared<ngraph::op::v1::LessEqual>(inp0, inp1);
|
node = std::make_shared<ngraph::op::v1::LessEqual>(inp0, inp1);
|
||||||
|
// Ideally we should do this but int32 internal blobs are converted to float32 data type in inference.
|
||||||
|
// TODO: Remove data type convertion when we have type inference.
|
||||||
|
else if (op == OPERATION::MOD) {
|
||||||
|
auto inp0_i64 = std::make_shared<ngraph::op::Convert>(inp0, ngraph::element::i64);
|
||||||
|
auto inp1_i64 = std::make_shared<ngraph::op::Convert>(inp1, ngraph::element::i64);
|
||||||
|
auto mod = std::make_shared<ngraph::op::v1::FloorMod>(inp0_i64, inp1_i64);
|
||||||
|
node = std::make_shared<ngraph::op::Convert>(mod, ngraph::element::f32);
|
||||||
|
}
|
||||||
|
else if (op == OPERATION::FMOD)
|
||||||
|
node = std::make_shared<ngraph::op::v1::Mod>(inp0, inp1);
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsNotImplemented, "Operation is not implemented for nGraph backend");
|
CV_Error(Error::StsNotImplemented, "Operation is not implemented for nGraph backend");
|
||||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||||
|
@ -74,6 +74,11 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
if (inputs_arr.depth() == CV_16S) {
|
||||||
|
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<Mat> inputs, outputs;
|
std::vector<Mat> inputs, outputs;
|
||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
@ -89,49 +94,59 @@ public:
|
|||||||
// NOTE: This impl does not check whether indices have duplicate entries.
|
// NOTE: This impl does not check whether indices have duplicate entries.
|
||||||
// The last duplicate entry will overwrite the previous.
|
// The last duplicate entry will overwrite the previous.
|
||||||
template<typename T, typename Functor>
|
template<typename T, typename Functor>
|
||||||
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
|
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat& output_mat) {
|
||||||
{
|
input_mat.copyTo(output_mat);
|
||||||
data.copyTo(out);
|
|
||||||
|
|
||||||
const int* shape = data.size.p;
|
const auto &input_mat_shape = shape(input_mat);
|
||||||
const size_t* step = data.step.p;
|
std::vector<size_t> input_mat_step(input_mat_shape.size());
|
||||||
|
for (int i = 0; i < input_mat.dims; i++) {
|
||||||
|
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
const int ind_ndims = indices.dims;
|
const int indices_mat_ndims = indices_mat.dims;
|
||||||
const int* ind_shape = indices.size.p;
|
const auto &indices_mat_shape = shape(indices_mat);
|
||||||
const T* p_indices = indices.ptr<const T>();
|
|
||||||
|
|
||||||
const int upd_ndims = updates.dims;
|
const int updates_mat_ndims = updates_mat.dims;
|
||||||
const int* upd_shape = updates.size.p;
|
const auto &updates_mat_shape = shape(updates_mat);
|
||||||
const T* p_updates = updates.ptr<const T>();
|
|
||||||
|
|
||||||
T* p_out = out.ptr<T>();
|
int indices_last_dim = indices_mat_shape[indices_mat_ndims - 1]; // last dim of indices
|
||||||
|
|
||||||
int k = ind_shape[ind_ndims - 1]; // last dim of indices
|
|
||||||
size_t total = (size_t)(indices.total() / k);
|
|
||||||
|
|
||||||
size_t updates_size = 1;
|
size_t updates_size = 1;
|
||||||
for (int i = ind_ndims - 1; i < upd_ndims; i++)
|
for (int i = indices_mat_ndims - 1; i < updates_mat_ndims; i++)
|
||||||
updates_size *= upd_shape[i];
|
updates_size *= updates_mat_shape[i];
|
||||||
|
|
||||||
size_t inp_start_offset = 0;
|
auto fn = [&](const Range &r) {
|
||||||
size_t ind_start_offset = 0;
|
size_t input_offset = 0,
|
||||||
size_t upd_start_offset = 0;
|
indices_offset = r.start * indices_last_dim,
|
||||||
for (size_t i = 0; i < total; i++, ind_start_offset += k, upd_start_offset += updates_size)
|
updates_offset = r.start * updates_size;
|
||||||
{
|
for (int i = r.start; i < r.end; i++) {
|
||||||
const T* tmp_p_indices = p_indices + ind_start_offset;
|
const T* indices = indices_mat.ptr<const T>();
|
||||||
inp_start_offset = 0;
|
const T* updates = updates_mat.ptr<const T>();
|
||||||
for (int j = 0; j < k; j++)
|
T* output = output_mat.ptr<T>();
|
||||||
{
|
|
||||||
CV_Assert(tmp_p_indices[j] < shape[j] && tmp_p_indices[j] > -shape[j]);
|
|
||||||
inp_start_offset += (((int)tmp_p_indices[j] + shape[j]) % shape[j]) * step[j];
|
|
||||||
}
|
|
||||||
inp_start_offset /= sizeof(T);
|
|
||||||
|
|
||||||
const T* tmp_p_updates = p_updates + upd_start_offset;
|
input_offset = 0;
|
||||||
T* tmp_p_out = p_out + inp_start_offset;
|
indices += indices_offset;
|
||||||
for (int j = 0; j < updates_size; j++)
|
for (int j = 0; j < indices_last_dim; j++) {
|
||||||
tmp_p_out[j] = rd(tmp_p_out[j], tmp_p_updates[j]);
|
int index = static_cast<int>(*(indices + j));
|
||||||
|
index = (index + input_mat_shape[j]) % input_mat_shape[j];
|
||||||
|
CV_Assert(index < input_mat_shape[j] && index >= 0);
|
||||||
|
input_offset += index * input_mat_step[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updates += updates_offset;
|
||||||
|
output += input_offset;
|
||||||
|
for (int j = 0; j < updates_size; j++) {
|
||||||
|
output[j] = reduce_operation(output[j], updates[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
indices_offset += indices_last_dim;
|
||||||
|
updates_offset += updates_size;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t total = (size_t)(indices_mat.total() / indices_last_dim);
|
||||||
|
double nstripes = (size_t)total * (indices_last_dim + updates_size) * (1 / 1024.0);
|
||||||
|
parallel_for_(Range(0, total), fn, nstripes);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename... Args>
|
template<typename... Args>
|
||||||
|
@ -68,6 +68,11 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
if (inputs_arr.depth() == CV_16S) {
|
||||||
|
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<Mat> inputs, outputs;
|
std::vector<Mat> inputs, outputs;
|
||||||
inputs_arr.getMatVector(inputs);
|
inputs_arr.getMatVector(inputs);
|
||||||
outputs_arr.getMatVector(outputs);
|
outputs_arr.getMatVector(outputs);
|
||||||
@ -81,59 +86,62 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename Functor>
|
template<typename T, typename Functor>
|
||||||
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
|
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat &output_mat) {
|
||||||
{
|
input_mat.copyTo(output_mat);
|
||||||
data.copyTo(out);
|
|
||||||
|
|
||||||
const int ndims = data.dims;
|
const int ndims = input_mat.dims;
|
||||||
const int* shape = data.size.p;
|
|
||||||
const size_t* step = data.step.p;
|
|
||||||
|
|
||||||
const int* ind_shape = indices.size.p;
|
const auto &input_mat_shape = shape(input_mat);
|
||||||
const size_t* ind_step = indices.step.p;
|
std::vector<size_t> input_mat_step(ndims);
|
||||||
|
|
||||||
size_t inp_offset = 0;
|
const auto &indices_mat_shape = shape(indices_mat);
|
||||||
size_t ind_offset = 0;
|
std::vector<size_t> indices_mat_step(ndims);
|
||||||
const T* p_index = indices.ptr<const T>();
|
|
||||||
const T* p_update = updates.ptr<const T>();
|
|
||||||
T* p_out = out.ptr<T>();
|
|
||||||
|
|
||||||
size_t total = indices.total();
|
for (int i = 0; i < ndims; i++) {
|
||||||
|
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
|
||||||
|
indices_mat_step[i] = static_cast<size_t>(indices_mat.step.p[i] / sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
int j, offset_at_idx, index;
|
auto fn = [&](const Range &r) {
|
||||||
size_t t, idx;
|
size_t input_offset = 0, indices_offset = 0;
|
||||||
for (size_t i = 0; i < total; i++)
|
|
||||||
{
|
int indices_index, index;
|
||||||
t = i;
|
size_t axis_offset, tmp_index, j_index;
|
||||||
inp_offset = 0;
|
for (int i = r.start; i < r.end; i++) {
|
||||||
ind_offset = 0;
|
const T* indices = indices_mat.ptr<const T>();
|
||||||
int offset_at_axis = 0;
|
const T* updates = updates_mat.ptr<const T>();
|
||||||
for (j = ndims - 1; j >= 0; j--)
|
T* output = output_mat.ptr<T>();
|
||||||
{
|
|
||||||
idx = t / ind_shape[j];
|
input_offset = 0;
|
||||||
offset_at_idx = (int)(t - idx * ind_shape[j]);
|
indices_offset = 0;
|
||||||
ind_offset += offset_at_idx * ind_step[j];
|
indices_index = i;
|
||||||
inp_offset += offset_at_idx * step[j];
|
axis_offset = 0;
|
||||||
t = idx;
|
for (int j = ndims - 1; j >= 0; j--) {
|
||||||
if (j == axis)
|
tmp_index = indices_index / indices_mat_shape[j];
|
||||||
{
|
j_index = (size_t)(indices_index - tmp_index * indices_mat_shape[j]);
|
||||||
offset_at_axis = offset_at_idx * step[j];
|
input_offset += j_index * input_mat_step[j];
|
||||||
|
indices_offset += j_index * indices_mat_step[j];
|
||||||
|
indices_index = tmp_index;
|
||||||
|
if (j == axis) {
|
||||||
|
axis_offset = j_index * input_mat_step[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ind_offset /= sizeof(T);
|
|
||||||
|
|
||||||
// get index and overwrite current indices
|
// get index and overwrite current indices
|
||||||
const T* tmp_p_index = p_index + ind_offset;
|
index = static_cast<int>(*(indices + indices_offset));
|
||||||
index = (int)(*tmp_p_index);
|
index = (index + input_mat_shape[axis]) % input_mat_shape[axis];
|
||||||
CV_Assert(index < shape[axis] && index > -shape[axis]);
|
CV_Assert(index < input_mat_shape[axis] && index >= 0);
|
||||||
|
input_offset = input_offset - axis_offset + index * input_mat_step[axis];
|
||||||
|
|
||||||
inp_offset = inp_offset - offset_at_axis + ((index + shape[axis]) % shape[axis]) * step[axis];
|
updates += indices_offset;
|
||||||
inp_offset /= sizeof(T);
|
output += input_offset;
|
||||||
|
*output = reduce_operation(*output, *updates);
|
||||||
const T* tmp_p_update = p_update + ind_offset;
|
|
||||||
T* tmp_p_out = p_out + inp_offset;
|
|
||||||
*tmp_p_out = rd(*tmp_p_out, *tmp_p_update);
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t total = indices_mat.total();
|
||||||
|
double nstripes = (size_t)total * ndims * (1 / 1024.0);
|
||||||
|
parallel_for_(Range(0, total), fn, nstripes);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename... Args>
|
template<typename... Args>
|
||||||
|
@ -901,7 +901,6 @@ AsyncArray Net::Impl::forwardAsync(const String& outputName)
|
|||||||
CV_Assert(!empty());
|
CV_Assert(!empty());
|
||||||
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
|
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
String layerName = outputName;
|
String layerName = outputName;
|
||||||
|
|
||||||
if (layerName.empty())
|
if (layerName.empty())
|
||||||
@ -922,9 +921,6 @@ AsyncArray Net::Impl::forwardAsync(const String& outputName)
|
|||||||
isAsync = false;
|
isAsync = false;
|
||||||
|
|
||||||
return getBlobAsync(layerName);
|
return getBlobAsync(layerName);
|
||||||
#else
|
|
||||||
CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
|
|
||||||
#endif // CV_CXX11
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -265,11 +265,9 @@ struct Net::Impl : public detail::NetImplBase
|
|||||||
|
|
||||||
Mat getBlob(String outputName) const;
|
Mat getBlob(String outputName) const;
|
||||||
|
|
||||||
#ifdef CV_CXX11
|
|
||||||
virtual AsyncArray getBlobAsync(const LayerPin& pin);
|
virtual AsyncArray getBlobAsync(const LayerPin& pin);
|
||||||
|
|
||||||
AsyncArray getBlobAsync(String outputName);
|
AsyncArray getBlobAsync(String outputName);
|
||||||
#endif // CV_CXX11
|
|
||||||
|
|
||||||
string dump(bool forceAllocation = false) const;
|
string dump(bool forceAllocation = false) const;
|
||||||
|
|
||||||
|
@ -728,6 +728,10 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
|
|||||||
if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
|
if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
|
||||||
break;
|
break;
|
||||||
#ifdef HAVE_CUDA
|
#ifdef HAVE_CUDA
|
||||||
|
/* Risk: Not every operation in "NaryEltwise" is supported in the CUDA backend. There is a chance
|
||||||
|
that Concat's output is filled with data in both host and device, leading to data missing.
|
||||||
|
See https://github.com/opencv/opencv/issues/24721 for more details.
|
||||||
|
*/
|
||||||
if (preferableBackend == DNN_BACKEND_CUDA &&
|
if (preferableBackend == DNN_BACKEND_CUDA &&
|
||||||
(inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
|
(inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
|
||||||
(inp_i_data->layerInstance->type != "Convolution" &&
|
(inp_i_data->layerInstance->type != "Convolution" &&
|
||||||
|
@ -97,8 +97,8 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
|
|||||||
max_image_size);
|
max_image_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_half_ && bias_term_)
|
if (bias_term_) {
|
||||||
{
|
if (use_half_) {
|
||||||
UMat biasOneMat = UMat::ones(M_, 1, CV_32F);
|
UMat biasOneMat = UMat::ones(M_, 1, CV_32F);
|
||||||
UMat newbias, tmpTop;
|
UMat newbias, tmpTop;
|
||||||
|
|
||||||
@ -106,6 +106,10 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
|
|||||||
convertFp16(top, tmpTop);
|
convertFp16(top, tmpTop);
|
||||||
cv::gemm(biasOneMat, newbias, 1, tmpTop, 1, tmpTop, 0);
|
cv::gemm(biasOneMat, newbias, 1, tmpTop, 1, tmpTop, 0);
|
||||||
convertFp16(tmpTop, top);
|
convertFp16(tmpTop, top);
|
||||||
|
} else {
|
||||||
|
UMat biasOnesMat = UMat::ones(M_, 1, CV_32F);
|
||||||
|
cv::gemm(biasOnesMat, bias, 1, top, 1, top, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -86,6 +86,7 @@ public:
|
|||||||
int getTensorShapeSize(int node_id, int node_input_id) {
|
int getTensorShapeSize(int node_id, int node_input_id) {
|
||||||
const auto node = getNode(node_id);
|
const auto node = getNode(node_id);
|
||||||
const auto &input_name = node->getInputName(node_input_id);
|
const auto &input_name = node->getInputName(node_input_id);
|
||||||
|
// try to get from value_info
|
||||||
for (int i = 0; i < net.value_info_size(); i++) {
|
for (int i = 0; i < net.value_info_size(); i++) {
|
||||||
const auto value_info = net.value_info(i);
|
const auto value_info = net.value_info(i);
|
||||||
if (value_info.name() == input_name) {
|
if (value_info.name() == input_name) {
|
||||||
@ -97,6 +98,18 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// try to get from input
|
||||||
|
for (int i = 0; i < net.input_size(); i++) {
|
||||||
|
const auto input = net.input(i);
|
||||||
|
if (input.name() == input_name) {
|
||||||
|
if (input.has_type() && input.type().has_tensor_type() &&
|
||||||
|
input.type().tensor_type().has_shape()) {
|
||||||
|
return input.type().tensor_type().shape().dim_size();
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -660,6 +673,10 @@ private:
|
|||||||
[Input] -> LayerNorm -> [Output]
|
[Input] -> LayerNorm -> [Output]
|
||||||
\
|
\
|
||||||
[weight], [bias]
|
[weight], [bias]
|
||||||
|
|
||||||
|
Note: axes of ReduceMean must be:
|
||||||
|
- last element is the axis of last dimension (-1 or (input_ndims - 1))
|
||||||
|
- a list of adjacent axes, e.g. [1, 2, 3, ..., input_ndims - 1]
|
||||||
*/
|
*/
|
||||||
class LayerNormSubGraph : public Subgraph
|
class LayerNormSubGraph : public Subgraph
|
||||||
{
|
{
|
||||||
@ -683,19 +700,22 @@ public:
|
|||||||
setFusedNode("LayerNormalization", input);
|
setFusedNode("LayerNormalization", input);
|
||||||
}
|
}
|
||||||
|
|
||||||
static float extractAxis(const Ptr<ImportGraphWrapper>& net, int node_id)
|
static std::vector<int64_t> extractAxis(const Ptr<ImportGraphWrapper>& net, int node_id)
|
||||||
{
|
{
|
||||||
|
// TODO: consider ReduceMean-18 which has axes as one of the inputs instead of attributes
|
||||||
Ptr<ImportNodeWrapper> mean_ptr = net->getNode(node_id);
|
Ptr<ImportNodeWrapper> mean_ptr = net->getNode(node_id);
|
||||||
opencv_onnx::NodeProto* mean_node = mean_ptr.dynamicCast<ONNXNodeWrapper>()->node;
|
opencv_onnx::NodeProto* mean_node = mean_ptr.dynamicCast<ONNXNodeWrapper>()->node;
|
||||||
int axis_ = -1;
|
std::vector<int64_t> axes;
|
||||||
for (int i = 0; i < mean_node->attribute_size(); i++)
|
for (int i = 0; i < mean_node->attribute_size(); i++)
|
||||||
{
|
{
|
||||||
opencv_onnx::AttributeProto attr = mean_node->attribute(i);
|
opencv_onnx::AttributeProto attr = mean_node->attribute(i);
|
||||||
if (attr.name() != "axes")
|
if (attr.name() != "axes")
|
||||||
continue;
|
continue;
|
||||||
axis_ = static_cast<int>(attr.ints(0));
|
for (int j = 0; j < attr.ints_size(); j++) {
|
||||||
|
axes.push_back(attr.ints(j));
|
||||||
}
|
}
|
||||||
return axis_;
|
}
|
||||||
|
return axes;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
||||||
@ -707,11 +727,31 @@ public:
|
|||||||
if (pow_exp - 2 > 1e-5) // not pow(2)
|
if (pow_exp - 2 > 1e-5) // not pow(2)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int axis_mean1 = extractAxis(net, matchedNodesIds[mean]);
|
std::vector<int64_t> axes = extractAxis(net, matchedNodesIds[mean]);
|
||||||
int axis_mean2 = extractAxis(net, matchedNodesIds[mean1]);
|
// check whether it is -1 or last_axis or [axis, ..., last_axis]
|
||||||
if (axis_mean1 != axis_mean2)
|
int64_t input_ndims = static_cast<int64_t>(net.dynamicCast<ONNXGraphWrapper>()->getTensorShapeSize(matchedNodesIds[mean], 0));
|
||||||
|
if (input_ndims == -1) {
|
||||||
|
return false; // input shape unknown
|
||||||
|
}
|
||||||
|
// assume that axes are sorted in ascending order, e.g. [0, 1, 2, 3] or [-3, -2, -1]
|
||||||
|
if (axes.back() != -1 && axes.back() != (input_ndims - 1)) {
|
||||||
return false;
|
return false;
|
||||||
axis = axis_mean1;
|
}
|
||||||
|
for (size_t i = 0; i < axes.size() - 1; i++) {
|
||||||
|
if (axes[i] - axes[i + 1] != -1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int64_t> axes1 = extractAxis(net, matchedNodesIds[mean1]);
|
||||||
|
if (axes.size() != axes1.size())
|
||||||
|
return false;
|
||||||
|
for (size_t i = 0; i < axes.size(); i++) {
|
||||||
|
if (((axes[i] + input_ndims) % input_ndims) != ((axes1[i] + input_ndims) % input_ndims)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
axis = axes[0];
|
||||||
|
|
||||||
epsilon = extractConstant(net, matchedNodesIds[add], 1).at<float>(0);
|
epsilon = extractConstant(net, matchedNodesIds[add], 1).at<float>(0);
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#ifdef HAVE_PROTOBUF
|
#ifdef HAVE_PROTOBUF
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -2619,6 +2620,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
|
|||||||
|
|
||||||
// Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
|
// Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
|
||||||
int axis = layerParams.get<int>("axis", 1);
|
int axis = layerParams.get<int>("axis", 1);
|
||||||
|
axis = normalize_axis(axis, inputShape.size());
|
||||||
for (size_t i = 0; i < inputs.size(); ++i)
|
for (size_t i = 0; i < inputs.size(); ++i)
|
||||||
{
|
{
|
||||||
inputShape[axis] = inputs[i].dims == (int)inputShape.size() ? inputs[i].size[axis] : 1;
|
inputShape[axis] = inputs[i].dims == (int)inputShape.size() ? inputs[i].size[axis] : 1;
|
||||||
@ -2831,6 +2833,11 @@ void ONNXImporter::parseElementWise(LayerParams& layerParams, const opencv_onnx:
|
|||||||
|
|
||||||
layerParams.type = "NaryEltwise";
|
layerParams.type = "NaryEltwise";
|
||||||
layerParams.set("operation", toLowerCase(node_proto.op_type()));
|
layerParams.set("operation", toLowerCase(node_proto.op_type()));
|
||||||
|
if (node_proto.op_type() == "Mod") {
|
||||||
|
if (layerParams.get<int>("fmod", 0)) {
|
||||||
|
layerParams.set("operation", "fmod");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// element-wise layers that can have >=1 inputs but actually have one input
|
// element-wise layers that can have >=1 inputs but actually have one input
|
||||||
if (node_proto.input_size() == 1 && (op_type == "max" || op_type == "min" || op_type == "mean" || op_type == "sum"))
|
if (node_proto.input_size() == 1 && (op_type == "max" || op_type == "min" || op_type == "mean" || op_type == "sum"))
|
||||||
@ -4004,10 +4011,11 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
|
|||||||
dispatch["ScatterElements"] = dispatch["Scatter"] = dispatch["ScatterND"] = &ONNXImporter::parseScatter;
|
dispatch["ScatterElements"] = dispatch["Scatter"] = dispatch["ScatterND"] = &ONNXImporter::parseScatter;
|
||||||
dispatch["Tile"] = &ONNXImporter::parseTile;
|
dispatch["Tile"] = &ONNXImporter::parseTile;
|
||||||
dispatch["LayerNormalization"] = &ONNXImporter::parseLayerNorm;
|
dispatch["LayerNormalization"] = &ONNXImporter::parseLayerNorm;
|
||||||
|
dispatch["GroupNormalization"] = &ONNXImporter::parseInstanceNormalization;
|
||||||
|
|
||||||
dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] =
|
dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] =
|
||||||
dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] =
|
dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] =
|
||||||
dispatch["LessOrEqual"] = &ONNXImporter::parseElementWise;
|
dispatch["LessOrEqual"] = dispatch["Mod"] = &ONNXImporter::parseElementWise;
|
||||||
|
|
||||||
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
|
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
|
||||||
dispatch["Where"] = &ONNXImporter::parseElementWise;
|
dispatch["Where"] = &ONNXImporter::parseElementWise;
|
||||||
|
@ -54,6 +54,7 @@
|
|||||||
#define vec_type Dtype8
|
#define vec_type Dtype8
|
||||||
#define CALC_MEAN calc_mean8
|
#define CALC_MEAN calc_mean8
|
||||||
#define MVN mvn8
|
#define MVN mvn8
|
||||||
|
#define MVN_GROUP mvn_group8
|
||||||
#define MEAN_FUSE mean_fuse8
|
#define MEAN_FUSE mean_fuse8
|
||||||
#define MVN_FUSE mvn_fuse8
|
#define MVN_FUSE mvn_fuse8
|
||||||
#elif NUM == 4
|
#elif NUM == 4
|
||||||
@ -62,6 +63,7 @@
|
|||||||
#define vec_type Dtype4
|
#define vec_type Dtype4
|
||||||
#define CALC_MEAN calc_mean4
|
#define CALC_MEAN calc_mean4
|
||||||
#define MVN mvn4
|
#define MVN mvn4
|
||||||
|
#define MVN_GROUP mvn_group4
|
||||||
#define MEAN_FUSE mean_fuse4
|
#define MEAN_FUSE mean_fuse4
|
||||||
#define MVN_FUSE mvn_fuse4
|
#define MVN_FUSE mvn_fuse4
|
||||||
#elif NUM == 1
|
#elif NUM == 1
|
||||||
@ -70,6 +72,7 @@
|
|||||||
#define vec_type Dtype
|
#define vec_type Dtype
|
||||||
#define CALC_MEAN calc_mean1
|
#define CALC_MEAN calc_mean1
|
||||||
#define MVN mvn1
|
#define MVN mvn1
|
||||||
|
#define MVN_GROUP mvn_group1
|
||||||
#define MEAN_FUSE mean_fuse1
|
#define MEAN_FUSE mean_fuse1
|
||||||
#define MVN_FUSE mvn_fuse1
|
#define MVN_FUSE mvn_fuse1
|
||||||
#endif
|
#endif
|
||||||
@ -150,6 +153,54 @@ __kernel void MVN(__global const Dtype* src,
|
|||||||
store(dst_vec, dst, index);
|
store(dst_vec, dst, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined KERNEL_MVN_GROUP
|
||||||
|
|
||||||
|
__kernel void MVN_GROUP(__global const Dtype* src,
|
||||||
|
const int rows,
|
||||||
|
const int cols,
|
||||||
|
const Dtype eps,
|
||||||
|
__global const Dtype* mean,
|
||||||
|
__global const Dtype* dev,
|
||||||
|
__global const Dtype* weight,
|
||||||
|
__global const Dtype* bias,
|
||||||
|
const int channels,
|
||||||
|
const int num_groups,
|
||||||
|
const float relu_slope,
|
||||||
|
__global Dtype* dst)
|
||||||
|
{
|
||||||
|
int x = get_global_id(0);
|
||||||
|
int y = get_global_id(1) * NUM;
|
||||||
|
int index = x * cols + y;
|
||||||
|
|
||||||
|
if (x >= rows || y >= cols)
|
||||||
|
return;
|
||||||
|
|
||||||
|
int group_size = channels / num_groups;
|
||||||
|
int step = norm_size / group_size;
|
||||||
|
int channel_index = x % num_groups * group_size + y / step
|
||||||
|
Dtype mean_val = mean[x];
|
||||||
|
Dtype dev_val = dev[x];
|
||||||
|
Dtype alpha;
|
||||||
|
#ifdef NORM_VARIANCE
|
||||||
|
alpha = 1 / sqrt(eps + dev_val);
|
||||||
|
#else
|
||||||
|
alpha = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Dtype w = weight[channel_index], b = bias[channel_index];
|
||||||
|
|
||||||
|
vec_type src_vec = load(src, index) - (vec_type)mean_val;
|
||||||
|
vec_type dst_vec = src_vec * alpha;
|
||||||
|
dst_vec = dst_vec * w + (vec_type)b;
|
||||||
|
|
||||||
|
#ifdef FUSE_RELU
|
||||||
|
vec_type new_val = dst_vec * relu_slope;
|
||||||
|
dst_vec = select(new_val, dst_vec, dst_vec > (vec_type)0.f);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
store(dst_vec, dst, index);
|
||||||
|
}
|
||||||
|
|
||||||
#elif defined KERNEL_MEAN_FUSE
|
#elif defined KERNEL_MEAN_FUSE
|
||||||
|
|
||||||
__kernel void MEAN_FUSE(__global const T * A,
|
__kernel void MEAN_FUSE(__global const T * A,
|
||||||
|
@ -95,6 +95,12 @@ public:
|
|||||||
Net net;
|
Net net;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
TEST_P(DNNTestNetwork, DISABLED_YOLOv8n) {
|
||||||
|
processNet("dnn/onnx/models/yolov8n.onnx", "", Size(640, 640), "output0");
|
||||||
|
expectNoFallbacksFromIE(net);
|
||||||
|
expectNoFallbacksFromCUDA(net);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(DNNTestNetwork, AlexNet)
|
TEST_P(DNNTestNetwork, AlexNet)
|
||||||
{
|
{
|
||||||
applyTestTag(CV_TEST_TAG_MEMORY_1GB);
|
applyTestTag(CV_TEST_TAG_MEMORY_1GB);
|
||||||
@ -1454,6 +1460,71 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, Eltwise, testing::Combine(
|
|||||||
dnnBackendsAndTargets()
|
dnnBackendsAndTargets()
|
||||||
));
|
));
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Element-wise layers
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
using NaryEltwiseConcat = TestWithParam<tuple<std::vector<int>, tuple<Backend, Target>>>;
|
||||||
|
TEST_P(NaryEltwiseConcat, Accuracy) {
|
||||||
|
auto param = GetParam();
|
||||||
|
std::vector<int> input_shape = get<0>(param);
|
||||||
|
auto backend_id = get<0>(get<1>(param));
|
||||||
|
auto target_id = get<1>(get<1>(param));
|
||||||
|
|
||||||
|
/* Build the following net:
|
||||||
|
|
||||||
|
<1x4x84>
|
||||||
|
/
|
||||||
|
[Input] -+-> Mul(B<1x84>) -> Concat(axis=1) -> [Output]
|
||||||
|
| |
|
||||||
|
+-> Sigmoid ----------+
|
||||||
|
|
||||||
|
*/
|
||||||
|
Net net;
|
||||||
|
|
||||||
|
std::vector<int> mul_B_shape(input_shape.size() - 1, 1);
|
||||||
|
mul_B_shape.back() = input_shape.back();
|
||||||
|
Mat mul_B(mul_B_shape, CV_32FC1);
|
||||||
|
randn(mul_B, 0.f, 1.f);
|
||||||
|
LayerParams mul_B_lp;
|
||||||
|
mul_B_lp.name = "mul_B";
|
||||||
|
mul_B_lp.type = "Const";
|
||||||
|
mul_B_lp.blobs.push_back(mul_B);
|
||||||
|
int id_mul_B = net.addLayer(mul_B_lp.name, mul_B_lp.type, mul_B_lp);
|
||||||
|
|
||||||
|
LayerParams mul_lp;
|
||||||
|
mul_lp.name = "mul";
|
||||||
|
mul_lp.type = "NaryEltwise";
|
||||||
|
mul_lp.set("operation", "mul");
|
||||||
|
int id_mul = net.addLayer(mul_lp.name, mul_lp.type, mul_lp);
|
||||||
|
net.connect(0, 0, id_mul, 0);
|
||||||
|
net.connect(id_mul_B, 0, id_mul, 1);
|
||||||
|
|
||||||
|
LayerParams sigmoid_lp;
|
||||||
|
sigmoid_lp.name = "sigmoid";
|
||||||
|
sigmoid_lp.type = "Sigmoid";
|
||||||
|
int id_sigmoid = net.addLayer(sigmoid_lp.name, sigmoid_lp.type, sigmoid_lp);
|
||||||
|
net.connect(0, 0, id_sigmoid, 0);
|
||||||
|
|
||||||
|
LayerParams concat_lp;
|
||||||
|
concat_lp.name = "concat";
|
||||||
|
concat_lp.type = "Concat";
|
||||||
|
concat_lp.set("axis", 1);
|
||||||
|
int id_concat = net.addLayer(concat_lp.name, concat_lp.type, concat_lp);
|
||||||
|
net.connect(id_mul, 0, id_concat, 0);
|
||||||
|
net.connect(id_sigmoid, 0, id_concat, 1);
|
||||||
|
|
||||||
|
// Run test
|
||||||
|
Mat input(input_shape, CV_32FC1);
|
||||||
|
testLayer(input, net, backend_id, target_id, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, NaryEltwiseConcat, testing::Combine(
|
||||||
|
testing::Values(std::vector<int>{1, 4, 84}),
|
||||||
|
dnnBackendsAndTargets())
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_layers_backends, dnnBackendsAndTargets());
|
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_layers_backends, dnnBackendsAndTargets());
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
@ -47,6 +47,10 @@ TEST_F(Test_Graph_Simplifier, LayerNormSubGraph) {
|
|||||||
test("layer_norm_expanded_with_initializers", "LayerNormalization");
|
test("layer_norm_expanded_with_initializers", "LayerNormalization");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Test_Graph_Simplifier, LayerNormNoFusionSubGraph) {
|
||||||
|
test("layer_norm_no_fusion", std::vector<std::string>{"NaryEltwise", "Reduce", "Sqrt"});
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(Test_Graph_Simplifier, ResizeSubgraph) {
|
TEST_F(Test_Graph_Simplifier, ResizeSubgraph) {
|
||||||
/* Test for 6 subgraphs:
|
/* Test for 6 subgraphs:
|
||||||
- GatherCastSubgraph
|
- GatherCastSubgraph
|
||||||
|
@ -2050,7 +2050,7 @@ private:
|
|||||||
net.setPreferableTarget(target);
|
net.setPreferableTarget(target);
|
||||||
|
|
||||||
Mat re;
|
Mat re;
|
||||||
ASSERT_NO_THROW(re = net.forward()); // runtime error
|
re = net.forward();
|
||||||
auto ptr_re = (float *) re.data;
|
auto ptr_re = (float *) re.data;
|
||||||
for (int i = 0; i < re.total(); i++)
|
for (int i = 0; i < re.total(); i++)
|
||||||
if (op == "sum"){
|
if (op == "sum"){
|
||||||
|
@ -1033,14 +1033,10 @@ TEST_P(Test_two_inputs, basic)
|
|||||||
randu(firstInp, 0, 100);
|
randu(firstInp, 0, 100);
|
||||||
randu(secondInp, 0, 100);
|
randu(secondInp, 0, 100);
|
||||||
|
|
||||||
#ifndef CV_CXX11
|
|
||||||
std::vector<String> input_names;
|
std::vector<String> input_names;
|
||||||
input_names.push_back("data");
|
input_names.push_back("data");
|
||||||
input_names.push_back("second_input");
|
input_names.push_back("second_input");
|
||||||
net.setInputsNames(input_names);
|
net.setInputsNames(input_names);
|
||||||
#else
|
|
||||||
net.setInputsNames({"data", "second_input"});
|
|
||||||
#endif
|
|
||||||
net.setInput(firstInp, "data", kScale);
|
net.setInput(firstInp, "data", kScale);
|
||||||
net.setInput(secondInp, "second_input", kScaleInv);
|
net.setInput(secondInp, "second_input", kScaleInv);
|
||||||
net.setPreferableBackend(backendId);
|
net.setPreferableBackend(backendId);
|
||||||
|
@ -311,6 +311,8 @@ static const TestCase testConformanceConfig[] = {
|
|||||||
{"test_gridsample_nearest", 2, 1},
|
{"test_gridsample_nearest", 2, 1},
|
||||||
{"test_gridsample_reflection_padding", 2, 1},
|
{"test_gridsample_reflection_padding", 2, 1},
|
||||||
{"test_gridsample_zeros_padding", 2, 1},
|
{"test_gridsample_zeros_padding", 2, 1},
|
||||||
|
{"test_group_normalization_epsilon", 3, 1},
|
||||||
|
{"test_group_normalization_example", 3, 1},
|
||||||
{"test_gru_batchwise", 3, 2},
|
{"test_gru_batchwise", 3, 2},
|
||||||
{"test_gru_defaults", 3, 1},
|
{"test_gru_defaults", 3, 1},
|
||||||
{"test_gru_seq_length", 4, 1},
|
{"test_gru_seq_length", 4, 1},
|
||||||
|
@ -736,6 +736,10 @@ CASE(test_gridsample_reflection_padding)
|
|||||||
// no filter
|
// no filter
|
||||||
CASE(test_gridsample_zeros_padding)
|
CASE(test_gridsample_zeros_padding)
|
||||||
// no filter
|
// no filter
|
||||||
|
CASE(test_group_normalization_epsilon)
|
||||||
|
// no filter
|
||||||
|
CASE(test_group_normalization_example)
|
||||||
|
// no filter
|
||||||
CASE(test_gru_batchwise)
|
CASE(test_gru_batchwise)
|
||||||
// no filter
|
// no filter
|
||||||
CASE(test_gru_defaults)
|
CASE(test_gru_defaults)
|
||||||
@ -1056,10 +1060,25 @@ CASE(test_mod_int64_fmod)
|
|||||||
// no filter
|
// no filter
|
||||||
CASE(test_mod_mixed_sign_float16)
|
CASE(test_mod_mixed_sign_float16)
|
||||||
// no filter
|
// no filter
|
||||||
|
if (target == DNN_TARGET_OPENCL)
|
||||||
|
{
|
||||||
|
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104141 vs 1e-05
|
||||||
|
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156212 vs 0.0001
|
||||||
|
}
|
||||||
CASE(test_mod_mixed_sign_float32)
|
CASE(test_mod_mixed_sign_float32)
|
||||||
// no filter
|
// no filter
|
||||||
|
if (target == DNN_TARGET_OPENCL)
|
||||||
|
{
|
||||||
|
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104141 vs 1e-05
|
||||||
|
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156212 vs 0.0001
|
||||||
|
}
|
||||||
CASE(test_mod_mixed_sign_float64)
|
CASE(test_mod_mixed_sign_float64)
|
||||||
// no filter
|
// no filter
|
||||||
|
if (target == DNN_TARGET_OPENCL)
|
||||||
|
{
|
||||||
|
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104167 vs 1e-05
|
||||||
|
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156251 vs 0.0001
|
||||||
|
}
|
||||||
CASE(test_mod_mixed_sign_int16)
|
CASE(test_mod_mixed_sign_int16)
|
||||||
// no filter
|
// no filter
|
||||||
CASE(test_mod_mixed_sign_int32)
|
CASE(test_mod_mixed_sign_int32)
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
"test_cast_STRING_to_FLOAT",
|
"test_cast_STRING_to_FLOAT",
|
||||||
"test_castlike_FLOAT_to_STRING_expanded",
|
"test_castlike_FLOAT_to_STRING_expanded",
|
||||||
"test_castlike_STRING_to_FLOAT_expanded",
|
"test_castlike_STRING_to_FLOAT_expanded",
|
||||||
"test_concat_1d_axis_negative_1",
|
"test_concat_1d_axis_negative_1", // 1d support is required
|
||||||
"test_div_uint8", // output type mismatch
|
"test_div_uint8", // output type mismatch
|
||||||
"test_maxpool_2d_dilations",
|
"test_maxpool_2d_dilations",
|
||||||
"test_maxpool_2d_same_lower",
|
"test_maxpool_2d_same_lower",
|
||||||
|
@ -210,9 +210,6 @@
|
|||||||
"test_min_uint8",
|
"test_min_uint8",
|
||||||
"test_mod_broadcast",
|
"test_mod_broadcast",
|
||||||
"test_mod_int64_fmod",
|
"test_mod_int64_fmod",
|
||||||
"test_mod_mixed_sign_float16",
|
|
||||||
"test_mod_mixed_sign_float32",
|
|
||||||
"test_mod_mixed_sign_float64",
|
|
||||||
"test_mod_mixed_sign_int16",
|
"test_mod_mixed_sign_int16",
|
||||||
"test_mod_mixed_sign_int32",
|
"test_mod_mixed_sign_int32",
|
||||||
"test_mod_mixed_sign_int64",
|
"test_mod_mixed_sign_int64",
|
||||||
|
@ -2673,24 +2673,36 @@ void yoloPostProcessing(
|
|||||||
cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
|
cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// each row is [cx, cy, w, h, conf_obj, conf_class1, ..., conf_class80]
|
if (test_name == "yolonas"){
|
||||||
|
// outs contains 2 elemets of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
|
||||||
|
Mat concat_out;
|
||||||
|
// squeeze the first dimension
|
||||||
|
outs[0] = outs[0].reshape(1, outs[0].size[1]);
|
||||||
|
outs[1] = outs[1].reshape(1, outs[1].size[1]);
|
||||||
|
cv::hconcat(outs[1], outs[0], concat_out);
|
||||||
|
outs[0] = concat_out;
|
||||||
|
// remove the second element
|
||||||
|
outs.pop_back();
|
||||||
|
// unsqueeze the first dimension
|
||||||
|
outs[0] = outs[0].reshape(0, std::vector<int>{1, 8400, 84});
|
||||||
|
}
|
||||||
|
|
||||||
for (auto preds : outs){
|
for (auto preds : outs){
|
||||||
|
|
||||||
preds = preds.reshape(1, preds.size[1]); // [1, 8400, 85] -> [8400, 85]
|
preds = preds.reshape(1, preds.size[1]); // [1, 8400, 85] -> [8400, 85]
|
||||||
|
|
||||||
for (int i = 0; i < preds.rows; ++i)
|
for (int i = 0; i < preds.rows; ++i)
|
||||||
{
|
{
|
||||||
// filter out non objects
|
// filter out non object
|
||||||
float obj_conf = (test_name != "yolov8") ? preds.at<float>(i, 4) : 1.0f;
|
float obj_conf = (test_name == "yolov8" || test_name == "yolonas") ? 1.0f : preds.at<float>(i, 4) ;
|
||||||
if (obj_conf < conf_threshold)
|
if (obj_conf < conf_threshold)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Mat scores = preds.row(i).colRange((test_name != "yolov8") ? 5 : 4, preds.cols);
|
Mat scores = preds.row(i).colRange((test_name == "yolov8" || test_name == "yolonas") ? 4 : 5, preds.cols);
|
||||||
double conf;
|
double conf;
|
||||||
Point maxLoc;
|
Point maxLoc;
|
||||||
minMaxLoc(scores, 0, &conf, 0, &maxLoc);
|
minMaxLoc(scores, 0, &conf, 0, &maxLoc);
|
||||||
|
|
||||||
conf = (test_name != "yolov8") ? conf * obj_conf : conf;
|
conf = (test_name == "yolov8" || test_name == "yolonas") ? conf : conf * obj_conf;
|
||||||
if (conf < conf_threshold)
|
if (conf < conf_threshold)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -2701,9 +2713,14 @@ void yoloPostProcessing(
|
|||||||
double w = det[2];
|
double w = det[2];
|
||||||
double h = det[3];
|
double h = det[3];
|
||||||
|
|
||||||
|
// std::cout << "cx: " << cx << " cy: " << cy << " w: " << w << " h: " << h << " conf: " << conf << " idx: " << maxLoc.x << std::endl;
|
||||||
// [x1, y1, x2, y2]
|
// [x1, y1, x2, y2]
|
||||||
|
if (test_name == "yolonas"){
|
||||||
|
boxes.push_back(Rect2d(cx, cy, w, h));
|
||||||
|
} else {
|
||||||
boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
|
boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
|
||||||
cx + 0.5 * w, cy + 0.5 * h));
|
cx + 0.5 * w, cy + 0.5 * h));
|
||||||
|
}
|
||||||
classIds.push_back(maxLoc.x);
|
classIds.push_back(maxLoc.x);
|
||||||
confidences.push_back(conf);
|
confidences.push_back(conf);
|
||||||
}
|
}
|
||||||
@ -2758,6 +2775,41 @@ TEST_P(Test_ONNX_nets, YOLOX)
|
|||||||
1.0e-4, 1.0e-4);
|
1.0e-4, 1.0e-4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_ONNX_nets, YOLONas)
|
||||||
|
{
|
||||||
|
// model information: https://dl.opencv.org/models/yolo-nas/Readme.md
|
||||||
|
std::string weightPath = _tf("models/yolo_nas_s.onnx", false);
|
||||||
|
|
||||||
|
Size targetSize{640, 640};
|
||||||
|
float conf_threshold = 0.50;
|
||||||
|
float iou_threshold = 0.50;
|
||||||
|
|
||||||
|
std::vector<int> refClassIds{1, 16, 7};
|
||||||
|
std::vector<float> refScores{0.9720f, 0.9283f, 0.8990f};
|
||||||
|
// [x1, y1, x2, y2]
|
||||||
|
std::vector<Rect2d> refBoxes{
|
||||||
|
Rect2d(105.516, 173.696, 471.323, 430.433),
|
||||||
|
Rect2d(109.241, 263.406, 259.872, 531.858),
|
||||||
|
Rect2d(390.153, 142.492, 574.932, 222.709)
|
||||||
|
};
|
||||||
|
|
||||||
|
Image2BlobParams imgParams(
|
||||||
|
Scalar::all(1/255.0),
|
||||||
|
targetSize,
|
||||||
|
Scalar::all(0),
|
||||||
|
false,
|
||||||
|
CV_32F,
|
||||||
|
DNN_LAYOUT_NCHW,
|
||||||
|
DNN_PMODE_LETTERBOX,
|
||||||
|
Scalar::all(114)
|
||||||
|
);
|
||||||
|
|
||||||
|
testYOLO(
|
||||||
|
weightPath, refClassIds, refScores, refBoxes,
|
||||||
|
imgParams, conf_threshold, iou_threshold,
|
||||||
|
1.0e-4, 1.0e-4, "yolonas");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(Test_ONNX_nets, YOLOv8)
|
TEST_P(Test_ONNX_nets, YOLOv8)
|
||||||
{
|
{
|
||||||
std::string weightPath = _tf("models/yolov8n.onnx", false);
|
std::string weightPath = _tf("models/yolov8n.onnx", false);
|
||||||
@ -2804,7 +2856,7 @@ TEST_P(Test_ONNX_nets, YOLOv7)
|
|||||||
CV_TEST_TAG_DEBUG_VERYLONG
|
CV_TEST_TAG_DEBUG_VERYLONG
|
||||||
);
|
);
|
||||||
|
|
||||||
std::string weightPath = _tf("models/yolov7_not_simplified.onnx", false);
|
std::string weightPath = _tf("models/yolov7.onnx", false);
|
||||||
// Reference, which is collected with input size of 640x640
|
// Reference, which is collected with input size of 640x640
|
||||||
std::vector<int> refClassIds{1, 16, 7};
|
std::vector<int> refClassIds{1, 16, 7};
|
||||||
std::vector<float> refScores{0.9614331f, 0.9589417f, 0.8679074f};
|
std::vector<float> refScores{0.9614331f, 0.9589417f, 0.8679074f};
|
||||||
@ -3031,6 +3083,10 @@ TEST_P(Test_ONNX_nets, VitTrack) {
|
|||||||
normAssert(ref_output3, outputs[2], "VitTrack output3");
|
normAssert(ref_output3, outputs[2], "VitTrack output3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_ONNX_layers, LayerNormNoFusion) {
|
||||||
|
testONNXModels("layer_norm_no_fusion");
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2018-2023 Intel Corporation
|
// Copyright (C) 2018-2024 Intel Corporation
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
@ -10,7 +10,7 @@
|
|||||||
// (cv::gapi::ie::backend() is still there and is defined always)
|
// (cv::gapi::ie::backend() is still there and is defined always)
|
||||||
#include "backends/ie/giebackend.hpp"
|
#include "backends/ie/giebackend.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_INF_ENGINE
|
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||||
|
|
||||||
#if INF_ENGINE_RELEASE <= 2019010000
|
#if INF_ENGINE_RELEASE <= 2019010000
|
||||||
# error G-API IE module supports only OpenVINO IE >= 2019 R1
|
# error G-API IE module supports only OpenVINO IE >= 2019 R1
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2018-2020 Intel Corporation
|
// Copyright (C) 2018-2024 Intel Corporation
|
||||||
|
|
||||||
#ifndef OPENCV_GAPI_GIEBACKEND_HPP
|
#ifndef OPENCV_GAPI_GIEBACKEND_HPP
|
||||||
#define OPENCV_GAPI_GIEBACKEND_HPP
|
#define OPENCV_GAPI_GIEBACKEND_HPP
|
||||||
@ -10,7 +10,7 @@
|
|||||||
// Include anyway - cv::gapi::ie::backend() still needs to be defined
|
// Include anyway - cv::gapi::ie::backend() still needs to be defined
|
||||||
#include "opencv2/gapi/infer/ie.hpp"
|
#include "opencv2/gapi/infer/ie.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_INF_ENGINE
|
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||||
|
|
||||||
#include <ade/util/algorithm.hpp> // type_list_index
|
#include <ade/util/algorithm.hpp> // type_list_index
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
@ -2,9 +2,9 @@
|
|||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2020 Intel Corporation
|
// Copyright (C) 2020-2024 Intel Corporation
|
||||||
|
|
||||||
#ifdef HAVE_INF_ENGINE
|
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
#include "../test_precomp.hpp"
|
#include "../test_precomp.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_INF_ENGINE
|
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
@ -1662,14 +1662,14 @@ CvWindow::CvWindow(QString name, int arg2)
|
|||||||
|
|
||||||
//Now attach everything
|
//Now attach everything
|
||||||
if (myToolBar)
|
if (myToolBar)
|
||||||
myGlobalLayout->addWidget(myToolBar, Qt::AlignCenter);
|
myGlobalLayout->addWidget(myToolBar, 0, Qt::AlignLeft);
|
||||||
|
|
||||||
myGlobalLayout->addWidget(myView->getWidget(), Qt::AlignCenter);
|
myGlobalLayout->addWidget(myView->getWidget(), 0, Qt::AlignCenter);
|
||||||
|
|
||||||
myGlobalLayout->addLayout(myBarLayout, Qt::AlignCenter);
|
myGlobalLayout->addLayout(myBarLayout);
|
||||||
|
|
||||||
if (myStatusBar)
|
if (myStatusBar)
|
||||||
myGlobalLayout->addWidget(myStatusBar, Qt::AlignCenter);
|
myGlobalLayout->addWidget(myStatusBar, 0, Qt::AlignLeft);
|
||||||
|
|
||||||
setLayout(myGlobalLayout);
|
setLayout(myGlobalLayout);
|
||||||
show();
|
show();
|
||||||
@ -2079,7 +2079,6 @@ void CvWindow::createStatusBar()
|
|||||||
{
|
{
|
||||||
myStatusBar = new QStatusBar(this);
|
myStatusBar = new QStatusBar(this);
|
||||||
myStatusBar->setSizeGripEnabled(false);
|
myStatusBar->setSizeGripEnabled(false);
|
||||||
myStatusBar->setFixedHeight(20);
|
|
||||||
myStatusBar->setMinimumWidth(1);
|
myStatusBar->setMinimumWidth(1);
|
||||||
myStatusBar_msg = new QLabel;
|
myStatusBar_msg = new QLabel;
|
||||||
|
|
||||||
|
@ -409,7 +409,9 @@ bool JpegDecoder::readData( Mat& img )
|
|||||||
{
|
{
|
||||||
jpeg_decompress_struct* cinfo = &((JpegState*)m_state)->cinfo;
|
jpeg_decompress_struct* cinfo = &((JpegState*)m_state)->cinfo;
|
||||||
JpegErrorMgr* jerr = &((JpegState*)m_state)->jerr;
|
JpegErrorMgr* jerr = &((JpegState*)m_state)->jerr;
|
||||||
|
#ifndef JCS_EXTENSIONS
|
||||||
JSAMPARRAY buffer = 0;
|
JSAMPARRAY buffer = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
if( setjmp( jerr->setjmp_buffer ) == 0 )
|
if( setjmp( jerr->setjmp_buffer ) == 0 )
|
||||||
{
|
{
|
||||||
@ -429,6 +431,18 @@ bool JpegDecoder::readData( Mat& img )
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef JCS_EXTENSIONS
|
||||||
|
if( color )
|
||||||
|
{
|
||||||
|
cinfo->out_color_space = JCS_EXT_BGR;
|
||||||
|
cinfo->out_color_components = 3;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cinfo->out_color_space = JCS_GRAYSCALE;
|
||||||
|
cinfo->out_color_components = 1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
if( color )
|
if( color )
|
||||||
{
|
{
|
||||||
if( cinfo->num_components != 4 )
|
if( cinfo->num_components != 4 )
|
||||||
@ -455,6 +469,7 @@ bool JpegDecoder::readData( Mat& img )
|
|||||||
cinfo->out_color_components = 4;
|
cinfo->out_color_components = 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Check for Exif marker APP1
|
// Check for Exif marker APP1
|
||||||
jpeg_saved_marker_ptr exif_marker = NULL;
|
jpeg_saved_marker_ptr exif_marker = NULL;
|
||||||
@ -481,12 +496,17 @@ bool JpegDecoder::readData( Mat& img )
|
|||||||
|
|
||||||
jpeg_start_decompress( cinfo );
|
jpeg_start_decompress( cinfo );
|
||||||
|
|
||||||
|
#ifndef JCS_EXTENSIONS
|
||||||
buffer = (*cinfo->mem->alloc_sarray)((j_common_ptr)cinfo,
|
buffer = (*cinfo->mem->alloc_sarray)((j_common_ptr)cinfo,
|
||||||
JPOOL_IMAGE, m_width*4, 1 );
|
JPOOL_IMAGE, m_width*4, 1 );
|
||||||
|
#endif
|
||||||
|
|
||||||
uchar* data = img.ptr();
|
uchar* data = img.ptr();
|
||||||
for( ; m_height--; data += step )
|
for( ; m_height--; data += step )
|
||||||
{
|
{
|
||||||
|
#ifdef JCS_EXTENSIONS
|
||||||
|
jpeg_read_scanlines( cinfo, &data, 1 );
|
||||||
|
#else
|
||||||
jpeg_read_scanlines( cinfo, buffer, 1 );
|
jpeg_read_scanlines( cinfo, buffer, 1 );
|
||||||
if( color )
|
if( color )
|
||||||
{
|
{
|
||||||
@ -502,6 +522,7 @@ bool JpegDecoder::readData( Mat& img )
|
|||||||
else
|
else
|
||||||
icvCvt_CMYK2Gray_8u_C4C1R( buffer[0], 0, data, 0, Size(m_width,1) );
|
icvCvt_CMYK2Gray_8u_C4C1R( buffer[0], 0, data, 0, Size(m_width,1) );
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
result = true;
|
result = true;
|
||||||
@ -593,8 +614,11 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
|||||||
int width = img.cols, height = img.rows;
|
int width = img.cols, height = img.rows;
|
||||||
|
|
||||||
std::vector<uchar> out_buf(1 << 12);
|
std::vector<uchar> out_buf(1 << 12);
|
||||||
|
|
||||||
|
#ifndef JCS_EXTENSIONS
|
||||||
AutoBuffer<uchar> _buffer;
|
AutoBuffer<uchar> _buffer;
|
||||||
uchar* buffer;
|
uchar* buffer;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct jpeg_compress_struct cinfo;
|
struct jpeg_compress_struct cinfo;
|
||||||
JpegErrorMgr jerr;
|
JpegErrorMgr jerr;
|
||||||
@ -629,8 +653,15 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
|||||||
|
|
||||||
int _channels = img.channels();
|
int _channels = img.channels();
|
||||||
int channels = _channels > 1 ? 3 : 1;
|
int channels = _channels > 1 ? 3 : 1;
|
||||||
|
|
||||||
|
#ifdef JCS_EXTENSIONS
|
||||||
|
cinfo.input_components = _channels;
|
||||||
|
cinfo.in_color_space = _channels == 3 ? JCS_EXT_BGR
|
||||||
|
: _channels == 4 ? JCS_EXT_BGRX : JCS_GRAYSCALE;
|
||||||
|
#else
|
||||||
cinfo.input_components = channels;
|
cinfo.input_components = channels;
|
||||||
cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE;
|
cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE;
|
||||||
|
#endif
|
||||||
|
|
||||||
int quality = 95;
|
int quality = 95;
|
||||||
int progressive = 0;
|
int progressive = 0;
|
||||||
@ -746,14 +777,17 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
|||||||
|
|
||||||
jpeg_start_compress( &cinfo, TRUE );
|
jpeg_start_compress( &cinfo, TRUE );
|
||||||
|
|
||||||
|
#ifndef JCS_EXTENSIONS
|
||||||
if( channels > 1 )
|
if( channels > 1 )
|
||||||
_buffer.allocate(width*channels);
|
_buffer.allocate(width*channels);
|
||||||
buffer = _buffer.data();
|
buffer = _buffer.data();
|
||||||
|
#endif
|
||||||
|
|
||||||
for( int y = 0; y < height; y++ )
|
for( int y = 0; y < height; y++ )
|
||||||
{
|
{
|
||||||
uchar *data = img.data + img.step*y, *ptr = data;
|
uchar *data = img.data + img.step*y, *ptr = data;
|
||||||
|
|
||||||
|
#ifndef JCS_EXTENSIONS
|
||||||
if( _channels == 3 )
|
if( _channels == 3 )
|
||||||
{
|
{
|
||||||
icvCvt_BGR2RGB_8u_C3R( data, 0, buffer, 0, Size(width,1) );
|
icvCvt_BGR2RGB_8u_C3R( data, 0, buffer, 0, Size(width,1) );
|
||||||
@ -764,6 +798,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
|
|||||||
icvCvt_BGRA2BGR_8u_C4C3R( data, 0, buffer, 0, Size(width,1), 2 );
|
icvCvt_BGRA2BGR_8u_C4C3R( data, 0, buffer, 0, Size(width,1), 2 );
|
||||||
ptr = buffer;
|
ptr = buffer;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
jpeg_write_scanlines( &cinfo, &ptr, 1 );
|
jpeg_write_scanlines( &cinfo, &ptr, 1 );
|
||||||
}
|
}
|
||||||
|
@ -210,15 +210,8 @@ struct ImageCodecInitializer
|
|||||||
static
|
static
|
||||||
ImageCodecInitializer& getCodecs()
|
ImageCodecInitializer& getCodecs()
|
||||||
{
|
{
|
||||||
#ifdef CV_CXX11
|
|
||||||
static ImageCodecInitializer g_codecs;
|
static ImageCodecInitializer g_codecs;
|
||||||
return g_codecs;
|
return g_codecs;
|
||||||
#else
|
|
||||||
// C++98 doesn't guarantee correctness of multi-threaded initialization of static global variables
|
|
||||||
// (memory leak here is not critical, use C++11 to avoid that)
|
|
||||||
static ImageCodecInitializer* g_codecs = new ImageCodecInitializer();
|
|
||||||
return *g_codecs;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -166,7 +166,7 @@ TEST_P(Imgcodecs_Avif_Image_EncodeDecodeSuite, imencode_imdecode) {
|
|||||||
cv::Exception);
|
cv::Exception);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
bool result;
|
bool result = true;
|
||||||
EXPECT_NO_THROW(
|
EXPECT_NO_THROW(
|
||||||
result = cv::imencode(".avif", img_original, buf, encoding_params_););
|
result = cv::imencode(".avif", img_original, buf, encoding_params_););
|
||||||
EXPECT_TRUE(result);
|
EXPECT_TRUE(result);
|
||||||
|
@ -4490,7 +4490,7 @@ An example using applyColorMap function
|
|||||||
|
|
||||||
/** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image.
|
/** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image.
|
||||||
|
|
||||||
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
|
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. If CV_8UC3, then the CV_8UC1 image is generated internally using cv::COLOR_BGR2GRAY.
|
||||||
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
|
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
|
||||||
@param colormap The colormap to apply, see #ColormapTypes
|
@param colormap The colormap to apply, see #ColormapTypes
|
||||||
*/
|
*/
|
||||||
@ -4498,8 +4498,8 @@ CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
|
|||||||
|
|
||||||
/** @brief Applies a user colormap on a given image.
|
/** @brief Applies a user colormap on a given image.
|
||||||
|
|
||||||
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
|
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. If CV_8UC3, then the CV_8UC1 image is generated internally using cv::COLOR_BGR2GRAY.
|
||||||
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
|
@param dst The result is the colormapped source image of the same number of channels as userColor. Note: Mat::create is called on dst.
|
||||||
@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256
|
@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor);
|
CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor);
|
||||||
|
@ -17,7 +17,7 @@ ocv_add_module(java BINDINGS opencv_core opencv_imgproc PRIVATE_REQUIRED opencv_
|
|||||||
|
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/common.cmake)
|
include(${CMAKE_CURRENT_SOURCE_DIR}/common.cmake)
|
||||||
|
|
||||||
# UTILITY: glob specific sources and append them to list (type is in H, CPP, JAVA, AIDL)
|
# UTILITY: glob specific sources and append them to list (type is in H, CPP, JAVA)
|
||||||
macro(glob_more_specific_sources _type _root _output)
|
macro(glob_more_specific_sources _type _root _output)
|
||||||
unset(_masks)
|
unset(_masks)
|
||||||
if(${_type} STREQUAL "H")
|
if(${_type} STREQUAL "H")
|
||||||
@ -26,8 +26,6 @@ macro(glob_more_specific_sources _type _root _output)
|
|||||||
set(_masks "${_root}/cpp/*.cpp")
|
set(_masks "${_root}/cpp/*.cpp")
|
||||||
elseif(${_type} STREQUAL "JAVA")
|
elseif(${_type} STREQUAL "JAVA")
|
||||||
set(_masks "${_root}/java/*.java" "${_root}/java/*.java.in")
|
set(_masks "${_root}/java/*.java" "${_root}/java/*.java.in")
|
||||||
elseif(${_type} STREQUAL "AIDL")
|
|
||||||
set(_masks "${_root}/java/*.aidl")
|
|
||||||
endif()
|
endif()
|
||||||
if (_masks)
|
if (_masks)
|
||||||
file(GLOB _result ${_masks})
|
file(GLOB _result ${_masks})
|
||||||
|
@ -42,7 +42,6 @@ android {
|
|||||||
main {
|
main {
|
||||||
jniLibs.srcDirs = ['../../jni']
|
jniLibs.srcDirs = ['../../jni']
|
||||||
java.srcDirs = ['src'] // TODO Use original files instead of copied into build directory
|
java.srcDirs = ['src'] // TODO Use original files instead of copied into build directory
|
||||||
aidl.srcDirs = ['src']
|
|
||||||
res.srcDirs = ['@OpenCV_SOURCE_DIR@/modules/java/android_sdk/android_gradle_lib/res']
|
res.srcDirs = ['@OpenCV_SOURCE_DIR@/modules/java/android_sdk/android_gradle_lib/res']
|
||||||
manifest.srcFile 'AndroidManifest.xml'
|
manifest.srcFile 'AndroidManifest.xml'
|
||||||
}
|
}
|
||||||
|
@ -121,8 +121,6 @@ android {
|
|||||||
targetCompatibility JavaVersion.VERSION_@ANDROID_GRADLE_JAVA_VERSION_INIT@
|
targetCompatibility JavaVersion.VERSION_@ANDROID_GRADLE_JAVA_VERSION_INIT@
|
||||||
}
|
}
|
||||||
|
|
||||||
@ANDROID_GRADLE_BUILD_FEATURE_AIDL@
|
|
||||||
|
|
||||||
buildTypes {
|
buildTypes {
|
||||||
debug {
|
debug {
|
||||||
packagingOptions {
|
packagingOptions {
|
||||||
@ -139,7 +137,6 @@ android {
|
|||||||
}
|
}
|
||||||
|
|
||||||
buildFeatures {
|
buildFeatures {
|
||||||
aidl true
|
|
||||||
prefabPublishing true
|
prefabPublishing true
|
||||||
buildConfig true
|
buildConfig true
|
||||||
}
|
}
|
||||||
@ -153,7 +150,6 @@ android {
|
|||||||
main {
|
main {
|
||||||
jniLibs.srcDirs = ['native/libs']
|
jniLibs.srcDirs = ['native/libs']
|
||||||
java.srcDirs = ['java/src']
|
java.srcDirs = ['java/src']
|
||||||
aidl.srcDirs = ['java/src']
|
|
||||||
res.srcDirs = ['java/res']
|
res.srcDirs = ['java/res']
|
||||||
manifest.srcFile 'java/AndroidManifest.xml'
|
manifest.srcFile 'java/AndroidManifest.xml'
|
||||||
}
|
}
|
||||||
|
@ -46,6 +46,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
protected ImageReader mImageReader;
|
protected ImageReader mImageReader;
|
||||||
protected int mPreviewFormat = ImageFormat.YUV_420_888;
|
protected int mPreviewFormat = ImageFormat.YUV_420_888;
|
||||||
protected int mRequestTemplate = CameraDevice.TEMPLATE_PREVIEW;
|
protected int mRequestTemplate = CameraDevice.TEMPLATE_PREVIEW;
|
||||||
|
private int mFrameRotation;
|
||||||
|
|
||||||
protected CameraDevice mCameraDevice;
|
protected CameraDevice mCameraDevice;
|
||||||
protected CameraCaptureSession mCaptureSession;
|
protected CameraCaptureSession mCaptureSession;
|
||||||
@ -86,8 +87,8 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean initializeCamera() {
|
protected boolean selectCamera() {
|
||||||
Log.i(LOGTAG, "initializeCamera");
|
Log.i(LOGTAG, "selectCamera");
|
||||||
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
|
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
|
||||||
try {
|
try {
|
||||||
String camList[] = manager.getCameraIdList();
|
String camList[] = manager.getCameraIdList();
|
||||||
@ -110,14 +111,10 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mCameraID != null) {
|
if (mCameraID == null) { // make JavaCamera2View behaves in the same way as JavaCameraView
|
||||||
Log.i(LOGTAG, "Opening camera: " + mCameraID);
|
Log.i(LOGTAG, "Selecting camera by index (" + mCameraIndex + ")");
|
||||||
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
|
|
||||||
} else { // make JavaCamera2View behaves in the same way as JavaCameraView
|
|
||||||
Log.i(LOGTAG, "Trying to open camera with the value (" + mCameraIndex + ")");
|
|
||||||
if (mCameraIndex < camList.length) {
|
if (mCameraIndex < camList.length) {
|
||||||
mCameraID = camList[mCameraIndex];
|
mCameraID = camList[mCameraIndex];
|
||||||
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
|
|
||||||
} else {
|
} else {
|
||||||
// CAMERA_DISCONNECTED is used when the camera id is no longer valid
|
// CAMERA_DISCONNECTED is used when the camera id is no longer valid
|
||||||
throw new CameraAccessException(CameraAccessException.CAMERA_DISCONNECTED);
|
throw new CameraAccessException(CameraAccessException.CAMERA_DISCONNECTED);
|
||||||
@ -125,11 +122,11 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} catch (CameraAccessException e) {
|
} catch (CameraAccessException e) {
|
||||||
Log.e(LOGTAG, "OpenCamera - Camera Access Exception", e);
|
Log.e(LOGTAG, "selectCamera - Camera Access Exception", e);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
Log.e(LOGTAG, "OpenCamera - Illegal Argument Exception", e);
|
Log.e(LOGTAG, "selectCamera - Illegal Argument Exception", e);
|
||||||
} catch (SecurityException e) {
|
} catch (SecurityException e) {
|
||||||
Log.e(LOGTAG, "OpenCamera - Security Exception", e);
|
Log.e(LOGTAG, "selectCamera - Security Exception", e);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -204,6 +201,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
mImageReader.setOnImageAvailableListener(new ImageReader.OnImageAvailableListener() {
|
mImageReader.setOnImageAvailableListener(new ImageReader.OnImageAvailableListener() {
|
||||||
@Override
|
@Override
|
||||||
public void onImageAvailable(ImageReader reader) {
|
public void onImageAvailable(ImageReader reader) {
|
||||||
|
|
||||||
Image image = reader.acquireLatestImage();
|
Image image = reader.acquireLatestImage();
|
||||||
if (image == null)
|
if (image == null)
|
||||||
return;
|
return;
|
||||||
@ -213,8 +211,9 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
assert (planes.length == 3);
|
assert (planes.length == 3);
|
||||||
assert (image.getFormat() == mPreviewFormat);
|
assert (image.getFormat() == mPreviewFormat);
|
||||||
|
|
||||||
JavaCamera2Frame tempFrame = new JavaCamera2Frame(image);
|
RotatedCameraFrame tempFrame = new RotatedCameraFrame(new JavaCamera2Frame(image), mFrameRotation);
|
||||||
deliverAndDrawFrame(tempFrame);
|
deliverAndDrawFrame(tempFrame);
|
||||||
|
tempFrame.mFrame.release();
|
||||||
tempFrame.release();
|
tempFrame.release();
|
||||||
image.close();
|
image.close();
|
||||||
}
|
}
|
||||||
@ -303,11 +302,22 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
protected boolean connectCamera(int width, int height) {
|
protected boolean connectCamera(int width, int height) {
|
||||||
Log.i(LOGTAG, "setCameraPreviewSize(" + width + "x" + height + ")");
|
Log.i(LOGTAG, "setCameraPreviewSize(" + width + "x" + height + ")");
|
||||||
startBackgroundThread();
|
startBackgroundThread();
|
||||||
initializeCamera();
|
selectCamera();
|
||||||
try {
|
try {
|
||||||
|
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
|
||||||
|
CameraCharacteristics characteristics = manager.getCameraCharacteristics(mCameraID);
|
||||||
|
mFrameRotation = getFrameRotation(
|
||||||
|
characteristics.get(CameraCharacteristics.LENS_FACING) == CameraCharacteristics.LENS_FACING_FRONT,
|
||||||
|
characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION));
|
||||||
|
|
||||||
boolean needReconfig = calcPreviewSize(width, height);
|
boolean needReconfig = calcPreviewSize(width, height);
|
||||||
|
if (mFrameRotation % 180 == 0) {
|
||||||
mFrameWidth = mPreviewSize.getWidth();
|
mFrameWidth = mPreviewSize.getWidth();
|
||||||
mFrameHeight = mPreviewSize.getHeight();
|
mFrameHeight = mPreviewSize.getHeight();
|
||||||
|
} else {
|
||||||
|
mFrameWidth = mPreviewSize.getHeight();
|
||||||
|
mFrameHeight = mPreviewSize.getWidth();
|
||||||
|
}
|
||||||
|
|
||||||
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
||||||
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
||||||
@ -322,12 +332,16 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
mCaptureSession.close();
|
mCaptureSession.close();
|
||||||
mCaptureSession = null;
|
mCaptureSession = null;
|
||||||
}
|
}
|
||||||
createCameraPreviewSession();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mFpsMeter != null) {
|
if (mFpsMeter != null) {
|
||||||
mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
|
mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Log.i(LOGTAG, "Opening camera: " + mCameraID);
|
||||||
|
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
|
||||||
|
} catch (CameraAccessException e) {
|
||||||
|
Log.e(LOGTAG, "OpenCamera - Camera Access Exception", e);
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
throw new RuntimeException("Interrupted while setCameraPreviewSize.", e);
|
throw new RuntimeException("Interrupted while setCameraPreviewSize.", e);
|
||||||
}
|
}
|
||||||
@ -442,6 +456,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
|||||||
mGray = new Mat();
|
mGray = new Mat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void release() {
|
public void release() {
|
||||||
mRgba.release();
|
mRgba.release();
|
||||||
mGray.release();
|
mGray.release();
|
||||||
|
@ -10,6 +10,7 @@ import org.opencv.videoio.VideoCapture;
|
|||||||
import org.opencv.videoio.VideoWriter;
|
import org.opencv.videoio.VideoWriter;
|
||||||
|
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
|
import android.hardware.Camera;
|
||||||
import android.util.AttributeSet;
|
import android.util.AttributeSet;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
import android.view.ViewGroup.LayoutParams;
|
import android.view.ViewGroup.LayoutParams;
|
||||||
@ -25,7 +26,7 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
|||||||
private Thread mThread;
|
private Thread mThread;
|
||||||
|
|
||||||
protected VideoCapture mCamera;
|
protected VideoCapture mCamera;
|
||||||
protected NativeCameraFrame mFrame;
|
protected RotatedCameraFrame mFrame;
|
||||||
|
|
||||||
public NativeCameraView(Context context, int cameraId) {
|
public NativeCameraView(Context context, int cameraId) {
|
||||||
super(context, cameraId);
|
super(context, cameraId);
|
||||||
@ -89,28 +90,65 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
|||||||
|
|
||||||
private boolean initializeCamera(int width, int height) {
|
private boolean initializeCamera(int width, int height) {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
|
Camera.CameraInfo cameraInfo = new Camera.CameraInfo();
|
||||||
if (mCameraIndex == -1) {
|
int localCameraIndex = mCameraIndex;
|
||||||
|
if (mCameraIndex == CAMERA_ID_ANY) {
|
||||||
Log.d(TAG, "Try to open default camera");
|
Log.d(TAG, "Try to open default camera");
|
||||||
mCamera = new VideoCapture(0, Videoio.CAP_ANDROID);
|
localCameraIndex = 0;
|
||||||
} else {
|
} else if (mCameraIndex == CAMERA_ID_BACK) {
|
||||||
Log.d(TAG, "Try to open camera with index " + mCameraIndex);
|
Log.i(TAG, "Trying to open back camera");
|
||||||
mCamera = new VideoCapture(mCameraIndex, Videoio.CAP_ANDROID);
|
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
||||||
|
Camera.getCameraInfo( camIdx, cameraInfo );
|
||||||
|
if (cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_BACK) {
|
||||||
|
localCameraIndex = camIdx;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} else if (mCameraIndex == CAMERA_ID_FRONT) {
|
||||||
|
Log.i(TAG, "Trying to open front camera");
|
||||||
|
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
||||||
|
Camera.getCameraInfo( camIdx, cameraInfo );
|
||||||
|
if (cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_FRONT) {
|
||||||
|
localCameraIndex = camIdx;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (localCameraIndex == CAMERA_ID_BACK) {
|
||||||
|
Log.e(TAG, "Back camera not found!");
|
||||||
|
return false;
|
||||||
|
} else if (localCameraIndex == CAMERA_ID_FRONT) {
|
||||||
|
Log.e(TAG, "Front camera not found!");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Log.d(TAG, "Try to open camera with index " + localCameraIndex);
|
||||||
|
mCamera = new VideoCapture(localCameraIndex, Videoio.CAP_ANDROID);
|
||||||
|
|
||||||
if (mCamera == null)
|
if (mCamera == null)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (mCamera.isOpened() == false)
|
if (mCamera.isOpened() == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
mFrame = new NativeCameraFrame(mCamera);
|
if (mCameraIndex != CAMERA_ID_BACK && mCameraIndex != CAMERA_ID_FRONT)
|
||||||
|
Camera.getCameraInfo(localCameraIndex, cameraInfo);
|
||||||
|
int frameRotation = getFrameRotation(
|
||||||
|
cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_FRONT,
|
||||||
|
cameraInfo.orientation);
|
||||||
|
|
||||||
|
mFrame = new RotatedCameraFrame(new NativeCameraFrame(mCamera), frameRotation);
|
||||||
|
|
||||||
mCamera.set(Videoio.CAP_PROP_FRAME_WIDTH, width);
|
mCamera.set(Videoio.CAP_PROP_FRAME_WIDTH, width);
|
||||||
mCamera.set(Videoio.CAP_PROP_FRAME_HEIGHT, height);
|
mCamera.set(Videoio.CAP_PROP_FRAME_HEIGHT, height);
|
||||||
|
|
||||||
mFrameWidth = (int)mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
|
if (frameRotation % 180 == 0) {
|
||||||
mFrameHeight = (int)mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
|
mFrameWidth = (int) mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
|
||||||
|
mFrameHeight = (int) mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
|
||||||
|
} else {
|
||||||
|
mFrameWidth = (int) mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
|
||||||
|
mFrameHeight = (int) mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
||||||
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
||||||
@ -131,7 +169,10 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
|||||||
|
|
||||||
private void releaseCamera() {
|
private void releaseCamera() {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
if (mFrame != null) mFrame.release();
|
if (mFrame != null) {
|
||||||
|
mFrame.mFrame.release();
|
||||||
|
mFrame.release();
|
||||||
|
}
|
||||||
if (mCamera != null) mCamera.release();
|
if (mCamera != null) mCamera.release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -162,6 +203,7 @@ public class NativeCameraView extends CameraBridgeViewBase {
|
|||||||
mBgr = new Mat();
|
mBgr = new Mat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void release() {
|
public void release() {
|
||||||
if (mGray != null) mGray.release();
|
if (mGray != null) mGray.release();
|
||||||
if (mRgba != null) mRgba.release();
|
if (mRgba != null) mRgba.release();
|
||||||
|
@ -4,6 +4,7 @@ import java.util.List;
|
|||||||
|
|
||||||
import org.opencv.BuildConfig;
|
import org.opencv.BuildConfig;
|
||||||
import org.opencv.R;
|
import org.opencv.R;
|
||||||
|
import org.opencv.core.Core;
|
||||||
import org.opencv.core.Mat;
|
import org.opencv.core.Mat;
|
||||||
import org.opencv.core.Size;
|
import org.opencv.core.Size;
|
||||||
|
|
||||||
@ -17,8 +18,10 @@ import android.graphics.Canvas;
|
|||||||
import android.graphics.Rect;
|
import android.graphics.Rect;
|
||||||
import android.util.AttributeSet;
|
import android.util.AttributeSet;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
import android.view.Surface;
|
||||||
import android.view.SurfaceHolder;
|
import android.view.SurfaceHolder;
|
||||||
import android.view.SurfaceView;
|
import android.view.SurfaceView;
|
||||||
|
import android.view.WindowManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is a basic class, implementing the interaction with Camera and OpenCV library.
|
* This is a basic class, implementing the interaction with Camera and OpenCV library.
|
||||||
@ -189,8 +192,93 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
|
|||||||
* This method returns single channel gray scale Mat with frame
|
* This method returns single channel gray scale Mat with frame
|
||||||
*/
|
*/
|
||||||
public Mat gray();
|
public Mat gray();
|
||||||
|
|
||||||
|
public void release();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
public class RotatedCameraFrame implements CvCameraViewFrame {
|
||||||
|
@Override
|
||||||
|
public Mat gray() {
|
||||||
|
if (mRotation != 0) {
|
||||||
|
Core.rotate(mFrame.gray(), mGrayRotated, getCvRotationCode(mRotation));
|
||||||
|
return mGrayRotated;
|
||||||
|
} else {
|
||||||
|
return mFrame.gray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Mat rgba() {
|
||||||
|
if (mRotation != 0) {
|
||||||
|
Core.rotate(mFrame.rgba(), mRgbaRotated, getCvRotationCode(mRotation));
|
||||||
|
return mRgbaRotated;
|
||||||
|
} else {
|
||||||
|
return mFrame.rgba();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getCvRotationCode(int degrees) {
|
||||||
|
if (degrees == 90) {
|
||||||
|
return Core.ROTATE_90_CLOCKWISE;
|
||||||
|
} else if (degrees == 180) {
|
||||||
|
return Core.ROTATE_180;
|
||||||
|
} else {
|
||||||
|
return Core.ROTATE_90_COUNTERCLOCKWISE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public RotatedCameraFrame(CvCameraViewFrame frame, int rotation) {
|
||||||
|
super();
|
||||||
|
mFrame = frame;
|
||||||
|
mRgbaRotated = new Mat();
|
||||||
|
mGrayRotated = new Mat();
|
||||||
|
mRotation = rotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void release() {
|
||||||
|
mRgbaRotated.release();
|
||||||
|
mGrayRotated.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CvCameraViewFrame mFrame;
|
||||||
|
private Mat mRgbaRotated;
|
||||||
|
private Mat mGrayRotated;
|
||||||
|
private int mRotation;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates how to rotate camera frame to match current screen orientation
|
||||||
|
*/
|
||||||
|
protected int getFrameRotation(boolean cameraFacingFront, int cameraSensorOrientation) {
|
||||||
|
WindowManager windowManager = (WindowManager) getContext().getSystemService(Context.WINDOW_SERVICE);
|
||||||
|
int screenOrientation = windowManager.getDefaultDisplay().getRotation();
|
||||||
|
int screenRotation = 0;
|
||||||
|
switch (screenOrientation) {
|
||||||
|
case Surface.ROTATION_0:
|
||||||
|
screenRotation = 0;
|
||||||
|
break;
|
||||||
|
case Surface.ROTATION_90:
|
||||||
|
screenRotation = 90;
|
||||||
|
break;
|
||||||
|
case Surface.ROTATION_180:
|
||||||
|
screenRotation = 180;
|
||||||
|
break;
|
||||||
|
case Surface.ROTATION_270:
|
||||||
|
screenRotation = 270;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int frameRotation;
|
||||||
|
if (cameraFacingFront) {
|
||||||
|
frameRotation = (cameraSensorOrientation + screenRotation) % 360;
|
||||||
|
} else {
|
||||||
|
frameRotation = (cameraSensorOrientation - screenRotation + 360) % 360;
|
||||||
|
}
|
||||||
|
|
||||||
|
return frameRotation;
|
||||||
|
}
|
||||||
|
|
||||||
public void surfaceChanged(SurfaceHolder arg0, int arg1, int arg2, int arg3) {
|
public void surfaceChanged(SurfaceHolder arg0, int arg1, int arg2, int arg3) {
|
||||||
Log.d(TAG, "call surfaceChanged event");
|
Log.d(TAG, "call surfaceChanged event");
|
||||||
synchronized(mSyncObject) {
|
synchronized(mSyncObject) {
|
||||||
|
@ -10,9 +10,12 @@ import android.hardware.Camera.PreviewCallback;
|
|||||||
import android.os.Build;
|
import android.os.Build;
|
||||||
import android.util.AttributeSet;
|
import android.util.AttributeSet;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
import android.view.Surface;
|
||||||
import android.view.ViewGroup.LayoutParams;
|
import android.view.ViewGroup.LayoutParams;
|
||||||
|
import android.view.WindowManager;
|
||||||
|
|
||||||
import org.opencv.BuildConfig;
|
import org.opencv.BuildConfig;
|
||||||
|
import org.opencv.core.Core;
|
||||||
import org.opencv.core.CvType;
|
import org.opencv.core.CvType;
|
||||||
import org.opencv.core.Mat;
|
import org.opencv.core.Mat;
|
||||||
import org.opencv.core.Size;
|
import org.opencv.core.Size;
|
||||||
@ -39,7 +42,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
private boolean mStopThread;
|
private boolean mStopThread;
|
||||||
|
|
||||||
protected Camera mCamera;
|
protected Camera mCamera;
|
||||||
protected JavaCameraFrame[] mCameraFrame;
|
protected RotatedCameraFrame[] mCameraFrame;
|
||||||
private SurfaceTexture mSurfaceTexture;
|
private SurfaceTexture mSurfaceTexture;
|
||||||
private int mPreviewFormat = ImageFormat.NV21;
|
private int mPreviewFormat = ImageFormat.NV21;
|
||||||
|
|
||||||
@ -71,29 +74,21 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
boolean result = true;
|
boolean result = true;
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
mCamera = null;
|
mCamera = null;
|
||||||
|
int cameraId = -1;
|
||||||
|
|
||||||
if (mCameraIndex == CAMERA_ID_ANY) {
|
if (mCameraIndex == CAMERA_ID_ANY) {
|
||||||
Log.d(TAG, "Trying to open camera with old open()");
|
|
||||||
try {
|
|
||||||
mCamera = Camera.open();
|
|
||||||
}
|
|
||||||
catch (Exception e){
|
|
||||||
Log.e(TAG, "Camera is not available (in use or does not exist): " + e.getLocalizedMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
if(mCamera == null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
|
|
||||||
boolean connected = false;
|
boolean connected = false;
|
||||||
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
|
||||||
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(camIdx) + ")");
|
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(camIdx) + ")");
|
||||||
try {
|
try {
|
||||||
mCamera = Camera.open(camIdx);
|
mCamera = Camera.open(camIdx);
|
||||||
connected = true;
|
connected = true;
|
||||||
|
cameraId = camIdx;
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
Log.e(TAG, "Camera #" + camIdx + "failed to open: " + e.getLocalizedMessage());
|
Log.e(TAG, "Camera #" + camIdx + "failed to open: " + e.getLocalizedMessage());
|
||||||
}
|
}
|
||||||
if (connected) break;
|
if (connected) break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
|
||||||
int localCameraIndex = mCameraIndex;
|
int localCameraIndex = mCameraIndex;
|
||||||
@ -126,6 +121,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(localCameraIndex) + ")");
|
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(localCameraIndex) + ")");
|
||||||
try {
|
try {
|
||||||
mCamera = Camera.open(localCameraIndex);
|
mCamera = Camera.open(localCameraIndex);
|
||||||
|
cameraId = localCameraIndex;
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
Log.e(TAG, "Camera #" + localCameraIndex + "failed to open: " + e.getLocalizedMessage());
|
Log.e(TAG, "Camera #" + localCameraIndex + "failed to open: " + e.getLocalizedMessage());
|
||||||
}
|
}
|
||||||
@ -136,6 +132,11 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
if (mCamera == null)
|
if (mCamera == null)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
android.hardware.Camera.CameraInfo info = new android.hardware.Camera.CameraInfo();
|
||||||
|
android.hardware.Camera.getCameraInfo(cameraId, info);
|
||||||
|
int frameRotation = getFrameRotation(
|
||||||
|
info.facing == Camera.CameraInfo.CAMERA_FACING_FRONT,
|
||||||
|
info.orientation);
|
||||||
/* Now set camera parameters */
|
/* Now set camera parameters */
|
||||||
try {
|
try {
|
||||||
Camera.Parameters params = mCamera.getParameters();
|
Camera.Parameters params = mCamera.getParameters();
|
||||||
@ -176,8 +177,16 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
mCamera.setParameters(params);
|
mCamera.setParameters(params);
|
||||||
params = mCamera.getParameters();
|
params = mCamera.getParameters();
|
||||||
|
|
||||||
|
int rawFrameWidth = params.getPreviewSize().width;
|
||||||
|
int rawFrameHeight = params.getPreviewSize().height;
|
||||||
|
|
||||||
|
if (frameRotation % 180 == 0) {
|
||||||
mFrameWidth = params.getPreviewSize().width;
|
mFrameWidth = params.getPreviewSize().width;
|
||||||
mFrameHeight = params.getPreviewSize().height;
|
mFrameHeight = params.getPreviewSize().height;
|
||||||
|
} else {
|
||||||
|
mFrameWidth = params.getPreviewSize().height;
|
||||||
|
mFrameHeight = params.getPreviewSize().width;
|
||||||
|
}
|
||||||
|
|
||||||
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
|
||||||
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
|
||||||
@ -196,14 +205,14 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
mCamera.setPreviewCallbackWithBuffer(this);
|
mCamera.setPreviewCallbackWithBuffer(this);
|
||||||
|
|
||||||
mFrameChain = new Mat[2];
|
mFrameChain = new Mat[2];
|
||||||
mFrameChain[0] = new Mat(mFrameHeight + (mFrameHeight/2), mFrameWidth, CvType.CV_8UC1);
|
mFrameChain[0] = new Mat(rawFrameHeight + (rawFrameHeight/2), rawFrameWidth, CvType.CV_8UC1);
|
||||||
mFrameChain[1] = new Mat(mFrameHeight + (mFrameHeight/2), mFrameWidth, CvType.CV_8UC1);
|
mFrameChain[1] = new Mat(rawFrameHeight + (rawFrameHeight/2), rawFrameWidth, CvType.CV_8UC1);
|
||||||
|
|
||||||
AllocateCache();
|
AllocateCache();
|
||||||
|
|
||||||
mCameraFrame = new JavaCameraFrame[2];
|
mCameraFrame = new RotatedCameraFrame[2];
|
||||||
mCameraFrame[0] = new JavaCameraFrame(mFrameChain[0], mFrameWidth, mFrameHeight);
|
mCameraFrame[0] = new RotatedCameraFrame(new JavaCameraFrame(mFrameChain[0], rawFrameWidth, rawFrameHeight), frameRotation);
|
||||||
mCameraFrame[1] = new JavaCameraFrame(mFrameChain[1], mFrameWidth, mFrameHeight);
|
mCameraFrame[1] = new RotatedCameraFrame(new JavaCameraFrame(mFrameChain[1], rawFrameWidth, rawFrameHeight), frameRotation);
|
||||||
|
|
||||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) {
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) {
|
||||||
mSurfaceTexture = new SurfaceTexture(MAGIC_TEXTURE_ID);
|
mSurfaceTexture = new SurfaceTexture(MAGIC_TEXTURE_ID);
|
||||||
@ -240,7 +249,9 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
mFrameChain[1].release();
|
mFrameChain[1].release();
|
||||||
}
|
}
|
||||||
if (mCameraFrame != null) {
|
if (mCameraFrame != null) {
|
||||||
|
mCameraFrame[0].mFrame.release();
|
||||||
mCameraFrame[0].release();
|
mCameraFrame[0].release();
|
||||||
|
mCameraFrame[1].mFrame.release();
|
||||||
mCameraFrame[1].release();
|
mCameraFrame[1].release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -336,6 +347,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
|
|||||||
mRgba = new Mat();
|
mRgba = new Mat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void release() {
|
public void release() {
|
||||||
mRgba.release();
|
mRgba.release();
|
||||||
}
|
}
|
||||||
|
@ -1254,13 +1254,13 @@ JNIEXPORT void JNICALL Java_org_opencv_%(module)s_%(j_cls)s_delete
|
|||||||
def copy_java_files(java_files_dir, java_base_path, default_package_path='org/opencv/'):
|
def copy_java_files(java_files_dir, java_base_path, default_package_path='org/opencv/'):
|
||||||
global total_files, updated_files
|
global total_files, updated_files
|
||||||
java_files = []
|
java_files = []
|
||||||
re_filter = re.compile(r'^.+\.(java|aidl|kt)(.in)?$')
|
re_filter = re.compile(r'^.+\.(java|kt)(.in)?$')
|
||||||
for root, dirnames, filenames in os.walk(java_files_dir):
|
for root, dirnames, filenames in os.walk(java_files_dir):
|
||||||
java_files += [os.path.join(root, filename) for filename in filenames if re_filter.match(filename)]
|
java_files += [os.path.join(root, filename) for filename in filenames if re_filter.match(filename)]
|
||||||
java_files = [f.replace('\\', '/') for f in java_files]
|
java_files = [f.replace('\\', '/') for f in java_files]
|
||||||
|
|
||||||
re_package = re.compile(r'^package +(.+);')
|
re_package = re.compile(r'^package +(.+);')
|
||||||
re_prefix = re.compile(r'^.+[\+/]([^\+]+).(java|aidl|kt)(.in)?$')
|
re_prefix = re.compile(r'^.+[\+/]([^\+]+).(java|kt)(.in)?$')
|
||||||
for java_file in java_files:
|
for java_file in java_files:
|
||||||
src = checkFileRemap(java_file)
|
src = checkFileRemap(java_file)
|
||||||
with open(src, 'r') as f:
|
with open(src, 'r') as f:
|
||||||
|
@ -27,7 +27,7 @@ public:
|
|||||||
* @param prototxt_path prototxt file path for the super resolution model
|
* @param prototxt_path prototxt file path for the super resolution model
|
||||||
* @param model_path model file path for the super resolution model
|
* @param model_path model file path for the super resolution model
|
||||||
*/
|
*/
|
||||||
CV_WRAP BarcodeDetector(const std::string &prototxt_path, const std::string &model_path);
|
CV_WRAP BarcodeDetector(CV_WRAP_FILE_PATH const std::string &prototxt_path, CV_WRAP_FILE_PATH const std::string &model_path);
|
||||||
~BarcodeDetector();
|
~BarcodeDetector();
|
||||||
|
|
||||||
/** @brief Decodes barcode in image once it's found by the detect() method.
|
/** @brief Decodes barcode in image once it's found by the detect() method.
|
||||||
|
@ -82,8 +82,8 @@ public:
|
|||||||
* @param backend_id the id of backend
|
* @param backend_id the id of backend
|
||||||
* @param target_id the id of target device
|
* @param target_id the id of target device
|
||||||
*/
|
*/
|
||||||
CV_WRAP static Ptr<FaceDetectorYN> create(const String& model,
|
CV_WRAP static Ptr<FaceDetectorYN> create(CV_WRAP_FILE_PATH const String& model,
|
||||||
const String& config,
|
CV_WRAP_FILE_PATH const String& config,
|
||||||
const Size& input_size,
|
const Size& input_size,
|
||||||
float score_threshold = 0.9f,
|
float score_threshold = 0.9f,
|
||||||
float nms_threshold = 0.3f,
|
float nms_threshold = 0.3f,
|
||||||
@ -154,7 +154,7 @@ public:
|
|||||||
* @param backend_id the id of backend
|
* @param backend_id the id of backend
|
||||||
* @param target_id the id of target device
|
* @param target_id the id of target device
|
||||||
*/
|
*/
|
||||||
CV_WRAP static Ptr<FaceRecognizerSF> create(const String& model, const String& config, int backend_id = 0, int target_id = 0);
|
CV_WRAP static Ptr<FaceRecognizerSF> create(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config, int backend_id = 0, int target_id = 0);
|
||||||
};
|
};
|
||||||
|
|
||||||
//! @}
|
//! @}
|
||||||
|
@ -483,39 +483,44 @@ void CharucoBoardImpl::generateImage(Size outSize, OutputArray img, int marginSi
|
|||||||
Mat noMarginsImg =
|
Mat noMarginsImg =
|
||||||
out.colRange(marginSize, out.cols - marginSize).rowRange(marginSize, out.rows - marginSize);
|
out.colRange(marginSize, out.cols - marginSize).rowRange(marginSize, out.rows - marginSize);
|
||||||
|
|
||||||
double totalLengthX, totalLengthY;
|
// the size of the chessboard square depends on the location of the chessboard
|
||||||
totalLengthX = squareLength * size.width;
|
float pixInSquare = 0.f;
|
||||||
totalLengthY = squareLength * size.height;
|
// the size of the chessboard in pixels
|
||||||
|
Size pixInChessboard(noMarginsImg.cols, noMarginsImg.rows);
|
||||||
// proportional transformation
|
|
||||||
double xReduction = totalLengthX / double(noMarginsImg.cols);
|
|
||||||
double yReduction = totalLengthY / double(noMarginsImg.rows);
|
|
||||||
|
|
||||||
// determine the zone where the chessboard is placed
|
// determine the zone where the chessboard is placed
|
||||||
Mat chessboardZoneImg;
|
float pixInSquareX = (float)noMarginsImg.cols / (float)size.width;
|
||||||
if(xReduction > yReduction) {
|
float pixInSquareY = (float)noMarginsImg.rows / (float)size.height;
|
||||||
int nRows = int(totalLengthY / xReduction);
|
Point startChessboard(0, 0);
|
||||||
int rowsMargins = (noMarginsImg.rows - nRows) / 2;
|
if (pixInSquareX <= pixInSquareY) {
|
||||||
chessboardZoneImg = noMarginsImg.rowRange(rowsMargins, noMarginsImg.rows - rowsMargins);
|
// the width of "noMarginsImg" image determines the dimensions of the chessboard
|
||||||
} else {
|
pixInSquare = pixInSquareX;
|
||||||
int nCols = int(totalLengthX / yReduction);
|
pixInChessboard.height = cvRound(pixInSquare*size.height);
|
||||||
int colsMargins = (noMarginsImg.cols - nCols) / 2;
|
int rowsMargin = (noMarginsImg.rows - pixInChessboard.height) / 2;
|
||||||
chessboardZoneImg = noMarginsImg.colRange(colsMargins, noMarginsImg.cols - colsMargins);
|
startChessboard.y = rowsMargin;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
// the height of "noMarginsImg" image determines the dimensions of the chessboard
|
||||||
|
pixInSquare = pixInSquareY;
|
||||||
|
pixInChessboard.width = cvRound(pixInSquare*size.width);
|
||||||
|
int colsMargin = (noMarginsImg.cols - pixInChessboard.width) / 2;
|
||||||
|
startChessboard.x = colsMargin;
|
||||||
|
}
|
||||||
|
// determine the zone where the chessboard is located
|
||||||
|
Mat chessboardZoneImg = noMarginsImg(Rect(startChessboard, pixInChessboard));
|
||||||
|
|
||||||
// determine the margins to draw only the markers
|
// marker size in pixels
|
||||||
// take the minimum just to be sure
|
const float pixInMarker = markerLength/squareLength*pixInSquare;
|
||||||
double squareSizePixels = min(double(chessboardZoneImg.cols) / double(size.width),
|
// the size of the marker margin in pixels
|
||||||
double(chessboardZoneImg.rows) / double(size.height));
|
const float pixInMarginMarker = 0.5f*(pixInSquare - pixInMarker);
|
||||||
|
|
||||||
double diffSquareMarkerLength = (squareLength - markerLength) / 2;
|
// determine the zone where the aruco markers are located
|
||||||
int diffSquareMarkerLengthPixels =
|
int endArucoX = cvRound(pixInSquare*(size.width-1)+pixInMarginMarker+pixInMarker);
|
||||||
int(diffSquareMarkerLength * squareSizePixels / squareLength);
|
int endArucoY = cvRound(pixInSquare*(size.height-1)+pixInMarginMarker+pixInMarker);
|
||||||
|
Mat arucoZone = chessboardZoneImg(Range(cvRound(pixInMarginMarker), endArucoY), Range(cvRound(pixInMarginMarker), endArucoX));
|
||||||
|
|
||||||
// draw markers
|
// draw markers
|
||||||
Mat markersImg;
|
Board::Impl::generateImage(arucoZone.size(), arucoZone, 0, borderBits);
|
||||||
Board::Impl::generateImage(chessboardZoneImg.size(), markersImg, diffSquareMarkerLengthPixels, borderBits);
|
|
||||||
markersImg.copyTo(chessboardZoneImg);
|
|
||||||
|
|
||||||
// now draw black squares
|
// now draw black squares
|
||||||
for(int y = 0; y < size.height; y++) {
|
for(int y = 0; y < size.height; y++) {
|
||||||
@ -527,12 +532,11 @@ void CharucoBoardImpl::generateImage(Size outSize, OutputArray img, int marginSi
|
|||||||
if(y % 2 != x % 2) continue; // white corner, dont do anything
|
if(y % 2 != x % 2) continue; // white corner, dont do anything
|
||||||
}
|
}
|
||||||
|
|
||||||
double startX, startY;
|
float startX = pixInSquare * float(x);
|
||||||
startX = squareSizePixels * double(x);
|
float startY = pixInSquare * float(y);
|
||||||
startY = squareSizePixels * double(y);
|
|
||||||
|
|
||||||
Mat squareZone = chessboardZoneImg.rowRange(int(startY), int(startY + squareSizePixels))
|
Mat squareZone = chessboardZoneImg(Range(cvRound(startY), cvRound(startY + pixInSquare)),
|
||||||
.colRange(int(startX), int(startX + squareSizePixels));
|
Range(cvRound(startX), cvRound(startX + pixInSquare)));
|
||||||
|
|
||||||
squareZone.setTo(0);
|
squareZone.setTo(0);
|
||||||
}
|
}
|
||||||
|
@ -684,7 +684,7 @@ struct ArucoDetector::ArucoDetectorImpl {
|
|||||||
contours.clear();
|
contours.clear();
|
||||||
|
|
||||||
// sort candidates from big to small
|
// sort candidates from big to small
|
||||||
std::sort(candidateTree.begin(), candidateTree.end());
|
std::stable_sort(candidateTree.begin(), candidateTree.end());
|
||||||
// group index for each candidate
|
// group index for each candidate
|
||||||
vector<int> groupId(candidateTree.size(), -1);
|
vector<int> groupId(candidateTree.size(), -1);
|
||||||
vector<vector<size_t> > groupedCandidates;
|
vector<vector<size_t> > groupedCandidates;
|
||||||
@ -728,11 +728,11 @@ struct ArucoDetector::ArucoDetectorImpl {
|
|||||||
|
|
||||||
for (vector<size_t>& grouped : groupedCandidates) {
|
for (vector<size_t>& grouped : groupedCandidates) {
|
||||||
if (detectorParams.detectInvertedMarker) // if detectInvertedMarker choose smallest contours
|
if (detectorParams.detectInvertedMarker) // if detectInvertedMarker choose smallest contours
|
||||||
std::sort(grouped.begin(), grouped.end(), [](const size_t &a, const size_t &b) {
|
std::stable_sort(grouped.begin(), grouped.end(), [](const size_t &a, const size_t &b) {
|
||||||
return a > b;
|
return a > b;
|
||||||
});
|
});
|
||||||
else // if detectInvertedMarker==false choose largest contours
|
else // if detectInvertedMarker==false choose largest contours
|
||||||
std::sort(grouped.begin(), grouped.end());
|
std::stable_sort(grouped.begin(), grouped.end());
|
||||||
size_t currId = grouped[0];
|
size_t currId = grouped[0];
|
||||||
isSelectedContours[currId] = true;
|
isSelectedContours[currId] = true;
|
||||||
for (size_t i = 1ull; i < grouped.size(); i++) {
|
for (size_t i = 1ull; i < grouped.size(); i++) {
|
||||||
@ -780,7 +780,7 @@ struct ArucoDetector::ArucoDetectorImpl {
|
|||||||
vector<int> idsTmp(ncandidates, -1);
|
vector<int> idsTmp(ncandidates, -1);
|
||||||
vector<int> rotated(ncandidates, 0);
|
vector<int> rotated(ncandidates, 0);
|
||||||
vector<uint8_t> validCandidates(ncandidates, 0);
|
vector<uint8_t> validCandidates(ncandidates, 0);
|
||||||
vector<bool> was(ncandidates, false);
|
vector<uint8_t> was(ncandidates, false);
|
||||||
bool checkCloseContours = true;
|
bool checkCloseContours = true;
|
||||||
|
|
||||||
int maxDepth = 0;
|
int maxDepth = 0;
|
||||||
|
@ -52,5 +52,7 @@
|
|||||||
#include "opencv2/core/private.hpp"
|
#include "opencv2/core/private.hpp"
|
||||||
|
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
#include <array>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "quirc.h"
|
#include "quirc.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
@ -771,6 +771,57 @@ TEST_P(CharucoBoard, testWrongSizeDetection)
|
|||||||
ASSERT_TRUE(detectedCharucoIds.empty());
|
ASSERT_TRUE(detectedCharucoIds.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CharucoBoardGenerate, issue_24806)
|
||||||
|
{
|
||||||
|
aruco::Dictionary dict = aruco::getPredefinedDictionary(aruco::DICT_4X4_1000);
|
||||||
|
const float squareLength = 13.f, markerLength = 10.f;
|
||||||
|
const Size boardSize(7ull, 4ull);
|
||||||
|
const aruco::CharucoBoard board(boardSize, squareLength, markerLength, dict);
|
||||||
|
const int marginSize = 24;
|
||||||
|
Mat boardImg;
|
||||||
|
|
||||||
|
// generate chessboard image
|
||||||
|
board.generateImage(Size(400, 300), boardImg, marginSize);
|
||||||
|
// This condition checks that the width of the image determines the dimensions of the chessboard in this test
|
||||||
|
CV_Assert((float)(boardImg.cols) / (float)boardSize.width <=
|
||||||
|
(float)(boardImg.rows) / (float)boardSize.height);
|
||||||
|
|
||||||
|
// prepare data for chessboard image test
|
||||||
|
Mat noMarginsImg = boardImg(Range(marginSize, boardImg.rows - marginSize),
|
||||||
|
Range(marginSize, boardImg.cols - marginSize));
|
||||||
|
const float pixInSquare = (float)(noMarginsImg.cols) / (float)boardSize.width;
|
||||||
|
|
||||||
|
Size pixInChessboard(cvRound(pixInSquare*boardSize.width), cvRound(pixInSquare*boardSize.height));
|
||||||
|
const Point startChessboard((noMarginsImg.cols - pixInChessboard.width) / 2,
|
||||||
|
(noMarginsImg.rows - pixInChessboard.height) / 2);
|
||||||
|
Mat chessboardZoneImg = noMarginsImg(Rect(startChessboard, pixInChessboard));
|
||||||
|
|
||||||
|
// B - black pixel, W - white pixel
|
||||||
|
// chessboard corner 1:
|
||||||
|
// B W
|
||||||
|
// W B
|
||||||
|
Mat goldCorner1 = (Mat_<uint8_t>(2, 2) <<
|
||||||
|
0, 255,
|
||||||
|
255, 0);
|
||||||
|
// B - black pixel, W - white pixel
|
||||||
|
// chessboard corner 2:
|
||||||
|
// W B
|
||||||
|
// B W
|
||||||
|
Mat goldCorner2 = (Mat_<uint8_t>(2, 2) <<
|
||||||
|
255, 0,
|
||||||
|
0, 255);
|
||||||
|
|
||||||
|
// test chessboard corners in generated image
|
||||||
|
for (const Point3f& p: board.getChessboardCorners()) {
|
||||||
|
Point2f chessCorner(pixInSquare*(p.x/squareLength),
|
||||||
|
pixInSquare*(p.y/squareLength));
|
||||||
|
Mat winCorner = chessboardZoneImg(Rect(Point(cvRound(chessCorner.x) - 1, cvRound(chessCorner.y) - 1), Size(2, 2)));
|
||||||
|
bool eq = (cv::countNonZero(goldCorner1 != winCorner) == 0) | (cv::countNonZero(goldCorner2 != winCorner) == 0);
|
||||||
|
ASSERT_TRUE(eq);
|
||||||
|
}
|
||||||
|
// TODO: fix aruco generateImage and add test aruco corners for generated image
|
||||||
|
}
|
||||||
|
|
||||||
// Temporary disabled in https://github.com/opencv/opencv/pull/24338
|
// Temporary disabled in https://github.com/opencv/opencv/pull/24338
|
||||||
// 5.x version produces conrnes with different shape than 4.x (32F_C2 instead of 2x 32FC1)
|
// 5.x version produces conrnes with different shape than 4.x (32F_C2 instead of 2x 32FC1)
|
||||||
TEST(Charuco, DISABLED_testSeveralBoardsWithCustomIds)
|
TEST(Charuco, DISABLED_testSeveralBoardsWithCustomIds)
|
||||||
|
@ -7,10 +7,6 @@
|
|||||||
#include "opencv2/ts.hpp"
|
#include "opencv2/ts.hpp"
|
||||||
#include "opencv2/objdetect.hpp"
|
#include "opencv2/objdetect.hpp"
|
||||||
|
|
||||||
#if defined CV_CXX11
|
#include <random>
|
||||||
#include <random>
|
|
||||||
#else
|
|
||||||
#include <cstdlib>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -5,16 +5,6 @@
|
|||||||
#include "test_precomp.hpp"
|
#include "test_precomp.hpp"
|
||||||
namespace opencv_test { namespace {
|
namespace opencv_test { namespace {
|
||||||
|
|
||||||
#if !defined CV_CXX11
|
|
||||||
// Wrapper for generating seeded random number via std::rand.
|
|
||||||
template<unsigned Seed>
|
|
||||||
class SeededRandFunctor {
|
|
||||||
public:
|
|
||||||
SeededRandFunctor() { std::srand(Seed); }
|
|
||||||
int operator()(int i) { return std::rand() % (i + 1); }
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::string encode_qrcode_images_name[] = {
|
std::string encode_qrcode_images_name[] = {
|
||||||
"version1_mode1.png", "version1_mode2.png", "version1_mode4.png",
|
"version1_mode1.png", "version1_mode2.png", "version1_mode4.png",
|
||||||
"version2_mode1.png", "version2_mode2.png", "version2_mode4.png",
|
"version2_mode1.png", "version2_mode2.png", "version2_mode4.png",
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user