Merge branch 4.x

This commit is contained in:
Alexander Smorkalov 2024-01-23 17:06:52 +03:00
commit decf6538a2
148 changed files with 3263 additions and 1564 deletions

View File

@ -1455,8 +1455,8 @@ if(WITH_WEBP OR HAVE_WEBP)
endif()
if(WITH_AVIF OR HAVE_AVIF)
if(AVIF_VERSION)
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${AVIF_VERSION})" ELSE "NO")
if(libavif_VERSION)
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${libavif_VERSION})" ELSE "NO")
else()
status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY}" ELSE "NO")
endif()
@ -1852,6 +1852,7 @@ if(BUILD_opencv_python3)
else()
status(" Libraries:" HAVE_opencv_python3 THEN "${PYTHON3_LIBRARIES}" ELSE NO)
endif()
status(" Limited API:" PYTHON3_LIMITED_API THEN "YES (ver ${PYTHON3_LIMITED_API_VERSION})" ELSE NO)
status(" numpy:" PYTHON3_NUMPY_INCLUDE_DIRS THEN "${PYTHON3_NUMPY_INCLUDE_DIRS} (ver ${PYTHON3_NUMPY_VERSION})" ELSE "NO (Python3 wrappers can not be generated)")
status(" install path:" HAVE_opencv_python3 THEN "${__INSTALL_PATH_PYTHON3}" ELSE "-")
endif()

View File

@ -1,8 +1,5 @@
## OpenCV: Open Source Computer Vision Library
### Keep OpenCV Free
OpenCV is raising funds to keep the library free for everyone, and we need the support of the entire community to do it. [Donate to OpenCV on IndieGoGo](http://igg.me/at/opencv5) before the campaign ends on December 16 to show your support.
### Resources
@ -13,6 +10,7 @@ OpenCV is raising funds to keep the library free for everyone, and we need the s
* previous forum (read only): <http://answers.opencv.org>
* Issue tracking: <https://github.com/opencv/opencv/issues>
* Additional OpenCV functionality: <https://github.com/opencv/opencv_contrib>
* Donate to OpenCV: <https://opencv.org/support/>
### Contributing

View File

@ -484,7 +484,6 @@ macro(ocv_check_compiler_optimization OPT)
endmacro()
macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRING COMMON_OPTION)
if(NOT MSVC)
unset(_POSTFIX)
# Check each feature option
foreach(OPT IN LISTS ${FEATURE_NAME_LIST})
@ -499,7 +498,6 @@ macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRI
if(NOT "x${_POSTFIX}" STREQUAL "x")
set(${FLAG_STRING} "${${FLAG_STRING}} ${COMMON_OPTION}${_POSTFIX}")
endif()
endif()
endmacro()
foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
@ -596,10 +594,12 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
endforeach()
if(AARCH64)
if(NOT MSVC)
# Define the list of NEON options to check
set(NEON_OPTIONS_LIST NEON_DOTPROD NEON_FP16 NEON_BF16)
set(BASE_ARCHITECTURE "-march=armv8.2-a")
ocv_cpu_aarch64_baseline_merge_feature_options(NEON_OPTIONS_LIST CPU_BASELINE_FLAGS ${BASE_ARCHITECTURE})
endif()
endif()
foreach(OPT ${CPU_BASELINE_REQUIRE})

View File

@ -1,13 +1,6 @@
if("${CMAKE_CXX_COMPILER};${CMAKE_C_COMPILER};${CMAKE_CXX_COMPILER_LAUNCHER}" MATCHES "ccache")
set(CMAKE_COMPILER_IS_CCACHE 1) # TODO: FIXIT Avoid setting of CMAKE_ variables
set(OPENCV_COMPILER_IS_CCACHE 1)
endif()
function(access_CMAKE_COMPILER_IS_CCACHE)
if(NOT OPENCV_SUPPRESS_DEPRECATIONS)
message(WARNING "DEPRECATED: CMAKE_COMPILER_IS_CCACHE is replaced to OPENCV_COMPILER_IS_CCACHE.")
endif()
endfunction()
variable_watch(CMAKE_COMPILER_IS_CCACHE access_CMAKE_COMPILER_IS_CCACHE)
if(ENABLE_CCACHE AND NOT OPENCV_COMPILER_IS_CCACHE)
# This works fine with Unix Makefiles and Ninja generators
find_host_program(CCACHE_PROGRAM ccache)
@ -391,7 +384,7 @@ endif()
# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347
if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)
if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2"))
if(UNIX AND ((NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2") AND NOT CMAKE_SYSTEM_NAME MATCHES "OpenBSD"))
set(_option "-Wl,--no-undefined")
set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}") # requires CMake 3.2+ and CMP0056

View File

@ -136,11 +136,11 @@ macro(ocv_check_windows_crt_linkage)
cmake_policy(GET CMP0091 MSVC_RUNTIME_SET_BY_ABSTRACTION)
if(MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MT")
set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
else()
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MD")
set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MDd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd")
endif()
endif()
endif()

View File

@ -270,6 +270,18 @@ find_python("${OPENCV_PYTHON3_VERSION}" "${MIN_VER_PYTHON3}" PYTHON3_LIBRARY PYT
PYTHON3_INCLUDE_DIR PYTHON3_INCLUDE_DIR2 PYTHON3_PACKAGES_PATH
PYTHON3_NUMPY_INCLUDE_DIRS PYTHON3_NUMPY_VERSION)
# Problem in numpy >=1.15 <1.17
OCV_OPTION(PYTHON3_LIMITED_API "Build with Python Limited API (not available with numpy >=1.15 <1.17)" NO
VISIBLE_IF PYTHON3_NUMPY_VERSION VERSION_LESS "1.15" OR NOT PYTHON3_NUMPY_VERSION VERSION_LESS "1.17")
if(PYTHON3_LIMITED_API)
set(_default_ver "0x03060000")
if(PYTHON3_VERSION_STRING VERSION_LESS "3.6")
# fix for older pythons
set(_default_ver "0x030${PYTHON3_VERSION_MINOR}0000")
endif()
set(PYTHON3_LIMITED_API_VERSION ${_default_ver} CACHE STRING "Minimal Python version for Limited API")
endif()
if(PYTHON_DEFAULT_EXECUTABLE)
set(PYTHON_DEFAULT_AVAILABLE "TRUE")
elseif(PYTHON3_EXECUTABLE AND PYTHON3INTERP_FOUND)

View File

@ -57,6 +57,18 @@ if(CANN_INSTALL_DIR)
set(HAVE_CANN OFF)
return()
endif()
# * libacl_dvpp_mpi.so
set(libacl_dvpp_mpi "${CANN_INSTALL_DIR}/lib64")
find_library(found_libacldvppmpi NAMES acl_dvpp_mpi PATHS ${libacl_dvpp_mpi} NO_DEFAULT_PATH)
if(found_libacldvppmpi)
set(libacl_dvpp_mpi ${found_libacldvppmpi})
message(STATUS "CANN: libacl_dvpp_mpi.so is found at ${libacl_dvpp_mpi}")
else()
message(STATUS "CANN: Missing libacl_dvpp_mpi.so. Turning off HAVE_CANN")
set(HAVE_CANN OFF)
return()
endif()
# * libgraph.so
set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64")
find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH)
@ -105,6 +117,7 @@ if(CANN_INSTALL_DIR)
list(APPEND libs_cann ${lib_opsproto})
list(APPEND libs_cann ${lib_graph})
list(APPEND libs_cann ${lib_ge_compiler})
list(APPEND libs_cann ${libacl_dvpp_mpi})
# * lib_graph_base.so
if(NOT CANN_VERSION_BELOW_6_3_ALPHA002)

View File

@ -89,15 +89,11 @@ else()
ocv_update(OPENCV_ANDROID_NAMESPACE_DECLARATION "")
endif()
# set android gradle java version in build.gradle and set aidl config
if(NOT (ANDROID_GRADLE_PLUGIN_VERSION VERSION_LESS "8.0.0"))
# AGP-8.0 requires a minimum JDK version of JDK17
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "17")
# Enable aidl configuration for OpenCV compile with AGP-8.0
ocv_update(ANDROID_GRADLE_BUILD_FEATURE_AIDL "buildFeatures { aidl true }")
else()
ocv_update(ANDROID_GRADLE_JAVA_VERSION_INIT "1_8")
ocv_update(ANDROID_GRADLE_BUILD_FEATURE_AIDL "")
endif()
set(ANDROID_GRADLE_JAVA_VERSION "${ANDROID_GRADLE_JAVA_VERSION_INIT}" CACHE STRING "Android Gradle Java version")

View File

@ -9,6 +9,9 @@ How to use the OpenCV parallel_for_ to parallelize your code {#tutorial_how_to_u
| -: | :- |
| Compatibility | OpenCV >= 3.0 |
@note See also C++ lambda usage with parallel for in [tuturial](@ref tutorial_how_to_use_OpenCV_parallel_for_new).
Goal
----
@ -20,7 +23,7 @@ If you want more information about multithreading, you will have to refer to a r
to remain simple.
Precondition
----
------------
The first precondition is to have OpenCV built with a parallel framework.
In OpenCV 3.2, the following parallel frameworks are available in that order:
@ -50,7 +53,7 @@ We will use the example of drawing a Mandelbrot set to show how from a regular s
the code to parallelize the computation.
Theory
-----------
------
The Mandelbrot set definition has been named in tribute to the mathematician Benoit Mandelbrot by the mathematician
Adrien Douady. It has been famous outside of the mathematics field as the image representation is an example of a
@ -69,7 +72,7 @@ Here, we will just introduce the formula to draw the Mandelbrot set (from the me
> \f[\limsup_{n\to\infty}|z_{n+1}|\leqslant2\f]
Pseudocode
-----------
----------
A simple algorithm to generate a representation of the Mandelbrot set is called the
["escape time algorithm"](https://en.wikipedia.org/wiki/Mandelbrot_set#Escape_time_algorithm).
@ -110,10 +113,10 @@ On this figure, we recall that the real part of a complex number is on the x-axi
You can see that the whole shape can be repeatedly visible if we zoom at particular locations.
Implementation
-----------
--------------
Escape time algorithm implementation
--------------------------
------------------------------------
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-escape-time-algorithm
@ -121,7 +124,7 @@ Here, we used the [`std::complex`](http://en.cppreference.com/w/cpp/numeric/comp
complex number. This function performs the test to check if the pixel is in set or not and returns the "escaped" iteration.
Sequential Mandelbrot implementation
--------------------------
------------------------------------
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-sequential
@ -149,7 +152,7 @@ The green curve corresponds to a simple linear scale transformation, the blue on
and you can observe how the lowest values will be boosted when looking at the slope at these positions.
Parallel Mandelbrot implementation
--------------------------
----------------------------------
When looking at the sequential implementation, we can notice that each pixel is computed independently. To optimize the
computation, we can perform multiple pixel calculations in parallel, by exploiting the multi-core architecture of modern
@ -181,7 +184,7 @@ C++ 11 standard allows to simplify the parallel implementation by get rid of the
@snippet how_to_use_OpenCV_parallel_for_.cpp mandelbrot-parallel-call-cxx11
Results
-----------
-------
You can find the full tutorial code [here](https://github.com/opencv/opencv/blob/5.x/samples/cpp/tutorial_code/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.cpp).
The performance of the parallel implementation depends of the type of CPU you have. For instance, on 4 cores / 8 threads

View File

@ -18,7 +18,7 @@ This tutorial assumes you have the following installed and configured:
- Android Studio
- JDK
- Android SDK and NDK
- OpenCV for Android SDK from official [release page on Github](https://github.com/opencv/opencv/releases)
- Optional: OpenCV for Android SDK from official [release page on Github](https://github.com/opencv/opencv/releases)
or [SourceForge](https://sourceforge.net/projects/opencvlibrary/). Advanced: as alternative the SDK may be
built from source code by [instruction on wiki](https://github.com/opencv/opencv/wiki/Custom-OpenCV-Android-SDK-and-AAR-package-build).
@ -26,8 +26,9 @@ If you need help with anything of the above, you may refer to our @ref tutorial_
If you encounter any error after thoroughly following these steps, feel free to contact us via OpenCV [forum](https://forum.opencv.org). We'll do our best to help you out.
Hello OpenCV sample
-------------------
Hello OpenCV sample with SDK
----------------------------
In this section we're gonna create a simple app that does nothing but OpenCV loading. In next section we'll extend it to support camera.
@ -75,11 +76,10 @@ In addition to this instruction you can use some video guide, for example [this
@endcode
The fix was found [here](https://stackoverflow.com/questions/73225714/import-opencv-sdk-to-android-studio-chipmunk)
6. OpenCV project uses `aidl` and `buildConfig` features. Please enable them in
6. OpenCV project uses `buildConfig` feature. Please enable it in
`MyApplication/OpenCV/build.gradle` file to `android` block:
@code{.gradle}
buildFeatures{
aidl true
buildConfig true
}
@ -115,6 +115,43 @@ In addition to this instruction you can use some video guide, for example [this
![](images/run_app.png)
Hello OpenCV sample with Maven Central
--------------------------------------
Since OpenCV 4.9.0 OpenCV for Android package is available with Maven Central and may be installed
automatically as Gradle dependency. In this section we're gonna create a simple app that does nothing
but OpenCV loading with Maven Central.
1. Open Android Studio and create empty project by choosing ***Empty Views Activity***
![](images/create_empty_project.png)
2. Setup the project:
- Choose ***Java*** language
- Choose ***Groovy DSL*** build configuration language
- Choose ***Minumum SDK*** with the version number not less than OpenCV supports. For 4.9.0 minimal SDK version is 21.
![](images/setup_project.png)
3. Edit `build.gradle` and add OpenCV library to Dependencies list like this:
@code{.gradle}
dependencies {
implementation 'org.opencv:opencv:4.9.0'
}
@endcode
`4.9.0` may be replaced by any version available as [official release](https://central.sonatype.com/artifact/org.opencv/opencv).
4. Before using any OpenCV function you have to load the library first. If you application includes other
OpenCV-dependent native libraries you should load them ***after*** OpenCV initialization. Add the folowing
code to load the library at app start:
@snippet samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java ocv_loader_init
Like this:
![](images/sample_code.png)
5. Choose a device to check the sample on and run the code by pressing `run` button
![](images/run_app.png)
Camera view sample
------------------

View File

@ -378,6 +378,9 @@ our OpenCV library that we use in our projects. Start up a command window and en
setx OpenCV_DIR D:\OpenCV\build\x64\vc16 (suggested for Visual Studio 2019 - 64 bit Windows)
setx OpenCV_DIR D:\OpenCV\build\x86\vc16 (suggested for Visual Studio 2019 - 32 bit Windows)
setx OpenCV_DIR D:\OpenCV\build\x64\vc17 (suggested for Visual Studio 2022 - 64 bit Windows)
setx OpenCV_DIR D:\OpenCV\build\x86\vc17 (suggested for Visual Studio 2022 - 32 bit Windows)
@endcode
Here the directory is where you have your OpenCV binaries (*extracted* or *built*). You can have
different platform (e.g. x64 instead of x86) or compiler type, so substitute appropriate value.

View File

@ -7,10 +7,8 @@
#include <opencv2/core/mat.hpp>
#ifdef CV_CXX11
//#include <future>
#include <chrono>
#endif
namespace cv {
@ -69,7 +67,6 @@ public:
CV_WRAP bool valid() const CV_NOEXCEPT;
#ifdef CV_CXX11
inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
@ -89,7 +86,6 @@ public:
std::future<Mat> getFutureMat() const;
std::future<UMat> getFutureUMat() const;
#endif
#endif
// PImpl

View File

@ -147,7 +147,7 @@
#endif
#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
# include<riscv-vector.h>
# include<riscv_vector.h>
# define CV_RVV071 1
#endif

View File

@ -476,6 +476,8 @@ Cv64suf;
#define CV_WRAP_MAPPABLE(mappable)
#define CV_WRAP_PHANTOM(phantom_header)
#define CV_WRAP_DEFAULT(val)
/* Indicates that the function parameter has filesystem path semantic */
#define CV_WRAP_FILE_PATH
/****************************************************************************************\
* Matrix type (Mat) *
@ -755,89 +757,44 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType)
#endif
/****************************************************************************************\
* CV_NODISCARD attribute (deprecated, GCC only) *
* DONT USE: use instead the standard CV_NODISCARD_STD macro above *
* this legacy method silently fails to issue warning until some version *
* after gcc 6.3.0. Yet with gcc 7+ you can use the above standard method *
* which makes this method useless. Don't use it. *
* @deprecated use instead CV_NODISCARD_STD *
\****************************************************************************************/
#ifndef CV_NODISCARD
# if defined(__GNUC__)
# define CV_NODISCARD __attribute__((__warn_unused_result__))
# elif defined(__clang__) && defined(__has_attribute)
# if __has_attribute(__warn_unused_result__)
# define CV_NODISCARD __attribute__((__warn_unused_result__))
# endif
# endif
#endif
#ifndef CV_NODISCARD
# define CV_NODISCARD /* nothing by default */
#endif
/****************************************************************************************\
* C++ 11 *
\****************************************************************************************/
#ifndef CV_CXX11
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
# define CV_CXX11 1
# endif
#else
# if CV_CXX11 == 0
# undef CV_CXX11
# endif
#endif
#ifndef CV_CXX11
#ifdef __cplusplus
// MSVC was stuck at __cplusplus == 199711L for a long time, even where it supports C++11,
// so check _MSC_VER instead. See:
// <https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus>
# if defined(_MSC_VER)
# if _MSC_VER < 1800
# error "OpenCV 4.x+ requires enabled C++11 support"
# endif
# elif __cplusplus < 201103L
# error "OpenCV 4.x+ requires enabled C++11 support"
# endif
#endif
#ifndef CV_CXX11
# define CV_CXX11 1
#endif
#define CV_CXX_MOVE_SEMANTICS 1
#define CV_CXX_MOVE(x) std::move(x)
#define CV_CXX_STD_ARRAY 1
#include <array>
#ifndef CV_OVERRIDE
# define CV_OVERRIDE override
#endif
#ifndef CV_FINAL
# define CV_FINAL final
#endif
#ifndef CV_NOEXCEPT
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
# define CV_NOEXCEPT noexcept
# endif
#endif
#ifndef CV_NOEXCEPT
# define CV_NOEXCEPT
#endif
#ifndef CV_CONSTEXPR
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
# define CV_CONSTEXPR constexpr
# endif
#endif
#ifndef CV_CONSTEXPR
# define CV_CONSTEXPR
#endif
// Integer types portability
#ifdef OPENCV_STDINT_HEADER
#include OPENCV_STDINT_HEADER
#elif defined(__cplusplus)
#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
namespace cv {
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int16_t;
typedef unsigned short uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
}
#elif defined(_MSC_VER) || __cplusplus >= 201103L
#ifdef __cplusplus
#include <cstdint>
namespace cv {
using std::int8_t;
@ -849,19 +806,6 @@ using std::uint32_t;
using std::int64_t;
using std::uint64_t;
}
#else
#include <stdint.h>
namespace cv {
typedef ::int8_t int8_t;
typedef ::uint8_t uint8_t;
typedef ::int16_t int16_t;
typedef ::uint16_t uint16_t;
typedef ::int32_t int32_t;
typedef ::uint32_t uint32_t;
typedef ::int64_t int64_t;
typedef ::uint64_t uint64_t;
}
#endif
#else // pure C
#include <stdint.h>
#endif

View File

@ -52,10 +52,8 @@ public:
*/
void setException(const cv::Exception& exception);
#ifdef CV_CXX11
explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
#endif
// PImpl

View File

@ -8,14 +8,8 @@
#ifndef CV__EXCEPTION_PTR
# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
# elif defined(CV_CXX11)
# else
# define CV__EXCEPTION_PTR 1
# elif defined(_MSC_VER)
# define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
# elif defined(__clang__)
# define CV__EXCEPTION_PTR 0 // C++11 only (see above)
# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
# endif
#endif
#ifndef CV__EXCEPTION_PTR

View File

@ -61,8 +61,7 @@
#endif
#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \
&& defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY)
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
#include <unsupported/Eigen/CXX11/Tensor>
#define OPENCV_EIGEN_TENSOR_SUPPORT 1
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3

File diff suppressed because it is too large Load Diff

View File

@ -53,6 +53,7 @@
#include "opencv2/core/bufferpool.hpp"
#include <array>
#include <type_traits>
namespace cv

View File

@ -386,10 +386,8 @@ public:
static Vec randn(_Tp a, _Tp b);
static Vec randu(_Tp a, _Tp b);
static Vec zeros();
#ifdef CV_CXX11
static Vec diag(_Tp alpha) = delete;
static Vec eye() = delete;
#endif
//! per-element multiplication
Vec mul(const Vec<_Tp, cn>& v) const;
@ -412,9 +410,7 @@ public:
const _Tp& operator ()(int i) const;
_Tp& operator ()(int i);
#ifdef CV_CXX11
Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
#endif
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);

View File

@ -28,7 +28,7 @@
#define OPENCV_CORE_QUATERNION_INL_HPP
#ifndef OPENCV_CORE_QUATERNION_HPP
#erorr This is not a standalone header. Include quaternion.hpp instead.
#error This is not a standalone header. Include quaternion.hpp instead.
#endif
//@cond IGNORE

View File

@ -9,8 +9,6 @@
//#define OPENCV_DISABLE_ALLOCATOR_STATS
#ifdef CV_CXX11
#include <atomic>
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
@ -26,14 +24,6 @@
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long
#endif
#else // CV_CXX11
#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int // CV_XADD supports int only
#endif
#endif // CV_CXX11
namespace cv { namespace utils {
#ifdef CV__ALLOCATOR_STATS_LOG
@ -59,7 +49,7 @@ public:
void onAllocate(size_t /*sz*/) {}
void onFree(size_t /*sz*/) {}
#elif defined(CV_CXX11)
#else
protected:
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
@ -104,49 +94,7 @@ public:
#endif
curr -= (counter_t)sz;
}
#else // non C++11
protected:
typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
volatile counter_t curr, total, total_allocs, peak; // overflow is possible, CV_XADD operates with 'int' only
public:
AllocatorStatistics()
: curr(0), total(0), total_allocs(0), peak(0)
{}
~AllocatorStatistics() CV_OVERRIDE {}
uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; }
uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; }
uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; }
uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; }
void resetPeakUsage() CV_OVERRIDE { peak = curr; }
// Controller interface
void onAllocate(size_t sz)
{
#ifdef CV__ALLOCATOR_STATS_LOG
CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
#endif
counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz;
peak = std::max((counter_t)peak, new_curr); // non-thread safe
//CV_XADD(&total, (uint64_t)sz); // overflow with int, non-reliable...
total += sz;
CV_XADD(&total_allocs, (counter_t)1);
}
void onFree(size_t sz)
{
#ifdef CV__ALLOCATOR_STATS_LOG
CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
#endif
CV_XADD(&curr, (counter_t)-sz);
}
#endif
#endif // OPENCV_DISABLE_ALLOCATOR_STATS
};
#ifdef CV__ALLOCATOR_STATS_LOG

View File

@ -3,7 +3,6 @@
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
//#undef CV_CXX11 // debug non C++11 mode
#include "opencv2/core/async.hpp"
#include "opencv2/core/detail/async_promise.hpp"
@ -16,11 +15,9 @@
#ifndef OPENCV_DISABLE_THREAD_SUPPORT
#ifdef CV_CXX11
#include <mutex>
#include <condition_variable>
#include <chrono>
#endif
namespace cv {
@ -37,12 +34,8 @@ struct AsyncArray::Impl
void releasePromise() CV_NOEXCEPT { CV_XADD(&refcount_promise, -1); if(1 == CV_XADD(&refcount, -1)) delete this; } \
int refcount_promise;
#ifdef CV_CXX11
mutable std::mutex mtx;
mutable std::condition_variable cond_var;
#else
mutable cv::Mutex mtx;
#endif
mutable bool has_result; // Mat, UMat or exception
@ -88,11 +81,7 @@ struct AsyncArray::Impl
if (!wait_for(timeoutNs))
return false;
}
#ifdef CV_CXX11
std::unique_lock<std::mutex> lock(mtx);
#else
cv::AutoLock lock(mtx);
#endif
if (has_result)
{
if (!result_mat.empty())
@ -145,7 +134,6 @@ struct AsyncArray::Impl
if (timeoutNs == 0)
return has_result;
CV_LOG_INFO(NULL, "Waiting for async result ...");
#ifdef CV_CXX11
std::unique_lock<std::mutex> lock(mtx);
const auto cond_pred = [&]{ return has_result == true; };
if (timeoutNs > 0)
@ -156,9 +144,6 @@ struct AsyncArray::Impl
CV_Assert(has_result);
return true;
}
#else
CV_Error(Error::StsNotImplemented, "OpenCV has been built without async waiting support (C++11 is required)");
#endif
}
AsyncArray getArrayResult()
@ -175,11 +160,7 @@ struct AsyncArray::Impl
{
if (future_is_returned && refcount_future == 0)
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
#ifdef CV_CXX11
std::unique_lock<std::mutex> lock(mtx);
#else
cv::AutoLock lock(mtx);
#endif
CV_Assert(!has_result);
int k = value.kind();
if (k == _InputArray::UMAT)
@ -193,9 +174,7 @@ struct AsyncArray::Impl
value.copyTo(*result_mat.get());
}
has_result = true;
#ifdef CV_CXX11
cond_var.notify_all();
#endif
}
#if CV__EXCEPTION_PTR
@ -203,18 +182,12 @@ struct AsyncArray::Impl
{
if (future_is_returned && refcount_future == 0)
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
#ifdef CV_CXX11
std::unique_lock<std::mutex> lock(mtx);
#else
cv::AutoLock lock(mtx);
#endif
CV_Assert(!has_result);
has_exception = true;
exception = e;
has_result = true;
#ifdef CV_CXX11
cond_var.notify_all();
#endif
}
#endif
@ -222,18 +195,12 @@ struct AsyncArray::Impl
{
if (future_is_returned && refcount_future == 0)
CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
#ifdef CV_CXX11
std::unique_lock<std::mutex> lock(mtx);
#else
cv::AutoLock lock(mtx);
#endif
CV_Assert(!has_result);
has_exception = true;
cv_exception = e;
has_result = true;
#ifdef CV_CXX11
cond_var.notify_all();
#endif
}
};

View File

@ -1952,12 +1952,7 @@ void _OutputArray::move(UMat& u) const
int k = kind();
if (k == UMAT)
{
#ifdef CV_CXX11
*(UMat*)obj = std::move(u);
#else
*(UMat*)obj = u;
u.release();
#endif
}
else if (k == MAT)
{
@ -1992,12 +1987,7 @@ void _OutputArray::move(Mat& m) const
}
else if (k == MAT)
{
#ifdef CV_CXX11
*(Mat*)obj = std::move(m);
#else
*(Mat*)obj = m;
m.release();
#endif
}
else if (k == MATX)
{

View File

@ -912,8 +912,7 @@ int getNumberOfCPUs_()
* the minimum most value as it has high probablity of being right and safe.
* Return 1 if we get 0 or not found on all methods.
*/
#if defined CV_CXX11 \
&& !defined(__MINGW32__) /* not implemented (2020-03) */ \
#if !defined(__MINGW32__) /* not implemented (2020-03) */
/*
* Check for this standard C++11 way, we do not return directly because

View File

@ -120,11 +120,15 @@ void* allocSingletonNewBuffer(size_t size) { return malloc(size); }
#include <cstdlib> // std::abort
#endif
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ || defined __Fuchsia__
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ || defined __Fuchsia__ || defined __QNX__
# include <unistd.h>
# include <fcntl.h>
#if defined __QNX__
# include <sys/elf.h>
# include <sys/auxv.h>
using Elf64_auxv_t = auxv64_t;
# include <elfdefinitions.h>
const uint64_t AT_HWCAP = NT_GNU_HWCAP;
#else
# include <elf.h>
#endif
@ -251,7 +255,7 @@ std::wstring GetTempFileNameWinRT(std::wstring prefix)
#include "omp.h"
#endif
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __GLIBC__ || defined __HAIKU__
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
@ -301,9 +305,7 @@ DECLARE_CV_CPUID_X86
#endif
#endif
#if defined CV_CXX11
#include <chrono>
#endif
#include <chrono>
namespace cv
{
@ -562,7 +564,7 @@ struct HWFeatures
}
#endif // CV_CPUID_X86
#if defined __ANDROID__ || defined __linux__ || defined __FreeBSD__ || defined __QNX__
#if defined __ANDROID__ || defined __linux__ || defined __QNX__
#ifdef __aarch64__
have[CV_CPU_NEON] = true;
have[CV_CPU_FP16] = true;
@ -581,10 +583,12 @@ struct HWFeatures
have[CV_CPU_NEON_DOTPROD] = (auxv.a_un.a_val & (1 << 20)) != 0; // HWCAP_ASIMDDP
have[CV_CPU_NEON_FP16] = (auxv.a_un.a_val & (1 << 10)) != 0; // HWCAP_ASIMDHP
}
#if defined(AT_HWCAP2)
else if (auxv.a_type == AT_HWCAP2)
{
have[CV_CPU_NEON_BF16] = (auxv.a_un.a_val & (1 << 14)) != 0; // HWCAP2_BF16
}
#endif
}
close(cpufile);
@ -611,7 +615,7 @@ struct HWFeatures
CV_LOG_INFO(NULL, "- FP16 instructions is NOT enabled via build flags");
#endif
#endif
#elif defined __arm__ && !defined __FreeBSD__
#elif defined __arm__
int cpufile = open("/proc/self/auxv", O_RDONLY);
if (cpufile >= 0)
@ -903,50 +907,15 @@ bool useOptimized(void)
int64 getTickCount(void)
{
#if defined CV_CXX11
std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
return (int64)now.time_since_epoch().count();
#elif defined _WIN32 || defined WINCE
LARGE_INTEGER counter;
QueryPerformanceCounter( &counter );
return (int64)counter.QuadPart;
#elif defined __MACH__ && defined __APPLE__
return (int64)mach_absolute_time();
#elif defined __unix__
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (int64)tp.tv_sec*1000000000 + tp.tv_nsec;
#else
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64)tv.tv_sec*1000000 + tv.tv_usec;
#endif
}
double getTickFrequency(void)
{
#if defined CV_CXX11
using clock_period_t = std::chrono::steady_clock::duration::period;
double clock_freq = clock_period_t::den / clock_period_t::num;
return clock_freq;
#elif defined _WIN32 || defined WINCE
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return (double)freq.QuadPart;
#elif defined __MACH__ && defined __APPLE__
static double freq = 0;
if( freq == 0 )
{
mach_timebase_info_data_t sTimebaseInfo;
mach_timebase_info(&sTimebaseInfo);
freq = sTimebaseInfo.denom*1e9/sTimebaseInfo.numer;
}
return freq;
#elif defined __unix__
return 1e9;
#else
return 1e6;
#endif
}
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || defined __ppc__)

View File

@ -7,7 +7,7 @@
#include <opencv2/core/bindings_utils.hpp>
#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT)
#if !defined(OPENCV_DISABLE_THREAD_SUPPORT)
#include <thread>
#include <chrono>
#endif
@ -85,7 +85,7 @@ TEST(Core_Async, LikePythonTest)
}
#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT)
#if !defined(OPENCV_DISABLE_THREAD_SUPPORT)
TEST(Core_Async, AsyncThread_Simple)
{

View File

@ -8,10 +8,8 @@
#include <opencv2/core/utils/fp_control_utils.hpp>
#ifdef CV_CXX11
#include <chrono>
#include <thread>
#endif
namespace opencv_test { namespace {
@ -282,9 +280,7 @@ public:
// FP state is not supported
// no checks
}
#ifdef CV_CXX11
std::this_thread::sleep_for(std::chrono::milliseconds(100));
#endif
}
cv::details::FPDenormalsModeState base_state;

View File

@ -4,6 +4,8 @@
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include <array>
#include "opencv2/ts.hpp"
#include "opencv2/ts/ocl_test.hpp"
#include "opencv2/core/private.hpp"

View File

@ -4,9 +4,7 @@
// This is .hpp file included from test_utils.cpp
#ifdef CV_CXX11
#include <thread> // std::thread
#endif
#include "opencv2/core/utils/tls.hpp"
@ -34,8 +32,6 @@ public:
int TLSReporter::g_last_id = 0;
int TLSReporter::g_allocated = 0;
#ifdef CV_CXX11
template<typename T>
static void callNThreadsWithTLS(int N, TLSData<T>& tls)
{
@ -129,6 +125,4 @@ static void testTLSAccumulator(bool detachFirst)
TEST(Core_TLS, AccumulatorHoldData_detachData) { testTLSAccumulator(true); }
TEST(Core_TLS, AccumulatorHoldData_gather) { testTLSAccumulator(false); }
#endif
}} // namespace

View File

@ -1183,6 +1183,11 @@ CV__DNN_INLINE_NS_BEGIN
static Ptr<AttentionLayer> create(const LayerParams &params);
};
class CV_EXPORTS GroupNormLayer : public Layer {
public:
static Ptr<GroupNormLayer> create(const LayerParams &params);
};
//! @}
//! @}
CV__DNN_INLINE_NS_END

View File

@ -444,7 +444,7 @@ CV__DNN_INLINE_NS_BEGIN
* Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
* backend.
*/
CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
CV_WRAP static Net readFromModelOptimizer(CV_WRAP_FILE_PATH const String& xml, CV_WRAP_FILE_PATH const String& bin);
/** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
* @param[in] bufferModelConfig buffer with model's configuration.
@ -477,7 +477,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param path path to output file with .dot extension
* @see dump()
*/
CV_WRAP void dumpToFile(const String& path);
CV_WRAP void dumpToFile(CV_WRAP_FILE_PATH const String& path);
/** @brief Adds new layer to the net.
* @param name unique name of the adding layer.
* @param type typename of the adding layer (type must be registered in LayerRegister).
@ -839,7 +839,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param darknetModel path to the .weights file with learned network.
* @returns Network object that ready to do forward, throw an exception in failure cases.
*/
CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String());
CV_EXPORTS_W Net readNetFromDarknet(CV_WRAP_FILE_PATH const String &cfgFile, CV_WRAP_FILE_PATH const String &darknetModel = String());
/** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
* @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture.
@ -864,7 +864,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param caffeModel path to the .caffemodel file with learned network.
* @returns Net object.
*/
CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
CV_EXPORTS_W Net readNetFromCaffe(CV_WRAP_FILE_PATH const String &prototxt, CV_WRAP_FILE_PATH const String &caffeModel = String());
/** @brief Reads a network model stored in Caffe model in memory.
* @param bufferProto buffer containing the content of the .prototxt file
@ -893,7 +893,7 @@ CV__DNN_INLINE_NS_BEGIN
* let us make it more flexible.
* @returns Net object.
*/
CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
CV_EXPORTS_W Net readNetFromTensorflow(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config = String());
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
* @param bufferModel buffer containing the content of the pb file
@ -918,7 +918,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param model path to the .tflite file with binary flatbuffers description of the network architecture
* @returns Net object.
*/
CV_EXPORTS_W Net readNetFromTFLite(const String &model);
CV_EXPORTS_W Net readNetFromTFLite(CV_WRAP_FILE_PATH const String &model);
/** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format.
* @param bufferModel buffer containing the content of the tflite file
@ -957,7 +957,7 @@ CV__DNN_INLINE_NS_BEGIN
* or @ref readNetFromDarknet. An order of @p model and @p config
* arguments does not matter.
*/
CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = "");
CV_EXPORTS_W Net readNet(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "", const String& framework = "");
/**
* @brief Read deep learning network represented in one of the supported formats.
@ -979,7 +979,7 @@ CV__DNN_INLINE_NS_BEGIN
* backend.
*/
CV_EXPORTS_W
Net readNetFromModelOptimizer(const String &xml, const String &bin = "");
Net readNetFromModelOptimizer(CV_WRAP_FILE_PATH const String &xml, CV_WRAP_FILE_PATH const String &bin = "");
/** @brief Load a network from Intel's Model Optimizer intermediate representation.
* @param[in] bufferModelConfig Buffer contains XML configuration with network's topology.
@ -1008,7 +1008,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param onnxFile path to the .onnx file with text description of the network architecture.
* @returns Network object that ready to do forward, throw an exception in failure cases.
*/
CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile);
CV_EXPORTS_W Net readNetFromONNX(CV_WRAP_FILE_PATH const String &onnxFile);
/** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a>
* in-memory buffer.
@ -1031,7 +1031,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param path to the .pb file with input tensor.
* @returns Mat.
*/
CV_EXPORTS_W Mat readTensorFromONNX(const String& path);
CV_EXPORTS_W Mat readTensorFromONNX(CV_WRAP_FILE_PATH const String& path);
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
@ -1204,7 +1204,7 @@ CV__DNN_INLINE_NS_BEGIN
* is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
* So the resulting model may be used there.
*/
CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
CV_EXPORTS_W void shrinkCaffeModel(CV_WRAP_FILE_PATH const String& src, CV_WRAP_FILE_PATH const String& dst,
const std::vector<String>& layersTypes = std::vector<String>());
/** @brief Create a text representation for a binary network stored in protocol buffer format.
@ -1213,7 +1213,7 @@ CV__DNN_INLINE_NS_BEGIN
*
* @note To reduce output file size, trained weights are not included.
*/
CV_EXPORTS_W void writeTextGraph(const String& model, const String& output);
CV_EXPORTS_W void writeTextGraph(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& output);
/** @brief Performs non maximum suppression given boxes and corresponding scores.
@ -1318,7 +1318,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param[in] model Binary file contains trained weights.
* @param[in] config Text file contains network configuration.
*/
CV_WRAP Model(const String& model, const String& config = "");
CV_WRAP Model(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
/**
* @brief Create model from deep learning network.
@ -1423,7 +1423,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param[in] model Binary file contains trained weights.
* @param[in] config Text file contains network configuration.
*/
CV_WRAP ClassificationModel(const String& model, const String& config = "");
CV_WRAP ClassificationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
/**
* @brief Create model from deep learning network.
@ -1473,7 +1473,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param[in] model Binary file contains trained weights.
* @param[in] config Text file contains network configuration.
*/
CV_WRAP KeypointsModel(const String& model, const String& config = "");
CV_WRAP KeypointsModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
/**
* @brief Create model from deep learning network.
@ -1505,7 +1505,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param[in] model Binary file contains trained weights.
* @param[in] config Text file contains network configuration.
*/
CV_WRAP SegmentationModel(const String& model, const String& config = "");
CV_WRAP SegmentationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
/**
* @brief Create model from deep learning network.
@ -1536,7 +1536,7 @@ CV__DNN_INLINE_NS_BEGIN
* @param[in] model Binary file contains trained weights.
* @param[in] config Text file contains network configuration.
*/
CV_WRAP DetectionModel(const String& model, const String& config = "");
CV_WRAP DetectionModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "");
/**
* @brief Create model from deep learning network.
@ -1602,7 +1602,7 @@ public:
* @param[in] config Text file contains network configuration
*/
CV_WRAP inline
TextRecognitionModel(const std::string& model, const std::string& config = "")
TextRecognitionModel(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
: TextRecognitionModel(readNet(model, config)) { /* nothing */ }
/**
@ -1757,7 +1757,7 @@ public:
* @param[in] config Text file contains network configuration.
*/
CV_WRAP inline
TextDetectionModel_EAST(const std::string& model, const std::string& config = "")
TextDetectionModel_EAST(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
: TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ }
/**
@ -1818,7 +1818,7 @@ public:
* @param[in] config Text file contains network configuration.
*/
CV_WRAP inline
TextDetectionModel_DB(const std::string& model, const std::string& config = "")
TextDetectionModel_DB(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "")
: TextDetectionModel_DB(readNet(model, config)) { /* nothing */ }
CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold);

View File

@ -258,22 +258,21 @@ PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16)
test_slice<4>(inputShape, begin, end);
}
struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
{
void test_layer(const std::vector<int>& shape, const String reduction = "none", int axis = 0)
{
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());
using Layer_Scatter = TestBaseWithParam<tuple<std::vector<int>, std::string, int, tuple<Backend, Target>>>;
PERF_TEST_P_(Layer_Scatter, scatter) {
std::vector<int> shape = get<0>(GetParam());
std::string reduction = get<1>(GetParam());
int axis = get<2>(GetParam());
int backend_id = get<0>(get<3>(GetParam()));
int target_id = get<1>(get<3>(GetParam()));
Mat data(shape, CV_32FC1);
Mat indices(shape, CV_32FC1);
Mat updates(shape, CV_32FC1);
Scalar mean = 0.f;
Scalar std = 1.f;
randn(data, mean, std);
randn(data, 0.f, 1.f);
randu(indices, 0, shape[axis]);
randn(updates, mean, std);
randn(updates, 0.f, 1.f);
indices.convertTo(indices, CV_32SC1, 1, -1);
@ -291,50 +290,46 @@ struct Layer_Scatter : public TestBaseWithParam<tuple<Backend, Target> >
// warmup
{
std::vector<String> inpNames(3);
inpNames[0] = "data";
inpNames[1] = "indices";
inpNames[2] = "updates";
net.setInputsNames(inpNames);
net.setInput(data, inpNames[0]);
net.setInput(indices, inpNames[1]);
net.setInput(updates, inpNames[2]);
std::vector<String> input_names{"data", "indices", "updates"};
net.setInputsNames(input_names);
net.setInput(data, input_names[0]);
net.setInput(indices, input_names[1]);
net.setInput(updates, input_names[2]);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
net.setPreferableBackend(backend_id);
net.setPreferableTarget(target_id);
Mat out = net.forward();
}
// perf
TEST_CYCLE()
{
Mat res = net.forward();
}
SANITY_CHECK_NOTHING();
}
int N = 8;
int C = 256;
int H = 128;
int W = 100;
};
PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter)
{
test_layer({N, C, H, W});
}
PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter_add)
{
test_layer({N, C, H, W}, "add");
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, Combine(
Values(std::vector<int>{2, 128, 64, 50}),
Values(std::string("none"), std::string("add")),
Values(0), // use Values(0, 1, 2, 3) for more details
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
/* withHalide= */ false,
/* withCpuOCV= */ true,
/* withVkCom= */ false,
/* withCUDA= */ false,
/* withNgraph= */ false,
/* withWebnn= */ false,
/* withCann= */ false) // only test on CPU
));
struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
{
void test_layer(const std::vector<int>& shape, const String reduction = "none")
{
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());
using Layer_ScatterND = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
PERF_TEST_P_(Layer_ScatterND, scatterND) {
std::vector<int> shape = get<0>(GetParam());
std::string reduction = get<1>(GetParam());
int backend_id = get<0>(get<2>(GetParam()));
int target_id = get<1>(get<2>(GetParam()));
std::vector<int> indices_shape(shape);
indices_shape.push_back(int(shape.size()));
@ -342,12 +337,10 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
Mat indices(indices_shape, CV_32FC1);
Mat updates(shape, CV_32FC1);
Scalar mean = 0.f;
Scalar std = 1.f;
randn(data, mean, std);
randn(updates, mean, std);
randn(data, 0.f, 1.f);
randn(updates, 0.f, 1.f);
// initialize the indices with index tuples like [0...N, 0...C, 0...H, 0...W]
// Create indices such that indices[n_i, c_j, h_k, w_l, :4] = [i, j, k, l]
std::vector<int> current_index_tuple(shape.size());
int total = data.total();
std::vector<int> indices_step;
@ -357,6 +350,7 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
indices_step.push_back(step);
}
int t, j, idx, offset_at_idx, offset;
auto *indices_ptr = indices.ptr<float>();
for (int i = 0; i < total; i++)
{
t = i;
@ -373,7 +367,7 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
offset += current_index_tuple[j] * indices_step[j];
for (j = 0; j < shape.size(); j++)
indices.at<float>(offset + j) = current_index_tuple[j];
indices_ptr[offset + j] = current_index_tuple[j];
}
Net net;
@ -389,17 +383,14 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
// warmup
{
std::vector<String> inpNames(3);
inpNames[0] = "data";
inpNames[1] = "indices";
inpNames[2] = "updates";
net.setInputsNames(inpNames);
net.setInput(data, inpNames[0]);
net.setInput(indices, inpNames[1]);
net.setInput(updates, inpNames[2]);
std::vector<String> input_names{"data", "indices", "updates"};
net.setInputsNames(input_names);
net.setInput(data, input_names[0]);
net.setInput(indices, input_names[1]);
net.setInput(updates, input_names[2]);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
net.setPreferableBackend(backend_id);
net.setPreferableTarget(target_id);
Mat out = net.forward();
}
@ -409,23 +400,20 @@ struct Layer_ScatterND : public TestBaseWithParam<tuple<Backend, Target> >
}
SANITY_CHECK_NOTHING();
}
int N = 8;
int C = 256;
int H = 128;
int W = 100;
};
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND)
{
test_layer({N, C, H ,W});
}
PERF_TEST_P_(Layer_ScatterND, DISABLED_ScatterND_add)
{
test_layer({N, C, H , W}, "add");
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, Combine(
Values(std::vector<int>{2, 128, 64, 50}),
Values(std::string("none"), std::string("add")),
dnnBackendsAndTargets(/* withInferenceEngine= */ false,
/* withHalide= */ false,
/* withCpuOCV= */ true,
/* withVkCom= */ false,
/* withCUDA= */ false,
/* withNgraph= */ false,
/* withWebnn= */ false,
/* withCann= */ false) // only test on CPU
));
struct Layer_LayerNorm : public TestBaseWithParam<tuple<Backend, Target> >
{
@ -795,19 +783,77 @@ PERF_TEST_P_(Layer_Attention, VisionTransformer) {
test_layer({1, 197, 768}, {768, 768, 768}, 12);
}
struct Layer_GroupNorm : public TestBaseWithParam<tuple<Backend, Target> >
{
void test_layer(const std::vector<int>& x_shape, int num_groups)
{
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());
Mat x(x_shape, CV_32FC1);
Mat scale(x_shape[1], 1, CV_32FC1);
Mat b(x_shape[1], 1, CV_32FC1);
randu(x, 0.f, 1.f);
randu(scale, 0.f, 1.f);
randu(b, 0.f, 1.f);
Net net;
LayerParams lp;
lp.type = "GroupNormalization";
lp.name = "testLayer";
lp.set("num_groups", num_groups);
int id = net.addLayerToPrev(lp.name, lp.type, lp);
net.connect(0, 0, id, 0);
net.connect(0, 1, id, 1);
net.connect(0, 2, id, 2);
// warmup
{
std::vector<String> inpNames{"x", "scale", "b"};
net.setInputsNames(inpNames);
net.setInput(x, inpNames[0]);
net.setInput(scale, inpNames[1]);
net.setInput(b, inpNames[2]);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward();
}
TEST_CYCLE()
{
Mat res = net.forward();
}
SANITY_CHECK_NOTHING();
}
int N = 2;
int C = 64;
int H = 180;
int W = 240;
int num_groups = 16;
};
PERF_TEST_P_(Layer_GroupNorm, GroupNorm)
{
test_layer({N, C, H, W}, num_groups);
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
#ifdef HAVE_CUDA
INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)));
#endif
INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_Attention, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_GroupNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
PERF_TEST_P_(Layer_FullyConnected, fc)

View File

@ -132,8 +132,23 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
}
else
{
CV_Assert(is_shape_compatible(output, x));
CV_Assert(is_shape_compatible(output, y));
auto inShape1 = x.shape_as_vector();
auto inShape2 = y.shape_as_vector();
auto outShape = output.shape_as_vector();
std::size_t x_ndims = inShape1.size(), y_ndims = inShape2.size();
if (x_ndims >= y_ndims) {
for (std::size_t i = 0; i < (x_ndims - y_ndims); i++) {
inShape2.insert(inShape2.begin(), 1);
}
} else {
for (std::size_t i = 0; i < (y_ndims - x_ndims); i++) {
inShape1.insert(inShape1.begin(), 1);
}
}
CV_Assert(is_shape_compatible1(outShape, inShape1));
CV_Assert(is_shape_compatible1(outShape, inShape2));
/* matching singleton axes in both input tensors can be eliminated
*
@ -148,20 +163,21 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
* x: [1, 256, 32, 32] -> [256, 32, 32]
* y: [1, 256, 1, 1] -> [256, 1, 1]
*/
for (int r = 0; r < output.rank(); r++)
{
while (x.rank() > r && y.rank() > r && x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
CV_Assert(output.get_axis_size(r) == 1);
x.squeeze(r);
y.squeeze(r);
output.squeeze(r);
int eliminate_times = 0;
for (std::size_t i = 0; i < outShape.size(); i++) {
if (inShape1[i] == 1 && inShape2[i] == 1 && outShape[i] == 1 && i != (outShape.size() - 1)) {
eliminate_times++;
} else {
break;
}
}
if (eliminate_times > 0) {
for (int i = 0; i < eliminate_times; i++) {
inShape1.erase(inShape1.begin());
inShape2.erase(inShape2.begin());
outShape.erase(outShape.begin());
}
}
auto inShape1 = x.shape_as_vector();
auto inShape2 = y.shape_as_vector();
auto outShape = output.shape_as_vector();
/* contiguous axes that do not broadcast can be merged into one axis
*
@ -324,7 +340,19 @@ void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
eltwise_op<T, SubFunctor<T>>(stream, output, x, y);
}
template <class T>
void eltwise_mod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
eltwise_op<T, ModFunctor<T>>(stream, output, x, y);
}
template <class T>
void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
eltwise_op<T, FModFunctor<T>>(stream, output, x, y);
}
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template void eltwise_mod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_fmod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
@ -333,6 +361,8 @@ void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
#endif
template void eltwise_mod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_fmod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_sub_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);

View File

@ -799,6 +799,40 @@ struct ReciprocalFunctor {
}
};
template <class T>
struct ModFunctor {
struct Params {
CUDA4DNN_HOST_DEVICE Params() {}
};
CUDA4DNN_DEVICE ModFunctor() { }
CUDA4DNN_DEVICE ModFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T x, T y) {
int res = (int)x % (int)y;
T zero = T(0);
if ((res > (int)zero && y < zero) || (res < (int)zero && y > zero)) {
res += (int)y;
}
return res;
}
};
template <class T>
struct FModFunctor {
struct Params {
CUDA4DNN_HOST_DEVICE Params() {}
};
CUDA4DNN_DEVICE FModFunctor() { }
CUDA4DNN_DEVICE FModFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T x, T y) {
using csl::device::fmod;
return fmod(x, y);
}
};
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */

View File

@ -36,6 +36,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de
template <> inline __device__ float min(float x, float y) { return fminf(x, y); }
template <> inline __device__ double min(double x, double y) { return fmin(x, y); }
template <class T> __device__ T fmod(T x, T y) { return x % y; }
template <> inline __device__ float fmod(float x, float y) { return fmodf(x, y); }
template <> inline __device__ double fmod(double x, double y) { return fmod(x, y); }
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template <> inline __device__ half fmod(half x, half y) { return fmodf((float)x, (float)y); }
#endif
template <class T> __device__ T log1p(T val);
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template <> inline __device__ __half log1p(__half val) { return hlog(__half(1) + val); }

View File

@ -78,6 +78,18 @@ namespace raw {
}
}
template <class T>
__global__ void normalize_mean_variance_groupwise(Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> inv_stddev, size_type inner_size, size_type C, size_type num_groups, size_type group_size) {
for (auto idx : grid_stride_range(output.size())) {
const index_type outer_idx = idx / inner_size;
const index_type c = outer_idx % C;
const index_type group_idx = outer_idx / group_size;
auto s = static_cast<float>(scale[c]) * inv_stddev[group_idx];
auto b = static_cast<float>(bias[c]);
output[idx] = (static_cast<float>(input[idx]) - means[group_idx]) * s + b;
}
}
template <class T>
__global__ void normalize_mean_variance_layernorm(Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, size_type inner_size) {
for (auto idx : grid_stride_range(output.size())) {
@ -191,6 +203,24 @@ template void normalize_mean_variance_channelwise(const Stream&, Span<__half> /*
#endif
template void normalize_mean_variance_channelwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t);
template <class T>
void normalize_mean_variance_groupwise(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> inv_stddev, std::size_t inner_size, std::size_t C, std::size_t num_groups, std::size_t group_size)
{
CV_Assert(input.size() == output.size());
CV_Assert(input.size() / inner_size == means.size() * group_size);
CV_Assert(means.size() == inv_stddev.size());
auto kernel = raw::normalize_mean_variance_groupwise<T>;
auto policy = make_policy(kernel, output.size(), 0, stream);
launch_kernel(kernel, policy, output, input, scale, bias, means, inv_stddev, inner_size, C, num_groups, group_size);
}
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template void normalize_mean_variance_groupwise(const Stream&, Span<__half> /*output*/, View<__half> /*input*/, View<__half> /*scale*/, View<__half> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t, std::size_t, std::size_t);
#endif
template void normalize_mean_variance_groupwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*inv_stddev*/, std::size_t, std::size_t, std::size_t, std::size_t);
template <class T>
void normalize_mean_variance_layernorm(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<float> means, View<float> inv_stddev, std::size_t inner_size)
{

View File

@ -1262,6 +1262,23 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
return true;
}
template <typename ShapeType>
bool is_shape_compatible1(const ShapeType &x_shape, const ShapeType &y_shape) noexcept {
const auto x_ndims = x_shape.size(), y_ndims = y_shape.size();
if (x_ndims != y_ndims) {
return false;
}
for (int i = 0; i < x_ndims; i++) {
if (x_shape[i] != y_shape[i] && x_shape[i] != 1 && y_shape[i] != 1) {
return false;
}
}
return true;
}
/** returns the rank to which the given tensor can be squeezed to */
template <class TensorType>
std::size_t get_effective_rank(const TensorType& x) noexcept {

View File

@ -33,6 +33,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T>
void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
template <class T>
void eltwise_mod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
template <class T>
void eltwise_fmod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */

View File

@ -35,6 +35,10 @@ void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> o
template <class T>
void normalize_mean_variance_layernorm(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size);
template <class T>
void normalize_mean_variance_groupwise(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> inv_stddev, std::size_t inner_size, std::size_t C, std::size_t num_groups, std::size_t group_size);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP */

View File

@ -28,6 +28,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
DIV,
MIN,
SUB,
MOD,
FMOD,
};
class EltwiseOpBase : public CUDABackendNode {
@ -90,6 +92,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
break;
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, input_x, input_y); break;
case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, input_x, input_y); break;
}
}
else
@ -122,6 +126,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
}
break;
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, output, input); break;
case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, output, input); break;
}
}
}

View File

@ -0,0 +1,87 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP
#include "../../op_cuda.hpp"
#include "../csl/stream.hpp"
#include "../csl/span.hpp"
#include "../csl/tensor.hpp"
#include "../csl/workspace.hpp"
#include "../kernels/fill_copy.hpp"
#include "../kernels/mvn.hpp"
#include <opencv2/core.hpp>
#include <cstddef>
#include <vector>
#include <utility>
namespace cv { namespace dnn { namespace cuda4dnn {
template <class T>
class GroupNormOp final : public CUDABackendNode {
public:
using wrapper_type = GetCUDABackendWrapperType<T>;
GroupNormOp(csl::Stream stream_, float epsilon_, size_t loops, size_t num_groups)
: stream(std::move(stream_)), epsilon(epsilon_), num_groups(num_groups) {
csl::WorkspaceBuilder builder;
builder.require<float>(loops * num_groups); // mean and stdev for each group
builder.require<float>(loops * num_groups);
scratch_mem_in_bytes = builder.required_workspace_size();
}
void forward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
csl::Workspace& workspace) override {
auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
auto scale_wrapper = inputs[1].dynamicCast<wrapper_type>();
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
auto input = input_wrapper->getView();
auto scale = scale_wrapper->getView();
auto bias = bias_wrapper->getView();
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
auto C = input.get_axis_size(1);
auto loops = input.size_range(0, 2);
auto norm_size = input.size_range(2, input.rank());
auto num_groups = this->num_groups;
auto group_size = C / num_groups;
if (norm_size == 1) {
kernels::fill<T>(stream, output, 0.f);
return;
} else {
auto ws_allocator = csl::WorkspaceAllocator(workspace);
auto mean = ws_allocator.get_span<float>(loops / group_size);
kernels::fill<float>(stream, mean, 0.f);
auto stdev = ws_allocator.get_span<float>(loops / group_size);
kernels::fill<float>(stream, stdev, 0.f);
kernels::reduce_mean_sqr_sum<T>(stream, mean, stdev, input, norm_size * group_size);
kernels::compute_normalization_scale(stream, stdev, mean, stdev, norm_size * group_size, epsilon);
kernels::normalize_mean_variance_groupwise<T>(stream, output, input, scale, bias, mean, stdev, norm_size, C, num_groups, group_size);
}
}
std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
private:
csl::Stream stream;
float epsilon;
std::size_t num_groups;
std::size_t scratch_mem_in_bytes;
};
}}} // cv::dnn::cuda4dnn
#endif // OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_GROUP_NORM_HPP

View File

@ -163,6 +163,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(Attention, AttentionLayer);
CV_DNN_REGISTER_LAYER_CLASS(GroupNormalization, GroupNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);

View File

@ -969,6 +969,13 @@ public:
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
else
#endif
#if CV_RVP052
if(isConv2D)
opt_RVP052::fastDepthwiseConv(wptr, kernel_h, kernel_w,
stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l,
biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp);
else
#endif
{
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
@ -1348,6 +1355,12 @@ public:
opt_LASX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
else
#endif
#if CV_RVP052
if(isConv2D)
opt_RVP052::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn);
else
#endif
for( int i = 0; i < outCn; i += 2 )
{

View File

@ -302,6 +302,11 @@ public:
if( useLASX )
opt_LASX::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
else
#endif
#if CV_RVP052
if( 1 )
opt_RVP052::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
else
#endif
{
int i = 0;

View File

@ -13,6 +13,8 @@
#include "int8layers/layers_common.simd_declarations.hpp"
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#include "./layers_rvp052.hpp"
#ifdef HAVE_OPENCL
#include "../ocl4dnn/include/ocl4dnn.hpp"
#endif

View File

@ -0,0 +1,221 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "../precomp.hpp"
#include "./layers_rvp052.hpp"
#if CV_RVP052
namespace cv {
namespace dnn {
namespace opt_RVP052 {
void fastConv(const int8_t *weights, size_t wstep, const int *bias,
const int8_t *rowbuf, int *output, const int *outShape,
int blockSize, int vecsize, int vecsize_aligned, int outZp,
const float *multiplier, bool initOutput, bool finalOutput)
{
int outCn = outShape[1];
size_t outPlaneSize = outShape[2] * outShape[3];
for (int i = 0; i < outCn; i += 2)
{
const int8_t *wptr0 = weights + i * wstep;
const int8_t *wptr1 = wptr0 + wstep;
int *outptr0 = output + i * outPlaneSize;
int *outptr1 = outptr0 + outPlaneSize;
int bias0 = bias[i], bias1 = bias[i + 1];
float mult0 = multiplier[i], mult1 = multiplier[i + 1];
if (i + 1 >= outCn)
{
wptr1 = wptr0;
outptr1 = outptr0;
bias1 = bias0;
mult1 = mult0;
}
int j = 0;
for (; j < blockSize; j++)
{
const int8_t *rptr = rowbuf + j * vecsize_aligned;
int s00 = initOutput ? bias0 : outptr0[j];
int s10 = initOutput ? bias1 : outptr1[j];
int32x2_t vsx0 = {s00, s10};
for (int k = 0; k < vecsize; k += 4)
{
int8x4_t vrptr[2] = {*(int8x4_t*)(rptr + k), *(int8x4_t*)(rptr + k)};
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr0 + k), *(int8x4_t*)(wptr1 + k)};
vsx0 = __nds__v_smaqa(vsx0, *(int8x8_t*)vwptr, *(int8x8_t*)vrptr);
}
if (finalOutput)
{
vsx0[0] = outZp + (int)std::round(vsx0[0] * mult0);
vsx0[1] = outZp + (int)std::round(vsx0[1] * mult1);
vsx0 = __nds__v_sclip32(vsx0, 7);
}
outptr0[j] = vsx0[0];
outptr1[j] = vsx0[1];
}
}
}
void fastDepthwiseConv(const int8_t *wptr,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
int pad_t, int pad_l,
const int *biasptr, const float *multptr,
const int8_t *inptr_,
int height, int width,
int *outptr_,
int out_d, int outH, int outW,
int inpZp, int outZp)
{
const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2],
w10 = wptr[3], w11 = wptr[4], w12 = wptr[5],
w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8];
int outW1 = min(outW, (width - dilation_w * (kernel_w - 1) + pad_l) / stride_w);
int bias = biasptr[out_d], biasCopy;
float mult = multptr[out_d];
for (int out_i = 0; out_i < outH; out_i++)
{
int in_i = out_i * stride_h - pad_t, out_j = 0;
const int8_t *imgptr0 = inptr_ + in_i * width;
const int8_t *imgptr1 = imgptr0 + dilation_h * width;
const int8_t *imgptr2 = imgptr0 + (dilation_h * 2) * width;
int8_t w00 = w00_, w01 = w01_, w02 = w02_;
int8_t w20 = w20_, w21 = w21_, w22 = w22_;
int out;
biasCopy = bias;
if (in_i < 0)
{
biasCopy += inpZp * (w00 + w01 + w02);
w00 = w01 = w02 = 0;
imgptr0 = imgptr1;
}
else if (in_i + dilation_h * (kernel_h - 1) >= height)
{
biasCopy += inpZp * (w20 + w21 + w22);
w20 = w21 = w22 = 0;
imgptr2 = imgptr1;
}
int *outptr = outptr_ + out_i * outW;
if (pad_l > 0)
{
out = (int)imgptr0[0] * w01 + (int)imgptr0[dilation_w] * w02 +
(int)imgptr1[0] * w11 + (int)imgptr1[dilation_w] * w12 +
(int)imgptr2[0] * w21 + (int)imgptr2[dilation_w] * w22 +
biasCopy + inpZp * (w00 + w10 + w20);
outptr[0] = __nds__sclip32(outZp + (int)std::round(out * mult), 7);
out_j = 1;
}
int8x8_t vwx0 = (int8x8_t){w00, w10, w20, 0, w00, w10, w20, 0};
int8x8_t vwx1 = (int8x8_t){w01, w11, w21, 0, w01, w11, w21, 0};
int8x8_t vwx2 = (int8x8_t){w02, w12, w22, 0, w02, w12, w22, 0};
int8x8_t vimgx0, vimgx1, vimgx2;
int32x2_t vout = {0, 0};
for (; out_j < outW1; out_j+=2)
{
int in_j = out_j * stride_w - pad_l;
vimgx0 = (int8x8_t){imgptr0[in_j], imgptr1[in_j], imgptr2[in_j], 0,
imgptr0[in_j + stride_w], imgptr1[in_j + stride_w], imgptr2[in_j + stride_w], 0};
vimgx1 = (int8x8_t){imgptr0[in_j + dilation_w], imgptr1[in_j + dilation_w], imgptr2[in_j + dilation_w], 0,
imgptr0[in_j + dilation_w + stride_w], imgptr1[in_j + dilation_w + stride_w], imgptr2[in_j + dilation_w + stride_w], 0};
vimgx2 = (int8x8_t){imgptr0[in_j + dilation_w * 2], imgptr1[in_j + dilation_w * 2], imgptr2[in_j + dilation_w * 2], 0,
imgptr0[in_j + dilation_w * 2 + stride_w], imgptr1[in_j + dilation_w * 2 + stride_w], imgptr2[in_j + dilation_w * 2 + stride_w], 0};
vout = (int32x2_t){biasCopy, biasCopy};
vout = __nds__v_smaqa(vout, vwx0, vimgx0);
vout = __nds__v_smaqa(vout, vwx1, vimgx1);
vout = __nds__v_smaqa(vout, vwx2, vimgx2);
outptr[out_j] = __nds__sclip32(outZp + (int)std::round(vout[0] * mult), 7);
outptr[out_j + 1] = __nds__sclip32(outZp + (int)std::round(vout[1] * mult), 7);
}
while (out_j > outW1) out_j--;
for (; out_j < outW; out_j++)
{
int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w * 2;
int s0 = 1, s1 = 1, s2 = 1;
if (in_j0 >= width)
{
in_j0 = 0;
s0 = 0;
biasCopy += inpZp * (w00 + w10 + w20);
}
if (in_j1 >= width)
{
in_j1 = 0;
s1 = 0;
biasCopy += inpZp * (w01 + w11 + w21);
}
if (in_j2 >= width)
{
in_j2 = 0;
s2 = 0;
biasCopy += inpZp * (w02 + w12 + w22);
}
out = (int)imgptr0[in_j0] * w00 * s0 + (int)imgptr0[in_j1] * w01 * s1 + (int)imgptr0[in_j2] * w02 * s2 +
(int)imgptr1[in_j0] * w10 * s0 + (int)imgptr1[in_j1] * w11 * s1 + (int)imgptr1[in_j2] * w12 * s2 +
(int)imgptr2[in_j0] * w20 * s0 + (int)imgptr2[in_j1] * w21 * s1 + (int)imgptr2[in_j2] * w22 * s2 + biasCopy;
outptr[out_j] = __nds__sclip32(outZp + (int)std::round(out * mult), 7);
}
}
}
// dst = vec * weights^t + bias
void fastGEMM1T( const int8_t* vec, const int8_t* weights,
size_t wstep, const int* bias, const float* multiplier,
int* dst, int nvecs, int vecsize, int outZp )
{
int i = 0;
for( ; i <= nvecs - 2; i += 2 )
{
const int8_t* wptr0 = weights + i * wstep;
const int8_t* wptr1 = weights + (i + 1) * wstep;
int32x2_t vs0 = *(int32x2_t*)(bias + i);
for( int k = 0; k < vecsize; k += 4 )
{
int8x4_t vvec[2] = {*(int8x4_t*)(vec + k), *(int8x4_t*)(vec + k)};
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr0 + k), *(int8x4_t*)(wptr1 + k)};
vs0 = __nds__v_smaqa(vs0, *(int8x8_t*)vwptr, *(int8x8_t*)vvec);
}
int32x2_t vdst = {(int)std::round(vs0[0] * multiplier[i]), (int)std::round(vs0[1] * multiplier[i + 1])};
vdst = __nds__v_sclip32(vdst + outZp, 7);
*(int32x2_t*)(dst + i) = vdst;
}
for( ; i < nvecs; i++ )
{
const int8_t* wptr = weights + i * wstep;
int s0 = bias[i];
for( int k = 0; k < vecsize; k += 4 )
{
int8x4_t vvec[2] = {*(int8x4_t*)(vec + k), 0};
int8x4_t vwptr[2] = {*(int8x4_t*)(wptr + k), 0};
s0 = __nds__smaqa(s0, *(unsigned long*)vwptr, *(unsigned long*)vvec);
}
dst[i] = __nds__sclip32(outZp + (int)std::round(s0 * multiplier[i]), 7);
}
}
}}} // namespace
#endif

View File

@ -0,0 +1,36 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#if defined(__riscv) && defined(__riscv_dsp) && defined(__ANDES)
# include <nds_intrinsic.h>
# define CV_RVP052 1
namespace cv {
namespace dnn {
namespace opt_RVP052 {
void fastConv( const int8_t* weights, size_t wstep, const int* bias,
const int8_t* rowbuf, int* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned, int outZp,
const float* multiplier, bool initOutput, bool finalOutput );
void fastDepthwiseConv( const int8_t* wptr,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
int pad_t, int pad_l,
const int* biasptr, const float* multptr,
const int8_t* inptr_,
int height, int width,
int* outptr_,
int out_d, int outH, int outW,
int inpZp, int outZp );
void fastGEMM1T( const int8_t* vec, const int8_t* weights,
size_t wstep, const int* bias, const float* multiplier,
int* dst, int nvecs, int vecsize, int outZp );
}}}
#else
# define CV_RVP052 0
#endif

View File

@ -338,7 +338,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
}
#if CV_TRY_AVX2
if (conv->useAVX2)
opt_AVX::winofunc_AtXA_8x8_F32((float *)out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE,
opt_AVX2::winofunc_AtXA_8x8_F32((float *)out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE,
bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct);
else
#endif

View File

@ -385,7 +385,7 @@ void fastGemmBatch(bool trans_a, bool trans_b,
const auto shape_b = shape(B);
const auto shape_c = shape(C);
CV_CheckGE(shape_a.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: A must be n-dimensional (n >= 2)");
CV_CheckEQ(shape_b.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: B must be n-dimensional (n >= 2)");
CV_CheckGE(shape_b.size(), static_cast<size_t>(2), "DNN/fastGemmBatch: B must be n-dimensional (n >= 2)");
const float *a = A.ptr<const float>();
const float *b = B.ptr<const float>();

View File

@ -158,4 +158,51 @@ void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &o
parallel_for_(Range(0, loops), fn, nstripes);
}
void fastNormGroup(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t num_groups) {
const auto input_shape = shape(input);
size_t N = input_shape[0], C = input_shape[1];
CV_CheckEQ(scale.total(), bias.total(), "fastNormGroup: scale and bias should have the same shape");
CV_CheckEQ(scale.total(), C, "fastNormGroup: scale should be a 1d tensor and match the channel of input");
CV_CheckGE(input.dims, 3, "fastNormGroup: input dimension >= 3");
size_t channels_per_group = C / num_groups;
size_t loops = N * num_groups;
size_t norm_size = static_cast<size_t>(total(input_shape, 2) * channels_per_group);
size_t step = norm_size / channels_per_group;
float inv_norm_size = 1.0 / norm_size;
auto fn = [&](const Range &r) {
const auto *input_data = input.ptr<const float>();
const auto *scale_data = scale.ptr<const float>();
const auto *bias_data = bias.ptr<const float>();
auto *output_data = output.ptr<float>();
for (int i = r.start; i < r.end; i++) {
const auto *x = input_data + norm_size * i;
auto *y = output_data + norm_size * i;
float mean = 0.f, mean_square = 0.f;
for (int j = 0; j < norm_size; j++) {
float v = x[j];
mean += v;
mean_square += v * v;
}
mean *= inv_norm_size;
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon);
float inv_stdev = 1.f / mean_square;
size_t group_idx = i % num_groups * channels_per_group;
for (size_t j = 0; j < norm_size; j++) {
size_t c = group_idx + (j / step);
float s = scale_data[c] * inv_stdev, b = bias_data[c];
y[j] = s * (x[j] - mean) + b;
}
}
};
double nstripes = loops * norm_size * (1 / 1024.0);
parallel_for_(Range(0, loops), fn, nstripes);
}
}} // cv::dnn

View File

@ -21,6 +21,9 @@ void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output,
// Channel-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon);
// Group-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
void fastNormGroup(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t num_groups);
}} // cv::dnn
#endif // OPENCV_DNN_FAST_NORM_HPP

View File

@ -1299,7 +1299,6 @@ Mat LayerEinsumImpl::batchwiseMatMul(
const Mat& input2,
const MatShape& input2ShapeOverride)
{
// Sanity checks before the actual MatMul
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
@ -1312,59 +1311,21 @@ Mat LayerEinsumImpl::batchwiseMatMul(
int K = input1ShapeOverride[2];
int N = input2ShapeOverride[2];
std::vector<Mat> output;
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
Mat output;
if (batches > 1)
{
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
// create tmpout with type like input1
output = Mat({batches, M, N}, input1.type());
// input1 should of size MxK
// check if input1 needs reshape, if need reshape
if (input1.size[0] != M || input1.size[1] != K)
{
int shape[] = {batches, M, K};
reshapedInput1 = input1.reshape(1, 3, shape);
}
// input2 should be of size KxN
// check if input2 needs reshape, if needs reshape
if (input2.size[0] != K || input2.size[1] != N)
{
int shape[] = {batches, K, N};
reshapedInput2 = input2.reshape(1, 3, shape);
}
for (size_t i=0; i < batches; i++)
{
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
for (int j = 1; j < reshapedInput1.dims; j++)
ranges1.emplace_back(cv::Range::all());
Mat part1 = reshapedInput1(ranges1);
int shape[] = {M, K};
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
for (int j = 1; j < reshapedInput2.dims; j++)
ranges2.emplace_back(cv::Range::all());
Mat part2 = reshapedInput2(ranges2);
int shape2[] = {K, N};
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
Mat tmp_output(M, N, part1.type());
fastGemm(false, false, 1.0, part1, part2, 0.0, tmp_output, opt);
int newShape[] = {1, M, N};
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
output.emplace_back(tmp_output);
}
reshapedInput2 = reshapedInput2.reshape(1, input2ShapeOverride);
reshapedInput1 = reshapedInput1.reshape(1, input1ShapeOverride);
fastGemmBatch(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, output, opt);
} else {
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
// input1 should of size MxK
// check if input1 needs reshape, if need reshape
if (input1.dims > 2 || input1.size[0] != M || (input1.dims > 1 && input1.size[1] != K) || input1.dims == 1)
@ -1381,23 +1342,12 @@ Mat LayerEinsumImpl::batchwiseMatMul(
reshapedInput2 = input2.reshape(1, 2, shape2);
}
Mat tmp_output(M, N, reshapedInput1.type());
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, tmp_output, opt);
int newShape[] = {1, M, N};
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
output.emplace_back(tmp_output);
output = Mat(M, N, reshapedInput1.type());
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, output, opt);
output = output.reshape(1, {1, M, N});
}
int outputDim[] = {static_cast<int>(output.size()), M, N};
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
for (size_t i = 0; i < output.size(); i++) {
Mat output_slice = output_buffer.row(i);
output[i].copyTo(output_slice);
}
return output_buffer;
return output;
};
Ptr<EinsumLayer> EinsumLayer::create(const LayerParams& params)
{

View File

@ -453,13 +453,6 @@ public:
ret = false;
break;
}
if (!use_half && bias && (outerSize > 1))
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
}
}
if (ret) return true;

View File

@ -0,0 +1,190 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include "./cpu_kernels/fast_norm.hpp"
// CUDA backend
#include "../op_cuda.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/group_norm.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
// OpenCL backend
#ifdef HAVE_OPENCL
#include "../ocl4dnn/include/math_functions.hpp"
#include "opencl_kernels_dnn.hpp"
#endif
namespace cv {
namespace dnn {
// https://github.com/onnx/onnx/blob/main/docs/Operators.md#GroupNormalization
class GroupNormLayerImpl CV_FINAL : public GroupNormLayer {
public:
GroupNormLayerImpl(const LayerParams &params) {
setParamsFrom(params);
epsilon = params.get<float>("epsilon", 1e-5);
num_groups = params.get<int>("num_groups");
}
virtual bool supportBackend(int backendId) CV_OVERRIDE {
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE {
const auto &input = inputs[0];
const auto &scale = inputs[1];
const auto &bias = inputs[2];
CV_CheckGE(input.size(), static_cast<size_t>(3), "DNN/GroupNorm: input dimension >= 3 is required");
int C = input[1];
int scale_dim = std::accumulate(scale.begin(), scale.end(), 1, std::multiplies<int>());
CV_CheckEQ(scale_dim, C, "DNN/InstanceNorm: scale must be a 1d tensor and match the channel of input");
int bias_dim = std::accumulate(bias.begin(), bias.end(), 1, std::multiplies<int>());
CV_CheckEQ(bias_dim, C, "DNN/InstanceNorm: bias must be a 1d tensor and match the channel of input");
outputs.assign(1, inputs[0]);
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE {
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S) {
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
const auto& input = inputs[0];
const auto& scale = inputs[1];
const auto& bias = inputs[2];
fastNormGroup(input, scale, bias, outputs[0], epsilon, num_groups);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
const auto &input = inputs[0], &scale = inputs[1], &bias = inputs[2];
auto &output = outputs[0];
const auto input_shape = shape(input);
size_t N = input_shape[0], C = input_shape[1];
size_t num_groups = this->num_groups;
size_t channels_per_group = C / num_groups;
size_t loops = N * num_groups, norm_size = static_cast<size_t>(total(input_shape, 2)) * channels_per_group;
float inv_norm_size = 1.f / norm_size;
// no fp16 support
if (input.depth() == CV_16S) {
return false;
}
String base_opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
// Calculate mean
UMat one = UMat::ones(norm_size, 1, CV_32F);
UMat mean = UMat(loops, 1, CV_32F);
UMat mean_square = UMat(loops, 1, CV_32F);
UMat tmp = UMat(loops, norm_size, CV_32F);
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, loops, norm_size, inv_norm_size,
input, 0, one, 0, 0.f, mean, 0);
if (!ret) {
return false;
}
// Calculate mean_square
int num_vector = (norm_size % 8 == 0) ? 8 : ((norm_size % 4 == 0) ? 4 : 1);
size_t global[] = {loops, static_cast<size_t>(norm_size / num_vector)};
String build_opt = format(" -DNUM=%d", num_vector) + base_opts;
String mean_square_kernel_name = format("calc_mean%d", num_vector);
ocl::Kernel mean_square_kernel(mean_square_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt + " -DKERNEL_MEAN");
if (mean_square_kernel.empty()) {
return false;
}
mean_square_kernel.set(0, ocl::KernelArg::PtrReadOnly(input));
mean_square_kernel.set(1, (int)loops);
mean_square_kernel.set(2, (int)norm_size);
mean_square_kernel.set(3, ocl::KernelArg::PtrReadOnly(mean));
mean_square_kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmp));
ret = mean_square_kernel.run(2, global, NULL, false);
if (!ret) {
return false;
}
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, loops, norm_size, inv_norm_size,
tmp, 0, one, 0, 0.f, mean_square, 0);
if (!ret) {
return false;
}
// Calculate group norm: output = scale * (x - mean) / sqrt(var + eps) + bias
String mvn_group_kernel_name = format("mvn_group%d", num_vector);
build_opt += " -DNORM_VARIANCE -DKERNEL_MVN_GROUP";
ocl::Kernel mvn_group_kernel(mvn_group_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt);
if (mvn_group_kernel.empty()) {
return false;
}
mvn_group_kernel.set(0, ocl::KernelArg::PtrReadOnly(input));
mvn_group_kernel.set(1, (int)loops);
mvn_group_kernel.set(2, (int)norm_size);
mvn_group_kernel.set(3, (float)epsilon);
mvn_group_kernel.set(4, ocl::KernelArg::PtrReadOnly(mean));
mvn_group_kernel.set(5, ocl::KernelArg::PtrReadOnly(mean_square));
mvn_group_kernel.set(6, ocl::KernelArg::PtrReadOnly(scale));
mvn_group_kernel.set(7, ocl::KernelArg::PtrReadOnly(bias));
mvn_group_kernel.set(8, (int)C);
mvn_group_kernel.set(9, (int)num_groups);
mvn_group_kernel.set(10, (float)0.f);
mvn_group_kernel.set(11, ocl::KernelArg::PtrWriteOnly(output));
ret = mvn_group_kernel.run(2, global, NULL, false);
if (!ret) {
return false;
}
return true;
}
#endif
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs) override {
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
size_t N = input_shape[0];
size_t num_groups = this->num_groups;
size_t loops = N * num_groups;
return make_cuda_node<cuda4dnn::GroupNormOp>(preferableTarget, std::move(context->stream), epsilon, loops, num_groups);
}
#endif // HAVE_CUDA
private:
float epsilon;
size_t num_groups;
};
Ptr<GroupNormLayer> GroupNormLayer::create(const LayerParams &params) {
return Ptr<GroupNormLayer>(new GroupNormLayerImpl(params));
}
}} // cv::dnn

View File

@ -24,6 +24,16 @@ namespace cv
namespace dnn
{
namespace {
static int _mod(int x, int y) {
int res = x % y;
if ((res < 0 && y > 0) || (res > 0 && y < 0)) {
res += y;
}
return res;
}
}
class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
{
public:
@ -42,7 +52,8 @@ public:
MAX,
MEAN,
MIN,
MOD,
MOD, // Integer Mod. Reminder's sign = Divisor's sign.
FMOD, // Floating-point Mod. Reminder's sign = Dividend's sign.
PROD,
SUB,
SUM,
@ -79,6 +90,8 @@ public:
op = OPERATION::MIN;
else if (operation == "mod")
op = OPERATION::MOD;
else if (operation == "fmod")
op = OPERATION::FMOD;
else if (operation == "mul")
op = OPERATION::PROD;
else if (operation == "sub")
@ -106,18 +119,21 @@ public:
#ifdef HAVE_CANN
if (backendId == DNN_BACKEND_CANN)
return op == OPERATION::ADD || op == OPERATION::PROD || op == OPERATION::SUB ||
op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN;
op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN ||
op == OPERATION::MOD || op == OPERATION::FMOD;
#endif
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return (op == OPERATION::ADD ||
op == OPERATION::PROD ||
op == OPERATION::GREATER_EQUAL ||
op == OPERATION::LESS_EQUAL
op == OPERATION::LESS_EQUAL ||
op == OPERATION::MOD ||
op == OPERATION::FMOD
);
if (backendId == DNN_BACKEND_CUDA) {
return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD ||
op == OPERATION::SUB;
op == OPERATION::SUB || op == OPERATION::MOD || op == OPERATION::FMOD;
}
return backendId == DNN_BACKEND_OPENCV;
}
@ -707,10 +723,16 @@ public:
}
case OPERATION::MOD:
{
auto mod = [](const uint8_t &a, const uint8_t &b) { return a % b; };
auto mod = [] (const T &a, const T &b) { return static_cast<T>(_mod(int(a), int(b))); };
binary_forward<T>(mod, std::forward<Args>(args)...);
break;
}
case OPERATION::FMOD:
{
auto fmod = [](const T &a, const T &b) { return std::fmod(a, b); };
binary_forward<T>(fmod, std::forward<Args>(args)...);
break;
}
case OPERATION::PROD:
{
auto prod = [](const T &a, const T &b) { return a * b; };
@ -782,9 +804,8 @@ public:
opDispatch<int32_t>(std::forward<Args>(args)...);
break;
case CV_32F:
CV_Assert(op != OPERATION::BITSHIFT && op != OPERATION::MOD &&
op != OPERATION::AND && op != OPERATION::OR &&
op != OPERATION::XOR);
CV_Assert(op != OPERATION::BITSHIFT && op != OPERATION::AND &&
op != OPERATION::OR && op != OPERATION::XOR);
opDispatch<float>(std::forward<Args>(args)...);
break;
default:
@ -801,19 +822,6 @@ public:
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_0_shape = inputs[0].dynamicCast<CUDABackendWrapper>()->getShape();
for (int i = 1; i < inputs.size(); i++)
{
auto input_i_shape = inputs[i].dynamicCast<CUDABackendWrapper>()->getShape();
if (input_0_shape.size() != input_i_shape.size())
return Ptr<BackendNode>();
// check if the shape can be supported by `eltwise_ops.cu`, or return the default BackendNode
for (int j = 0; j < input_0_shape.size(); j++)
if (input_0_shape[j] != input_i_shape[j] &&
input_0_shape[j] != 1 && input_i_shape[j] != 1)
return Ptr<BackendNode>();
}
cuda4dnn::EltwiseOpType op_ = cuda4dnn::EltwiseOpType::SUM;
switch (op) {
case OPERATION::MAX:
@ -837,6 +845,12 @@ public:
case OPERATION::SUB:
op_ = cuda4dnn::EltwiseOpType::SUB;
break;
case OPERATION::MOD:
op_ = cuda4dnn::EltwiseOpType::MOD;
break;
case OPERATION::FMOD:
op_ = cuda4dnn::EltwiseOpType::FMOD;
break;
default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
};
@ -881,6 +895,8 @@ public:
BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, name);
BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, name);
BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, name);
BUILD_CANN_ELTWISE_OP(OPERATION::MOD, Mod, name);
BUILD_CANN_ELTWISE_OP(OPERATION::FMOD, Mod, name);
#undef BUILD_CANN_ELTWISE_OP
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
}
@ -927,6 +943,16 @@ public:
node = std::make_shared<ngraph::op::v1::GreaterEqual>(inp0, inp1);
else if (op == OPERATION::LESS_EQUAL)
node = std::make_shared<ngraph::op::v1::LessEqual>(inp0, inp1);
// Ideally we should do this but int32 internal blobs are converted to float32 data type in inference.
// TODO: Remove data type convertion when we have type inference.
else if (op == OPERATION::MOD) {
auto inp0_i64 = std::make_shared<ngraph::op::Convert>(inp0, ngraph::element::i64);
auto inp1_i64 = std::make_shared<ngraph::op::Convert>(inp1, ngraph::element::i64);
auto mod = std::make_shared<ngraph::op::v1::FloorMod>(inp0_i64, inp1_i64);
node = std::make_shared<ngraph::op::Convert>(mod, ngraph::element::f32);
}
else if (op == OPERATION::FMOD)
node = std::make_shared<ngraph::op::v1::Mod>(inp0, inp1);
else
CV_Error(Error::StsNotImplemented, "Operation is not implemented for nGraph backend");
return Ptr<BackendNode>(new InfEngineNgraphNode(node));

View File

@ -74,6 +74,11 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S) {
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
@ -89,49 +94,59 @@ public:
// NOTE: This impl does not check whether indices have duplicate entries.
// The last duplicate entry will overwrite the previous.
template<typename T, typename Functor>
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
{
data.copyTo(out);
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat& output_mat) {
input_mat.copyTo(output_mat);
const int* shape = data.size.p;
const size_t* step = data.step.p;
const auto &input_mat_shape = shape(input_mat);
std::vector<size_t> input_mat_step(input_mat_shape.size());
for (int i = 0; i < input_mat.dims; i++) {
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
}
const int ind_ndims = indices.dims;
const int* ind_shape = indices.size.p;
const T* p_indices = indices.ptr<const T>();
const int indices_mat_ndims = indices_mat.dims;
const auto &indices_mat_shape = shape(indices_mat);
const int upd_ndims = updates.dims;
const int* upd_shape = updates.size.p;
const T* p_updates = updates.ptr<const T>();
const int updates_mat_ndims = updates_mat.dims;
const auto &updates_mat_shape = shape(updates_mat);
T* p_out = out.ptr<T>();
int k = ind_shape[ind_ndims - 1]; // last dim of indices
size_t total = (size_t)(indices.total() / k);
int indices_last_dim = indices_mat_shape[indices_mat_ndims - 1]; // last dim of indices
size_t updates_size = 1;
for (int i = ind_ndims - 1; i < upd_ndims; i++)
updates_size *= upd_shape[i];
for (int i = indices_mat_ndims - 1; i < updates_mat_ndims; i++)
updates_size *= updates_mat_shape[i];
size_t inp_start_offset = 0;
size_t ind_start_offset = 0;
size_t upd_start_offset = 0;
for (size_t i = 0; i < total; i++, ind_start_offset += k, upd_start_offset += updates_size)
{
const T* tmp_p_indices = p_indices + ind_start_offset;
inp_start_offset = 0;
for (int j = 0; j < k; j++)
{
CV_Assert(tmp_p_indices[j] < shape[j] && tmp_p_indices[j] > -shape[j]);
inp_start_offset += (((int)tmp_p_indices[j] + shape[j]) % shape[j]) * step[j];
}
inp_start_offset /= sizeof(T);
auto fn = [&](const Range &r) {
size_t input_offset = 0,
indices_offset = r.start * indices_last_dim,
updates_offset = r.start * updates_size;
for (int i = r.start; i < r.end; i++) {
const T* indices = indices_mat.ptr<const T>();
const T* updates = updates_mat.ptr<const T>();
T* output = output_mat.ptr<T>();
const T* tmp_p_updates = p_updates + upd_start_offset;
T* tmp_p_out = p_out + inp_start_offset;
for (int j = 0; j < updates_size; j++)
tmp_p_out[j] = rd(tmp_p_out[j], tmp_p_updates[j]);
input_offset = 0;
indices += indices_offset;
for (int j = 0; j < indices_last_dim; j++) {
int index = static_cast<int>(*(indices + j));
index = (index + input_mat_shape[j]) % input_mat_shape[j];
CV_Assert(index < input_mat_shape[j] && index >= 0);
input_offset += index * input_mat_step[j];
}
updates += updates_offset;
output += input_offset;
for (int j = 0; j < updates_size; j++) {
output[j] = reduce_operation(output[j], updates[j]);
}
indices_offset += indices_last_dim;
updates_offset += updates_size;
}
};
size_t total = (size_t)(indices_mat.total() / indices_last_dim);
double nstripes = (size_t)total * (indices_last_dim + updates_size) * (1 / 1024.0);
parallel_for_(Range(0, total), fn, nstripes);
}
template<typename... Args>

View File

@ -68,6 +68,11 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S) {
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
@ -81,59 +86,62 @@ public:
}
template<typename T, typename Functor>
void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out)
{
data.copyTo(out);
void forward_impl(const Functor &reduce_operation, const Mat &input_mat, const Mat &indices_mat, const Mat &updates_mat, Mat &output_mat) {
input_mat.copyTo(output_mat);
const int ndims = data.dims;
const int* shape = data.size.p;
const size_t* step = data.step.p;
const int ndims = input_mat.dims;
const int* ind_shape = indices.size.p;
const size_t* ind_step = indices.step.p;
const auto &input_mat_shape = shape(input_mat);
std::vector<size_t> input_mat_step(ndims);
size_t inp_offset = 0;
size_t ind_offset = 0;
const T* p_index = indices.ptr<const T>();
const T* p_update = updates.ptr<const T>();
T* p_out = out.ptr<T>();
const auto &indices_mat_shape = shape(indices_mat);
std::vector<size_t> indices_mat_step(ndims);
size_t total = indices.total();
for (int i = 0; i < ndims; i++) {
input_mat_step[i] = static_cast<size_t>(input_mat.step.p[i] / sizeof(T));
indices_mat_step[i] = static_cast<size_t>(indices_mat.step.p[i] / sizeof(T));
}
int j, offset_at_idx, index;
size_t t, idx;
for (size_t i = 0; i < total; i++)
{
t = i;
inp_offset = 0;
ind_offset = 0;
int offset_at_axis = 0;
for (j = ndims - 1; j >= 0; j--)
{
idx = t / ind_shape[j];
offset_at_idx = (int)(t - idx * ind_shape[j]);
ind_offset += offset_at_idx * ind_step[j];
inp_offset += offset_at_idx * step[j];
t = idx;
if (j == axis)
{
offset_at_axis = offset_at_idx * step[j];
auto fn = [&](const Range &r) {
size_t input_offset = 0, indices_offset = 0;
int indices_index, index;
size_t axis_offset, tmp_index, j_index;
for (int i = r.start; i < r.end; i++) {
const T* indices = indices_mat.ptr<const T>();
const T* updates = updates_mat.ptr<const T>();
T* output = output_mat.ptr<T>();
input_offset = 0;
indices_offset = 0;
indices_index = i;
axis_offset = 0;
for (int j = ndims - 1; j >= 0; j--) {
tmp_index = indices_index / indices_mat_shape[j];
j_index = (size_t)(indices_index - tmp_index * indices_mat_shape[j]);
input_offset += j_index * input_mat_step[j];
indices_offset += j_index * indices_mat_step[j];
indices_index = tmp_index;
if (j == axis) {
axis_offset = j_index * input_mat_step[j];
}
}
ind_offset /= sizeof(T);
// get index and overwrite current indices
const T* tmp_p_index = p_index + ind_offset;
index = (int)(*tmp_p_index);
CV_Assert(index < shape[axis] && index > -shape[axis]);
index = static_cast<int>(*(indices + indices_offset));
index = (index + input_mat_shape[axis]) % input_mat_shape[axis];
CV_Assert(index < input_mat_shape[axis] && index >= 0);
input_offset = input_offset - axis_offset + index * input_mat_step[axis];
inp_offset = inp_offset - offset_at_axis + ((index + shape[axis]) % shape[axis]) * step[axis];
inp_offset /= sizeof(T);
const T* tmp_p_update = p_update + ind_offset;
T* tmp_p_out = p_out + inp_offset;
*tmp_p_out = rd(*tmp_p_out, *tmp_p_update);
updates += indices_offset;
output += input_offset;
*output = reduce_operation(*output, *updates);
}
};
size_t total = indices_mat.total();
double nstripes = (size_t)total * ndims * (1 / 1024.0);
parallel_for_(Range(0, total), fn, nstripes);
}
template<typename... Args>

View File

@ -901,7 +901,6 @@ AsyncArray Net::Impl::forwardAsync(const String& outputName)
CV_Assert(!empty());
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
#ifdef CV_CXX11
String layerName = outputName;
if (layerName.empty())
@ -922,9 +921,6 @@ AsyncArray Net::Impl::forwardAsync(const String& outputName)
isAsync = false;
return getBlobAsync(layerName);
#else
CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
#endif // CV_CXX11
}

View File

@ -265,11 +265,9 @@ struct Net::Impl : public detail::NetImplBase
Mat getBlob(String outputName) const;
#ifdef CV_CXX11
virtual AsyncArray getBlobAsync(const LayerPin& pin);
AsyncArray getBlobAsync(String outputName);
#endif // CV_CXX11
string dump(bool forceAllocation = false) const;

View File

@ -728,6 +728,10 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
break;
#ifdef HAVE_CUDA
/* Risk: Not every operation in "NaryEltwise" is supported in the CUDA backend. There is a chance
that Concat's output is filled with data in both host and device, leading to data missing.
See https://github.com/opencv/opencv/issues/24721 for more details.
*/
if (preferableBackend == DNN_BACKEND_CUDA &&
(inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
(inp_i_data->layerInstance->type != "Convolution" &&

View File

@ -97,8 +97,8 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
max_image_size);
}
if (use_half_ && bias_term_)
{
if (bias_term_) {
if (use_half_) {
UMat biasOneMat = UMat::ones(M_, 1, CV_32F);
UMat newbias, tmpTop;
@ -106,6 +106,10 @@ bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
convertFp16(top, tmpTop);
cv::gemm(biasOneMat, newbias, 1, tmpTop, 1, tmpTop, 0);
convertFp16(tmpTop, top);
} else {
UMat biasOnesMat = UMat::ones(M_, 1, CV_32F);
cv::gemm(biasOnesMat, bias, 1, top, 1, top, 0);
}
}
return ret;

View File

@ -86,6 +86,7 @@ public:
int getTensorShapeSize(int node_id, int node_input_id) {
const auto node = getNode(node_id);
const auto &input_name = node->getInputName(node_input_id);
// try to get from value_info
for (int i = 0; i < net.value_info_size(); i++) {
const auto value_info = net.value_info(i);
if (value_info.name() == input_name) {
@ -97,6 +98,18 @@ public:
}
}
}
// try to get from input
for (int i = 0; i < net.input_size(); i++) {
const auto input = net.input(i);
if (input.name() == input_name) {
if (input.has_type() && input.type().has_tensor_type() &&
input.type().tensor_type().has_shape()) {
return input.type().tensor_type().shape().dim_size();
} else {
return -1;
}
}
}
return -1;
}
@ -660,6 +673,10 @@ private:
[Input] -> LayerNorm -> [Output]
\
[weight], [bias]
Note: axes of ReduceMean must be:
- last element is the axis of last dimension (-1 or (input_ndims - 1))
- a list of adjacent axes, e.g. [1, 2, 3, ..., input_ndims - 1]
*/
class LayerNormSubGraph : public Subgraph
{
@ -683,19 +700,22 @@ public:
setFusedNode("LayerNormalization", input);
}
static float extractAxis(const Ptr<ImportGraphWrapper>& net, int node_id)
static std::vector<int64_t> extractAxis(const Ptr<ImportGraphWrapper>& net, int node_id)
{
// TODO: consider ReduceMean-18 which has axes as one of the inputs instead of attributes
Ptr<ImportNodeWrapper> mean_ptr = net->getNode(node_id);
opencv_onnx::NodeProto* mean_node = mean_ptr.dynamicCast<ONNXNodeWrapper>()->node;
int axis_ = -1;
std::vector<int64_t> axes;
for (int i = 0; i < mean_node->attribute_size(); i++)
{
opencv_onnx::AttributeProto attr = mean_node->attribute(i);
if (attr.name() != "axes")
continue;
axis_ = static_cast<int>(attr.ints(0));
for (int j = 0; j < attr.ints_size(); j++) {
axes.push_back(attr.ints(j));
}
return axis_;
}
return axes;
}
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
@ -707,11 +727,31 @@ public:
if (pow_exp - 2 > 1e-5) // not pow(2)
return false;
int axis_mean1 = extractAxis(net, matchedNodesIds[mean]);
int axis_mean2 = extractAxis(net, matchedNodesIds[mean1]);
if (axis_mean1 != axis_mean2)
std::vector<int64_t> axes = extractAxis(net, matchedNodesIds[mean]);
// check whether it is -1 or last_axis or [axis, ..., last_axis]
int64_t input_ndims = static_cast<int64_t>(net.dynamicCast<ONNXGraphWrapper>()->getTensorShapeSize(matchedNodesIds[mean], 0));
if (input_ndims == -1) {
return false; // input shape unknown
}
// assume that axes are sorted in ascending order, e.g. [0, 1, 2, 3] or [-3, -2, -1]
if (axes.back() != -1 && axes.back() != (input_ndims - 1)) {
return false;
axis = axis_mean1;
}
for (size_t i = 0; i < axes.size() - 1; i++) {
if (axes[i] - axes[i + 1] != -1) {
return false;
}
}
std::vector<int64_t> axes1 = extractAxis(net, matchedNodesIds[mean1]);
if (axes.size() != axes1.size())
return false;
for (size_t i = 0; i < axes.size(); i++) {
if (((axes[i] + input_ndims) % input_ndims) != ((axes1[i] + input_ndims) % input_ndims)) {
return false;
}
}
axis = axes[0];
epsilon = extractConstant(net, matchedNodesIds[add], 1).at<float>(0);

View File

@ -22,6 +22,7 @@
#ifdef HAVE_PROTOBUF
#include <array>
#include <iostream>
#include <fstream>
#include <string>
@ -2619,6 +2620,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
// Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
int axis = layerParams.get<int>("axis", 1);
axis = normalize_axis(axis, inputShape.size());
for (size_t i = 0; i < inputs.size(); ++i)
{
inputShape[axis] = inputs[i].dims == (int)inputShape.size() ? inputs[i].size[axis] : 1;
@ -2831,6 +2833,11 @@ void ONNXImporter::parseElementWise(LayerParams& layerParams, const opencv_onnx:
layerParams.type = "NaryEltwise";
layerParams.set("operation", toLowerCase(node_proto.op_type()));
if (node_proto.op_type() == "Mod") {
if (layerParams.get<int>("fmod", 0)) {
layerParams.set("operation", "fmod");
};
}
// element-wise layers that can have >=1 inputs but actually have one input
if (node_proto.input_size() == 1 && (op_type == "max" || op_type == "min" || op_type == "mean" || op_type == "sum"))
@ -4004,10 +4011,11 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
dispatch["ScatterElements"] = dispatch["Scatter"] = dispatch["ScatterND"] = &ONNXImporter::parseScatter;
dispatch["Tile"] = &ONNXImporter::parseTile;
dispatch["LayerNormalization"] = &ONNXImporter::parseLayerNorm;
dispatch["GroupNormalization"] = &ONNXImporter::parseInstanceNormalization;
dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] =
dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] =
dispatch["LessOrEqual"] = &ONNXImporter::parseElementWise;
dispatch["LessOrEqual"] = dispatch["Mod"] = &ONNXImporter::parseElementWise;
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
dispatch["Where"] = &ONNXImporter::parseElementWise;

View File

@ -54,6 +54,7 @@
#define vec_type Dtype8
#define CALC_MEAN calc_mean8
#define MVN mvn8
#define MVN_GROUP mvn_group8
#define MEAN_FUSE mean_fuse8
#define MVN_FUSE mvn_fuse8
#elif NUM == 4
@ -62,6 +63,7 @@
#define vec_type Dtype4
#define CALC_MEAN calc_mean4
#define MVN mvn4
#define MVN_GROUP mvn_group4
#define MEAN_FUSE mean_fuse4
#define MVN_FUSE mvn_fuse4
#elif NUM == 1
@ -70,6 +72,7 @@
#define vec_type Dtype
#define CALC_MEAN calc_mean1
#define MVN mvn1
#define MVN_GROUP mvn_group1
#define MEAN_FUSE mean_fuse1
#define MVN_FUSE mvn_fuse1
#endif
@ -150,6 +153,54 @@ __kernel void MVN(__global const Dtype* src,
store(dst_vec, dst, index);
}
#elif defined KERNEL_MVN_GROUP
__kernel void MVN_GROUP(__global const Dtype* src,
const int rows,
const int cols,
const Dtype eps,
__global const Dtype* mean,
__global const Dtype* dev,
__global const Dtype* weight,
__global const Dtype* bias,
const int channels,
const int num_groups,
const float relu_slope,
__global Dtype* dst)
{
int x = get_global_id(0);
int y = get_global_id(1) * NUM;
int index = x * cols + y;
if (x >= rows || y >= cols)
return;
int group_size = channels / num_groups;
int step = norm_size / group_size;
int channel_index = x % num_groups * group_size + y / step
Dtype mean_val = mean[x];
Dtype dev_val = dev[x];
Dtype alpha;
#ifdef NORM_VARIANCE
alpha = 1 / sqrt(eps + dev_val);
#else
alpha = 1;
#endif
Dtype w = weight[channel_index], b = bias[channel_index];
vec_type src_vec = load(src, index) - (vec_type)mean_val;
vec_type dst_vec = src_vec * alpha;
dst_vec = dst_vec * w + (vec_type)b;
#ifdef FUSE_RELU
vec_type new_val = dst_vec * relu_slope;
dst_vec = select(new_val, dst_vec, dst_vec > (vec_type)0.f);
#endif
store(dst_vec, dst, index);
}
#elif defined KERNEL_MEAN_FUSE
__kernel void MEAN_FUSE(__global const T * A,

View File

@ -95,6 +95,12 @@ public:
Net net;
};
TEST_P(DNNTestNetwork, DISABLED_YOLOv8n) {
processNet("dnn/onnx/models/yolov8n.onnx", "", Size(640, 640), "output0");
expectNoFallbacksFromIE(net);
expectNoFallbacksFromCUDA(net);
}
TEST_P(DNNTestNetwork, AlexNet)
{
applyTestTag(CV_TEST_TAG_MEMORY_1GB);
@ -1454,6 +1460,71 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, Eltwise, testing::Combine(
dnnBackendsAndTargets()
));
////////////////////////////////////////////////////////////////////////////////
// Element-wise layers
////////////////////////////////////////////////////////////////////////////////
using NaryEltwiseConcat = TestWithParam<tuple<std::vector<int>, tuple<Backend, Target>>>;
TEST_P(NaryEltwiseConcat, Accuracy) {
auto param = GetParam();
std::vector<int> input_shape = get<0>(param);
auto backend_id = get<0>(get<1>(param));
auto target_id = get<1>(get<1>(param));
/* Build the following net:
<1x4x84>
/
[Input] -+-> Mul(B<1x84>) -> Concat(axis=1) -> [Output]
| |
+-> Sigmoid ----------+
*/
Net net;
std::vector<int> mul_B_shape(input_shape.size() - 1, 1);
mul_B_shape.back() = input_shape.back();
Mat mul_B(mul_B_shape, CV_32FC1);
randn(mul_B, 0.f, 1.f);
LayerParams mul_B_lp;
mul_B_lp.name = "mul_B";
mul_B_lp.type = "Const";
mul_B_lp.blobs.push_back(mul_B);
int id_mul_B = net.addLayer(mul_B_lp.name, mul_B_lp.type, mul_B_lp);
LayerParams mul_lp;
mul_lp.name = "mul";
mul_lp.type = "NaryEltwise";
mul_lp.set("operation", "mul");
int id_mul = net.addLayer(mul_lp.name, mul_lp.type, mul_lp);
net.connect(0, 0, id_mul, 0);
net.connect(id_mul_B, 0, id_mul, 1);
LayerParams sigmoid_lp;
sigmoid_lp.name = "sigmoid";
sigmoid_lp.type = "Sigmoid";
int id_sigmoid = net.addLayer(sigmoid_lp.name, sigmoid_lp.type, sigmoid_lp);
net.connect(0, 0, id_sigmoid, 0);
LayerParams concat_lp;
concat_lp.name = "concat";
concat_lp.type = "Concat";
concat_lp.set("axis", 1);
int id_concat = net.addLayer(concat_lp.name, concat_lp.type, concat_lp);
net.connect(id_mul, 0, id_concat, 0);
net.connect(id_sigmoid, 0, id_concat, 1);
// Run test
Mat input(input_shape, CV_32FC1);
testLayer(input, net, backend_id, target_id, false);
}
INSTANTIATE_TEST_CASE_P(Layer_Test_Backends, NaryEltwiseConcat, testing::Combine(
testing::Values(std::vector<int>{1, 4, 84}),
dnnBackendsAndTargets())
);
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_layers_backends, dnnBackendsAndTargets());
}} // namespace

View File

@ -47,6 +47,10 @@ TEST_F(Test_Graph_Simplifier, LayerNormSubGraph) {
test("layer_norm_expanded_with_initializers", "LayerNormalization");
}
TEST_F(Test_Graph_Simplifier, LayerNormNoFusionSubGraph) {
test("layer_norm_no_fusion", std::vector<std::string>{"NaryEltwise", "Reduce", "Sqrt"});
}
TEST_F(Test_Graph_Simplifier, ResizeSubgraph) {
/* Test for 6 subgraphs:
- GatherCastSubgraph

View File

@ -2050,7 +2050,7 @@ private:
net.setPreferableTarget(target);
Mat re;
ASSERT_NO_THROW(re = net.forward()); // runtime error
re = net.forward();
auto ptr_re = (float *) re.data;
for (int i = 0; i < re.total(); i++)
if (op == "sum"){

View File

@ -1033,14 +1033,10 @@ TEST_P(Test_two_inputs, basic)
randu(firstInp, 0, 100);
randu(secondInp, 0, 100);
#ifndef CV_CXX11
std::vector<String> input_names;
input_names.push_back("data");
input_names.push_back("second_input");
net.setInputsNames(input_names);
#else
net.setInputsNames({"data", "second_input"});
#endif
net.setInput(firstInp, "data", kScale);
net.setInput(secondInp, "second_input", kScaleInv);
net.setPreferableBackend(backendId);

View File

@ -311,6 +311,8 @@ static const TestCase testConformanceConfig[] = {
{"test_gridsample_nearest", 2, 1},
{"test_gridsample_reflection_padding", 2, 1},
{"test_gridsample_zeros_padding", 2, 1},
{"test_group_normalization_epsilon", 3, 1},
{"test_group_normalization_example", 3, 1},
{"test_gru_batchwise", 3, 2},
{"test_gru_defaults", 3, 1},
{"test_gru_seq_length", 4, 1},

View File

@ -736,6 +736,10 @@ CASE(test_gridsample_reflection_padding)
// no filter
CASE(test_gridsample_zeros_padding)
// no filter
CASE(test_group_normalization_epsilon)
// no filter
CASE(test_group_normalization_example)
// no filter
CASE(test_gru_batchwise)
// no filter
CASE(test_gru_defaults)
@ -1056,10 +1060,25 @@ CASE(test_mod_int64_fmod)
// no filter
CASE(test_mod_mixed_sign_float16)
// no filter
if (target == DNN_TARGET_OPENCL)
{
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104141 vs 1e-05
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156212 vs 0.0001
}
CASE(test_mod_mixed_sign_float32)
// no filter
if (target == DNN_TARGET_OPENCL)
{
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104141 vs 1e-05
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156212 vs 0.0001
}
CASE(test_mod_mixed_sign_float64)
// no filter
if (target == DNN_TARGET_OPENCL)
{
default_l1 = 0.0011; // Expected: (normL1) <= (l1), actual: 0.00104167 vs 1e-05
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00156251 vs 0.0001
}
CASE(test_mod_mixed_sign_int16)
// no filter
CASE(test_mod_mixed_sign_int32)

View File

@ -41,7 +41,7 @@
"test_cast_STRING_to_FLOAT",
"test_castlike_FLOAT_to_STRING_expanded",
"test_castlike_STRING_to_FLOAT_expanded",
"test_concat_1d_axis_negative_1",
"test_concat_1d_axis_negative_1", // 1d support is required
"test_div_uint8", // output type mismatch
"test_maxpool_2d_dilations",
"test_maxpool_2d_same_lower",

View File

@ -210,9 +210,6 @@
"test_min_uint8",
"test_mod_broadcast",
"test_mod_int64_fmod",
"test_mod_mixed_sign_float16",
"test_mod_mixed_sign_float32",
"test_mod_mixed_sign_float64",
"test_mod_mixed_sign_int16",
"test_mod_mixed_sign_int32",
"test_mod_mixed_sign_int64",

View File

@ -2673,24 +2673,36 @@ void yoloPostProcessing(
cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
}
// each row is [cx, cy, w, h, conf_obj, conf_class1, ..., conf_class80]
if (test_name == "yolonas"){
// outs contains 2 elemets of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
Mat concat_out;
// squeeze the first dimension
outs[0] = outs[0].reshape(1, outs[0].size[1]);
outs[1] = outs[1].reshape(1, outs[1].size[1]);
cv::hconcat(outs[1], outs[0], concat_out);
outs[0] = concat_out;
// remove the second element
outs.pop_back();
// unsqueeze the first dimension
outs[0] = outs[0].reshape(0, std::vector<int>{1, 8400, 84});
}
for (auto preds : outs){
preds = preds.reshape(1, preds.size[1]); // [1, 8400, 85] -> [8400, 85]
for (int i = 0; i < preds.rows; ++i)
{
// filter out non objects
float obj_conf = (test_name != "yolov8") ? preds.at<float>(i, 4) : 1.0f;
// filter out non object
float obj_conf = (test_name == "yolov8" || test_name == "yolonas") ? 1.0f : preds.at<float>(i, 4) ;
if (obj_conf < conf_threshold)
continue;
Mat scores = preds.row(i).colRange((test_name != "yolov8") ? 5 : 4, preds.cols);
Mat scores = preds.row(i).colRange((test_name == "yolov8" || test_name == "yolonas") ? 4 : 5, preds.cols);
double conf;
Point maxLoc;
minMaxLoc(scores, 0, &conf, 0, &maxLoc);
conf = (test_name != "yolov8") ? conf * obj_conf : conf;
conf = (test_name == "yolov8" || test_name == "yolonas") ? conf : conf * obj_conf;
if (conf < conf_threshold)
continue;
@ -2701,9 +2713,14 @@ void yoloPostProcessing(
double w = det[2];
double h = det[3];
// std::cout << "cx: " << cx << " cy: " << cy << " w: " << w << " h: " << h << " conf: " << conf << " idx: " << maxLoc.x << std::endl;
// [x1, y1, x2, y2]
if (test_name == "yolonas"){
boxes.push_back(Rect2d(cx, cy, w, h));
} else {
boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
cx + 0.5 * w, cy + 0.5 * h));
}
classIds.push_back(maxLoc.x);
confidences.push_back(conf);
}
@ -2758,6 +2775,41 @@ TEST_P(Test_ONNX_nets, YOLOX)
1.0e-4, 1.0e-4);
}
TEST_P(Test_ONNX_nets, YOLONas)
{
// model information: https://dl.opencv.org/models/yolo-nas/Readme.md
std::string weightPath = _tf("models/yolo_nas_s.onnx", false);
Size targetSize{640, 640};
float conf_threshold = 0.50;
float iou_threshold = 0.50;
std::vector<int> refClassIds{1, 16, 7};
std::vector<float> refScores{0.9720f, 0.9283f, 0.8990f};
// [x1, y1, x2, y2]
std::vector<Rect2d> refBoxes{
Rect2d(105.516, 173.696, 471.323, 430.433),
Rect2d(109.241, 263.406, 259.872, 531.858),
Rect2d(390.153, 142.492, 574.932, 222.709)
};
Image2BlobParams imgParams(
Scalar::all(1/255.0),
targetSize,
Scalar::all(0),
false,
CV_32F,
DNN_LAYOUT_NCHW,
DNN_PMODE_LETTERBOX,
Scalar::all(114)
);
testYOLO(
weightPath, refClassIds, refScores, refBoxes,
imgParams, conf_threshold, iou_threshold,
1.0e-4, 1.0e-4, "yolonas");
}
TEST_P(Test_ONNX_nets, YOLOv8)
{
std::string weightPath = _tf("models/yolov8n.onnx", false);
@ -2804,7 +2856,7 @@ TEST_P(Test_ONNX_nets, YOLOv7)
CV_TEST_TAG_DEBUG_VERYLONG
);
std::string weightPath = _tf("models/yolov7_not_simplified.onnx", false);
std::string weightPath = _tf("models/yolov7.onnx", false);
// Reference, which is collected with input size of 640x640
std::vector<int> refClassIds{1, 16, 7};
std::vector<float> refScores{0.9614331f, 0.9589417f, 0.8679074f};
@ -3031,6 +3083,10 @@ TEST_P(Test_ONNX_nets, VitTrack) {
normAssert(ref_output3, outputs[2], "VitTrack output3");
}
TEST_P(Test_ONNX_layers, LayerNormNoFusion) {
testONNXModels("layer_norm_no_fusion");
}
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
}} // namespace

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018-2023 Intel Corporation
// Copyright (C) 2018-2024 Intel Corporation
#include "precomp.hpp"
@ -10,7 +10,7 @@
// (cv::gapi::ie::backend() is still there and is defined always)
#include "backends/ie/giebackend.hpp"
#ifdef HAVE_INF_ENGINE
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
#if INF_ENGINE_RELEASE <= 2019010000
# error G-API IE module supports only OpenVINO IE >= 2019 R1

View File

@ -2,7 +2,7 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2024 Intel Corporation
#ifndef OPENCV_GAPI_GIEBACKEND_HPP
#define OPENCV_GAPI_GIEBACKEND_HPP
@ -10,7 +10,7 @@
// Include anyway - cv::gapi::ie::backend() still needs to be defined
#include "opencv2/gapi/infer/ie.hpp"
#ifdef HAVE_INF_ENGINE
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
#include <ade/util/algorithm.hpp> // type_list_index
#include <condition_variable>

View File

@ -2,9 +2,9 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2020 Intel Corporation
// Copyright (C) 2020-2024 Intel Corporation
#ifdef HAVE_INF_ENGINE
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
#include <vector>
#include <string>

View File

@ -6,7 +6,7 @@
#include "../test_precomp.hpp"
#ifdef HAVE_INF_ENGINE
#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE < 2024000000
#include <stdexcept>
#include <mutex>

View File

@ -1662,14 +1662,14 @@ CvWindow::CvWindow(QString name, int arg2)
//Now attach everything
if (myToolBar)
myGlobalLayout->addWidget(myToolBar, Qt::AlignCenter);
myGlobalLayout->addWidget(myToolBar, 0, Qt::AlignLeft);
myGlobalLayout->addWidget(myView->getWidget(), Qt::AlignCenter);
myGlobalLayout->addWidget(myView->getWidget(), 0, Qt::AlignCenter);
myGlobalLayout->addLayout(myBarLayout, Qt::AlignCenter);
myGlobalLayout->addLayout(myBarLayout);
if (myStatusBar)
myGlobalLayout->addWidget(myStatusBar, Qt::AlignCenter);
myGlobalLayout->addWidget(myStatusBar, 0, Qt::AlignLeft);
setLayout(myGlobalLayout);
show();
@ -2079,7 +2079,6 @@ void CvWindow::createStatusBar()
{
myStatusBar = new QStatusBar(this);
myStatusBar->setSizeGripEnabled(false);
myStatusBar->setFixedHeight(20);
myStatusBar->setMinimumWidth(1);
myStatusBar_msg = new QLabel;

View File

@ -409,7 +409,9 @@ bool JpegDecoder::readData( Mat& img )
{
jpeg_decompress_struct* cinfo = &((JpegState*)m_state)->cinfo;
JpegErrorMgr* jerr = &((JpegState*)m_state)->jerr;
#ifndef JCS_EXTENSIONS
JSAMPARRAY buffer = 0;
#endif
if( setjmp( jerr->setjmp_buffer ) == 0 )
{
@ -429,6 +431,18 @@ bool JpegDecoder::readData( Mat& img )
}
#endif
#ifdef JCS_EXTENSIONS
if( color )
{
cinfo->out_color_space = JCS_EXT_BGR;
cinfo->out_color_components = 3;
}
else
{
cinfo->out_color_space = JCS_GRAYSCALE;
cinfo->out_color_components = 1;
}
#else
if( color )
{
if( cinfo->num_components != 4 )
@ -455,6 +469,7 @@ bool JpegDecoder::readData( Mat& img )
cinfo->out_color_components = 4;
}
}
#endif
// Check for Exif marker APP1
jpeg_saved_marker_ptr exif_marker = NULL;
@ -481,12 +496,17 @@ bool JpegDecoder::readData( Mat& img )
jpeg_start_decompress( cinfo );
#ifndef JCS_EXTENSIONS
buffer = (*cinfo->mem->alloc_sarray)((j_common_ptr)cinfo,
JPOOL_IMAGE, m_width*4, 1 );
#endif
uchar* data = img.ptr();
for( ; m_height--; data += step )
{
#ifdef JCS_EXTENSIONS
jpeg_read_scanlines( cinfo, &data, 1 );
#else
jpeg_read_scanlines( cinfo, buffer, 1 );
if( color )
{
@ -502,6 +522,7 @@ bool JpegDecoder::readData( Mat& img )
else
icvCvt_CMYK2Gray_8u_C4C1R( buffer[0], 0, data, 0, Size(m_width,1) );
}
#endif
}
result = true;
@ -593,8 +614,11 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
int width = img.cols, height = img.rows;
std::vector<uchar> out_buf(1 << 12);
#ifndef JCS_EXTENSIONS
AutoBuffer<uchar> _buffer;
uchar* buffer;
#endif
struct jpeg_compress_struct cinfo;
JpegErrorMgr jerr;
@ -629,8 +653,15 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
int _channels = img.channels();
int channels = _channels > 1 ? 3 : 1;
#ifdef JCS_EXTENSIONS
cinfo.input_components = _channels;
cinfo.in_color_space = _channels == 3 ? JCS_EXT_BGR
: _channels == 4 ? JCS_EXT_BGRX : JCS_GRAYSCALE;
#else
cinfo.input_components = channels;
cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE;
#endif
int quality = 95;
int progressive = 0;
@ -746,14 +777,17 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
jpeg_start_compress( &cinfo, TRUE );
#ifndef JCS_EXTENSIONS
if( channels > 1 )
_buffer.allocate(width*channels);
buffer = _buffer.data();
#endif
for( int y = 0; y < height; y++ )
{
uchar *data = img.data + img.step*y, *ptr = data;
#ifndef JCS_EXTENSIONS
if( _channels == 3 )
{
icvCvt_BGR2RGB_8u_C3R( data, 0, buffer, 0, Size(width,1) );
@ -764,6 +798,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
icvCvt_BGRA2BGR_8u_C4C3R( data, 0, buffer, 0, Size(width,1), 2 );
ptr = buffer;
}
#endif
jpeg_write_scanlines( &cinfo, &ptr, 1 );
}

View File

@ -210,15 +210,8 @@ struct ImageCodecInitializer
static
ImageCodecInitializer& getCodecs()
{
#ifdef CV_CXX11
static ImageCodecInitializer g_codecs;
return g_codecs;
#else
// C++98 doesn't guarantee correctness of multi-threaded initialization of static global variables
// (memory leak here is not critical, use C++11 to avoid that)
static ImageCodecInitializer* g_codecs = new ImageCodecInitializer();
return *g_codecs;
#endif
}
/**

View File

@ -166,7 +166,7 @@ TEST_P(Imgcodecs_Avif_Image_EncodeDecodeSuite, imencode_imdecode) {
cv::Exception);
return;
}
bool result;
bool result = true;
EXPECT_NO_THROW(
result = cv::imencode(".avif", img_original, buf, encoding_params_););
EXPECT_TRUE(result);

View File

@ -4490,7 +4490,7 @@ An example using applyColorMap function
/** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image.
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. If CV_8UC3, then the CV_8UC1 image is generated internally using cv::COLOR_BGR2GRAY.
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
@param colormap The colormap to apply, see #ColormapTypes
*/
@ -4498,8 +4498,8 @@ CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
/** @brief Applies a user colormap on a given image.
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. If CV_8UC3, then the CV_8UC1 image is generated internally using cv::COLOR_BGR2GRAY.
@param dst The result is the colormapped source image of the same number of channels as userColor. Note: Mat::create is called on dst.
@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256
*/
CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor);

View File

@ -17,7 +17,7 @@ ocv_add_module(java BINDINGS opencv_core opencv_imgproc PRIVATE_REQUIRED opencv_
include(${CMAKE_CURRENT_SOURCE_DIR}/common.cmake)
# UTILITY: glob specific sources and append them to list (type is in H, CPP, JAVA, AIDL)
# UTILITY: glob specific sources and append them to list (type is in H, CPP, JAVA)
macro(glob_more_specific_sources _type _root _output)
unset(_masks)
if(${_type} STREQUAL "H")
@ -26,8 +26,6 @@ macro(glob_more_specific_sources _type _root _output)
set(_masks "${_root}/cpp/*.cpp")
elseif(${_type} STREQUAL "JAVA")
set(_masks "${_root}/java/*.java" "${_root}/java/*.java.in")
elseif(${_type} STREQUAL "AIDL")
set(_masks "${_root}/java/*.aidl")
endif()
if (_masks)
file(GLOB _result ${_masks})

View File

@ -42,7 +42,6 @@ android {
main {
jniLibs.srcDirs = ['../../jni']
java.srcDirs = ['src'] // TODO Use original files instead of copied into build directory
aidl.srcDirs = ['src']
res.srcDirs = ['@OpenCV_SOURCE_DIR@/modules/java/android_sdk/android_gradle_lib/res']
manifest.srcFile 'AndroidManifest.xml'
}

View File

@ -121,8 +121,6 @@ android {
targetCompatibility JavaVersion.VERSION_@ANDROID_GRADLE_JAVA_VERSION_INIT@
}
@ANDROID_GRADLE_BUILD_FEATURE_AIDL@
buildTypes {
debug {
packagingOptions {
@ -139,7 +137,6 @@ android {
}
buildFeatures {
aidl true
prefabPublishing true
buildConfig true
}
@ -153,7 +150,6 @@ android {
main {
jniLibs.srcDirs = ['native/libs']
java.srcDirs = ['java/src']
aidl.srcDirs = ['java/src']
res.srcDirs = ['java/res']
manifest.srcFile 'java/AndroidManifest.xml'
}

View File

@ -46,6 +46,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
protected ImageReader mImageReader;
protected int mPreviewFormat = ImageFormat.YUV_420_888;
protected int mRequestTemplate = CameraDevice.TEMPLATE_PREVIEW;
private int mFrameRotation;
protected CameraDevice mCameraDevice;
protected CameraCaptureSession mCaptureSession;
@ -86,8 +87,8 @@ public class JavaCamera2View extends CameraBridgeViewBase {
}
}
protected boolean initializeCamera() {
Log.i(LOGTAG, "initializeCamera");
protected boolean selectCamera() {
Log.i(LOGTAG, "selectCamera");
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
try {
String camList[] = manager.getCameraIdList();
@ -110,14 +111,10 @@ public class JavaCamera2View extends CameraBridgeViewBase {
}
}
}
if (mCameraID != null) {
Log.i(LOGTAG, "Opening camera: " + mCameraID);
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
} else { // make JavaCamera2View behaves in the same way as JavaCameraView
Log.i(LOGTAG, "Trying to open camera with the value (" + mCameraIndex + ")");
if (mCameraID == null) { // make JavaCamera2View behaves in the same way as JavaCameraView
Log.i(LOGTAG, "Selecting camera by index (" + mCameraIndex + ")");
if (mCameraIndex < camList.length) {
mCameraID = camList[mCameraIndex];
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
} else {
// CAMERA_DISCONNECTED is used when the camera id is no longer valid
throw new CameraAccessException(CameraAccessException.CAMERA_DISCONNECTED);
@ -125,11 +122,11 @@ public class JavaCamera2View extends CameraBridgeViewBase {
}
return true;
} catch (CameraAccessException e) {
Log.e(LOGTAG, "OpenCamera - Camera Access Exception", e);
Log.e(LOGTAG, "selectCamera - Camera Access Exception", e);
} catch (IllegalArgumentException e) {
Log.e(LOGTAG, "OpenCamera - Illegal Argument Exception", e);
Log.e(LOGTAG, "selectCamera - Illegal Argument Exception", e);
} catch (SecurityException e) {
Log.e(LOGTAG, "OpenCamera - Security Exception", e);
Log.e(LOGTAG, "selectCamera - Security Exception", e);
}
return false;
}
@ -204,6 +201,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
mImageReader.setOnImageAvailableListener(new ImageReader.OnImageAvailableListener() {
@Override
public void onImageAvailable(ImageReader reader) {
Image image = reader.acquireLatestImage();
if (image == null)
return;
@ -213,8 +211,9 @@ public class JavaCamera2View extends CameraBridgeViewBase {
assert (planes.length == 3);
assert (image.getFormat() == mPreviewFormat);
JavaCamera2Frame tempFrame = new JavaCamera2Frame(image);
RotatedCameraFrame tempFrame = new RotatedCameraFrame(new JavaCamera2Frame(image), mFrameRotation);
deliverAndDrawFrame(tempFrame);
tempFrame.mFrame.release();
tempFrame.release();
image.close();
}
@ -303,11 +302,22 @@ public class JavaCamera2View extends CameraBridgeViewBase {
protected boolean connectCamera(int width, int height) {
Log.i(LOGTAG, "setCameraPreviewSize(" + width + "x" + height + ")");
startBackgroundThread();
initializeCamera();
selectCamera();
try {
CameraManager manager = (CameraManager) getContext().getSystemService(Context.CAMERA_SERVICE);
CameraCharacteristics characteristics = manager.getCameraCharacteristics(mCameraID);
mFrameRotation = getFrameRotation(
characteristics.get(CameraCharacteristics.LENS_FACING) == CameraCharacteristics.LENS_FACING_FRONT,
characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION));
boolean needReconfig = calcPreviewSize(width, height);
if (mFrameRotation % 180 == 0) {
mFrameWidth = mPreviewSize.getWidth();
mFrameHeight = mPreviewSize.getHeight();
} else {
mFrameWidth = mPreviewSize.getHeight();
mFrameHeight = mPreviewSize.getWidth();
}
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
@ -322,12 +332,16 @@ public class JavaCamera2View extends CameraBridgeViewBase {
mCaptureSession.close();
mCaptureSession = null;
}
createCameraPreviewSession();
}
if (mFpsMeter != null) {
mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
}
Log.i(LOGTAG, "Opening camera: " + mCameraID);
manager.openCamera(mCameraID, mStateCallback, mBackgroundHandler);
} catch (CameraAccessException e) {
Log.e(LOGTAG, "OpenCamera - Camera Access Exception", e);
} catch (RuntimeException e) {
throw new RuntimeException("Interrupted while setCameraPreviewSize.", e);
}
@ -442,6 +456,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
mGray = new Mat();
}
@Override
public void release() {
mRgba.release();
mGray.release();

View File

@ -10,6 +10,7 @@ import org.opencv.videoio.VideoCapture;
import org.opencv.videoio.VideoWriter;
import android.content.Context;
import android.hardware.Camera;
import android.util.AttributeSet;
import android.util.Log;
import android.view.ViewGroup.LayoutParams;
@ -25,7 +26,7 @@ public class NativeCameraView extends CameraBridgeViewBase {
private Thread mThread;
protected VideoCapture mCamera;
protected NativeCameraFrame mFrame;
protected RotatedCameraFrame mFrame;
public NativeCameraView(Context context, int cameraId) {
super(context, cameraId);
@ -89,28 +90,65 @@ public class NativeCameraView extends CameraBridgeViewBase {
private boolean initializeCamera(int width, int height) {
synchronized (this) {
if (mCameraIndex == -1) {
Camera.CameraInfo cameraInfo = new Camera.CameraInfo();
int localCameraIndex = mCameraIndex;
if (mCameraIndex == CAMERA_ID_ANY) {
Log.d(TAG, "Try to open default camera");
mCamera = new VideoCapture(0, Videoio.CAP_ANDROID);
} else {
Log.d(TAG, "Try to open camera with index " + mCameraIndex);
mCamera = new VideoCapture(mCameraIndex, Videoio.CAP_ANDROID);
localCameraIndex = 0;
} else if (mCameraIndex == CAMERA_ID_BACK) {
Log.i(TAG, "Trying to open back camera");
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
Camera.getCameraInfo( camIdx, cameraInfo );
if (cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_BACK) {
localCameraIndex = camIdx;
break;
}
}
} else if (mCameraIndex == CAMERA_ID_FRONT) {
Log.i(TAG, "Trying to open front camera");
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
Camera.getCameraInfo( camIdx, cameraInfo );
if (cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_FRONT) {
localCameraIndex = camIdx;
break;
}
}
}
if (localCameraIndex == CAMERA_ID_BACK) {
Log.e(TAG, "Back camera not found!");
return false;
} else if (localCameraIndex == CAMERA_ID_FRONT) {
Log.e(TAG, "Front camera not found!");
return false;
}
Log.d(TAG, "Try to open camera with index " + localCameraIndex);
mCamera = new VideoCapture(localCameraIndex, Videoio.CAP_ANDROID);
if (mCamera == null)
return false;
if (mCamera.isOpened() == false)
return false;
mFrame = new NativeCameraFrame(mCamera);
if (mCameraIndex != CAMERA_ID_BACK && mCameraIndex != CAMERA_ID_FRONT)
Camera.getCameraInfo(localCameraIndex, cameraInfo);
int frameRotation = getFrameRotation(
cameraInfo.facing == Camera.CameraInfo.CAMERA_FACING_FRONT,
cameraInfo.orientation);
mFrame = new RotatedCameraFrame(new NativeCameraFrame(mCamera), frameRotation);
mCamera.set(Videoio.CAP_PROP_FRAME_WIDTH, width);
mCamera.set(Videoio.CAP_PROP_FRAME_HEIGHT, height);
mFrameWidth = (int)mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
mFrameHeight = (int)mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
if (frameRotation % 180 == 0) {
mFrameWidth = (int) mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
mFrameHeight = (int) mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
} else {
mFrameWidth = (int) mCamera.get(Videoio.CAP_PROP_FRAME_HEIGHT);
mFrameHeight = (int) mCamera.get(Videoio.CAP_PROP_FRAME_WIDTH);
}
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
@ -131,7 +169,10 @@ public class NativeCameraView extends CameraBridgeViewBase {
private void releaseCamera() {
synchronized (this) {
if (mFrame != null) mFrame.release();
if (mFrame != null) {
mFrame.mFrame.release();
mFrame.release();
}
if (mCamera != null) mCamera.release();
}
}
@ -162,6 +203,7 @@ public class NativeCameraView extends CameraBridgeViewBase {
mBgr = new Mat();
}
@Override
public void release() {
if (mGray != null) mGray.release();
if (mRgba != null) mRgba.release();

View File

@ -4,6 +4,7 @@ import java.util.List;
import org.opencv.BuildConfig;
import org.opencv.R;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.Size;
@ -17,8 +18,10 @@ import android.graphics.Canvas;
import android.graphics.Rect;
import android.util.AttributeSet;
import android.util.Log;
import android.view.Surface;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.WindowManager;
/**
* This is a basic class, implementing the interaction with Camera and OpenCV library.
@ -189,8 +192,93 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
* This method returns single channel gray scale Mat with frame
*/
public Mat gray();
public void release();
};
public class RotatedCameraFrame implements CvCameraViewFrame {
@Override
public Mat gray() {
if (mRotation != 0) {
Core.rotate(mFrame.gray(), mGrayRotated, getCvRotationCode(mRotation));
return mGrayRotated;
} else {
return mFrame.gray();
}
}
@Override
public Mat rgba() {
if (mRotation != 0) {
Core.rotate(mFrame.rgba(), mRgbaRotated, getCvRotationCode(mRotation));
return mRgbaRotated;
} else {
return mFrame.rgba();
}
}
private int getCvRotationCode(int degrees) {
if (degrees == 90) {
return Core.ROTATE_90_CLOCKWISE;
} else if (degrees == 180) {
return Core.ROTATE_180;
} else {
return Core.ROTATE_90_COUNTERCLOCKWISE;
}
}
public RotatedCameraFrame(CvCameraViewFrame frame, int rotation) {
super();
mFrame = frame;
mRgbaRotated = new Mat();
mGrayRotated = new Mat();
mRotation = rotation;
}
@Override
public void release() {
mRgbaRotated.release();
mGrayRotated.release();
}
public CvCameraViewFrame mFrame;
private Mat mRgbaRotated;
private Mat mGrayRotated;
private int mRotation;
};
/**
* Calculates how to rotate camera frame to match current screen orientation
*/
protected int getFrameRotation(boolean cameraFacingFront, int cameraSensorOrientation) {
WindowManager windowManager = (WindowManager) getContext().getSystemService(Context.WINDOW_SERVICE);
int screenOrientation = windowManager.getDefaultDisplay().getRotation();
int screenRotation = 0;
switch (screenOrientation) {
case Surface.ROTATION_0:
screenRotation = 0;
break;
case Surface.ROTATION_90:
screenRotation = 90;
break;
case Surface.ROTATION_180:
screenRotation = 180;
break;
case Surface.ROTATION_270:
screenRotation = 270;
break;
}
int frameRotation;
if (cameraFacingFront) {
frameRotation = (cameraSensorOrientation + screenRotation) % 360;
} else {
frameRotation = (cameraSensorOrientation - screenRotation + 360) % 360;
}
return frameRotation;
}
public void surfaceChanged(SurfaceHolder arg0, int arg1, int arg2, int arg3) {
Log.d(TAG, "call surfaceChanged event");
synchronized(mSyncObject) {

View File

@ -10,9 +10,12 @@ import android.hardware.Camera.PreviewCallback;
import android.os.Build;
import android.util.AttributeSet;
import android.util.Log;
import android.view.Surface;
import android.view.ViewGroup.LayoutParams;
import android.view.WindowManager;
import org.opencv.BuildConfig;
import org.opencv.core.Core;
import org.opencv.core.CvType;
import org.opencv.core.Mat;
import org.opencv.core.Size;
@ -39,7 +42,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
private boolean mStopThread;
protected Camera mCamera;
protected JavaCameraFrame[] mCameraFrame;
protected RotatedCameraFrame[] mCameraFrame;
private SurfaceTexture mSurfaceTexture;
private int mPreviewFormat = ImageFormat.NV21;
@ -71,29 +74,21 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
boolean result = true;
synchronized (this) {
mCamera = null;
int cameraId = -1;
if (mCameraIndex == CAMERA_ID_ANY) {
Log.d(TAG, "Trying to open camera with old open()");
try {
mCamera = Camera.open();
}
catch (Exception e){
Log.e(TAG, "Camera is not available (in use or does not exist): " + e.getLocalizedMessage());
}
if(mCamera == null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
boolean connected = false;
for (int camIdx = 0; camIdx < Camera.getNumberOfCameras(); ++camIdx) {
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(camIdx) + ")");
try {
mCamera = Camera.open(camIdx);
connected = true;
cameraId = camIdx;
} catch (RuntimeException e) {
Log.e(TAG, "Camera #" + camIdx + "failed to open: " + e.getLocalizedMessage());
}
if (connected) break;
}
}
} else {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
int localCameraIndex = mCameraIndex;
@ -126,6 +121,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
Log.d(TAG, "Trying to open camera with new open(" + Integer.valueOf(localCameraIndex) + ")");
try {
mCamera = Camera.open(localCameraIndex);
cameraId = localCameraIndex;
} catch (RuntimeException e) {
Log.e(TAG, "Camera #" + localCameraIndex + "failed to open: " + e.getLocalizedMessage());
}
@ -136,6 +132,11 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
if (mCamera == null)
return false;
android.hardware.Camera.CameraInfo info = new android.hardware.Camera.CameraInfo();
android.hardware.Camera.getCameraInfo(cameraId, info);
int frameRotation = getFrameRotation(
info.facing == Camera.CameraInfo.CAMERA_FACING_FRONT,
info.orientation);
/* Now set camera parameters */
try {
Camera.Parameters params = mCamera.getParameters();
@ -176,8 +177,16 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
mCamera.setParameters(params);
params = mCamera.getParameters();
int rawFrameWidth = params.getPreviewSize().width;
int rawFrameHeight = params.getPreviewSize().height;
if (frameRotation % 180 == 0) {
mFrameWidth = params.getPreviewSize().width;
mFrameHeight = params.getPreviewSize().height;
} else {
mFrameWidth = params.getPreviewSize().height;
mFrameHeight = params.getPreviewSize().width;
}
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
@ -196,14 +205,14 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
mCamera.setPreviewCallbackWithBuffer(this);
mFrameChain = new Mat[2];
mFrameChain[0] = new Mat(mFrameHeight + (mFrameHeight/2), mFrameWidth, CvType.CV_8UC1);
mFrameChain[1] = new Mat(mFrameHeight + (mFrameHeight/2), mFrameWidth, CvType.CV_8UC1);
mFrameChain[0] = new Mat(rawFrameHeight + (rawFrameHeight/2), rawFrameWidth, CvType.CV_8UC1);
mFrameChain[1] = new Mat(rawFrameHeight + (rawFrameHeight/2), rawFrameWidth, CvType.CV_8UC1);
AllocateCache();
mCameraFrame = new JavaCameraFrame[2];
mCameraFrame[0] = new JavaCameraFrame(mFrameChain[0], mFrameWidth, mFrameHeight);
mCameraFrame[1] = new JavaCameraFrame(mFrameChain[1], mFrameWidth, mFrameHeight);
mCameraFrame = new RotatedCameraFrame[2];
mCameraFrame[0] = new RotatedCameraFrame(new JavaCameraFrame(mFrameChain[0], rawFrameWidth, rawFrameHeight), frameRotation);
mCameraFrame[1] = new RotatedCameraFrame(new JavaCameraFrame(mFrameChain[1], rawFrameWidth, rawFrameHeight), frameRotation);
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) {
mSurfaceTexture = new SurfaceTexture(MAGIC_TEXTURE_ID);
@ -240,7 +249,9 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
mFrameChain[1].release();
}
if (mCameraFrame != null) {
mCameraFrame[0].mFrame.release();
mCameraFrame[0].release();
mCameraFrame[1].mFrame.release();
mCameraFrame[1].release();
}
}
@ -336,6 +347,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
mRgba = new Mat();
}
@Override
public void release() {
mRgba.release();
}

View File

@ -1254,13 +1254,13 @@ JNIEXPORT void JNICALL Java_org_opencv_%(module)s_%(j_cls)s_delete
def copy_java_files(java_files_dir, java_base_path, default_package_path='org/opencv/'):
global total_files, updated_files
java_files = []
re_filter = re.compile(r'^.+\.(java|aidl|kt)(.in)?$')
re_filter = re.compile(r'^.+\.(java|kt)(.in)?$')
for root, dirnames, filenames in os.walk(java_files_dir):
java_files += [os.path.join(root, filename) for filename in filenames if re_filter.match(filename)]
java_files = [f.replace('\\', '/') for f in java_files]
re_package = re.compile(r'^package +(.+);')
re_prefix = re.compile(r'^.+[\+/]([^\+]+).(java|aidl|kt)(.in)?$')
re_prefix = re.compile(r'^.+[\+/]([^\+]+).(java|kt)(.in)?$')
for java_file in java_files:
src = checkFileRemap(java_file)
with open(src, 'r') as f:

View File

@ -27,7 +27,7 @@ public:
* @param prototxt_path prototxt file path for the super resolution model
* @param model_path model file path for the super resolution model
*/
CV_WRAP BarcodeDetector(const std::string &prototxt_path, const std::string &model_path);
CV_WRAP BarcodeDetector(CV_WRAP_FILE_PATH const std::string &prototxt_path, CV_WRAP_FILE_PATH const std::string &model_path);
~BarcodeDetector();
/** @brief Decodes barcode in image once it's found by the detect() method.

View File

@ -82,8 +82,8 @@ public:
* @param backend_id the id of backend
* @param target_id the id of target device
*/
CV_WRAP static Ptr<FaceDetectorYN> create(const String& model,
const String& config,
CV_WRAP static Ptr<FaceDetectorYN> create(CV_WRAP_FILE_PATH const String& model,
CV_WRAP_FILE_PATH const String& config,
const Size& input_size,
float score_threshold = 0.9f,
float nms_threshold = 0.3f,
@ -154,7 +154,7 @@ public:
* @param backend_id the id of backend
* @param target_id the id of target device
*/
CV_WRAP static Ptr<FaceRecognizerSF> create(const String& model, const String& config, int backend_id = 0, int target_id = 0);
CV_WRAP static Ptr<FaceRecognizerSF> create(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config, int backend_id = 0, int target_id = 0);
};
//! @}

View File

@ -483,39 +483,44 @@ void CharucoBoardImpl::generateImage(Size outSize, OutputArray img, int marginSi
Mat noMarginsImg =
out.colRange(marginSize, out.cols - marginSize).rowRange(marginSize, out.rows - marginSize);
double totalLengthX, totalLengthY;
totalLengthX = squareLength * size.width;
totalLengthY = squareLength * size.height;
// proportional transformation
double xReduction = totalLengthX / double(noMarginsImg.cols);
double yReduction = totalLengthY / double(noMarginsImg.rows);
// the size of the chessboard square depends on the location of the chessboard
float pixInSquare = 0.f;
// the size of the chessboard in pixels
Size pixInChessboard(noMarginsImg.cols, noMarginsImg.rows);
// determine the zone where the chessboard is placed
Mat chessboardZoneImg;
if(xReduction > yReduction) {
int nRows = int(totalLengthY / xReduction);
int rowsMargins = (noMarginsImg.rows - nRows) / 2;
chessboardZoneImg = noMarginsImg.rowRange(rowsMargins, noMarginsImg.rows - rowsMargins);
} else {
int nCols = int(totalLengthX / yReduction);
int colsMargins = (noMarginsImg.cols - nCols) / 2;
chessboardZoneImg = noMarginsImg.colRange(colsMargins, noMarginsImg.cols - colsMargins);
float pixInSquareX = (float)noMarginsImg.cols / (float)size.width;
float pixInSquareY = (float)noMarginsImg.rows / (float)size.height;
Point startChessboard(0, 0);
if (pixInSquareX <= pixInSquareY) {
// the width of "noMarginsImg" image determines the dimensions of the chessboard
pixInSquare = pixInSquareX;
pixInChessboard.height = cvRound(pixInSquare*size.height);
int rowsMargin = (noMarginsImg.rows - pixInChessboard.height) / 2;
startChessboard.y = rowsMargin;
}
else {
// the height of "noMarginsImg" image determines the dimensions of the chessboard
pixInSquare = pixInSquareY;
pixInChessboard.width = cvRound(pixInSquare*size.width);
int colsMargin = (noMarginsImg.cols - pixInChessboard.width) / 2;
startChessboard.x = colsMargin;
}
// determine the zone where the chessboard is located
Mat chessboardZoneImg = noMarginsImg(Rect(startChessboard, pixInChessboard));
// determine the margins to draw only the markers
// take the minimum just to be sure
double squareSizePixels = min(double(chessboardZoneImg.cols) / double(size.width),
double(chessboardZoneImg.rows) / double(size.height));
// marker size in pixels
const float pixInMarker = markerLength/squareLength*pixInSquare;
// the size of the marker margin in pixels
const float pixInMarginMarker = 0.5f*(pixInSquare - pixInMarker);
double diffSquareMarkerLength = (squareLength - markerLength) / 2;
int diffSquareMarkerLengthPixels =
int(diffSquareMarkerLength * squareSizePixels / squareLength);
// determine the zone where the aruco markers are located
int endArucoX = cvRound(pixInSquare*(size.width-1)+pixInMarginMarker+pixInMarker);
int endArucoY = cvRound(pixInSquare*(size.height-1)+pixInMarginMarker+pixInMarker);
Mat arucoZone = chessboardZoneImg(Range(cvRound(pixInMarginMarker), endArucoY), Range(cvRound(pixInMarginMarker), endArucoX));
// draw markers
Mat markersImg;
Board::Impl::generateImage(chessboardZoneImg.size(), markersImg, diffSquareMarkerLengthPixels, borderBits);
markersImg.copyTo(chessboardZoneImg);
Board::Impl::generateImage(arucoZone.size(), arucoZone, 0, borderBits);
// now draw black squares
for(int y = 0; y < size.height; y++) {
@ -527,12 +532,11 @@ void CharucoBoardImpl::generateImage(Size outSize, OutputArray img, int marginSi
if(y % 2 != x % 2) continue; // white corner, dont do anything
}
double startX, startY;
startX = squareSizePixels * double(x);
startY = squareSizePixels * double(y);
float startX = pixInSquare * float(x);
float startY = pixInSquare * float(y);
Mat squareZone = chessboardZoneImg.rowRange(int(startY), int(startY + squareSizePixels))
.colRange(int(startX), int(startX + squareSizePixels));
Mat squareZone = chessboardZoneImg(Range(cvRound(startY), cvRound(startY + pixInSquare)),
Range(cvRound(startX), cvRound(startX + pixInSquare)));
squareZone.setTo(0);
}

View File

@ -684,7 +684,7 @@ struct ArucoDetector::ArucoDetectorImpl {
contours.clear();
// sort candidates from big to small
std::sort(candidateTree.begin(), candidateTree.end());
std::stable_sort(candidateTree.begin(), candidateTree.end());
// group index for each candidate
vector<int> groupId(candidateTree.size(), -1);
vector<vector<size_t> > groupedCandidates;
@ -728,11 +728,11 @@ struct ArucoDetector::ArucoDetectorImpl {
for (vector<size_t>& grouped : groupedCandidates) {
if (detectorParams.detectInvertedMarker) // if detectInvertedMarker choose smallest contours
std::sort(grouped.begin(), grouped.end(), [](const size_t &a, const size_t &b) {
std::stable_sort(grouped.begin(), grouped.end(), [](const size_t &a, const size_t &b) {
return a > b;
});
else // if detectInvertedMarker==false choose largest contours
std::sort(grouped.begin(), grouped.end());
std::stable_sort(grouped.begin(), grouped.end());
size_t currId = grouped[0];
isSelectedContours[currId] = true;
for (size_t i = 1ull; i < grouped.size(); i++) {
@ -780,7 +780,7 @@ struct ArucoDetector::ArucoDetectorImpl {
vector<int> idsTmp(ncandidates, -1);
vector<int> rotated(ncandidates, 0);
vector<uint8_t> validCandidates(ncandidates, 0);
vector<bool> was(ncandidates, false);
vector<uint8_t> was(ncandidates, false);
bool checkCloseContours = true;
int maxDepth = 0;

View File

@ -52,5 +52,7 @@
#include "opencv2/core/private.hpp"
#include <numeric>
#include <array>
#include <vector>
#endif

View File

@ -15,6 +15,7 @@
#include "quirc.h"
#endif
#include <array>
#include <limits>
#include <cmath>
#include <queue>

View File

@ -771,6 +771,57 @@ TEST_P(CharucoBoard, testWrongSizeDetection)
ASSERT_TRUE(detectedCharucoIds.empty());
}
TEST(CharucoBoardGenerate, issue_24806)
{
aruco::Dictionary dict = aruco::getPredefinedDictionary(aruco::DICT_4X4_1000);
const float squareLength = 13.f, markerLength = 10.f;
const Size boardSize(7ull, 4ull);
const aruco::CharucoBoard board(boardSize, squareLength, markerLength, dict);
const int marginSize = 24;
Mat boardImg;
// generate chessboard image
board.generateImage(Size(400, 300), boardImg, marginSize);
// This condition checks that the width of the image determines the dimensions of the chessboard in this test
CV_Assert((float)(boardImg.cols) / (float)boardSize.width <=
(float)(boardImg.rows) / (float)boardSize.height);
// prepare data for chessboard image test
Mat noMarginsImg = boardImg(Range(marginSize, boardImg.rows - marginSize),
Range(marginSize, boardImg.cols - marginSize));
const float pixInSquare = (float)(noMarginsImg.cols) / (float)boardSize.width;
Size pixInChessboard(cvRound(pixInSquare*boardSize.width), cvRound(pixInSquare*boardSize.height));
const Point startChessboard((noMarginsImg.cols - pixInChessboard.width) / 2,
(noMarginsImg.rows - pixInChessboard.height) / 2);
Mat chessboardZoneImg = noMarginsImg(Rect(startChessboard, pixInChessboard));
// B - black pixel, W - white pixel
// chessboard corner 1:
// B W
// W B
Mat goldCorner1 = (Mat_<uint8_t>(2, 2) <<
0, 255,
255, 0);
// B - black pixel, W - white pixel
// chessboard corner 2:
// W B
// B W
Mat goldCorner2 = (Mat_<uint8_t>(2, 2) <<
255, 0,
0, 255);
// test chessboard corners in generated image
for (const Point3f& p: board.getChessboardCorners()) {
Point2f chessCorner(pixInSquare*(p.x/squareLength),
pixInSquare*(p.y/squareLength));
Mat winCorner = chessboardZoneImg(Rect(Point(cvRound(chessCorner.x) - 1, cvRound(chessCorner.y) - 1), Size(2, 2)));
bool eq = (cv::countNonZero(goldCorner1 != winCorner) == 0) | (cv::countNonZero(goldCorner2 != winCorner) == 0);
ASSERT_TRUE(eq);
}
// TODO: fix aruco generateImage and add test aruco corners for generated image
}
// Temporary disabled in https://github.com/opencv/opencv/pull/24338
// 5.x version produces conrnes with different shape than 4.x (32F_C2 instead of 2x 32FC1)
TEST(Charuco, DISABLED_testSeveralBoardsWithCustomIds)

View File

@ -7,10 +7,6 @@
#include "opencv2/ts.hpp"
#include "opencv2/objdetect.hpp"
#if defined CV_CXX11
#include <random>
#else
#include <cstdlib>
#endif
#include <random>
#endif

View File

@ -5,16 +5,6 @@
#include "test_precomp.hpp"
namespace opencv_test { namespace {
#if !defined CV_CXX11
// Wrapper for generating seeded random number via std::rand.
template<unsigned Seed>
class SeededRandFunctor {
public:
SeededRandFunctor() { std::srand(Seed); }
int operator()(int i) { return std::rand() % (i + 1); }
};
#endif
std::string encode_qrcode_images_name[] = {
"version1_mode1.png", "version1_mode2.png", "version1_mode4.png",
"version2_mode1.png", "version2_mode2.png", "version2_mode4.png",

Some files were not shown because too many files have changed in this diff Show More