Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2022-08-14 15:50:42 +00:00
commit 2ebdc04787
37 changed files with 246 additions and 90 deletions

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR)
cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR)
project(Carotene)

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)

View File

@ -46,7 +46,7 @@
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F")
list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16 NEON_DOTPROD)
list(APPEND CPU_ALL_OPTIMIZATIONS MSA)
list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
list(APPEND CPU_ALL_OPTIMIZATIONS RVV)
@ -329,6 +329,7 @@ if(X86 OR X86_64)
elseif(ARM OR AARCH64)
ocv_update(CPU_NEON_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_neon.cpp")
ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
ocv_update(CPU_NEON_DOTPROD_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_dotprod.cpp")
if(NOT AARCH64)
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16")
if(NOT MSVC)
@ -340,9 +341,11 @@ elseif(ARM OR AARCH64)
endif()
ocv_update(CPU_FP16_IMPLIES "NEON")
else()
ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16")
ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16;NEON_DOTPROD")
ocv_update(CPU_NEON_FLAGS_ON "")
ocv_update(CPU_FP16_IMPLIES "NEON")
ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "-march=armv8.2-a+dotprod")
ocv_update(CPU_NEON_DOTPROD_IMPLIES "NEON")
set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
endif()
elseif(MIPS)

View File

@ -136,7 +136,7 @@ if(CV_GCC OR CV_CLANG)
endif()
add_extra_compiler_option(-Wsign-promo)
add_extra_compiler_option(-Wuninitialized)
if(CV_GCC AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
if(CV_GCC AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0 OR ARM))
add_extra_compiler_option(-Wno-psabi)
endif()
if(HAVE_CXX11)

View File

@ -253,12 +253,13 @@ if(CUDA_FOUND)
endif()
if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7)
ocv_filter_available_architecture(__cuda_arch_bin
5.3
6.2
7.2
7.0
8.7
)
else()
set(__cuda_arch_bin "${_nvcc_out}")

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION ${MIN_VER_CMAKE})
if(" ${HALIDE_ROOT_DIR}" STREQUAL " ")
unset(HALIDE_ROOT_DIR CACHE)

View File

@ -0,0 +1,24 @@
#include <stdio.h>
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
#include "arm_neon.h"
int test()
{
const unsigned int src[] = { 0, 0, 0, 0 };
unsigned int dst[4];
uint32x4_t v_src = *(uint32x4_t*)src;
uint8x16_t v_m0 = *(uint8x16_t*)src;
uint8x16_t v_m1 = *(uint8x16_t*)src;
uint32x4_t v_dst = vdotq_u32(v_src, v_m0, v_m1);
*(uint32x4_t*)dst = v_dst;
return (int)dst[0];
}
#else
#error "DOTPROD is not supported"
#endif
int main()
{
printf("%d\n", test());
return 0;
}

View File

@ -2692,7 +2692,7 @@ CV_EXPORTS_W int recoverPose( InputArray points1, InputArray points2,
InputOutputArray mask = noArray());
/** @brief Recovers the relative camera rotation and the translation from an estimated essential
matrix and the corresponding points in two images, using cheirality check. Returns the number of
matrix and the corresponding points in two images, using chirality check. Returns the number of
inliers that pass the check.
@param E The input essential matrix.
@ -2710,11 +2710,11 @@ described below.
therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit
length.
@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks
inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the cheirality check.
inliers in points1 and points2 for the given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the chirality check.
This function decomposes an essential matrix using @ref decomposeEssentialMat and then verifies
possible pose hypotheses by doing cheirality check. The cheirality check means that the
possible pose hypotheses by doing chirality check. The chirality check means that the
triangulated 3D points should have positive depth. Some details can be found in @cite Nister03.
This function can be used to process the output E and mask from @ref findEssentialMat. In this
@ -2761,8 +2761,8 @@ length.
are feature points from cameras with same focal length and principal point.
@param pp principal point of the camera.
@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks
inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the cheirality check.
inliers in points1 and points2 for the given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the chirality check.
This function differs from the one above that it computes camera intrinsic matrix from focal length and
principal point:
@ -2797,12 +2797,12 @@ length.
@param distanceThresh threshold distance which is used to filter out far away points (i.e. infinite
points).
@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks
inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the cheirality check.
inliers in points1 and points2 for the given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the chirality check.
@param triangulatedPoints 3D points which were reconstructed by triangulation.
This function differs from the one above that it outputs the triangulated 3D point that are used for
the cheirality check.
the chirality check.
*/
CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
InputArray cameraMatrix, OutputArray R, OutputArray t, double distanceThresh, InputOutputArray mask = noArray(),

View File

@ -1232,7 +1232,7 @@ int ChessBoardDetector::cleanFoundConnectedQuads(std::vector<ChessBoardQuad*>& q
centers[i] = ci;
center += ci;
}
center.x *= (1.0f / quad_count);
center *= (1.0f / quad_count);
// If we still have more quadrangles than we should,
// we try to eliminate bad ones based on minimizing the bounding box.
@ -1256,7 +1256,7 @@ int ChessBoardDetector::cleanFoundConnectedQuads(std::vector<ChessBoardQuad*>& q
Mat points(1, quad_count, CV_32FC2, &centers[0]);
cv::convexHull(points, hull, true);
centers[skip] = temp;
double hull_area = contourArea(hull, true);
double hull_area = contourArea(hull, false);
// remember smallest box area
if (hull_area < min_box_area)
@ -1298,6 +1298,7 @@ int ChessBoardDetector::cleanFoundConnectedQuads(std::vector<ChessBoardQuad*>& q
quad_group[min_box_area_index] = quad_group[quad_count];
centers[min_box_area_index] = centers[quad_count];
}
quad_group.resize(quad_count);
return quad_count;
}

View File

@ -601,7 +601,7 @@ int cv::recoverPose( InputArray E, InputArray _points1, InputArray _points2,
P3(Range::all(), Range(0, 3)) = R1 * 1.0; P3.col(3) = -t * 1.0;
P4(Range::all(), Range(0, 3)) = R2 * 1.0; P4.col(3) = -t * 1.0;
// Do the cheirality check.
// Do the chirality check.
// Notice here a threshold dist is used to filter
// out far away points (i.e. infinite points) since
// their depth may vary between positive and negative.

View File

@ -6,7 +6,7 @@ ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3)
ocv_add_dispatched_file(convert SSE2 AVX2 VSX3)
ocv_add_dispatched_file(convert_scale SSE2 AVX2)
ocv_add_dispatched_file(count_non_zero SSE2 AVX2)
ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX)
ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD)
ocv_add_dispatched_file(mean SSE2 AVX2)
ocv_add_dispatched_file(merge SSE2 AVX2)
ocv_add_dispatched_file(split SSE2 AVX2)

View File

@ -79,6 +79,10 @@
# endif
# define CV_FP16 1
#endif
#ifdef CV_CPU_COMPILE_NEON_DOTPROD
# include <arm_neon.h>
# define CV_NEON_DOT 1
#endif
#ifdef CV_CPU_COMPILE_AVX2
# include <immintrin.h>
# define CV_AVX2 1

View File

@ -420,6 +420,27 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON_DOTPROD
# define CV_TRY_NEON_DOTPROD 1
# define CV_CPU_FORCE_NEON_DOTPROD 1
# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD 1
# define CV_CPU_CALL_NEON_DOTPROD(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) return (opt_NEON_DOTPROD::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON_DOTPROD
# define CV_TRY_NEON_DOTPROD 1
# define CV_CPU_FORCE_NEON_DOTPROD 0
# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD (cv::checkHardwareSupport(CV_CPU_NEON_DOTPROD))
# define CV_CPU_CALL_NEON_DOTPROD(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_DOTPROD) return (opt_NEON_DOTPROD::fn args)
# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_DOTPROD) return (opt_NEON_DOTPROD::fn args)
#else
# define CV_TRY_NEON_DOTPROD 0
# define CV_CPU_FORCE_NEON_DOTPROD 0
# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD 0
# define CV_CPU_CALL_NEON_DOTPROD(fn, args)
# define CV_CPU_CALL_NEON_DOTPROD_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON_DOTPROD(fn, args, mode, ...) CV_CPU_CALL_NEON_DOTPROD(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
# define CV_TRY_MSA 1
# define CV_CPU_FORCE_MSA 1

View File

@ -268,6 +268,7 @@ namespace cv {
#define CV_CPU_AVX_5124FMAPS 27
#define CV_CPU_NEON 100
#define CV_CPU_NEON_DOTPROD 101
#define CV_CPU_MSA 150
@ -324,6 +325,7 @@ enum CpuFeatures {
CPU_AVX_5124FMAPS = 27,
CPU_NEON = 100,
CPU_NEON_DOTPROD = 101,
CPU_MSA = 150,

View File

@ -78,8 +78,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#define CV_NEON_AARCH64 0
#endif
// TODO
#define CV_NEON_DOT 0
//////////// Utils ////////////
@ -665,11 +663,22 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64
}
// 8 >> 32
#ifdef CV_NEON_DOT
#define OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_OP(_Tpvec1, _Tpvec2, suffix) \
inline _Tpvec1 v_dotprod_expand(const _Tpvec2& a, const _Tpvec2& b) \
{ \
return _Tpvec1(vdotq_##suffix(vdupq_n_##suffix(0), a.val, b.val));\
} \
inline _Tpvec1 v_dotprod_expand(const _Tpvec2& a, const _Tpvec2& b, const _Tpvec1& c) \
{ \
return _Tpvec1(vdotq_##suffix(c.val, a.val, b.val)); \
}
OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_OP(v_uint32x4, v_uint8x16, u32)
OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_OP(v_int32x4, v_int8x16, s32)
#else
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
{
#if CV_NEON_DOT
return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val));
#else
const uint8x16_t zero = vreinterpretq_u8_u32(vdupq_n_u32(0));
const uint8x16_t mask = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF));
const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0));
@ -685,23 +694,15 @@ inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16),
vshrq_n_u32(vreinterpretq_u32_u16(odd), 16));
return v_uint32x4(vaddq_u32(s0, s1));
#endif
}
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b,
const v_uint32x4& c)
{
#if CV_NEON_DOT
return v_uint32x4(vdotq_u32(c.val, a.val, b.val));
#else
return v_dotprod_expand(a, b) + c;
#endif
}
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
{
#if CV_NEON_DOT
return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val));
#else
int16x8_t p0 = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
int16x8_t p1 = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
int16x8_t uzp1, uzp2;
@ -710,18 +711,13 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
int16x4_t uzpl1, uzpl2;
_v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2);
return v_int32x4(vaddl_s16(uzpl1, uzpl2));
#endif
}
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b,
const v_int32x4& c)
{
#if CV_NEON_DOT
return v_int32x4(vdotq_s32(c.val, a.val, b.val));
#else
return v_dotprod_expand(a, b) + c;
#endif
}
#endif
// 16 >> 64
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
{
@ -830,45 +826,44 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_
}
// 8 >> 32
#ifdef CV_NEON_DOT
#define OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_FAST_OP(_Tpvec1, _Tpvec2, suffix) \
inline _Tpvec1 v_dotprod_expand_fast(const _Tpvec2& a, const _Tpvec2& b) \
{ \
return v_dotprod_expand(a, b); \
} \
inline _Tpvec1 v_dotprod_expand_fast(const _Tpvec2& a, const _Tpvec2& b, const _Tpvec1& c) \
{ \
return v_dotprod_expand(a, b, c); \
}
OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_FAST_OP(v_uint32x4, v_uint8x16, u32)
OPENCV_HAL_IMPL_NEON_DOT_PRODUCT_FAST_OP(v_int32x4, v_int8x16, s32)
#else
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
{
#if CV_NEON_DOT
return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val));
#else
uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1));
uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1));
return v_uint32x4(vaddq_u32(s0, s1));
#endif
}
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
{
#if CV_NEON_DOT
return v_uint32x4(vdotq_u32(c.val, a.val, b.val));
#else
return v_dotprod_expand_fast(a, b) + c;
#endif
}
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
{
#if CV_NEON_DOT
return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val));
#else
int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val));
return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod)));
#endif
}
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
{
#if CV_NEON_DOT
return v_int32x4(vdotq_s32(c.val, a.val, b.val));
#else
return v_dotprod_expand_fast(a, b) + c;
#endif
}
#endif
// 16 >> 64
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)

View File

@ -29,8 +29,7 @@ public:
}
void cleanup() const
{
CV_Assert(ptr && *ptr);
*ptr = 0;
CV_DbgAssert(ptr);
if (raw_mem)
fastFree(raw_mem);
}

View File

@ -305,6 +305,9 @@ DECLARE_CV_CPUID_X86
#endif
#endif
#if defined CV_CXX11
#include <chrono>
#endif
namespace cv
{
@ -414,6 +417,7 @@ struct HWFeatures
g_hwFeatureNames[CPU_AVX_5124FMAPS] = "AVX5124FMAPS";
g_hwFeatureNames[CPU_NEON] = "NEON";
g_hwFeatureNames[CPU_NEON_DOTPROD] = "NEON_DOTPROD";
g_hwFeatureNames[CPU_VSX] = "VSX";
g_hwFeatureNames[CPU_VSX3] = "VSX3";
@ -561,6 +565,24 @@ struct HWFeatures
#ifdef __aarch64__
have[CV_CPU_NEON] = true;
have[CV_CPU_FP16] = true;
int cpufile = open("/proc/self/auxv", O_RDONLY);
if (cpufile >= 0)
{
Elf64_auxv_t auxv;
const size_t size_auxv_t = sizeof(auxv);
while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
{
if (auxv.a_type == AT_HWCAP)
{
have[CV_CPU_NEON_DOTPROD] = (auxv.a_un.a_val & (1 << 20)) != 0;
break;
}
}
close(cpufile);
}
#elif defined __arm__ && defined __ANDROID__
#if defined HAVE_CPUFEATURES
CV_LOG_INFO(NULL, "calling android_getCpuFeatures() ...");
@ -853,7 +875,10 @@ bool useOptimized(void)
int64 getTickCount(void)
{
#if defined _WIN32 || defined WINCE
#if defined CV_CXX11
std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
return (int64)now.time_since_epoch().count();
#elif defined _WIN32 || defined WINCE
LARGE_INTEGER counter;
QueryPerformanceCounter( &counter );
return (int64)counter.QuadPart;
@ -872,7 +897,11 @@ int64 getTickCount(void)
double getTickFrequency(void)
{
#if defined _WIN32 || defined WINCE
#if defined CV_CXX11
using clock_period_t = std::chrono::steady_clock::duration::period;
double clock_freq = clock_period_t::den / clock_period_t::num;
return clock_freq;
#elif defined _WIN32 || defined WINCE
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return (double)freq.QuadPart;

View File

@ -408,9 +408,6 @@ TEST_P(BufferArea, basic)
EXPECT_EQ((double)0, dbl_ptr[i]);
}
}
EXPECT_TRUE(int_ptr == NULL);
EXPECT_TRUE(uchar_ptr == NULL);
EXPECT_TRUE(dbl_ptr == NULL);
}
TEST_P(BufferArea, align)
@ -447,10 +444,6 @@ TEST_P(BufferArea, align)
}
}
}
for (size_t i = 0; i < CNT; ++i)
{
EXPECT_TRUE(buffers[i] == NULL);
}
}
TEST_P(BufferArea, default_align)

View File

@ -132,6 +132,16 @@ public:
if (hasWeights && hasBias)
CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
if (weights.total() == 1)
{
// The total() of bias should be same as weights.
if (hasBias)
inpBlob.convertTo(outBlob, CV_32F, weights.at<float>(0), bias.at<float>(0));
else
inpBlob.convertTo(outBlob, CV_32F, weights.at<float>(0));
return;
}
int endAxis;
for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
{

View File

@ -2026,6 +2026,8 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
void findBroadAxis(const MatShape& broadShape, const MatShape& outShape, size_t& axis, int& broadAxis)
{
// Currently, this function can only complete 1-dimensional expansion of broadShape.
// If there are two dimensions in broadShape that need to be expended, it will fail.
const size_t diff = outShape.size() - broadShape.size();
// find the first non-one element of the broadcasting shape

View File

@ -1060,6 +1060,8 @@ TEST_P(Test_ONNX_layers, Div)
normAssert(ref, out, "", default_l1, default_lInf);
expectNoFallbacksFromIE(net);
expectNoFallbacksFromCUDA(net);
testONNXModels("div_test_1x1",npy, 0, 0, false, true, 2);
}
TEST_P(Test_ONNX_layers, DynamicReshape)

View File

@ -981,11 +981,20 @@ else // CV_8U
__pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2));
v_pack_store(dst + k, __pack01);
}
#endif
#if defined(__GNUC__) && __GNUC__ >= 9
// avoid warning "iteration 7 invokes undefined behavior" on Linux ARM64
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Waggressive-loop-optimizations"
#endif
for( ; k < len; k++ )
{
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
}
#if defined(__GNUC__) && __GNUC__ >= 9
#pragma GCC diagnostic pop
#endif
}
#else
float* dst = dstMat.ptr<float>(row);

View File

@ -78,11 +78,15 @@ Input depth (src.depth()) | Output depth (ddepth)
--------------------------|----------------------
CV_8U | -1/CV_16S/CV_32F/CV_64F
CV_16U/CV_16S | -1/CV_32F/CV_64F
CV_32F | -1/CV_32F/CV_64F
CV_32F | -1/CV_32F
CV_64F | -1/CV_64F
@note when ddepth=-1, the output image will have the same depth as the source.
@note if you need double floating-point accuracy and using single floating-point input data
(CV_32F input and CV_64F output depth combination), you can use @ref Mat.convertTo to convert
the input data to the desired precision.
@defgroup imgproc_transform Geometric Image Transformations
The functions in this section perform various geometrical transformations of 2D images. They do not
@ -1792,7 +1796,7 @@ with the following \f$3 \times 3\f$ aperture:
@param src Source image.
@param dst Destination image of the same size and the same number of channels as src .
@param ddepth Desired depth of the destination image.
@param ddepth Desired depth of the destination image, see @ref filter_depths "combinations".
@param ksize Aperture size used to compute the second-derivative filters. See #getDerivKernels for
details. The size must be positive and odd.
@param scale Optional scale factor for the computed Laplacian values. By default, no scaling is
@ -2279,7 +2283,7 @@ case of multi-channel images, each channel is processed independently.
@param src input image; the number of channels can be arbitrary, but the depth should be one of
CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
@param dst output image of the same size and type as src.
@param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular
@param kernel structuring element used for dilation; if element=Mat(), a 3 x 3 rectangular
structuring element is used. Kernel can be created using #getStructuringElement
@param anchor position of the anchor within the element; default value (-1, -1) means that the
anchor is at the element center.
@ -2809,7 +2813,7 @@ It makes possible to do a fast blurring or fast block correlation with a variabl
example. In case of multi-channel images, sums for each channel are accumulated independently.
As a practical example, the next figure shows the calculation of the integral of a straight
rectangle Rect(3,3,3,2) and of a tilted rectangle Rect(5,1,2,3) . The selected pixels in the
rectangle Rect(4,4,3,2) and of a tilted rectangle Rect(5,1,2,3) . The selected pixels in the
original image are shown, as well as the relative pixels in the integral images sum and tilted .
![integral calculation example](pics/integral.png)
@ -3174,7 +3178,14 @@ CV_EXPORTS void calcHist( const Mat* images, int nimages,
const int* histSize, const float** ranges,
bool uniform = true, bool accumulate = false );
/** @overload */
/** @overload
this variant supports only uniform histograms.
ranges argument is either empty vector or a flattened vector of histSize.size()*2 elements
(histSize.size() element pairs). The first and second elements of each pair specify the lower and
upper boundaries.
*/
CV_EXPORTS_W void calcHist( InputArrayOfArrays images,
const std::vector<int>& channels,
InputArray mask, OutputArray hist,

View File

@ -1058,7 +1058,7 @@ EllipseEx( Mat& img, Point2l center, Size2l axes,
* Polygons filling *
\****************************************************************************************/
static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, int pix_size)
static inline void ICV_HLINE_X(uchar* ptr, int64_t xl, int64_t xr, const uchar* color, int pix_size)
{
uchar* hline_min_ptr = (uchar*)(ptr) + (xl)*(pix_size);
uchar* hline_end_ptr = (uchar*)(ptr) + (xr+1)*(pix_size);
@ -1083,7 +1083,7 @@ static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, i
}
//end ICV_HLINE_X()
static inline void ICV_HLINE(uchar* ptr, int xl, int xr, const void* color, int pix_size)
static inline void ICV_HLINE(uchar* ptr, int64_t xl, int64_t xr, const void* color, int pix_size)
{
ICV_HLINE_X(ptr, xl, xr, reinterpret_cast<const uchar*>(color), pix_size);
}
@ -1177,7 +1177,7 @@ FillConvexPoly( Mat& img, const Point2l* v, int npts, const void* color, int lin
edge[0].x = edge[1].x = -XY_ONE;
edge[0].dx = edge[1].dx = 0;
ptr += img.step*y;
ptr += (int64_t)img.step*y;
do
{
@ -1206,7 +1206,7 @@ FillConvexPoly( Mat& img, const Point2l* v, int npts, const void* color, int lin
}
edge[i].ye = ty;
edge[i].dx = ((xe - xs)*2 + (ty - y)) / (2 * (ty - y));
edge[i].dx = ((xe - xs)*2 + ((int64_t)ty - y)) / (2 * ((int64_t)ty - y));
edge[i].x = xs;
edge[i].idx = idx;
break;
@ -1480,7 +1480,7 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill )
size_t step = img.step;
int pix_size = (int)img.elemSize();
uchar* ptr = img.ptr();
int err = 0, dx = radius, dy = 0, plus = 1, minus = (radius << 1) - 1;
int64_t err = 0, dx = radius, dy = 0, plus = 1, minus = (radius << 1) - 1;
int inside = center.x >= radius && center.x < size.width - radius &&
center.y >= radius && center.y < size.height - radius;
@ -1490,8 +1490,8 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill )
while( dx >= dy )
{
int mask;
int y11 = center.y - dy, y12 = center.y + dy, y21 = center.y - dx, y22 = center.y + dx;
int x11 = center.x - dx, x12 = center.x + dx, x21 = center.x - dy, x22 = center.x + dy;
int64_t y11 = center.y - dy, y12 = center.y + dy, y21 = center.y - dx, y22 = center.y + dx;
int64_t x11 = center.x - dx, x12 = center.x + dx, x21 = center.x - dy, x22 = center.x + dy;
if( inside )
{
@ -1531,7 +1531,7 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill )
{
if( fill )
{
x11 = std::max( x11, 0 );
x11 = std::max( x11, (int64_t)0 );
x12 = MIN( x12, size.width - 1 );
}
@ -1569,7 +1569,7 @@ Circle( Mat& img, Point center, int radius, const void* color, int fill )
{
if( fill )
{
x21 = std::max( x21, 0 );
x21 = std::max( x21, (int64_t)0 );
x22 = MIN( x22, size.width - 1 );
}
@ -1866,6 +1866,12 @@ void rectangle( InputOutputArray img, Rect rec,
{
CV_INSTRUMENT_REGION();
CV_Assert( 0 <= shift && shift <= XY_SHIFT );
// Crop the rectangle to right around the mat.
rec &= Rect(-(1 << shift), -(1 << shift), ((img.cols() + 2) << shift),
((img.rows() + 2) << shift));
if( !rec.empty() )
rectangle( img, rec.tl(), rec.br() - Point(1<<shift,1<<shift),
color, thickness, lineType, shift );

View File

@ -963,7 +963,7 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
if (dsize.width > ssize.width*2)
{
row[(_dst.cols-1) + x] = row[dx + cn];
row[(_dst.cols-1) * cn + x] = row[dx + cn];
}
}

View File

@ -0,0 +1,19 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
TEST(Imgproc_PyrUp, pyrUp_regression_22184)
{
Mat src(100, 100, CV_16UC3, Scalar::all(255));
Mat dst(100 * 2 + 1, 100 * 2 + 1, CV_16UC3, Scalar::all(0));
pyrUp(src, dst, Size(dst.cols, dst.rows));
double min_val = 0;
minMaxLoc(dst, &min_val);
ASSERT_GT(cvRound(min_val), 0);
}
}} // namespace

View File

@ -453,6 +453,7 @@ namespace binding_utils
EMSCRIPTEN_BINDINGS(binding_utils)
{
register_vector<int>("IntVector");
register_vector<char>("CharVector");
register_vector<float>("FloatVector");
register_vector<double>("DoubleVector");
register_vector<cv::Point>("PointVector");

View File

@ -7,4 +7,10 @@
#include "opencv2/ts.hpp"
#include "opencv2/objdetect.hpp"
#if defined CV_CXX11
#include <random>
#else
#include <cstdlib>
#endif
#endif

View File

@ -5,6 +5,16 @@
#include "test_precomp.hpp"
namespace opencv_test { namespace {
#if !defined CV_CXX11
// Wrapper for generating seeded random number via std::rand.
template<unsigned Seed>
class SeededRandFunctor {
public:
SeededRandFunctor() { std::srand(Seed); }
int operator()(int i) { return std::rand() % (i + 1); }
};
#endif
std::string encode_qrcode_images_name[] = {
"version1_mode1.png", "version1_mode2.png", "version1_mode4.png",
"version2_mode1.png", "version2_mode2.png", "version2_mode4.png",
@ -380,8 +390,15 @@ TEST(Objdetect_QRCode_Encode_Decode_Structured_Append, DISABLED_regression)
std::string symbol_set = config["symbols_set"];
std::string input_info = symbol_set;
std::random_shuffle(input_info.begin(), input_info.end());
#if defined CV_CXX11
// std::random_shuffle is deprecated since C++11 and removed in C++17.
// Use manually constructed RNG with a fixed seed and std::shuffle instead.
std::mt19937 rand_gen {1};
std::shuffle(input_info.begin(), input_info.end(), rand_gen);
#else
SeededRandFunctor<1> rand_gen;
std::random_shuffle(input_info.begin(), input_info.end(), rand_gen);
#endif
for (int j = min_stuctures_num; j < max_stuctures_num; j++)
{
QRCodeEncoder::Params params;

View File

@ -35,7 +35,7 @@ add_subdirectory(python3)
else() # standalone build
cmake_minimum_required(VERSION 2.8.12)
cmake_minimum_required(VERSION 2.8.12.2)
project(OpenCVPython CXX C)
include("./standalone.cmake")

View File

@ -89,7 +89,7 @@ def bootstrap():
BINARIES_PATHS = []
g_vars = globals()
l_vars = locals()
l_vars = locals().copy()
if sys.version_info[:2] < (3, 0):
from . load_config_py2 import exec_file_wrapper

View File

@ -60,6 +60,7 @@ extern "C" {
#include <errno.h>
#endif
#include <libavcodec/version.h>
#include <libavformat/avformat.h>
#ifdef __cplusplus

View File

@ -189,7 +189,7 @@
#
# ------------------------------------------------------------------------------
cmake_minimum_required( VERSION 2.6.3 )
cmake_minimum_required( VERSION 2.8.12.2 )
if( DEFINED CMAKE_CROSSCOMPILING )
# subsequent toolchain loading is not really needed

View File

@ -17,7 +17,7 @@ int main(int /*argc*/, const char** /* argv */ )
{
const Point center( img.rows / 2 , img.cols /2 );
for( int radius = 5; radius < img.rows ; radius += 3.5 )
for( int radius = 5; radius < img.rows ; radius += 3 )
{
cv::circle( img, center, radius, Scalar(255,0,255) );
}

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
cmake_minimum_required(VERSION 2.8.12.2 FATAL_ERROR)
set(PROJECT_NAME "c_hal")
set(HAL_LIB_NAME "c_hal")

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
cmake_minimum_required(VERSION 2.8.12.2 FATAL_ERROR)
set(PROJECT_NAME "slow_hal")
set(HAL_LIB_NAME "slow_hal")

View File

@ -1,6 +1,6 @@
ocv_install_example_src(cpp *.cpp *.hpp CMakeLists.txt)
cmake_minimum_required(VERSION 2.8.9)
cmake_minimum_required(VERSION 2.8.12.2)
set(OPENCV_OPENVX_SAMPLE_REQUIRED_DEPS
opencv_core