mirror of
https://github.com/opencv/opencv.git
synced 2025-06-06 00:43:52 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
4b2d1aaeea
@ -316,7 +316,7 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
|
||||
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
|
||||
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
|
||||
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
|
||||
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF (CV_GCC AND PPC64LE) )
|
||||
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
|
||||
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
|
||||
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
|
||||
|
@ -46,13 +46,13 @@
|
||||
/* Cocoa API */
|
||||
#cmakedefine HAVE_COCOA
|
||||
|
||||
/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
|
||||
/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/
|
||||
#cmakedefine HAVE_CUBLAS
|
||||
|
||||
/* NVidia Cuda Runtime API*/
|
||||
/* NVIDIA CUDA Runtime API*/
|
||||
#cmakedefine HAVE_CUDA
|
||||
|
||||
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
|
||||
/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/
|
||||
#cmakedefine HAVE_CUFFT
|
||||
|
||||
/* IEEE1394 capturing support */
|
||||
@ -124,10 +124,10 @@
|
||||
/* Microsoft Media Foundation Capture library */
|
||||
#cmakedefine HAVE_MSMF
|
||||
|
||||
/* NVidia Video Decoding API*/
|
||||
/* NVIDIA Video Decoding API*/
|
||||
#cmakedefine HAVE_NVCUVID
|
||||
|
||||
/* NVidia Video Encoding API*/
|
||||
/* NVIDIA Video Encoding API*/
|
||||
#cmakedefine HAVE_NVCUVENC
|
||||
|
||||
/* OpenCL Support */
|
||||
|
@ -8,7 +8,7 @@ Goal
|
||||
In the @ref tutorial_video_input_psnr_ssim tutorial I already presented the PSNR and SSIM methods for checking
|
||||
the similarity between the two images. And as you could see, the execution process takes quite some
|
||||
time , especially in the case of the SSIM. However, if the performance numbers of an OpenCV
|
||||
implementation for the CPU do not satisfy you and you happen to have an NVidia CUDA GPU device in
|
||||
implementation for the CPU do not satisfy you and you happen to have an NVIDIA CUDA GPU device in
|
||||
your system, all is not lost. You may try to port or write your owm algorithm for the video card.
|
||||
|
||||
This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. As
|
||||
@ -187,7 +187,7 @@ introduce asynchronous OpenCV GPU calls too with the help of the @ref cv::cuda::
|
||||
Result and conclusion
|
||||
---------------------
|
||||
|
||||
On an Intel P8700 laptop CPU paired with a low end NVidia GT220M, here are the performance numbers:
|
||||
On an Intel P8700 laptop CPU paired with a low end NVIDIA GT220M, here are the performance numbers:
|
||||
@code
|
||||
Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506
|
||||
Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506
|
||||
|
@ -754,5 +754,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
|
||||
|
||||
#include "opencv2/core/neon_utils.hpp"
|
||||
#include "opencv2/core/vsx_utils.hpp"
|
||||
#include "opencv2/core/check.hpp"
|
||||
|
||||
#endif //OPENCV_CORE_BASE_HPP
|
||||
|
135
modules/core/include/opencv2/core/check.hpp
Normal file
135
modules/core/include/opencv2/core/check.hpp
Normal file
@ -0,0 +1,135 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_CHECK_HPP
|
||||
#define OPENCV_CORE_CHECK_HPP
|
||||
|
||||
#include <opencv2/core/base.hpp>
|
||||
|
||||
namespace cv {
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
|
||||
CV_EXPORTS const char* depthToString(int depth);
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
|
||||
CV_EXPORTS const String typeToString(int type);
|
||||
|
||||
|
||||
//! @cond IGNORED
|
||||
namespace detail {
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
|
||||
CV_EXPORTS const char* depthToString_(int depth);
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
|
||||
CV_EXPORTS const cv::String typeToString_(int type);
|
||||
|
||||
enum TestOp {
|
||||
TEST_CUSTOM = 0,
|
||||
TEST_EQ = 1,
|
||||
TEST_NE = 2,
|
||||
TEST_LE = 3,
|
||||
TEST_LT = 4,
|
||||
TEST_GE = 5,
|
||||
TEST_GT = 6,
|
||||
CV__LAST_TEST_OP
|
||||
};
|
||||
|
||||
struct CheckContext {
|
||||
const char* func;
|
||||
const char* file;
|
||||
int line;
|
||||
enum TestOp testOp;
|
||||
const char* message;
|
||||
const char* p1_str;
|
||||
const char* p2_str;
|
||||
};
|
||||
|
||||
#ifndef CV__CHECK_FILENAME
|
||||
# define CV__CHECK_FILENAME __FILE__
|
||||
#endif
|
||||
|
||||
#ifndef CV__CHECK_FUNCTION
|
||||
# if defined _MSC_VER
|
||||
# define CV__CHECK_FUNCTION __FUNCSIG__
|
||||
# elif defined __GNUC__
|
||||
# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
|
||||
# else
|
||||
# define CV__CHECK_FUNCTION "<unknown>"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
|
||||
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
|
||||
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
|
||||
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
|
||||
|
||||
|
||||
#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
|
||||
#define CV__TEST_NE(v1, v2) ((v1) != (v2))
|
||||
#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
|
||||
#define CV__TEST_LT(v1, v2) ((v1) < (v2))
|
||||
#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
|
||||
#define CV__TEST_GT(v1, v2) ((v1) > (v2))
|
||||
|
||||
#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
|
||||
if(CV__TEST_##op((v1), (v2))) ; else { \
|
||||
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
|
||||
cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
|
||||
if(!!(test_expr)) ; else { \
|
||||
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
|
||||
cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
} // namespace
|
||||
//! @endcond
|
||||
|
||||
|
||||
/// Supported values of these types: int, float, double
|
||||
#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
|
||||
|
||||
/// Check with additional "decoding" of type values in error message
|
||||
#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
|
||||
/// Check with additional "decoding" of depth values in error message
|
||||
#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
|
||||
|
||||
#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
|
||||
|
||||
|
||||
/// Example: type == CV_8UC1 || type == CV_8UC3
|
||||
#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
|
||||
|
||||
/// Example: depth == CV_32F || depth == CV_64F
|
||||
#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
|
||||
|
||||
/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
|
||||
// TODO define pretty-printers: #define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // OPENCV_CORE_CHECK_HPP
|
@ -795,7 +795,7 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>
|
||||
/** @brief Multiply and add
|
||||
|
||||
Returns \f$ a*b + c \f$
|
||||
For floating point types only. */
|
||||
For floating point types and signed 32bit int only. */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
const v_reg<_Tp, n>& c)
|
||||
@ -828,6 +828,29 @@ template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Dot product of elements
|
||||
|
||||
Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
|
||||
Scheme:
|
||||
@code
|
||||
{A1 A2 ...} // 16-bit
|
||||
x {B1 B2 ...} // 16-bit
|
||||
-------------
|
||||
{A1B1+A2B2+C1 ...} // 32-bit
|
||||
|
||||
@endcode
|
||||
Implemented only for 16-bit signed source type (v_int16x8).
|
||||
*/
|
||||
template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::w_type w_type;
|
||||
v_reg<w_type, n/2> s;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
|
||||
return s;
|
||||
}
|
||||
|
||||
/** @brief Multiply and expand
|
||||
|
||||
Multiply values two registers and store results in two registers with wider pack type.
|
||||
|
@ -506,6 +506,12 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
v_int32x4 s = v_dotprod(a, b);
|
||||
return v_int32x4(vaddq_s32(s.val , c.val));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
|
||||
@ -730,6 +736,11 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_
|
||||
return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{
|
||||
return v_int32x4(vmlaq_s32(c.val, a.val, b.val));
|
||||
}
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
|
||||
{
|
||||
@ -1095,6 +1106,18 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
|
||||
#endif
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
float32x4_t a_ = a.val;
|
||||
int32x4_t result;
|
||||
__asm__ ("fcvtns %0.4s, %1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a_)
|
||||
: /* No clobbers */);
|
||||
return v_int32x4(result);
|
||||
}
|
||||
#else
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||
@ -1103,7 +1126,7 @@ inline v_int32x4 v_round(const v_float32x4& a)
|
||||
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
|
||||
return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
|
||||
}
|
||||
|
||||
#endif
|
||||
inline v_int32x4 v_floor(const v_float32x4& a)
|
||||
{
|
||||
int32x4_t a1 = vcvtq_s32_f32(a.val);
|
||||
|
@ -710,6 +710,11 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
return v_int32x4(_mm_madd_epi16(a.val, b.val));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
return v_int32x4(_mm_add_epi32(_mm_madd_epi16(a.val, b.val), c.val));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
|
||||
@ -954,6 +959,10 @@ inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
|
||||
__m128i m = _mm_cmpgt_epi32(b.val, a.val);
|
||||
return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
|
||||
}
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{
|
||||
return a * b + c;
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
|
||||
inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
||||
@ -1632,7 +1641,7 @@ inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2&
|
||||
c = v_reinterpret_as_f64(t2);
|
||||
}
|
||||
|
||||
// 2-channel, float only
|
||||
// 2-channel
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
@ -1644,7 +1653,29 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
|
||||
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
|
||||
}
|
||||
|
||||
inline void v_store_interleave( short* ptr, const v_int16x8& a, const v_int16x8& b )
|
||||
inline void v_load_deinterleave(const short* ptr, v_int16x8& a, v_int16x8& b)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
|
||||
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
|
||||
|
||||
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
|
||||
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
|
||||
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
|
||||
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
|
||||
|
||||
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
|
||||
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort*ptr, v_uint16x8& a, v_uint16x8& b)
|
||||
{
|
||||
v_int16x8 sa, sb;
|
||||
v_load_deinterleave((const short*)ptr, sa, sb);
|
||||
a = v_reinterpret_as_u16(sa);
|
||||
b = v_reinterpret_as_u16(sb);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(short* ptr, const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
__m128i t0, t1;
|
||||
t0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||
|
@ -1,46 +1,6 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_VSX_HPP
|
||||
#define OPENCV_HAL_VSX_HPP
|
||||
@ -276,34 +236,38 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
{ return _Tpvec(ld(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \
|
||||
{ return _Tpvec(ld_a(0, ptr)); } \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vec_ld_l8(ptr)); } \
|
||||
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
|
||||
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
{ st(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
||||
{ st_a(a.val, 0, ptr); } \
|
||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_l8(a.val, ptr); } \
|
||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_h8(a.val, ptr); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
|
||||
|
||||
//////////////// Value reordering ///////////////
|
||||
|
||||
@ -343,7 +307,7 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
b1.val = fl(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
|
||||
{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
|
||||
@ -353,10 +317,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{ return v_uint32x4(vec_ld_buw(ptr)); }
|
||||
{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{ return v_int32x4(vec_ld_bsw(ptr)); }
|
||||
{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
|
||||
/* pack */
|
||||
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
|
||||
@ -429,36 +393,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
d.val = vec_mergesql(a.val, b.val);
|
||||
}
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
const int w = sizeof(typename _Tpvec::lane_type);
|
||||
const int n = _Tpvec::nlanes;
|
||||
const unsigned int sf = ((w * n) - (s * w));
|
||||
if (s == 0)
|
||||
return _Tpvec(a.val);
|
||||
else if (sf > 15)
|
||||
return _Tpvec();
|
||||
// bitwise it just to make xlc happy
|
||||
return _Tpvec(vec_sld(b.val, a.val, sf & 15));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \
|
||||
template<int s> \
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
switch(s) { \
|
||||
case 0: return _Tpvec(a.val); \
|
||||
case 2: return _Tpvec(b.val); \
|
||||
case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \
|
||||
default: return _Tpvec(); \
|
||||
} \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
|
||||
|
||||
|
||||
////////// Arithmetic, bitwise and comparison operations /////////
|
||||
|
||||
/* Element-wise binary and unary operations */
|
||||
@ -669,6 +603,11 @@ OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{ return v_rotate_right<s>(a, b); }
|
||||
|
||||
////////// Reduce and mask /////////
|
||||
|
||||
/** Reduce **/
|
||||
@ -821,6 +760,9 @@ inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
|
||||
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{ return a * b + c; }
|
||||
|
||||
// TODO: exp, log, sin, cos
|
||||
|
||||
/** Absolute values **/
|
||||
@ -904,6 +846,9 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& m3)
|
||||
|
@ -1,46 +1,6 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_VSX_UTILS_HPP
|
||||
#define OPENCV_HAL_VSX_UTILS_HPP
|
||||
@ -64,106 +24,77 @@ typedef __vector unsigned char vec_uchar16;
|
||||
#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__}
|
||||
#define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, c))
|
||||
#define vec_uchar16_c(v) ((vec_uchar16)(v))
|
||||
#define vec_uchar16_mx vec_uchar16_sp(0xFF)
|
||||
#define vec_uchar16_mn vec_uchar16_sp(0)
|
||||
#define vec_uchar16_z vec_uchar16_mn
|
||||
#define vec_uchar16_z vec_uchar16_sp(0)
|
||||
|
||||
typedef __vector signed char vec_char16;
|
||||
#define vec_char16_set(...) (vec_char16){__VA_ARGS__}
|
||||
#define vec_char16_sp(c) (__VSX_S16__(vec_char16, c))
|
||||
#define vec_char16_c(v) ((vec_char16)(v))
|
||||
#define vec_char16_mx vec_char16_sp(0x7F)
|
||||
#define vec_char16_mn vec_char16_sp(-0x7F-1)
|
||||
#define vec_char16_z vec_char16_sp(0)
|
||||
|
||||
typedef __vector unsigned short vec_ushort8;
|
||||
#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__}
|
||||
#define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, c))
|
||||
#define vec_ushort8_c(v) ((vec_ushort8)(v))
|
||||
#define vec_ushort8_mx vec_ushort8_sp(0xFFFF)
|
||||
#define vec_ushort8_mn vec_ushort8_sp(0)
|
||||
#define vec_ushort8_z vec_ushort8_mn
|
||||
#define vec_ushort8_z vec_ushort8_sp(0)
|
||||
|
||||
typedef __vector signed short vec_short8;
|
||||
#define vec_short8_set(...) (vec_short8){__VA_ARGS__}
|
||||
#define vec_short8_sp(c) (__VSX_S8__(vec_short8, c))
|
||||
#define vec_short8_c(v) ((vec_short8)(v))
|
||||
#define vec_short8_mx vec_short8_sp(0x7FFF)
|
||||
#define vec_short8_mn vec_short8_sp(-0x7FFF-1)
|
||||
#define vec_short8_z vec_short8_sp(0)
|
||||
|
||||
typedef __vector unsigned int vec_uint4;
|
||||
#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__}
|
||||
#define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, c))
|
||||
#define vec_uint4_c(v) ((vec_uint4)(v))
|
||||
#define vec_uint4_mx vec_uint4_sp(0xFFFFFFFFU)
|
||||
#define vec_uint4_mn vec_uint4_sp(0)
|
||||
#define vec_uint4_z vec_uint4_mn
|
||||
#define vec_uint4_z vec_uint4_sp(0)
|
||||
|
||||
typedef __vector signed int vec_int4;
|
||||
#define vec_int4_set(...) (vec_int4){__VA_ARGS__}
|
||||
#define vec_int4_sp(c) (__VSX_S4__(vec_int4, c))
|
||||
#define vec_int4_c(v) ((vec_int4)(v))
|
||||
#define vec_int4_mx vec_int4_sp(0x7FFFFFFF)
|
||||
#define vec_int4_mn vec_int4_sp(-0x7FFFFFFF-1)
|
||||
#define vec_int4_z vec_int4_sp(0)
|
||||
|
||||
typedef __vector float vec_float4;
|
||||
#define vec_float4_set(...) (vec_float4){__VA_ARGS__}
|
||||
#define vec_float4_sp(c) (__VSX_S4__(vec_float4, c))
|
||||
#define vec_float4_c(v) ((vec_float4)(v))
|
||||
#define vec_float4_mx vec_float4_sp(3.40282347E+38F)
|
||||
#define vec_float4_mn vec_float4_sp(1.17549435E-38F)
|
||||
#define vec_float4_z vec_float4_sp(0)
|
||||
|
||||
typedef __vector unsigned long long vec_udword2;
|
||||
#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__}
|
||||
#define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, c))
|
||||
#define vec_udword2_c(v) ((vec_udword2)(v))
|
||||
#define vec_udword2_mx vec_udword2_sp(18446744073709551615ULL)
|
||||
#define vec_udword2_mn vec_udword2_sp(0)
|
||||
#define vec_udword2_z vec_udword2_mn
|
||||
#define vec_udword2_z vec_udword2_sp(0)
|
||||
|
||||
typedef __vector signed long long vec_dword2;
|
||||
#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__}
|
||||
#define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, c))
|
||||
#define vec_dword2_c(v) ((vec_dword2)(v))
|
||||
#define vec_dword2_mx vec_dword2_sp(9223372036854775807LL)
|
||||
#define vec_dword2_mn vec_dword2_sp(-9223372036854775807LL-1)
|
||||
#define vec_dword2_z vec_dword2_sp(0)
|
||||
|
||||
typedef __vector double vec_double2;
|
||||
#define vec_double2_set(...) (vec_double2){__VA_ARGS__}
|
||||
#define vec_double2_c(v) ((vec_double2)(v))
|
||||
#define vec_double2_sp(c) (__VSX_S2__(vec_double2, c))
|
||||
#define vec_double2_mx vec_double2_sp(1.7976931348623157E+308)
|
||||
#define vec_double2_mn vec_double2_sp(2.2250738585072014E-308)
|
||||
#define vec_double2_z vec_double2_sp(0)
|
||||
|
||||
#define vec_bchar16 __vector __bool char
|
||||
#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__}
|
||||
#define vec_bchar16_c(v) ((vec_bchar16)(v))
|
||||
#define vec_bchar16_f (__VSX_S16__(vec_bchar16, 0))
|
||||
#define vec_bchar16_t (__VSX_S16__(vec_bchar16, 1))
|
||||
|
||||
#define vec_bshort8 __vector __bool short
|
||||
#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__}
|
||||
#define vec_bshort8_c(v) ((vec_bshort8)(v))
|
||||
#define vec_bshort8_f (__VSX_S8__(vec_bshort8, 0))
|
||||
#define vec_bshort8_t (__VSX_S8__(vec_bshort8, 1))
|
||||
|
||||
#define vec_bint4 __vector __bool int
|
||||
#define vec_bint4_set(...) (vec_bint4){__VA_ARGS__}
|
||||
#define vec_bint4_c(v) ((vec_bint4)(v))
|
||||
#define vec_bint4_f (__VSX_S4__(vec_bint4, 0))
|
||||
#define vec_bint4_t (__VSX_S4__(vec_bint4, 1))
|
||||
|
||||
#define vec_bdword2 __vector __bool long long
|
||||
#define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__}
|
||||
#define vec_bdword2_c(v) ((vec_bdword2)(v))
|
||||
#define vec_bdword2_f (__VSX_S2__(vec_bdword2, 0))
|
||||
#define vec_bdword2_t (__VSX_S2__(vec_bdword2, 1))
|
||||
|
||||
|
||||
#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline))
|
||||
|
||||
@ -688,34 +619,17 @@ VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu)
|
||||
{ vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); }
|
||||
#endif
|
||||
|
||||
// load 4 unsigned bytes into uint4 vector
|
||||
#define vec_ld_buw(p) vec_uint4_set((p)[0], (p)[1], (p)[2], (p)[3])
|
||||
|
||||
// load 4 signed bytes into int4 vector
|
||||
#define vec_ld_bsw(p) vec_int4_set((p)[0], (p)[1], (p)[2], (p)[3])
|
||||
|
||||
// load 4 unsigned bytes into float vector
|
||||
#define vec_ld_bps(p) vec_ctf(vec_ld_buw(p), 0)
|
||||
|
||||
// Store lower 8 byte
|
||||
#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0)
|
||||
|
||||
// Store higher 8 byte
|
||||
#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1)
|
||||
|
||||
/*
|
||||
* vec_ld_l8(ptr) -> Load 64-bits of integer data to lower part
|
||||
* vec_ldz_l8(ptr) -> Load 64-bits of integer data to lower part and zero upper part
|
||||
**/
|
||||
#define VSX_IMPL_LOAD_L8(Tvec, Tp) \
|
||||
VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \
|
||||
{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } \
|
||||
VSX_FINLINE(Tvec) vec_ldz_l8(const Tp *p) \
|
||||
{ \
|
||||
/* TODO: try (Tvec)(vec_udword2{*((uint64*)p), 0}) */ \
|
||||
static const vec_bdword2 mask = {0xFFFFFFFFFFFFFFFF, 0x0000000000000000}; \
|
||||
return vec_and(vec_ld_l8(p), (Tvec)mask); \
|
||||
}
|
||||
// Load 64-bits of integer data to lower part
|
||||
#define VSX_IMPL_LOAD_L8(Tvec, Tp) \
|
||||
VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \
|
||||
{ return ((Tvec)vec_promote(*((uint64*)p), 0)); }
|
||||
|
||||
VSX_IMPL_LOAD_L8(vec_uchar16, uchar)
|
||||
VSX_IMPL_LOAD_L8(vec_char16, schar)
|
||||
VSX_IMPL_LOAD_L8(vec_ushort8, ushort)
|
||||
@ -745,11 +659,11 @@ VSX_IMPL_LOAD_L8(vec_double2, double)
|
||||
* Implement vec_unpacklu and vec_unpackhu
|
||||
* since vec_unpackl, vec_unpackh only support signed integers
|
||||
**/
|
||||
#define VSX_IMPL_UNPACKU(rt, rg, zero) \
|
||||
VSX_FINLINE(rt) vec_unpacklu(const rg& a) \
|
||||
{ return reinterpret_cast<rt>(vec_mergel(a, zero)); } \
|
||||
VSX_FINLINE(rt) vec_unpackhu(const rg& a) \
|
||||
{ return reinterpret_cast<rt>(vec_mergeh(a, zero)); }
|
||||
#define VSX_IMPL_UNPACKU(rt, rg, zero) \
|
||||
VSX_FINLINE(rt) vec_unpacklu(const rg& a) \
|
||||
{ return (rt)(vec_mergel(a, zero)); } \
|
||||
VSX_FINLINE(rt) vec_unpackhu(const rg& a) \
|
||||
{ return (rt)(vec_mergeh(a, zero)); }
|
||||
|
||||
VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z)
|
||||
VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z)
|
||||
|
160
modules/core/src/check.cpp
Normal file
160
modules/core/src/check.cpp
Normal file
@ -0,0 +1,160 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "opencv2/core/check.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
const char* depthToString(int depth)
|
||||
{
|
||||
const char* s = detail::depthToString_(depth);
|
||||
return s ? s : "<invalid depth>";
|
||||
}
|
||||
|
||||
const cv::String typeToString(int type)
|
||||
{
|
||||
cv::String s = detail::typeToString_(type);
|
||||
if (s.empty())
|
||||
{
|
||||
static cv::String invalidType("<invalid type>");
|
||||
return invalidType;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
namespace detail {
|
||||
|
||||
static const char* getTestOpPhraseStr(unsigned testOp)
|
||||
{
|
||||
static const char* _names[] = { "{custom check}", "equal to", "not equal to", "less than or equal to", "less than", "greater than or equal to", "greater than" };
|
||||
CV_DbgAssert(testOp < CV__LAST_TEST_OP);
|
||||
return testOp < CV__LAST_TEST_OP ? _names[testOp] : "???";
|
||||
}
|
||||
static const char* getTestOpMath(unsigned testOp)
|
||||
{
|
||||
static const char* _names[] = { "???", "==", "!=", "<=", "<", ">=", ">" };
|
||||
CV_DbgAssert(testOp < CV__LAST_TEST_OP);
|
||||
return testOp < CV__LAST_TEST_OP ? _names[testOp] : "???";
|
||||
}
|
||||
|
||||
const char* depthToString_(int depth)
|
||||
{
|
||||
static const char* depthNames[] = { "CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F", "CV_USRTYPE1" };
|
||||
return depth <= CV_USRTYPE1 ? depthNames[depth] : NULL;
|
||||
}
|
||||
|
||||
const cv::String typeToString_(int type)
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(type);
|
||||
int cn = CV_MAT_CN(type);
|
||||
if (depth >= 0 && depth <= CV_USRTYPE1)
|
||||
return cv::format("%sC%d", depthToString_(depth), cn);
|
||||
return cv::String();
|
||||
}
|
||||
|
||||
template<typename T> static CV_NORETURN
|
||||
void check_failed_auto_(const T& v1, const T& v2, const CheckContext& ctx)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << ctx.message << " (expected: '" << ctx.p1_str << " " << getTestOpMath(ctx.testOp) << " " << ctx.p2_str << "'), where" << std::endl
|
||||
<< " '" << ctx.p1_str << "' is " << v1 << std::endl;
|
||||
if (ctx.testOp != TEST_CUSTOM && ctx.testOp < CV__LAST_TEST_OP)
|
||||
{
|
||||
ss << "must be " << getTestOpPhraseStr(ctx.testOp) << std::endl;
|
||||
}
|
||||
ss << " '" << ctx.p2_str << "' is " << v2;
|
||||
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
|
||||
}
|
||||
void check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << ctx.message << " (expected: '" << ctx.p1_str << " " << getTestOpMath(ctx.testOp) << " " << ctx.p2_str << "'), where" << std::endl
|
||||
<< " '" << ctx.p1_str << "' is " << v1 << " (" << depthToString(v1) << ")" << std::endl;
|
||||
if (ctx.testOp != TEST_CUSTOM && ctx.testOp < CV__LAST_TEST_OP)
|
||||
{
|
||||
ss << "must be " << getTestOpPhraseStr(ctx.testOp) << std::endl;
|
||||
}
|
||||
ss << " '" << ctx.p2_str << "' is " << v2 << " (" << depthToString(v2) << ")";
|
||||
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
|
||||
}
|
||||
void check_failed_MatType(const int v1, const int v2, const CheckContext& ctx)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << ctx.message << " (expected: '" << ctx.p1_str << " " << getTestOpMath(ctx.testOp) << " " << ctx.p2_str << "'), where" << std::endl
|
||||
<< " '" << ctx.p1_str << "' is " << v1 << " (" << typeToString(v1) << ")" << std::endl;
|
||||
if (ctx.testOp != TEST_CUSTOM && ctx.testOp < CV__LAST_TEST_OP)
|
||||
{
|
||||
ss << "must be " << getTestOpPhraseStr(ctx.testOp) << std::endl;
|
||||
}
|
||||
ss << " '" << ctx.p2_str << "' is " << v2 << " (" << typeToString(v2) << ")";
|
||||
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
|
||||
}
|
||||
void check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<int>(v1, v2, ctx);
|
||||
}
|
||||
void check_failed_auto(const int v1, const int v2, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<int>(v1, v2, ctx);
|
||||
}
|
||||
void check_failed_auto(const float v1, const float v2, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<float>(v1, v2, ctx);
|
||||
}
|
||||
void check_failed_auto(const double v1, const double v2, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<double>(v1, v2, ctx);
|
||||
}
|
||||
|
||||
|
||||
template<typename T> static CV_NORETURN
|
||||
void check_failed_auto_(const T& v, const CheckContext& ctx)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << ctx.message << ":" << std::endl
|
||||
<< " '" << ctx.p2_str << "'" << std::endl
|
||||
<< "where" << std::endl
|
||||
<< " '" << ctx.p1_str << "' is " << v;
|
||||
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
|
||||
}
|
||||
void check_failed_MatDepth(const int v, const CheckContext& ctx)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << ctx.message << ":" << std::endl
|
||||
<< " '" << ctx.p2_str << "'" << std::endl
|
||||
<< "where" << std::endl
|
||||
<< " '" << ctx.p1_str << "' is " << v << " (" << depthToString(v) << ")";
|
||||
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
|
||||
}
|
||||
void check_failed_MatType(const int v, const CheckContext& ctx)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << ctx.message << ":" << std::endl
|
||||
<< " '" << ctx.p2_str << "'" << std::endl
|
||||
<< "where" << std::endl
|
||||
<< " '" << ctx.p1_str << "' is " << v << " (" << typeToString(v) << ")";
|
||||
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
|
||||
}
|
||||
void check_failed_MatChannels(const int v, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<int>(v, ctx);
|
||||
}
|
||||
void check_failed_auto(const int v, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<int>(v, ctx);
|
||||
}
|
||||
void check_failed_auto(const float v, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<float>(v, ctx);
|
||||
}
|
||||
void check_failed_auto(const double v, const CheckContext& ctx)
|
||||
{
|
||||
check_failed_auto_<double>(v, ctx);
|
||||
}
|
||||
|
||||
|
||||
}} // namespace
|
@ -4275,7 +4275,8 @@ public:
|
||||
entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
|
||||
Context& ctx = Context::getDefault();
|
||||
cl_int retval = CL_SUCCESS;
|
||||
CV_OCL_CHECK_(entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval), retval);
|
||||
entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
|
||||
CV_Assert(entry.clBuffer_ != NULL);
|
||||
if(retval == CL_SUCCESS)
|
||||
{
|
||||
@ -4669,15 +4670,19 @@ public:
|
||||
{
|
||||
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
|
||||
u->size, u->origdata, &retval);
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
|
||||
(long long int)u->size, u->origdata, (void*)handle).c_str());
|
||||
}
|
||||
if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
|
||||
{
|
||||
handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
|
||||
u->size, u->origdata, &retval);
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
|
||||
(long long int)u->size, u->origdata, (void*)handle).c_str());
|
||||
tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
|
||||
}
|
||||
}
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, "clCreateBuffer()");
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
|
||||
if(!handle || retval != CL_SUCCESS)
|
||||
return false;
|
||||
u->handle = handle;
|
||||
@ -4805,13 +4810,14 @@ public:
|
||||
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
(CL_MAP_READ | CL_MAP_WRITE),
|
||||
0, u->size, 0, 0, 0, &retval);
|
||||
CV_OCL_CHECK_RESULT(retval, "clEnqueueMapBuffer()");
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
|
||||
CV_Assert(u->origdata == data);
|
||||
if (u->originalUMatData)
|
||||
{
|
||||
CV_Assert(u->originalUMatData->data == data);
|
||||
}
|
||||
CV_OCL_CHECK(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0));
|
||||
retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
|
||||
CV_OCL_DBG_CHECK(clFinish(q));
|
||||
}
|
||||
}
|
||||
@ -4838,7 +4844,8 @@ public:
|
||||
else
|
||||
#endif
|
||||
{
|
||||
CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
|
||||
cl_int retval = clReleaseMemObject((cl_mem)u->handle);
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
|
||||
}
|
||||
u->handle = 0;
|
||||
u->markDeviceCopyObsolete(true);
|
||||
@ -4955,7 +4962,7 @@ public:
|
||||
u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
(CL_MAP_READ | CL_MAP_WRITE),
|
||||
0, u->size, 0, 0, 0, &retval);
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(sz=%lld)", (int64)u->size).c_str());
|
||||
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
|
||||
}
|
||||
if (u->data && retval == CL_SUCCESS)
|
||||
{
|
||||
@ -4982,8 +4989,10 @@ public:
|
||||
#ifdef HAVE_OPENCL_SVM
|
||||
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
|
||||
#endif
|
||||
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
|
||||
cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
|
||||
(void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
|
||||
u->markHostCopyObsolete(false);
|
||||
}
|
||||
}
|
||||
@ -5032,7 +5041,8 @@ public:
|
||||
if (u->refcount == 0)
|
||||
{
|
||||
CV_Assert(u->mapcount-- == 1);
|
||||
CV_OCL_CHECK(retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0));
|
||||
retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
|
||||
if (Device::getDefault().isAMD())
|
||||
{
|
||||
// required for multithreaded applications (see stitching test)
|
||||
@ -5050,8 +5060,10 @@ public:
|
||||
#ifdef HAVE_OPENCL_SVM
|
||||
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
|
||||
#endif
|
||||
CV_OCL_CHECK(retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
|
||||
retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
|
||||
(void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
|
||||
u->markDeviceCopyObsolete(false);
|
||||
u->markHostCopyObsolete(true);
|
||||
}
|
||||
@ -5354,8 +5366,10 @@ public:
|
||||
if( iscontinuous )
|
||||
{
|
||||
AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
|
||||
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
|
||||
cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
||||
dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
|
||||
(void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
|
||||
}
|
||||
else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
|
||||
{
|
||||
@ -5527,8 +5541,10 @@ public:
|
||||
{
|
||||
if( iscontinuous )
|
||||
{
|
||||
CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
|
||||
srcrawofs, dstrawofs, total, 0, 0, 0));
|
||||
retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
|
||||
srcrawofs, dstrawofs, total, 0, 0, 0);
|
||||
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
|
||||
(void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
|
||||
}
|
||||
else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
|
||||
{
|
||||
@ -6373,7 +6389,9 @@ struct Image2D::Impl
|
||||
if (!alias && !src.isContinuous())
|
||||
{
|
||||
devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
|
||||
CV_OCL_CHECK_RESULT(err, "clCreateBuffer()");
|
||||
CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
|
||||
(long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
|
||||
).c_str());
|
||||
|
||||
const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
|
||||
CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
|
||||
|
@ -249,10 +249,34 @@ const char* Exception::what() const throw() { return msg.c_str(); }
|
||||
|
||||
void Exception::formatMessage()
|
||||
{
|
||||
if( func.size() > 0 )
|
||||
msg = format("OpenCV(%s) %s:%d: error: (%d) %s: %s in function %s\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), func.c_str());
|
||||
size_t pos = err.find('\n');
|
||||
bool multiline = pos != cv::String::npos;
|
||||
if (multiline)
|
||||
{
|
||||
std::stringstream ss;
|
||||
size_t prev_pos = 0;
|
||||
while (pos != cv::String::npos)
|
||||
{
|
||||
ss << "> " << err.substr(prev_pos, pos - prev_pos) << std::endl;
|
||||
prev_pos = pos + 1;
|
||||
pos = err.find('\n', prev_pos);
|
||||
}
|
||||
ss << "> " << err.substr(prev_pos);
|
||||
if (err[err.size() - 1] != '\n')
|
||||
ss << std::endl;
|
||||
err = ss.str();
|
||||
}
|
||||
if (func.size() > 0)
|
||||
{
|
||||
if (multiline)
|
||||
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) in function '%s'\n%s", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), func.c_str(), err.c_str());
|
||||
else
|
||||
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) %s in function '%s'\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), func.c_str());
|
||||
}
|
||||
else
|
||||
msg = format("OpenCV(%s) %s:%d: error: (%d) %s: %s\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str());
|
||||
{
|
||||
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) %s%s", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), multiline ? "" : "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
|
||||
|
@ -521,15 +521,25 @@ template<typename R> struct TheTest
|
||||
TheTest & test_dot_prod()
|
||||
{
|
||||
typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
|
||||
typedef typename Rx2::lane_type w_type;
|
||||
|
||||
Data<R> dataA, dataB(2);
|
||||
R a = dataA, b = dataB;
|
||||
|
||||
Data<Rx2> res = v_dotprod(a, b);
|
||||
Data<Rx2> dataC;
|
||||
dataC += std::numeric_limits<w_type>::is_signed ?
|
||||
std::numeric_limits<w_type>::min() :
|
||||
std::numeric_limits<w_type>::max() - R::nlanes * (dataB[0] + 1);
|
||||
Rx2 c = dataC;
|
||||
|
||||
Data<Rx2> resD = v_dotprod(a, b),
|
||||
resE = v_dotprod(a, b, c);
|
||||
|
||||
const int n = R::nlanes / 2;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], res[i]);
|
||||
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], resD[i]);
|
||||
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1] + dataC[i], resE[i]);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -245,4 +245,503 @@ TEST(Core_Version, consistency)
|
||||
EXPECT_EQ(String(CV_VERSION), cv::getVersionString());
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Test core/check.hpp macros
|
||||
//
|
||||
|
||||
void test_check_eq_1(int value_1, int value_2)
|
||||
{
|
||||
CV_CheckEQ(value_1, value_2, "Validation check failed");
|
||||
}
|
||||
TEST(Core_Check, testEQ_int_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_eq_1(123, 5678);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation check failed (expected: 'value_1 == value_2'), where\n"
|
||||
"> 'value_1' is 123\n"
|
||||
"> must be equal to\n"
|
||||
"> 'value_2' is 5678\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testEQ_int_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_eq_1(1234, 1234);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void test_check_eq_2(float value_1, float value_2)
|
||||
{
|
||||
CV_CheckEQ(value_1, value_2, "Validation check failed (float)");
|
||||
}
|
||||
TEST(Core_Check, testEQ_float_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_eq_2(1234.5f, 1234.55f);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation check failed (float) (expected: 'value_1 == value_2'), where\n"
|
||||
"> 'value_1' is 1234.5\n" // TODO Locale handling (use LC_ALL=C on Linux)
|
||||
"> must be equal to\n"
|
||||
"> 'value_2' is 1234.55\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testEQ_float_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_eq_2(1234.6f, 1234.6f);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void test_check_eq_3(double value_1, double value_2)
|
||||
{
|
||||
CV_CheckEQ(value_1, value_2, "Validation check failed (double)");
|
||||
}
|
||||
TEST(Core_Check, testEQ_double_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_eq_3(1234.5, 1234.56);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation check failed (double) (expected: 'value_1 == value_2'), where\n"
|
||||
"> 'value_1' is 1234.5\n" // TODO Locale handling (use LC_ALL=C on Linux)
|
||||
"> must be equal to\n"
|
||||
"> 'value_2' is 1234.56\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testEQ_double_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_eq_3(1234.0f, 1234.0f);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void test_check_ne_1(int value_1, int value_2)
|
||||
{
|
||||
CV_CheckNE(value_1, value_2, "Validation NE check failed");
|
||||
}
|
||||
TEST(Core_Check, testNE_int_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_ne_1(123, 123);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation NE check failed (expected: 'value_1 != value_2'), where\n"
|
||||
"> 'value_1' is 123\n"
|
||||
"> must be not equal to\n"
|
||||
"> 'value_2' is 123\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testNE_int_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_ne_1(123, 1234);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void test_check_le_1(int value_1, int value_2)
|
||||
{
|
||||
CV_CheckLE(value_1, value_2, "Validation LE check failed");
|
||||
}
|
||||
TEST(Core_Check, testLE_int_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_le_1(1234, 123);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation LE check failed (expected: 'value_1 <= value_2'), where\n"
|
||||
"> 'value_1' is 1234\n"
|
||||
"> must be less than or equal to\n"
|
||||
"> 'value_2' is 123\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testLE_int_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_le_1(1234, 1234);
|
||||
});
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_le_1(123, 1234);
|
||||
});
|
||||
}
|
||||
|
||||
void test_check_lt_1(int value_1, int value_2)
|
||||
{
|
||||
CV_CheckLT(value_1, value_2, "Validation LT check failed");
|
||||
}
|
||||
TEST(Core_Check, testLT_int_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_lt_1(1234, 123);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation LT check failed (expected: 'value_1 < value_2'), where\n"
|
||||
"> 'value_1' is 1234\n"
|
||||
"> must be less than\n"
|
||||
"> 'value_2' is 123\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testLT_int_fail_eq)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_lt_1(123, 123);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation LT check failed (expected: 'value_1 < value_2'), where\n"
|
||||
"> 'value_1' is 123\n"
|
||||
"> must be less than\n"
|
||||
"> 'value_2' is 123\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testLT_int_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_lt_1(123, 1234);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void test_check_ge_1(int value_1, int value_2)
|
||||
{
|
||||
CV_CheckGE(value_1, value_2, "Validation GE check failed");
|
||||
}
|
||||
TEST(Core_Check, testGE_int_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_ge_1(123, 1234);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation GE check failed (expected: 'value_1 >= value_2'), where\n"
|
||||
"> 'value_1' is 123\n"
|
||||
"> must be greater than or equal to\n"
|
||||
"> 'value_2' is 1234\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testGE_int_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_ge_1(1234, 1234);
|
||||
});
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_ge_1(1234, 123);
|
||||
});
|
||||
}
|
||||
|
||||
void test_check_gt_1(int value_1, int value_2)
|
||||
{
|
||||
CV_CheckGT(value_1, value_2, "Validation GT check failed");
|
||||
}
|
||||
TEST(Core_Check, testGT_int_fail)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_gt_1(123, 1234);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation GT check failed (expected: 'value_1 > value_2'), where\n"
|
||||
"> 'value_1' is 123\n"
|
||||
"> must be greater than\n"
|
||||
"> 'value_2' is 1234\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testGT_int_fail_eq)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_gt_1(123, 123);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Validation GT check failed (expected: 'value_1 > value_2'), where\n"
|
||||
"> 'value_1' is 123\n"
|
||||
"> must be greater than\n"
|
||||
"> 'value_2' is 123\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
TEST(Core_Check, testGT_int_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_gt_1(1234, 123);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void test_check_MatType_1(int src_type)
|
||||
{
|
||||
CV_CheckTypeEQ(src_type, CV_32FC1, "Unsupported source type");
|
||||
}
|
||||
TEST(Core_Check, testMatType_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_MatType_1(CV_MAKE_TYPE(CV_32F, 1));
|
||||
});
|
||||
}
|
||||
TEST(Core_Check, testMatType_fail_1)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_MatType_1(CV_8UC1);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Unsupported source type (expected: 'src_type == CV_32FC1'), where\n"
|
||||
"> 'src_type' is 0 (CV_8UC1)\n"
|
||||
"> must be equal to\n"
|
||||
"> 'CV_32FC1' is 5 (CV_32FC1)\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
|
||||
void test_check_MatType_2(int src_type)
|
||||
{
|
||||
CV_CheckType(src_type, src_type == CV_32FC1 || src_type == CV_32FC3, "Unsupported src");
|
||||
}
|
||||
TEST(Core_Check, testMatType_fail_2)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_MatType_2(CV_8UC1);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Unsupported src:\n"
|
||||
"> 'src_type == CV_32FC1 || src_type == CV_32FC3'\n"
|
||||
"> where\n> 'src_type' is 0 (CV_8UC1)\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
|
||||
void test_check_MatDepth_1(int src_depth)
|
||||
{
|
||||
CV_CheckDepthEQ(src_depth, CV_32F, "Unsupported source depth");
|
||||
}
|
||||
TEST(Core_Check, testMatDepth_pass)
|
||||
{
|
||||
EXPECT_NO_THROW(
|
||||
{
|
||||
test_check_MatDepth_1(CV_MAKE_TYPE(CV_32F, 1));
|
||||
});
|
||||
}
|
||||
TEST(Core_Check, testMatDepth_fail_1)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_MatDepth_1(CV_8U);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Unsupported source depth (expected: 'src_depth == CV_32F'), where\n"
|
||||
"> 'src_depth' is 0 (CV_8U)\n"
|
||||
"> must be equal to\n"
|
||||
"> 'CV_32F' is 5 (CV_32F)\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
|
||||
void test_check_MatDepth_2(int src_depth)
|
||||
{
|
||||
CV_CheckDepth(src_depth, src_depth == CV_32F || src_depth == CV_64F, "Unsupported src");
|
||||
}
|
||||
TEST(Core_Check, testMatDepth_fail_2)
|
||||
{
|
||||
try
|
||||
{
|
||||
test_check_MatDepth_2(CV_8U);
|
||||
FAIL() << "Unreachable code called";
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
EXPECT_STREQ(e.err.c_str(),
|
||||
"> Unsupported src:\n"
|
||||
"> 'src_depth == CV_32F || src_depth == CV_64F'\n"
|
||||
"> where\n> 'src_depth' is 0 (CV_8U)\n"
|
||||
);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
FAIL() << "Unexpected C++ exception: " << e.what();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Unexpected unknown exception";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}} // namespace
|
||||
|
@ -559,7 +559,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
{
|
||||
public:
|
||||
float pnorm, epsilon;
|
||||
bool acrossSpatial;
|
||||
CV_DEPRECATED bool acrossSpatial;
|
||||
|
||||
static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
@ -80,7 +80,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
enum Target
|
||||
{
|
||||
DNN_TARGET_CPU,
|
||||
DNN_TARGET_OPENCL
|
||||
DNN_TARGET_OPENCL,
|
||||
DNN_TARGET_OPENCL_FP16
|
||||
};
|
||||
|
||||
/** @brief This class provides all data needed to initialize layer.
|
||||
|
@ -318,6 +318,7 @@ for node in graph_def.node:
|
||||
node.input.pop()
|
||||
node.input.pop()
|
||||
node.input.append(layer_256_1_relu1.name)
|
||||
node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
|
||||
break
|
||||
|
||||
softmaxShape = NodeDef()
|
||||
|
@ -13,7 +13,7 @@
|
||||
namespace opencv_test {
|
||||
|
||||
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
|
||||
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL)
|
||||
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)
|
||||
|
||||
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> >
|
||||
{
|
||||
@ -41,8 +41,6 @@ public:
|
||||
throw cvtest::SkipTestException("OpenCL is not available/disabled in OpenCV");
|
||||
}
|
||||
}
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
|
||||
throw SkipTestException("Skip OpenCL target of Inference Engine backend");
|
||||
|
||||
randu(input, 0.0f, 1.0f);
|
||||
|
||||
@ -89,24 +87,32 @@ public:
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, AlexNet)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
|
||||
"alexnet.yml", Mat(cv::Size(227, 227), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
|
||||
"", Mat(cv::Size(224, 224), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, ResNet_50)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
|
||||
"resnet_50.yml", Mat(cv::Size(224, 224), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
|
||||
"squeezenet_v1_1.yml", Mat(cv::Size(227, 227), CV_32FC3));
|
||||
}
|
||||
@ -135,14 +141,18 @@ PERF_TEST_P_(DNNTestNetwork, SSD)
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, OpenFace)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/openface_nn4.small2.v1.t7", "", "",
|
||||
Mat(cv::Size(96, 96), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
@ -150,7 +160,8 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
|
||||
backend == DNN_BACKEND_HALIDE)
|
||||
backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
@ -158,7 +169,9 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "",
|
||||
Mat(cv::Size(224, 224), CV_32FC3));
|
||||
}
|
||||
@ -189,7 +202,7 @@ PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
||||
PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
@ -197,7 +210,9 @@ PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
@ -209,6 +224,8 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
#endif
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
|
||||
|
@ -50,7 +50,7 @@ syntax = "proto2";
|
||||
|
||||
package opencv_caffe;
|
||||
|
||||
// NVidia's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe:
|
||||
// NVIDIA's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe:
|
||||
// Math and storage types
|
||||
enum Type {
|
||||
DOUBLE = 0;
|
||||
@ -72,10 +72,10 @@ message BlobProto {
|
||||
repeated double double_data = 8 [packed = true];
|
||||
repeated double double_diff = 9 [packed = true];
|
||||
|
||||
// NVidia's Caffe fields begin.
|
||||
// NVIDIA's Caffe fields begin.
|
||||
optional Type raw_data_type = 10;
|
||||
optional bytes raw_data = 12 [packed = false];
|
||||
// NVidia's Caffe fields end.
|
||||
// NVIDIA's Caffe fields end.
|
||||
|
||||
// 4D dimensions -- deprecated. Use "shape" instead.
|
||||
optional int32 num = 1 [default = 0];
|
||||
|
@ -1154,7 +1154,7 @@ struct Net::Impl
|
||||
ld.skip = true;
|
||||
}
|
||||
layers[lastLayerId].skip = false;
|
||||
ieNode->net->init();
|
||||
ieNode->net->init(preferableTarget);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1167,17 +1167,17 @@ struct Net::Impl
|
||||
for (it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
ld.skip = true; // Initially skip all Inference Engine supported layers.
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
bool fused = ld.skip && ld.id != 0;
|
||||
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
if (!layer->supportBackend(preferableBackend))
|
||||
{
|
||||
addInfEngineNetOutputs(ld);
|
||||
ld.skip = false;
|
||||
net = Ptr<InfEngineBackendNet>();
|
||||
netBlobsWrappers.clear();
|
||||
continue;
|
||||
}
|
||||
ld.skip = true; // Initially skip all Inference Engine supported layers.
|
||||
|
||||
// Create a new network if one of inputs from different Inference Engine graph.
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
@ -1217,19 +1217,16 @@ struct Net::Impl
|
||||
}
|
||||
netBlobsWrappers[ld.id] = ld.outputBlobsWrappers[0];
|
||||
|
||||
bool fused = false;
|
||||
Ptr<BackendNode> node;
|
||||
if (!net.empty())
|
||||
{
|
||||
// Try to fuse.
|
||||
bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
|
||||
ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
|
||||
if (inPlace)
|
||||
if (fused)
|
||||
{
|
||||
node = layer->tryAttach(layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]);
|
||||
fused = !node.empty();
|
||||
if (fused)
|
||||
ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
|
||||
bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
|
||||
ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
|
||||
CV_Assert(inPlace);
|
||||
node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
|
||||
ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1247,6 +1244,19 @@ struct Net::Impl
|
||||
CV_Assert(!ieNode.empty());
|
||||
ieNode->net = net;
|
||||
|
||||
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !fused)
|
||||
{
|
||||
ieNode->layer->precision = InferenceEngine::Precision::FP16;
|
||||
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
|
||||
if (weightableLayer)
|
||||
{
|
||||
if (weightableLayer->_weights)
|
||||
weightableLayer->_weights = convertFp16(weightableLayer->_weights);
|
||||
if (weightableLayer->_biases)
|
||||
weightableLayer->_biases = convertFp16(weightableLayer->_biases);
|
||||
}
|
||||
}
|
||||
|
||||
ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers);
|
||||
net->addBlobs(ld.inputBlobsWrappers);
|
||||
net->addBlobs(ld.outputBlobsWrappers);
|
||||
@ -1276,7 +1286,7 @@ struct Net::Impl
|
||||
|
||||
if (!ieNode->net->isInitialized())
|
||||
{
|
||||
ieNode->net->init();
|
||||
ieNode->net->init(preferableTarget);
|
||||
ld.skip = false;
|
||||
}
|
||||
}
|
||||
@ -1380,7 +1390,8 @@ struct Net::Impl
|
||||
|
||||
void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT)
|
||||
if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT &&
|
||||
preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
|
||||
return;
|
||||
|
||||
CV_TRACE_FUNCTION();
|
||||
@ -1407,7 +1418,7 @@ struct Net::Impl
|
||||
// some other layers.
|
||||
|
||||
// TODO: OpenCL target support more fusion styles.
|
||||
if ( preferableTarget == DNN_TARGET_OPENCL &&
|
||||
if ( preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL &&
|
||||
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
|
||||
ld.layerInstance->type != "MVN")) )
|
||||
continue;
|
||||
@ -1442,6 +1453,9 @@ struct Net::Impl
|
||||
break;
|
||||
}
|
||||
|
||||
if (preferableBackend != DNN_BACKEND_DEFAULT)
|
||||
continue; // Go to the next layer.
|
||||
|
||||
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
|
||||
if ( preferableTarget != DNN_TARGET_OPENCL ||
|
||||
(preferableTarget == DNN_TARGET_OPENCL &&
|
||||
@ -1583,6 +1597,9 @@ struct Net::Impl
|
||||
}
|
||||
}
|
||||
|
||||
if (preferableBackend != DNN_BACKEND_DEFAULT)
|
||||
continue; // Go to the next layer.
|
||||
|
||||
// the optimization #2. if there is no layer that takes max pooling layer's computed
|
||||
// max indices (and only some semantical segmentation networks might need this;
|
||||
// many others only take the maximum values), then we switch the max pooling
|
||||
@ -1944,7 +1961,8 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
|
||||
ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second));
|
||||
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
|
||||
|
||||
cvNet.connect(0, 0, lid, 0);
|
||||
for (int i = 0; i < inputsNames.size(); ++i)
|
||||
cvNet.connect(0, i, lid, i);
|
||||
}
|
||||
cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
|
||||
|
||||
|
@ -234,19 +234,6 @@ public:
|
||||
#endif // HAVE_HALIDE
|
||||
break;
|
||||
}
|
||||
case DNN_BACKEND_INFERENCE_ENGINE:
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
auto base = node.dynamicCast<InfEngineBackendNode>();
|
||||
auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
|
||||
if (conv)
|
||||
{
|
||||
fuseConvWeights(conv, weights_, bias_);
|
||||
return base;
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
@ -287,8 +274,9 @@ public:
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
|
||||
|
||||
ieLayer->_weights = wrapToInfEngineBlob(weights_);
|
||||
ieLayer->_biases = wrapToInfEngineBlob(bias_);
|
||||
const int numChannels = weights_.total();
|
||||
ieLayer->_weights = wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C);
|
||||
ieLayer->_biases = wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
@ -40,6 +40,7 @@
|
||||
//
|
||||
//M*/
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -53,6 +54,12 @@ public:
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_DEFAULT ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
@ -104,6 +111,19 @@ public:
|
||||
if (outputs[i].data != inputs[i]->data)
|
||||
inputs[i]->copyTo(outputs[i]);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Split";
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::SplitLayer> ieLayer(new InferenceEngine::SplitLayer(lp));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<Layer> BlankLayer::create(const LayerParams& params)
|
||||
|
@ -173,21 +173,21 @@ public:
|
||||
std::vector<float> biasvec;
|
||||
std::vector<float> reluslope;
|
||||
Ptr<ActivationLayer> activ;
|
||||
bool newWeightAndBias;
|
||||
bool fusedBias;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
Ptr<OCL4DNNConvSpatial<float> > convolutionOp;
|
||||
std::vector<UMat> umat_blobs;
|
||||
bool fusedBias;
|
||||
bool newWeightAndBias;
|
||||
bool newActiv;
|
||||
ocl4dnnFusedActiv_t activType;
|
||||
float power;
|
||||
#endif
|
||||
ConvolutionLayerImpl(const LayerParams ¶ms) : BaseConvolutionLayerImpl(params)
|
||||
{
|
||||
#ifdef HAVE_OPENCL
|
||||
fusedBias = false;
|
||||
newWeightAndBias = false;
|
||||
fusedBias = false;
|
||||
#ifdef HAVE_OPENCL
|
||||
newActiv = false;
|
||||
activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
||||
power = 0.f;
|
||||
@ -350,10 +350,8 @@ public:
|
||||
biasvec[i] += b.at<float>(i);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
newWeightAndBias = !w.empty() || !b.empty();
|
||||
fusedBias = hasBias() || !b.empty();
|
||||
#endif
|
||||
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
|
||||
}
|
||||
|
||||
@ -433,9 +431,31 @@ public:
|
||||
ieLayer->_dilation_y = dilation.height;
|
||||
ieLayer->_group = group;
|
||||
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0]);
|
||||
if (hasBias())
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1]);
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
|
||||
if (newWeightAndBias)
|
||||
{
|
||||
if (weightsMat.isContinuous())
|
||||
{
|
||||
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
|
||||
ieLayer->_weights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
|
||||
}
|
||||
else
|
||||
{
|
||||
ieLayer->_weights = InferenceEngine::make_shared_blob<float>(
|
||||
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
|
||||
ieLayer->_weights->dims());
|
||||
ieLayer->_weights->allocate();
|
||||
|
||||
Mat newWeights = infEngineBlobToMat(ieLayer->_weights).reshape(1, outCn);
|
||||
Mat fusedWeights = weightsMat.colRange(0, newWeights.cols);
|
||||
fusedWeights.copyTo(newWeights);
|
||||
}
|
||||
}
|
||||
if (hasBias() || fusedBias)
|
||||
{
|
||||
Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
|
||||
ieLayer->_biases = wrapToInfEngineBlob(biasesMat, {outCn}, InferenceEngine::Layout::C);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
|
@ -412,9 +412,9 @@ public:
|
||||
std::shared_ptr<InferenceEngine::FullyConnectedLayer> ieLayer(new InferenceEngine::FullyConnectedLayer(lp));
|
||||
|
||||
ieLayer->_out_num = blobs[0].size[0];
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0]);
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], {blobs[0].size[0], blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW);
|
||||
if (blobs.size() > 1)
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1]);
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {ieLayer->_out_num}, InferenceEngine::Layout::C);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
|
@ -77,7 +77,20 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const
|
||||
{
|
||||
if (params.has(nameAll_))
|
||||
{
|
||||
parameterH = parameterW = params.get<int>(nameAll_);
|
||||
DictValue param = params.get(nameAll_);
|
||||
parameterH = param.get<int>(0);
|
||||
if (param.size() == 1)
|
||||
{
|
||||
parameterW = parameterH;
|
||||
}
|
||||
else if (param.size() == 2)
|
||||
{
|
||||
parameterW = param.get<int>(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
@ -54,9 +55,19 @@ public:
|
||||
pnorm = params.get<float>("p", 2);
|
||||
epsilon = params.get<float>("eps", 1e-10f);
|
||||
acrossSpatial = params.get<bool>("across_spatial", true);
|
||||
startAxis = params.get<int>("start_axis", 1);
|
||||
CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
|
||||
endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
|
||||
CV_Assert(pnorm > 0);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_DEFAULT ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
|
||||
pnorm == 2 && !blobs.empty();
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
@ -85,20 +96,26 @@ public:
|
||||
|
||||
const UMat& inp0 = inputs[0];
|
||||
UMat& buffer = internals[0];
|
||||
size_t num = inp0.size[0];
|
||||
size_t channels = inp0.size[1];
|
||||
size_t channelSize = inp0.total() / (num * channels);
|
||||
startAxis = clamp(startAxis, inp0.dims);
|
||||
endAxis = clamp(endAxis, inp0.dims);
|
||||
|
||||
size_t num = total(shape(inp0.size), 0, startAxis);
|
||||
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
|
||||
size_t planeSize = inp0.total() / (num * numPlanes);
|
||||
MatShape s = shape(1, inputs[0].total());
|
||||
UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
|
||||
UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
|
||||
for (size_t i = 0; i < num; ++i)
|
||||
{
|
||||
MatShape s = shape(channels, channelSize);
|
||||
UMat src = inputs[i].reshape(1, s.size(), &s[0]);
|
||||
UMat dst = outputs[i].reshape(1, s.size(), &s[0]);
|
||||
s = shape(numPlanes, planeSize);
|
||||
UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
|
||||
UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
|
||||
|
||||
UMat abs_mat;
|
||||
absdiff(src, cv::Scalar::all(0), abs_mat);
|
||||
pow(abs_mat, pnorm, buffer);
|
||||
|
||||
if (acrossSpatial)
|
||||
if (planeSize == 1)
|
||||
{
|
||||
// add eps to avoid overflow
|
||||
float absSum = sum(buffer)[0] + epsilon;
|
||||
@ -114,7 +131,7 @@ public:
|
||||
// compute inverted norm to call multiply instead divide
|
||||
cv::pow(norm, -1.0f / pnorm, norm);
|
||||
|
||||
repeat(norm, channels, 1, buffer);
|
||||
repeat(norm, numPlanes, 1, buffer);
|
||||
multiply(src, buffer, dst);
|
||||
}
|
||||
|
||||
@ -130,7 +147,7 @@ public:
|
||||
else
|
||||
{
|
||||
// _scale: _channels x 1
|
||||
CV_Assert(scale.total() == channels);
|
||||
CV_Assert(scale.total() == numPlanes);
|
||||
repeat(scale, 1, dst.cols, buffer);
|
||||
multiply(dst, buffer, dst);
|
||||
}
|
||||
@ -162,17 +179,22 @@ public:
|
||||
|
||||
const Mat& inp0 = *inputs[0];
|
||||
Mat& buffer = internals[0];
|
||||
size_t num = inp0.size[0];
|
||||
size_t channels = inp0.size[1];
|
||||
size_t channelSize = inp0.total() / (num * channels);
|
||||
startAxis = clamp(startAxis, inp0.dims);
|
||||
endAxis = clamp(endAxis, inp0.dims);
|
||||
|
||||
const float* inpData = inp0.ptr<float>();
|
||||
float* outData = outputs[0].ptr<float>();
|
||||
|
||||
size_t num = total(shape(inp0.size), 0, startAxis);
|
||||
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
|
||||
size_t planeSize = inp0.total() / (num * numPlanes);
|
||||
for (size_t n = 0; n < num; ++n)
|
||||
{
|
||||
Mat src = Mat(channels, channelSize, CV_32F, (void*)inp0.ptr<float>(n));
|
||||
Mat dst = Mat(channels, channelSize, CV_32F, (void*)outputs[0].ptr<float>(n));
|
||||
|
||||
Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
|
||||
Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
|
||||
cv::pow(abs(src), pnorm, buffer);
|
||||
|
||||
if (acrossSpatial)
|
||||
if (planeSize == 1)
|
||||
{
|
||||
// add eps to avoid overflow
|
||||
float absSum = sum(buffer)[0] + epsilon;
|
||||
@ -188,7 +210,7 @@ public:
|
||||
// compute inverted norm to call multiply instead divide
|
||||
cv::pow(norm, -1.0f / pnorm, norm);
|
||||
|
||||
repeat(norm, channels, 1, buffer);
|
||||
repeat(norm, numPlanes, 1, buffer);
|
||||
multiply(src, buffer, dst);
|
||||
}
|
||||
|
||||
@ -204,13 +226,40 @@ public:
|
||||
else
|
||||
{
|
||||
// _scale: _channels x 1
|
||||
CV_Assert(scale.total() == channels);
|
||||
CV_Assert(scale.total() == numPlanes);
|
||||
repeat(scale, 1, dst.cols, buffer);
|
||||
multiply(dst, buffer, dst);
|
||||
}
|
||||
}
|
||||
inpData += numPlanes * planeSize;
|
||||
outData += numPlanes * planeSize;
|
||||
}
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Normalize";
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
|
||||
|
||||
CV_Assert(!blobs.empty());
|
||||
|
||||
ieLayer->params["eps"] = format("%f", epsilon);
|
||||
ieLayer->params["across_spatial"] = acrossSpatial ? "1" : "0";
|
||||
ieLayer->params["channel_shared"] = blobs[0].total() == 1 ? "1" : "0";
|
||||
|
||||
const int numChannels = blobs[0].total();
|
||||
ieLayer->blobs["weights"] = wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
private:
|
||||
int startAxis, endAxis;
|
||||
};
|
||||
|
||||
|
||||
|
@ -132,20 +132,6 @@ public:
|
||||
#endif // HAVE_HALIDE
|
||||
break;
|
||||
}
|
||||
case DNN_BACKEND_INFERENCE_ENGINE:
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
auto base = node.dynamicCast<InfEngineBackendNode>();
|
||||
auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
|
||||
if (conv)
|
||||
{
|
||||
Mat bias = hasBias ? blobs[1] : Mat();
|
||||
fuseConvWeights(conv, blobs[0], bias);
|
||||
return base;
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
@ -192,9 +178,10 @@ public:
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
|
||||
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0]);
|
||||
const int numChannels = blobs[0].total();
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C);
|
||||
if (hasBias)
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1]);
|
||||
ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {numChannels}, InferenceEngine::Layout::C);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
@ -90,27 +90,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
|
||||
{
|
||||
switch (node->backendId)
|
||||
{
|
||||
case DNN_BACKEND_INFERENCE_ENGINE:
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
auto base = node.dynamicCast<InfEngineBackendNode>();
|
||||
auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
|
||||
if (conv)
|
||||
{
|
||||
fuseConvWeights(conv, Mat(), blobs[0]);
|
||||
return base;
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
|
@ -18,6 +18,11 @@ namespace cv { namespace dnn {
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
|
||||
static int infEngineVersion()
|
||||
{
|
||||
return std::atoi(InferenceEngine::GetInferenceEngineVersion()->buildNumber);
|
||||
}
|
||||
|
||||
InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& _layer)
|
||||
: BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {}
|
||||
|
||||
@ -58,23 +63,37 @@ static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std:
|
||||
{
|
||||
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
|
||||
std::reverse(reversedShape.begin(), reversedShape.end());
|
||||
return InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Layout::ANY)
|
||||
);
|
||||
if (infEngineVersion() > 5855)
|
||||
{
|
||||
InferenceEngine::Layout l = InferenceEngine::Layout::ANY;
|
||||
if (m.dims == 4)
|
||||
l = InferenceEngine::Layout::NCHW;
|
||||
else if (m.dims == 2)
|
||||
l = InferenceEngine::Layout::NC;
|
||||
return InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, l)
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
return InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape)
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
|
||||
InferenceEngine::Layout layout)
|
||||
{
|
||||
return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
shape, (float*)m.data);
|
||||
layout, shape, (float*)m.data);
|
||||
}
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m)
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
|
||||
{
|
||||
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
|
||||
std::reverse(reversedShape.begin(), reversedShape.end());
|
||||
return wrapToInfEngineBlob(m, reversedShape);
|
||||
return wrapToInfEngineBlob(m, reversedShape, layout);
|
||||
}
|
||||
|
||||
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr)
|
||||
@ -109,10 +128,14 @@ void InfEngineBackendWrapper::setHostDirty()
|
||||
|
||||
InfEngineBackendNet::InfEngineBackendNet()
|
||||
{
|
||||
targetDevice = InferenceEngine::TargetDevice::eCPU;
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
}
|
||||
|
||||
InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
|
||||
{
|
||||
targetDevice = InferenceEngine::TargetDevice::eCPU;
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
inputs = net.getInputsInfo();
|
||||
outputs = net.getOutputsInfo();
|
||||
layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly.
|
||||
@ -126,9 +149,14 @@ void InfEngineBackendNet::Release() noexcept
|
||||
outputs.clear();
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::setPrecision(InferenceEngine::Precision p) noexcept
|
||||
{
|
||||
precision = p;
|
||||
}
|
||||
|
||||
InferenceEngine::Precision InfEngineBackendNet::getPrecision() noexcept
|
||||
{
|
||||
return InferenceEngine::Precision::FP32;
|
||||
return precision;
|
||||
}
|
||||
|
||||
// Assume that outputs of network is unconnected blobs.
|
||||
@ -161,9 +189,8 @@ InferenceEngine::InputInfo::Ptr InfEngineBackendNet::getInput(const std::string
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::getName(char *pName, size_t len) noexcept
|
||||
void InfEngineBackendNet::getName(char*, size_t) noexcept
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
|
||||
size_t InfEngineBackendNet::layerCount() noexcept
|
||||
@ -213,13 +240,15 @@ InfEngineBackendNet::getLayerByName(const char *layerName, InferenceEngine::CNNL
|
||||
|
||||
void InfEngineBackendNet::setTargetDevice(InferenceEngine::TargetDevice device) noexcept
|
||||
{
|
||||
if (device != InferenceEngine::TargetDevice::eCPU)
|
||||
if (device != InferenceEngine::TargetDevice::eCPU &&
|
||||
device != InferenceEngine::TargetDevice::eGPU)
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
targetDevice = device;
|
||||
}
|
||||
|
||||
InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() noexcept
|
||||
{
|
||||
return InferenceEngine::TargetDevice::eCPU;
|
||||
return targetDevice;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t size) noexcept
|
||||
@ -234,7 +263,7 @@ size_t InfEngineBackendNet::getBatchSize() const noexcept
|
||||
return 0;
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::init()
|
||||
void InfEngineBackendNet::init(int targetId)
|
||||
{
|
||||
if (inputs.empty())
|
||||
{
|
||||
@ -307,6 +336,15 @@ void InfEngineBackendNet::init()
|
||||
outBlobs[it.first] = allBlobs[it.first];
|
||||
}
|
||||
|
||||
switch (targetId)
|
||||
{
|
||||
case DNN_TARGET_CPU: setTargetDevice(InferenceEngine::TargetDevice::eCPU); break;
|
||||
case DNN_TARGET_OPENCL_FP16: setPrecision(InferenceEngine::Precision::FP16); // Fallback to the next.
|
||||
case DNN_TARGET_OPENCL: setTargetDevice(InferenceEngine::TargetDevice::eGPU); break;
|
||||
default:
|
||||
CV_Error(Error::StsError, format("Unknown target identifier: %d", targetId));
|
||||
}
|
||||
|
||||
if (!isInitialized())
|
||||
initPlugin(*this);
|
||||
}
|
||||
@ -317,10 +355,9 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
|
||||
|
||||
InferenceEngine::StatusCode status;
|
||||
InferenceEngine::ResponseDesc resp;
|
||||
const InferenceEngine::Version* v = InferenceEngine::GetInferenceEngineVersion();
|
||||
|
||||
plugin = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(InferenceEngine::TargetDevice::eCPU);
|
||||
if (std::atoi(v->buildNumber) > 5855)
|
||||
plugin = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice);
|
||||
if (infEngineVersion() > 5855 && targetDevice == InferenceEngine::TargetDevice::eCPU)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InferenceEngine::IExtensionPtr extension =
|
||||
@ -360,7 +397,7 @@ void InfEngineBackendNet::forward()
|
||||
CV_Error(Error::StsAssert, resp.msg);
|
||||
}
|
||||
|
||||
static inline Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
||||
Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
||||
{
|
||||
// NOTE: Inference Engine sizes are reversed.
|
||||
std::vector<size_t> dims = blob->dims();
|
||||
@ -369,56 +406,6 @@ static inline Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
||||
return Mat(size, CV_32F, (void*)blob->buffer());
|
||||
}
|
||||
|
||||
void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& conv,
|
||||
const Mat& w, const Mat& b)
|
||||
{
|
||||
CV_Assert(!w.empty() || !b.empty());
|
||||
if (!w.empty())
|
||||
{
|
||||
// Get convolution's weights. Clone the data because Inference Engine can host it
|
||||
// and conv->_weights->allocate() below will deallocate it.
|
||||
Mat originWeights = infEngineBlobToMat(conv->_weights).clone();
|
||||
|
||||
// Create new weights blob.
|
||||
conv->_weights = InferenceEngine::make_shared_blob<float>(
|
||||
InferenceEngine::Precision::FP32, conv->_weights->dims());
|
||||
conv->_weights->allocate();
|
||||
|
||||
// Convolution weights have OIHW data layout.
|
||||
// (conv(I) + b1 ) * w + b2
|
||||
// w*conv(I) + b1 * w + b2
|
||||
Mat fusedWeights = infEngineBlobToMat(conv->_weights);
|
||||
|
||||
const int numChannels = fusedWeights.size[0];
|
||||
// Mat weights = blobs[0].reshape(1, 1);
|
||||
// Mat bias = hasBias ? blobs[1].reshape(1, 1) : Mat();
|
||||
CV_Assert(numChannels == w.total());
|
||||
CV_Assert(b.empty() || numChannels == b.total());
|
||||
for (int i = 0; i < numChannels; ++i)
|
||||
{
|
||||
cv::multiply(slice(originWeights, i), w.at<float>(i), slice(fusedWeights, i));
|
||||
}
|
||||
}
|
||||
if (conv->_biases)
|
||||
{
|
||||
// The same for biases.
|
||||
Mat originBiases = infEngineBlobToMat(conv->_biases).clone();
|
||||
|
||||
conv->_biases = InferenceEngine::make_shared_blob<float>(
|
||||
InferenceEngine::Precision::FP32, conv->_biases->dims());
|
||||
conv->_biases->allocate();
|
||||
Mat fusedBiases = infEngineBlobToMat(conv->_biases);
|
||||
originBiases.copyTo(fusedBiases);
|
||||
|
||||
if (!w.empty())
|
||||
cv::multiply(w.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), fusedBiases, fusedBiases);
|
||||
if (!b.empty())
|
||||
cv::add(fusedBiases, b.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), fusedBiases);
|
||||
}
|
||||
else
|
||||
conv->_biases = wrapToInfEngineBlob(b);
|
||||
}
|
||||
|
||||
InfEngineBackendLayer::InfEngineBackendLayer(const InferenceEngine::DataPtr& output_)
|
||||
{
|
||||
output = output_;
|
||||
@ -454,6 +441,16 @@ void InfEngineBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArra
|
||||
CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend.");
|
||||
}
|
||||
|
||||
InferenceEngine::TBlob<int16_t>::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob)
|
||||
{
|
||||
auto halfs = InferenceEngine::make_shared_blob<int16_t>(InferenceEngine::Precision::FP16, blob->layout(), blob->dims());
|
||||
halfs->allocate();
|
||||
Mat floatsData(1, blob->size(), CV_32F, blob->buffer());
|
||||
Mat halfsData(1, blob->size(), CV_16SC1, halfs->buffer());
|
||||
convertFp16(floatsData, halfsData);
|
||||
return halfs;
|
||||
}
|
||||
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool haveInfEngine()
|
||||
|
@ -32,6 +32,8 @@ public:
|
||||
|
||||
virtual void Release() noexcept CV_OVERRIDE;
|
||||
|
||||
void setPrecision(InferenceEngine::Precision p) noexcept;
|
||||
|
||||
virtual InferenceEngine::Precision getPrecision() noexcept CV_OVERRIDE;
|
||||
|
||||
virtual void getOutputsInfo(InferenceEngine::OutputsDataMap &out) noexcept /*CV_OVERRIDE*/;
|
||||
@ -68,7 +70,7 @@ public:
|
||||
|
||||
virtual size_t getBatchSize() const noexcept CV_OVERRIDE;
|
||||
|
||||
void init();
|
||||
void init(int targetId);
|
||||
|
||||
void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers);
|
||||
|
||||
@ -83,6 +85,8 @@ private:
|
||||
InferenceEngine::BlobMap inpBlobs;
|
||||
InferenceEngine::BlobMap outBlobs;
|
||||
InferenceEngine::BlobMap allBlobs;
|
||||
InferenceEngine::TargetDevice targetDevice;
|
||||
InferenceEngine::Precision precision;
|
||||
InferenceEngine::InferenceEnginePluginPtr plugin;
|
||||
|
||||
void initPlugin(InferenceEngine::ICNNNetwork& net);
|
||||
@ -116,15 +120,17 @@ public:
|
||||
InferenceEngine::TBlob<float>::Ptr blob;
|
||||
};
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m);
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape);
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
|
||||
|
||||
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr);
|
||||
|
||||
// Fuses convolution weights and biases with channel-wise scales and shifts.
|
||||
void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& conv,
|
||||
const Mat& w, const Mat& b = Mat());
|
||||
Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob);
|
||||
|
||||
// Convert Inference Engine blob with FP32 precision to FP16 precision.
|
||||
// Allocates memory for a new blob.
|
||||
InferenceEngine::TBlob<int16_t>::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob);
|
||||
|
||||
// This is a fake class to run networks from Model Optimizer. Objects of that
|
||||
// class simulate responses of layers are imported by OpenCV and supported by
|
||||
@ -151,7 +157,6 @@ private:
|
||||
InferenceEngine::DataPtr output;
|
||||
};
|
||||
|
||||
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool haveInfEngine();
|
||||
|
@ -80,14 +80,16 @@ public:
|
||||
{
|
||||
CV_Assert(inpId < node.input_size());
|
||||
std::string name = node.input(inpId);
|
||||
// If operation produces several tensors, they are specified by index
|
||||
// after ':' character. In example, "input:0".
|
||||
name = name.substr(0, name.rfind(':'));
|
||||
const int numNodes = net.node_size();
|
||||
for (int i = 0; i < numNodes; ++i)
|
||||
{
|
||||
if (net.node(i).name() == name)
|
||||
return net.node(i);
|
||||
}
|
||||
CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
|
||||
return net.node(0); // just return something
|
||||
CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found");
|
||||
}
|
||||
|
||||
// Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
|
||||
@ -400,6 +402,23 @@ private:
|
||||
int numOutDims;
|
||||
};
|
||||
|
||||
class L2NormalizeSubgraph : public Subgraph
|
||||
{
|
||||
public:
|
||||
L2NormalizeSubgraph()
|
||||
{
|
||||
int input = addNodeToMatch("");
|
||||
int square = addNodeToMatch("Square", input);
|
||||
int reductionIndices = addNodeToMatch("Const");
|
||||
int sum = addNodeToMatch("Sum", square, reductionIndices);
|
||||
int y = addNodeToMatch("Const");
|
||||
int maximum = addNodeToMatch("Maximum", sum, y);
|
||||
int rsqrt = addNodeToMatch("Rsqrt", maximum);
|
||||
addNodeToMatch("Mul", input, rsqrt);
|
||||
setFusedNode("L2Normalize", input, reductionIndices);
|
||||
}
|
||||
};
|
||||
|
||||
void simplifySubgraphs(tensorflow::GraphDef& net)
|
||||
{
|
||||
std::vector<Ptr<Subgraph> > subgraphs;
|
||||
@ -410,6 +429,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
|
||||
subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph()));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3)));
|
||||
subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
|
||||
|
||||
int numNodes = net.node_size();
|
||||
std::vector<int> matchedNodesIds;
|
||||
|
@ -37,7 +37,13 @@ using ::google::protobuf::Reflection;
|
||||
namespace
|
||||
{
|
||||
|
||||
static int toNCHW[] = {0, 2, 3, 1};
|
||||
static int toNCHW(int idx)
|
||||
{
|
||||
CV_Assert(-4 <= idx && idx < 4);
|
||||
if (idx == 0) return 0;
|
||||
else if (idx > 0) return idx % 3 + 1;
|
||||
else return (4 + idx) % 3 + 1;
|
||||
}
|
||||
|
||||
// This values are used to indicate layer output's data layout where it's possible.
|
||||
enum DataLayout
|
||||
@ -556,11 +562,23 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
|
||||
// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
|
||||
static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts)
|
||||
{
|
||||
if (hasLayerAttr(layer, "data_format"))
|
||||
{
|
||||
std::string format = getLayerAttr(layer, "data_format").s();
|
||||
if (format == "NHWC" || format == "channels_last")
|
||||
return DATA_LAYOUT_NHWC;
|
||||
else if (format == "NCHW" || format == "channels_first")
|
||||
return DATA_LAYOUT_NCHW;
|
||||
else
|
||||
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
|
||||
}
|
||||
|
||||
// Determine layout by layer's inputs
|
||||
int layout = DATA_LAYOUT_UNKNOWN;
|
||||
std::map<String, int>::const_iterator it;
|
||||
for (int i = 0, n = layer.input_size(); i < n; ++i)
|
||||
{
|
||||
it = data_layouts.find(layer.input(i));
|
||||
it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':')));
|
||||
if (it != data_layouts.end())
|
||||
{
|
||||
if (it->second == DATA_LAYOUT_UNKNOWN)
|
||||
@ -708,17 +726,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
// one input only
|
||||
connect(layer_id, dstNet, parsePin(input), id, 0);
|
||||
|
||||
if (hasLayerAttr(layer, "data_format"))
|
||||
{
|
||||
std::string format = getLayerAttr(layer, "data_format").s();
|
||||
if (format == "NHWC" || format == "channels_last")
|
||||
data_layouts[name] = DATA_LAYOUT_NHWC;
|
||||
else if (format == "NCHW" || format == "channels_first")
|
||||
data_layouts[name] = DATA_LAYOUT_NCHW;
|
||||
else
|
||||
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
|
||||
}
|
||||
else
|
||||
if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
|
||||
data_layouts[name] = DATA_LAYOUT_NHWC;
|
||||
}
|
||||
else if (type == "BiasAdd" || type == "Add")
|
||||
@ -956,7 +964,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
{
|
||||
int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
|
||||
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
|
||||
layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW[axis] : axis);
|
||||
layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis);
|
||||
|
||||
int id = dstNet.addLayer(name, "Concat", layerParams);
|
||||
layer_id[name] = id;
|
||||
@ -1017,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
// num_split
|
||||
// 1st blob is dims tensor
|
||||
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
|
||||
layerParams.set("axis", toNCHW[axis]);
|
||||
layerParams.set("axis", toNCHW(axis));
|
||||
|
||||
int id = dstNet.addLayer(name, "Slice", layerParams);
|
||||
layer_id[name] = id;
|
||||
@ -1410,9 +1418,26 @@ void TFImporter::populateNet(Net dstNet)
|
||||
{
|
||||
// op: "L2Normalize"
|
||||
// input: "input"
|
||||
CV_Assert(layer.input_size() == 1);
|
||||
layerParams.set("across_spatial", false);
|
||||
layerParams.set("channel_shared", false);
|
||||
// input: "reduction_indices" (axis)
|
||||
CV_Assert(layer.input_size() == 2);
|
||||
Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
|
||||
CV_Assert(reductionIndices.type() == CV_32SC1);
|
||||
|
||||
const int numAxes = reductionIndices.total();
|
||||
if (data_layouts[name] == DATA_LAYOUT_NHWC)
|
||||
for (int i = 0; i < numAxes; ++i)
|
||||
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
|
||||
|
||||
cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
|
||||
for (int i = 1; i < numAxes; ++i)
|
||||
{
|
||||
CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
|
||||
// Axes have the same sign.
|
||||
CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
|
||||
}
|
||||
layerParams.set("start_axis", reductionIndices.at<int>(0));
|
||||
layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
|
||||
|
||||
int id = dstNet.addLayer(name, "Normalize", layerParams);
|
||||
layer_id[name] = id;
|
||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
||||
|
@ -100,6 +100,8 @@ public:
|
||||
|
||||
TEST_P(DNNTestNetwork, AlexNet)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
|
||||
Size(227, 227), "prob",
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" :
|
||||
@ -108,6 +110,8 @@ TEST_P(DNNTestNetwork, AlexNet)
|
||||
|
||||
TEST_P(DNNTestNetwork, ResNet_50)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
|
||||
Size(224, 224), "prob",
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" :
|
||||
@ -116,6 +120,8 @@ TEST_P(DNNTestNetwork, ResNet_50)
|
||||
|
||||
TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
|
||||
Size(227, 227), "prob",
|
||||
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" :
|
||||
@ -124,6 +130,8 @@ TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
|
||||
|
||||
TEST_P(DNNTestNetwork, GoogLeNet)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
|
||||
Size(224, 224), "prob");
|
||||
}
|
||||
@ -147,7 +155,9 @@ TEST_P(DNNTestNetwork, ENet)
|
||||
|
||||
TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
|
||||
@ -157,7 +167,9 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||
|
||||
TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
|
||||
@ -177,35 +189,45 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
|
||||
TEST_P(DNNTestNetwork, OpenPose_pose_coco)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
double l1 = target == DNN_TARGET_OPENCL_FP16 ? 3e-5 : 1e-5;
|
||||
double lInf = target == DNN_TARGET_OPENCL_FP16 ? 3e-3 : 1e-4;
|
||||
processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt",
|
||||
Size(368, 368), "");
|
||||
Size(368, 368), "", "", l1, lInf);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenPose_pose_mpi)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
double l1 = target == DNN_TARGET_OPENCL_FP16 ? 4e-5 : 1e-5;
|
||||
double lInf = target == DNN_TARGET_OPENCL_FP16 ? 7e-3 : 1e-4;
|
||||
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt",
|
||||
Size(368, 368), "");
|
||||
Size(368, 368), "", "", l1, lInf);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
double l1 = target == DNN_TARGET_OPENCL_FP16 ? 5e-5 : 1e-5;
|
||||
double lInf = target == DNN_TARGET_OPENCL_FP16 ? 5e-3 : 1e-4;
|
||||
// The same .caffemodel but modified .prototxt
|
||||
// See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
|
||||
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt",
|
||||
Size(368, 368), "");
|
||||
Size(368, 368), "", "", l1, lInf);
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, OpenFace)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "");
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, opencv_face_detector)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
|
||||
Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
|
||||
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
|
||||
@ -214,13 +236,23 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
|
||||
|
||||
TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
|
||||
inp, "detection_out");
|
||||
}
|
||||
|
||||
TEST_P(DNNTestNetwork, DenseNet_121)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "caffe");
|
||||
}
|
||||
|
||||
const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
#ifdef HAVE_HALIDE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
@ -228,6 +260,8 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
#endif
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
|
||||
};
|
||||
|
@ -866,6 +866,44 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
|
||||
|
||||
normAssert(outDefault, out);
|
||||
}
|
||||
|
||||
// 1. Create a .prototxt file with the following network:
|
||||
// layer {
|
||||
// type: "Input" name: "data" top: "data"
|
||||
// input_param { shape { dim: 1 dim: 2 dim: 3 } }
|
||||
// }
|
||||
// layer {
|
||||
// type: "Input" name: "second_input" top: "second_input"
|
||||
// input_param { shape { dim: 1 dim: 2 dim: 3 } }
|
||||
// }
|
||||
// layer {
|
||||
// type: "Eltwise" name: "output" top: "output"
|
||||
// bottom: "data" bottom: "second_input"
|
||||
// eltwise_param { operation: SUM }
|
||||
// }
|
||||
//
|
||||
// 2. Create a .caffemodel file using Caffe:
|
||||
//
|
||||
// import caffe
|
||||
// net = caffe.Net('/path/to/prototxt', caffe.TEST)
|
||||
// net.save('/path/to/caffemodel')
|
||||
//
|
||||
// 3. Convert using ModelOptimizer.
|
||||
TEST(Test_DLDT, two_inputs)
|
||||
{
|
||||
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
|
||||
int inpSize[] = {1, 2, 3};
|
||||
Mat firstInp(3, &inpSize[0], CV_32F);
|
||||
Mat secondInp(3, &inpSize[0], CV_32F);
|
||||
randu(firstInp, -1, 1);
|
||||
randu(secondInp, -1, 1);
|
||||
|
||||
net.setInput(firstInp, "data");
|
||||
net.setInput(secondInp, "second_input");
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(out, firstInp + secondInp);
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
}} // namespace
|
||||
|
@ -53,7 +53,7 @@ namespace opencv_test {
|
||||
using namespace cv::dnn;
|
||||
|
||||
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
|
||||
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL)
|
||||
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)
|
||||
|
||||
static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
|
||||
{
|
||||
|
@ -193,6 +193,13 @@ TEST_P(Test_TensorFlow_layers, reshape)
|
||||
runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, l2_normalize)
|
||||
{
|
||||
int targetId = GetParam();
|
||||
runTensorFlowNet("l2_normalize", targetId);
|
||||
runTensorFlowNet("l2_normalize_3d", targetId);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
|
||||
|
@ -229,7 +229,7 @@ OCL_PERF_TEST_P(RemapFixture, Remap,
|
||||
|
||||
OCL_TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
|
||||
|
||||
SANITY_CHECK(dst, eps);
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
} } // namespace opencv_test::ocl
|
||||
|
@ -202,8 +202,8 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
|
||||
|
||||
PERF_TEST_P( TestRemap, remap,
|
||||
Combine(
|
||||
Values( TYPICAL_MAT_TYPES ),
|
||||
Values( szVGA, sz720p, sz1080p ),
|
||||
Values( CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1 ),
|
||||
Values( szVGA, sz1080p ),
|
||||
InterType::all(),
|
||||
BorderMode::all(),
|
||||
RemapMode::all()
|
||||
@ -231,7 +231,7 @@ PERF_TEST_P( TestRemap, remap,
|
||||
remap(source, destination, map_x, map_y, interpolationType, borderMode);
|
||||
}
|
||||
|
||||
SANITY_CHECK(destination, 1);
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode )
|
||||
|
@ -547,7 +547,7 @@ static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
|
||||
size_t src_step = _src.step(), src_offset = _src.offset();
|
||||
const size_t tileSizeYmax = wgs / tileSizeX;
|
||||
|
||||
// workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory
|
||||
// workaround for NVIDIA: 3 channel vector type takes 4*elem_size in local memory
|
||||
int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
|
||||
|
||||
if (((src_offset % src_step) % esz == 0) &&
|
||||
|
@ -36,19 +36,25 @@ public:
|
||||
typedef fixedpoint64 WT;
|
||||
CV_ALWAYS_INLINE fixedpoint64() { val = 0; }
|
||||
CV_ALWAYS_INLINE fixedpoint64(const int8_t& _val) { val = ((int64_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint64(const uint8_t& _val) { val = ((int64_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint64(const int16_t& _val) { val = ((int64_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint64(const uint16_t& _val) { val = ((int64_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint64(const int32_t& _val) { val = ((int64_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint64(const cv::softdouble& _val) { val = cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const int8_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const uint8_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const int16_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const uint16_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const int32_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const cv::softdouble& _val) { val = cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint64& operator = (const fixedpoint64& _val) { val = _val.val; return *this; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const int8_t& val2) const { return operator *(fixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const uint8_t& val2) const { return operator *(fixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const int16_t& val2) const { return operator *(fixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const uint16_t& val2) const { return operator *(fixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const int32_t& val2) const { return operator *(fixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint64& val2) const
|
||||
{
|
||||
//Assume -0x00000000C0000000 <= val2 <=0x0000000100000000 INT64_MIN <= val <= INT64_MAX, so shifted multiplication result is inside [INT64_MIN, INT64_MAX] range
|
||||
uint64_t uval = (uint64_t)((val ^ (val >> 63)) - (val >> 63));
|
||||
uint64_t umul = (uint64_t)((val2.val ^ (val2.val >> 63)) - (val2.val >> 63));
|
||||
int64_t ressign = (val >> 63) ^ (val2.val >> 63);
|
||||
@ -61,6 +67,9 @@ public:
|
||||
uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32);
|
||||
val0_l &= 0xFFFFFFFF;
|
||||
|
||||
if ( (sh2 >> 32) || (val0_h >> ressign ? 32 : 31) )
|
||||
return (ressign ? ~(int64_t)0x7FFFFFFFFFFFFFFF : (int64_t)0x7FFFFFFFFFFFFFFF);
|
||||
|
||||
if (ressign)
|
||||
{
|
||||
val0_l = (~val0_l + 1) & 0xFFFFFFFF;
|
||||
@ -68,16 +77,19 @@ public:
|
||||
}
|
||||
return (int64_t)(val0_h << 32 | val0_l);
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const { return fixedpoint64(val + val2.val); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator - (const fixedpoint64& val2) const { return fixedpoint64(val - val2.val); }
|
||||
// CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const
|
||||
// {
|
||||
// int64_t nfrac = (int64_t)frac + val2.frac;
|
||||
// int64_t nval = (int64_t)val + val2.val + nfrac >> 32;
|
||||
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0);
|
||||
// }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const
|
||||
{
|
||||
int64_t res = val + val2.val;
|
||||
return ((val ^ res) & (val2.val ^ res)) >> 63 ? ~(res & ~0x7FFFFFFFFFFFFFFF) : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator - (const fixedpoint64& val2) const
|
||||
{
|
||||
int64_t res = val - val2.val;
|
||||
return ((val ^ val2.val) & (val ^ res)) >> 63 ? ~(res & ~0x7FFFFFFFFFFFFFFF) : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator >> (int n) const { return fixedpoint64(val >> n); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator << (int n) const { return fixedpoint64(val << n); }
|
||||
CV_ALWAYS_INLINE bool operator == (const fixedpoint64& val2) const { return val == val2.val; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int64_t)fixedround((uint64_t)val) >> fixedShift); }
|
||||
CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); }
|
||||
@ -108,31 +120,36 @@ public:
|
||||
CV_ALWAYS_INLINE ufixedpoint64& operator = (const uint32_t& _val) { val = ((uint64_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE ufixedpoint64& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint64_t)cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); return *this; }
|
||||
CV_ALWAYS_INLINE ufixedpoint64& operator = (const ufixedpoint64& _val) { val = _val.val; return *this; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator * (const uint8_t& val2) const { return operator *(ufixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator * (const uint16_t& val2) const { return operator *(ufixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator * (const uint32_t& val2) const { return operator *(ufixedpoint64(val2)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator * (const ufixedpoint64& val2) const
|
||||
{
|
||||
//Assume val2 <=0x0000000100000000, so shifted multiplication result is less than val and therefore than UINT64_MAX
|
||||
uint64_t sh0 = fixedround((val & 0xFFFFFFFF) * (val2.val & 0xFFFFFFFF));
|
||||
uint64_t sh1_0 = (val >> 32) * (val2.val & 0xFFFFFFFF);
|
||||
uint64_t sh1_1 = (val & 0xFFFFFFFF) * (val2.val >> 32);
|
||||
uint64_t sh2 = (val >> 32) * (val2.val >> 32);
|
||||
uint64_t sh2 = (val >> 32) * (val2.val >> 32);
|
||||
uint64_t val0_l = (sh1_0 & 0xFFFFFFFF) + (sh1_1 & 0xFFFFFFFF) + (sh0 >> 32);
|
||||
uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32);
|
||||
val0_l &= 0xFFFFFFFF;
|
||||
|
||||
if ((sh2 >> 32) || (val0_h >> 32))
|
||||
return ((uint64_t)0xFFFFFFFFFFFFFFFF);
|
||||
|
||||
return val0_h << 32 | val0_l;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator + (const ufixedpoint64& val2) const { return ufixedpoint64(val + val2.val); }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator - (const ufixedpoint64& val2) const { return ufixedpoint64(val - val2.val); }
|
||||
// CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const
|
||||
// {
|
||||
// int64_t nfrac = (int64_t)frac + val2.frac;
|
||||
// int64_t nval = (int64_t)val + val2.val + nfrac >> 32;
|
||||
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0);
|
||||
// }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator + (const ufixedpoint64& val2) const
|
||||
{
|
||||
uint64_t res = val + val2.val;
|
||||
return (val > res) ? (uint64_t)0xFFFFFFFFFFFFFFFF : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator - (const ufixedpoint64& val2) const
|
||||
{
|
||||
return val > val2.val ? (val - val2.val) : 0;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator >> (int n) const { return ufixedpoint64(val >> n); }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator << (int n) const { return ufixedpoint64(val << n); }
|
||||
CV_ALWAYS_INLINE bool operator == (const ufixedpoint64& val2) const { return val == val2.val; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
|
||||
CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); }
|
||||
@ -163,21 +180,26 @@ public:
|
||||
CV_ALWAYS_INLINE fixedpoint32& operator = (const int16_t& _val) { val = ((int32_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint32& operator = (const cv::softdouble& _val) { val = (int32_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint32& operator = (const fixedpoint32& _val) { val = _val.val; return *this; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator * (const int8_t& val2) const { return cv::saturate_cast<int32_t>((int64_t)val * val2); }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator * (const uint8_t& val2) const { return cv::saturate_cast<int32_t>((int64_t)val * val2); }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator * (const int16_t& val2) const { return cv::saturate_cast<int32_t>((int64_t)val * val2); }
|
||||
CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint32& val2) const { return (int64_t)val * (int64_t)(val2.val); }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const { return fixedpoint32(val + val2.val); }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator - (const fixedpoint32& val2) const { return fixedpoint32(val - val2.val); }
|
||||
// CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const
|
||||
// {
|
||||
// int32_t nfrac = (int32_t)frac + val2.frac;
|
||||
// int32_t nval = (int32_t)val + val2.val + nfrac >> 32;
|
||||
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0);
|
||||
// }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const
|
||||
{
|
||||
int32_t res = val + val2.val;
|
||||
return ((val ^ res) & (val2.val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator - (const fixedpoint32& val2) const
|
||||
{
|
||||
int32_t res = val - val2.val;
|
||||
return ((val ^ val2.val) & (val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator >> (int n) const { return fixedpoint32(val >> n); }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator << (int n) const { return fixedpoint32(val << n); }
|
||||
CV_ALWAYS_INLINE bool operator == (const fixedpoint32& val2) const { return val == val2.val; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int32_t)fixedround((uint32_t)val) >> fixedShift); }
|
||||
CV_ALWAYS_INLINE operator fixedpoint64() const { return (int64_t)val << (fixedpoint64::fixedShift - fixedShift); }
|
||||
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
|
||||
@ -204,21 +226,24 @@ public:
|
||||
CV_ALWAYS_INLINE ufixedpoint32& operator = (const uint16_t& _val) { val = ((uint32_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE ufixedpoint32& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint32_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; }
|
||||
CV_ALWAYS_INLINE ufixedpoint32& operator = (const ufixedpoint32& _val) { val = _val.val; return *this; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator * (const uint8_t& val2) const { return cv::saturate_cast<uint32_t>((uint64_t)val * val2); }
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator * (const uint16_t& val2) const { return cv::saturate_cast<uint32_t>((uint64_t)val * val2); }
|
||||
CV_ALWAYS_INLINE ufixedpoint64 operator * (const ufixedpoint32& val2) const { return (uint64_t)val * (uint64_t)(val2.val); }
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator + (const ufixedpoint32& val2) const { return ufixedpoint32(val + val2.val); }
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator - (const ufixedpoint32& val2) const { return ufixedpoint32(val - val2.val); }
|
||||
// CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const
|
||||
// {
|
||||
// int32_t nfrac = (int32_t)frac + val2.frac;
|
||||
// int32_t nval = (int32_t)val + val2.val + nfrac >> 32;
|
||||
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0);
|
||||
// }
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator + (const ufixedpoint32& val2) const
|
||||
{
|
||||
uint32_t res = val + val2.val;
|
||||
return (val > res) ? 0xFFFFFFFF : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator - (const ufixedpoint32& val2) const
|
||||
{
|
||||
return val > val2.val ? (val - val2.val) : 0;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator >> (int n) const { return ufixedpoint32(val >> n); }
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator << (int n) const { return ufixedpoint32(val << n); }
|
||||
CV_ALWAYS_INLINE bool operator == (const ufixedpoint32& val2) const { return val == val2.val; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
|
||||
CV_ALWAYS_INLINE operator ufixedpoint64() const { return (uint64_t)val << (ufixedpoint64::fixedShift - fixedShift); }
|
||||
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
|
||||
@ -239,20 +264,28 @@ public:
|
||||
typedef fixedpoint32 WT;
|
||||
CV_ALWAYS_INLINE fixedpoint16() { val = 0; }
|
||||
CV_ALWAYS_INLINE fixedpoint16(const int8_t& _val) { val = ((int16_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint16(const uint8_t& _val) { val = ((int16_t)_val) << fixedShift; }
|
||||
CV_ALWAYS_INLINE fixedpoint16(const cv::softdouble& _val) { val = (int16_t)cvRound(_val * cv::softdouble((1 << fixedShift))); }
|
||||
CV_ALWAYS_INLINE fixedpoint16& operator = (const int8_t& _val) { val = ((int16_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint16& operator = (const cv::softdouble& _val) { val = (int16_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; }
|
||||
CV_ALWAYS_INLINE fixedpoint16& operator = (const fixedpoint16& _val) { val = _val.val; return *this; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator * (const ET& val2) const { return (int16_t)(val * val2); } // Wrong rounding is possible for floating point types
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator * (const int8_t& val2) const { return cv::saturate_cast<int16_t>((int32_t)val * val2); }
|
||||
CV_ALWAYS_INLINE fixedpoint32 operator * (const fixedpoint16& val2) const { return (int32_t)val * (int32_t)(val2.val); }
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator + (const fixedpoint16& val2) const { return fixedpoint16((int16_t)(val + val2.val)); }
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator - (const fixedpoint16& val2) const { return fixedpoint16((int16_t)(val - val2.val)); }
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator + (const fixedpoint16& val2) const
|
||||
{
|
||||
int16_t res = val + val2.val;
|
||||
return ((val ^ res) & (val2.val ^ res)) >> 15 ? (int16_t)(~(res & ~0x7FFF)) : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator - (const fixedpoint16& val2) const
|
||||
{
|
||||
int16_t res = val - val2.val;
|
||||
return ((val ^ val2.val) & (val ^ res)) >> 15 ? (int16_t)(~(res & ~(int16_t)0x7FFF)) : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator >> (int n) const { return fixedpoint16((int16_t)(val >> n)); }
|
||||
CV_ALWAYS_INLINE fixedpoint16 operator << (int n) const { return fixedpoint16((int16_t)(val << n)); }
|
||||
CV_ALWAYS_INLINE bool operator == (const fixedpoint16& val2) const { return val == val2.val; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int16_t)fixedround((uint16_t)val) >> fixedShift); }
|
||||
CV_ALWAYS_INLINE operator fixedpoint32() const { return (int32_t)val << (fixedpoint32::fixedShift - fixedShift); }
|
||||
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
|
||||
@ -276,15 +309,23 @@ public:
|
||||
CV_ALWAYS_INLINE ufixedpoint16& operator = (const uint8_t& _val) { val = ((uint16_t)_val) << fixedShift; return *this; }
|
||||
CV_ALWAYS_INLINE ufixedpoint16& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint16_t)cvRound(_val * cv::softdouble((int32_t)(1 << fixedShift))); return *this; }
|
||||
CV_ALWAYS_INLINE ufixedpoint16& operator = (const ufixedpoint16& _val) { val = _val.val; return *this; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator * (const ET& val2) const { return (uint16_t)(val * val2); } // Wrong rounding is possible for floating point types
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator * (const uint8_t& val2) const { return cv::saturate_cast<uint16_t>((uint32_t)val * val2); }
|
||||
CV_ALWAYS_INLINE ufixedpoint32 operator * (const ufixedpoint16& val2) const { return ((uint32_t)val * (uint32_t)(val2.val)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator + (const ufixedpoint16& val2) const { return ufixedpoint16((uint16_t)(val + val2.val)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator - (const ufixedpoint16& val2) const { return ufixedpoint16((uint16_t)(val - val2.val)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator + (const ufixedpoint16& val2) const
|
||||
{
|
||||
uint16_t res = val + val2.val;
|
||||
return (val > res) ? (uint16_t)0xFFFF : res;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator - (const ufixedpoint16& val2) const
|
||||
{
|
||||
return val > val2.val ? (uint16_t)(val - val2.val) : (uint16_t)0;
|
||||
}
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator >> (int n) const { return ufixedpoint16((uint16_t)(val >> n)); }
|
||||
CV_ALWAYS_INLINE ufixedpoint16 operator << (int n) const { return ufixedpoint16((uint16_t)(val << n)); }
|
||||
CV_ALWAYS_INLINE bool operator == (const ufixedpoint16& val2) const { return val == val2.val; }
|
||||
template <typename ET>
|
||||
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
|
||||
CV_ALWAYS_INLINE operator ufixedpoint32() const { return (uint32_t)val << (ufixedpoint32::fixedShift - fixedShift); }
|
||||
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
|
||||
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -77,6 +77,7 @@ protected:
|
||||
|
||||
virtual void run_func() = 0;
|
||||
virtual void run_reference_func() = 0;
|
||||
virtual float get_success_error_level(int _interpolation, int _depth) const;
|
||||
virtual void validate_results() const;
|
||||
virtual void prepare_test_data_for_reference_func();
|
||||
|
||||
@ -229,6 +230,20 @@ void CV_ImageWarpBaseTest::run(int)
|
||||
ts->set_gtest_status();
|
||||
}
|
||||
|
||||
float CV_ImageWarpBaseTest::get_success_error_level(int _interpolation, int) const
|
||||
{
|
||||
if (_interpolation == INTER_CUBIC)
|
||||
return 1.0f;
|
||||
else if (_interpolation == INTER_LANCZOS4)
|
||||
return 1.0f;
|
||||
else if (_interpolation == INTER_NEAREST)
|
||||
return 1.0f;
|
||||
else if (_interpolation == INTER_AREA)
|
||||
return 2.0f;
|
||||
else
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
void CV_ImageWarpBaseTest::validate_results() const
|
||||
{
|
||||
Mat _dst;
|
||||
@ -237,15 +252,7 @@ void CV_ImageWarpBaseTest::validate_results() const
|
||||
Size dsize = dst.size(), ssize = src.size();
|
||||
int cn = _dst.channels();
|
||||
dsize.width *= cn;
|
||||
float t = 1.0f;
|
||||
if (interpolation == INTER_CUBIC)
|
||||
t = 1.0f;
|
||||
else if (interpolation == INTER_LANCZOS4)
|
||||
t = 1.0f;
|
||||
else if (interpolation == INTER_NEAREST)
|
||||
t = 1.0f;
|
||||
else if (interpolation == INTER_AREA)
|
||||
t = 2.0f;
|
||||
float t = get_success_error_level(interpolation & INTER_MAX, dst.depth());
|
||||
|
||||
for (int dy = 0; dy < dsize.height; ++dy)
|
||||
{
|
||||
@ -1034,7 +1041,7 @@ public:
|
||||
|
||||
protected:
|
||||
virtual void generate_test_data();
|
||||
virtual void prepare_test_data_for_reference_func();
|
||||
virtual float get_success_error_level(int _interpolation, int _depth) const;
|
||||
|
||||
virtual void run_func();
|
||||
virtual void run_reference_func();
|
||||
@ -1083,16 +1090,16 @@ void CV_WarpAffine_Test::run_func()
|
||||
cv::warpAffine(src, dst, M, dst.size(), interpolation, borderType, borderValue);
|
||||
}
|
||||
|
||||
void CV_WarpAffine_Test::prepare_test_data_for_reference_func()
|
||||
float CV_WarpAffine_Test::get_success_error_level(int _interpolation, int _depth) const
|
||||
{
|
||||
CV_ImageWarpBaseTest::prepare_test_data_for_reference_func();
|
||||
return _depth == CV_8U ? 0 : CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
|
||||
}
|
||||
|
||||
void CV_WarpAffine_Test::run_reference_func()
|
||||
{
|
||||
prepare_test_data_for_reference_func();
|
||||
|
||||
warpAffine(src, reference_dst);
|
||||
Mat tmp = Mat::zeros(dst.size(), dst.type());
|
||||
warpAffine(src, tmp);
|
||||
tmp.convertTo(reference_dst, reference_dst.depth());
|
||||
}
|
||||
|
||||
void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
|
||||
@ -1123,7 +1130,7 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
|
||||
const int AB_SCALE = 1 << AB_BITS;
|
||||
int round_delta = (inter == INTER_NEAREST) ? AB_SCALE / 2 : (AB_SCALE / INTER_TAB_SIZE / 2);
|
||||
|
||||
const double* data_tM = tM.ptr<double>(0);
|
||||
const softdouble* data_tM = tM.ptr<softdouble>(0);
|
||||
for (int dy = 0; dy < dsize.height; ++dy)
|
||||
{
|
||||
short* yM = mapx.ptr<short>(dy);
|
||||
@ -1162,6 +1169,7 @@ public:
|
||||
|
||||
protected:
|
||||
virtual void generate_test_data();
|
||||
virtual float get_success_error_level(int _interpolation, int _depth) const;
|
||||
|
||||
virtual void run_func();
|
||||
virtual void run_reference_func();
|
||||
@ -1204,11 +1212,16 @@ void CV_WarpPerspective_Test::run_func()
|
||||
cv::warpPerspective(src, dst, M, dst.size(), interpolation, borderType, borderValue);
|
||||
}
|
||||
|
||||
float CV_WarpPerspective_Test::get_success_error_level(int _interpolation, int _depth) const
|
||||
{
|
||||
return CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
|
||||
}
|
||||
|
||||
void CV_WarpPerspective_Test::run_reference_func()
|
||||
{
|
||||
prepare_test_data_for_reference_func();
|
||||
|
||||
warpPerspective(src, reference_dst);
|
||||
Mat tmp = Mat::zeros(dst.size(), dst.type());
|
||||
warpPerspective(src, tmp);
|
||||
tmp.convertTo(reference_dst, reference_dst.depth());
|
||||
}
|
||||
|
||||
void CV_WarpPerspective_Test::warpPerspective(const Mat& _src, Mat& _dst)
|
||||
|
@ -482,6 +482,7 @@ node {
|
||||
name: "conv4_3_norm/l2_normalize"
|
||||
op: "L2Normalize"
|
||||
input: "Relu_4:0"
|
||||
input: "conv4_3_norm/l2_normalize/Sum/reduction_indices"
|
||||
}
|
||||
node {
|
||||
name: "conv4_3_norm/mul_1"
|
||||
|
Loading…
Reference in New Issue
Block a user