mirror of
https://github.com/opencv/opencv.git
synced 2025-06-08 10:03:15 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
d0032b0717
@ -302,7 +302,7 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)
|
|||||||
if [ -n \"$VERBOSE\" ]; then
|
if [ -n \"$VERBOSE\" ]; then
|
||||||
tail -n1 \$0
|
tail -n1 \$0
|
||||||
fi
|
fi
|
||||||
${_command} -D$<JOIN:$<TARGET_PROPERTY:${_targetName},COMPILE_DEFINITIONS>, -D>
|
${_command} '-D$<JOIN:$<TARGET_PROPERTY:${_targetName},COMPILE_DEFINITIONS>,' '-D>'
|
||||||
")
|
")
|
||||||
GET_FILENAME_COMPONENT(_outdir ${_output} PATH)
|
GET_FILENAME_COMPONENT(_outdir ${_output} PATH)
|
||||||
ADD_CUSTOM_COMMAND(
|
ADD_CUSTOM_COMMAND(
|
||||||
|
@ -47,7 +47,7 @@ area and try to implement them.
|
|||||||
Additional Resources
|
Additional Resources
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
-# [Histograms of Oriented Gradients Video](www.youtube.com/watch?v=0Zib1YEE4LU)
|
-# [Histograms of Oriented Gradients Video](https://www.youtube.com/watch?v=0Zib1YEE4LU)
|
||||||
|
|
||||||
Exercises
|
Exercises
|
||||||
---------
|
---------
|
||||||
|
@ -431,19 +431,6 @@ inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a)
|
|||||||
inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a)
|
inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a)
|
||||||
{ return v_float64x4(_mm256_castps_pd(a.val)); }
|
{ return v_float64x4(_mm256_castps_pd(a.val)); }
|
||||||
|
|
||||||
#if CV_FP16
|
|
||||||
inline v_float32x8 v256_load_fp16_f32(const short* ptr)
|
|
||||||
{
|
|
||||||
return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void v_store_fp16(short* ptr, const v_float32x8& a)
|
|
||||||
{
|
|
||||||
__m128i fp16_value = _mm256_cvtps_ph(a.val, 0);
|
|
||||||
_mm_store_si128((__m128i*)ptr, fp16_value);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Recombine */
|
/* Recombine */
|
||||||
/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \
|
/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \
|
||||||
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
|
||||||
@ -1400,7 +1387,7 @@ inline v_float32x8 v_cvt_f32(const v_float64x4& a)
|
|||||||
inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b)
|
inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b)
|
||||||
{
|
{
|
||||||
__m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val);
|
__m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val);
|
||||||
return v_float32x8(_mm256_insertf128_ps(_mm256_castps128_ps256(af), bf, 1));
|
return v_float32x8(_v256_combine(af, bf));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline v_float64x4 v_cvt_f64(const v_int32x8& a)
|
inline v_float64x4 v_cvt_f64(const v_int32x8& a)
|
||||||
@ -1474,7 +1461,7 @@ inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx)
|
|||||||
}
|
}
|
||||||
inline v_int32x8 v256_lut_quads(const int* tab, const int* idx)
|
inline v_int32x8 v256_lut_quads(const int* tab, const int* idx)
|
||||||
{
|
{
|
||||||
return v_int32x8(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i*)(tab + idx[0]))), _mm_loadu_si128((const __m128i*)(tab + idx[1])), 0x1));
|
return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
|
||||||
}
|
}
|
||||||
inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); }
|
inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); }
|
||||||
inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); }
|
inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); }
|
||||||
@ -1490,7 +1477,7 @@ inline v_int64x4 v256_lut(const int64* tab, const int* idx)
|
|||||||
}
|
}
|
||||||
inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx)
|
inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx)
|
||||||
{
|
{
|
||||||
return v_int64x4(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i*)(tab + idx[0]))), _mm_loadu_si128((const __m128i*)(tab + idx[1])), 0x1));
|
return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
|
||||||
}
|
}
|
||||||
inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); }
|
inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); }
|
||||||
inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); }
|
inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); }
|
||||||
@ -1506,7 +1493,7 @@ inline v_float64x4 v256_lut(const double* tab, const int* idx)
|
|||||||
{
|
{
|
||||||
return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8));
|
return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8));
|
||||||
}
|
}
|
||||||
inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_mm256_insertf128_pd(_mm256_castpd128_pd256(_mm_loadu_pd(tab + idx[0])), _mm_loadu_pd(tab + idx[1]), 0x1)); }
|
inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); }
|
||||||
|
|
||||||
inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec)
|
inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec)
|
||||||
{
|
{
|
||||||
|
@ -278,48 +278,6 @@ struct v_float64x2
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CV_FP16
|
|
||||||
// Workaround for old compilers
|
|
||||||
static inline int16x4_t vreinterpret_s16_f16(float16x4_t a) { return (int16x4_t)a; }
|
|
||||||
static inline float16x4_t vreinterpret_f16_s16(int16x4_t a) { return (float16x4_t)a; }
|
|
||||||
|
|
||||||
static inline float16x4_t cv_vld1_f16(const void* ptr)
|
|
||||||
{
|
|
||||||
#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
|
|
||||||
return vreinterpret_f16_s16(vld1_s16((const short*)ptr));
|
|
||||||
#else
|
|
||||||
return vld1_f16((const __fp16*)ptr);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
static inline void cv_vst1_f16(void* ptr, float16x4_t a)
|
|
||||||
{
|
|
||||||
#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
|
|
||||||
vst1_s16((short*)ptr, vreinterpret_s16_f16(a));
|
|
||||||
#else
|
|
||||||
vst1_f16((__fp16*)ptr, a);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef vdup_n_f16
|
|
||||||
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // CV_FP16
|
|
||||||
|
|
||||||
#if CV_FP16
|
|
||||||
inline v_float32x4 v128_load_fp16_f32(const short* ptr)
|
|
||||||
{
|
|
||||||
float16x4_t a = cv_vld1_f16((const __fp16*)ptr);
|
|
||||||
return v_float32x4(vcvt_f32_f16(a));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void v_store_fp16(short* ptr, const v_float32x4& a)
|
|
||||||
{
|
|
||||||
float16x4_t fp16 = vcvt_f16_f32(a.val);
|
|
||||||
cv_vst1_f16((short*)ptr, fp16);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
|
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
|
||||||
inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
|
inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
|
||||||
inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
|
inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
|
||||||
|
@ -2684,19 +2684,6 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
|||||||
return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val)));
|
return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV_FP16
|
|
||||||
inline v_float32x4 v128_load_fp16_f32(const short* ptr)
|
|
||||||
{
|
|
||||||
return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void v_store_fp16(short* ptr, const v_float32x4& a)
|
|
||||||
{
|
|
||||||
__m128i fp16_value = _mm_cvtps_ph(a.val, 0);
|
|
||||||
_mm_storel_epi64((__m128i*)ptr, fp16_value);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
////////////// Lookup table access ////////////////////
|
////////////// Lookup table access ////////////////////
|
||||||
|
|
||||||
inline v_int8x16 v_lut(const schar* tab, const int* idx)
|
inline v_int8x16 v_lut(const schar* tab, const int* idx)
|
||||||
@ -2956,6 +2943,9 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
|||||||
|
|
||||||
inline v_float32x4 v_load_expand(const float16_t* ptr)
|
inline v_float32x4 v_load_expand(const float16_t* ptr)
|
||||||
{
|
{
|
||||||
|
#if CV_FP16
|
||||||
|
return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
||||||
|
#else
|
||||||
const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000);
|
const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000);
|
||||||
const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000);
|
const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000);
|
||||||
const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000));
|
const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000));
|
||||||
@ -2968,10 +2958,15 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
|
|||||||
__m128i zmask = _mm_cmpeq_epi32(e, z);
|
__m128i zmask = _mm_cmpeq_epi32(e, z);
|
||||||
__m128i ft = v_select_si128(zmask, zt, t);
|
__m128i ft = v_select_si128(zmask, zt, t);
|
||||||
return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign)));
|
return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign)));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
||||||
{
|
{
|
||||||
|
#if CV_FP16
|
||||||
|
__m128i fp16_value = _mm_cvtps_ph(v.val, 0);
|
||||||
|
_mm_storel_epi64((__m128i*)ptr, fp16_value);
|
||||||
|
#else
|
||||||
const __m128i signmask = _mm_set1_epi32(0x80000000);
|
const __m128i signmask = _mm_set1_epi32(0x80000000);
|
||||||
const __m128i rval = _mm_set1_epi32(0x3f000000);
|
const __m128i rval = _mm_set1_epi32(0x3f000000);
|
||||||
|
|
||||||
@ -2993,6 +2988,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
|||||||
t = _mm_or_si128(t, sign);
|
t = _mm_or_si128(t, sign);
|
||||||
t = _mm_packs_epi32(t, t);
|
t = _mm_packs_epi32(t, t);
|
||||||
_mm_storel_epi64((__m128i*)ptr, t);
|
_mm_storel_epi64((__m128i*)ptr, t);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_cleanup() {}
|
inline void v_cleanup() {}
|
||||||
|
@ -256,6 +256,12 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
|
|||||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms");
|
CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms");
|
||||||
|
|
||||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||||
|
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
||||||
|
if (status != CL_SUCCESS)
|
||||||
|
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
|
||||||
|
|
||||||
|
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
||||||
|
|
||||||
size_t exts_len;
|
size_t exts_len;
|
||||||
cv::AutoBuffer<char> extensions;
|
cv::AutoBuffer<char> extensions;
|
||||||
bool is_support_cl_khr_d3d11_sharing = false;
|
bool is_support_cl_khr_d3d11_sharing = false;
|
||||||
@ -264,9 +270,6 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
|
|||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < (int)numPlatforms; i++)
|
for (int i = 0; i < (int)numPlatforms; i++)
|
||||||
{
|
{
|
||||||
status = clGetPlatformIDs(numPlatforms, &platforms[i], NULL);
|
|
||||||
if (status != CL_SUCCESS)
|
|
||||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
|
|
||||||
status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &exts_len);
|
status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &exts_len);
|
||||||
if (status != CL_SUCCESS)
|
if (status != CL_SUCCESS)
|
||||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS");
|
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS");
|
||||||
@ -479,7 +482,7 @@ Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device)
|
|||||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||||
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
||||||
if (status != CL_SUCCESS)
|
if (status != CL_SUCCESS)
|
||||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
|
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
|
||||||
|
|
||||||
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
||||||
|
|
||||||
@ -587,7 +590,7 @@ Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDev
|
|||||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||||
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
||||||
if (status != CL_SUCCESS)
|
if (status != CL_SUCCESS)
|
||||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
|
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
|
||||||
|
|
||||||
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
||||||
|
|
||||||
@ -697,7 +700,7 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
|
|||||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||||
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
|
||||||
if (status != CL_SUCCESS)
|
if (status != CL_SUCCESS)
|
||||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
|
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
|
||||||
|
|
||||||
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
|
||||||
|
|
||||||
|
@ -44,6 +44,9 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
#include <future>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "../dnn/version.hpp"
|
#include "../dnn/version.hpp"
|
||||||
|
|
||||||
@ -57,6 +60,18 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
|
|
||||||
typedef std::vector<int> MatShape;
|
typedef std::vector<int> MatShape;
|
||||||
|
|
||||||
|
#if defined(CV_CXX11) || defined(CV_DOXYGEN)
|
||||||
|
typedef std::future<Mat> AsyncMat;
|
||||||
|
#else
|
||||||
|
// Just a workaround for bindings.
|
||||||
|
struct AsyncMat
|
||||||
|
{
|
||||||
|
Mat get() { return Mat(); }
|
||||||
|
void wait() const {}
|
||||||
|
size_t wait_for(size_t milliseconds) const { CV_UNUSED(milliseconds); return -1; }
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Enum of computation backends supported by layers.
|
* @brief Enum of computation backends supported by layers.
|
||||||
* @see Net::setPreferableBackend
|
* @see Net::setPreferableBackend
|
||||||
@ -68,7 +83,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
//! DNN_BACKEND_OPENCV otherwise.
|
//! DNN_BACKEND_OPENCV otherwise.
|
||||||
DNN_BACKEND_DEFAULT,
|
DNN_BACKEND_DEFAULT,
|
||||||
DNN_BACKEND_HALIDE,
|
DNN_BACKEND_HALIDE,
|
||||||
DNN_BACKEND_INFERENCE_ENGINE,
|
DNN_BACKEND_INFERENCE_ENGINE, //!< Intel's Inference Engine computational backend.
|
||||||
DNN_BACKEND_OPENCV,
|
DNN_BACKEND_OPENCV,
|
||||||
DNN_BACKEND_VKCOM
|
DNN_BACKEND_VKCOM
|
||||||
};
|
};
|
||||||
@ -84,8 +99,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
DNN_TARGET_OPENCL_FP16,
|
DNN_TARGET_OPENCL_FP16,
|
||||||
DNN_TARGET_MYRIAD,
|
DNN_TARGET_MYRIAD,
|
||||||
DNN_TARGET_VULKAN,
|
DNN_TARGET_VULKAN,
|
||||||
//! FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
|
DNN_TARGET_FPGA //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
|
||||||
DNN_TARGET_FPGA
|
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
|
CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
|
||||||
@ -458,6 +472,15 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
*/
|
*/
|
||||||
CV_WRAP Mat forward(const String& outputName = String());
|
CV_WRAP Mat forward(const String& outputName = String());
|
||||||
|
|
||||||
|
/** @brief Runs forward pass to compute output of layer with name @p outputName.
|
||||||
|
* @param outputName name for layer which output is needed to get
|
||||||
|
* @details By default runs forward pass for the whole network.
|
||||||
|
*
|
||||||
|
* This is an asynchronous version of forward(const String&).
|
||||||
|
* dnn::DNN_BACKEND_INFERENCE_ENGINE backend is required.
|
||||||
|
*/
|
||||||
|
CV_WRAP AsyncMat forwardAsync(const String& outputName = String());
|
||||||
|
|
||||||
/** @brief Runs forward pass to compute output of layer with name @p outputName.
|
/** @brief Runs forward pass to compute output of layer with name @p outputName.
|
||||||
* @param outputBlobs contains all output blobs for specified layer.
|
* @param outputBlobs contains all output blobs for specified layer.
|
||||||
* @param outputName name for layer which output is needed to get
|
* @param outputName name for layer which output is needed to get
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#define OPENCV_DNN_VERSION_HPP
|
#define OPENCV_DNN_VERSION_HPP
|
||||||
|
|
||||||
/// Use with major OpenCV version only.
|
/// Use with major OpenCV version only.
|
||||||
#define OPENCV_DNN_API_VERSION 20190412
|
#define OPENCV_DNN_API_VERSION 20190422
|
||||||
|
|
||||||
#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
|
#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
|
||||||
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
|
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
|
||||||
|
@ -2,7 +2,13 @@
|
|||||||
typedef dnn::DictValue LayerId;
|
typedef dnn::DictValue LayerId;
|
||||||
typedef std::vector<dnn::MatShape> vector_MatShape;
|
typedef std::vector<dnn::MatShape> vector_MatShape;
|
||||||
typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape;
|
typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape;
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
typedef std::chrono::milliseconds chrono_milliseconds;
|
||||||
|
typedef std::future_status AsyncMatStatus;
|
||||||
|
#else
|
||||||
|
typedef size_t chrono_milliseconds;
|
||||||
|
typedef size_t AsyncMatStatus;
|
||||||
|
#endif
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)
|
bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)
|
||||||
@ -40,6 +46,46 @@ bool pyopencv_to(PyObject *o, std::vector<Mat> &blobs, const char *name) //requi
|
|||||||
return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
|
return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
|
||||||
|
template<>
|
||||||
|
PyObject* pyopencv_from(const std::future<Mat>& f_)
|
||||||
|
{
|
||||||
|
std::future<Mat>& f = const_cast<std::future<Mat>&>(f_);
|
||||||
|
Ptr<cv::dnn::AsyncMat> p(new std::future<Mat>(std::move(f)));
|
||||||
|
return pyopencv_from(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
PyObject* pyopencv_from(const std::future_status& status)
|
||||||
|
{
|
||||||
|
return pyopencv_from((int)status);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool pyopencv_to(PyObject* src, std::chrono::milliseconds& dst, const char* name)
|
||||||
|
{
|
||||||
|
size_t millis = 0;
|
||||||
|
if (pyopencv_to(src, millis, name))
|
||||||
|
{
|
||||||
|
dst = std::chrono::milliseconds(millis);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template<>
|
||||||
|
PyObject* pyopencv_from(const cv::dnn::AsyncMat&)
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsNotImplemented, "C++11 is required.");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_CXX11
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
PyObject* pyopencv_from(const dnn::DictValue &dv)
|
PyObject* pyopencv_from(const dnn::DictValue &dv)
|
||||||
{
|
{
|
||||||
|
22
modules/dnn/misc/python/shadow_async_mat.hpp
Normal file
22
modules/dnn/misc/python/shadow_async_mat.hpp
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#error This is a shadow header file, which is not intended for processing by any compiler. \
|
||||||
|
Only bindings parser should handle this file.
|
||||||
|
|
||||||
|
namespace cv { namespace dnn {
|
||||||
|
|
||||||
|
class CV_EXPORTS_W AsyncMat
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! Wait for Mat object readiness and return it.
|
||||||
|
CV_WRAP Mat get();
|
||||||
|
|
||||||
|
//! Wait for Mat object readiness.
|
||||||
|
CV_WRAP void wait() const;
|
||||||
|
|
||||||
|
/** @brief Wait for Mat object readiness specific amount of time.
|
||||||
|
* @param timeout Timeout in milliseconds
|
||||||
|
* @returns [std::future_status](https://en.cppreference.com/w/cpp/thread/future_status)
|
||||||
|
*/
|
||||||
|
CV_WRAP AsyncMatStatus wait_for(std::chrono::milliseconds timeout) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
}}
|
@ -5,8 +5,8 @@ import numpy as np
|
|||||||
|
|
||||||
from tests_common import NewOpenCVTests, unittest
|
from tests_common import NewOpenCVTests, unittest
|
||||||
|
|
||||||
def normAssert(test, a, b, lInf=1e-5):
|
def normAssert(test, a, b, msg=None, lInf=1e-5):
|
||||||
test.assertLess(np.max(np.abs(a - b)), lInf)
|
test.assertLess(np.max(np.abs(a - b)), lInf, msg)
|
||||||
|
|
||||||
def inter_area(box1, box2):
|
def inter_area(box1, box2):
|
||||||
x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
|
x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
|
||||||
@ -53,53 +53,6 @@ def normAssertDetections(test, ref, out, confThreshold=0.0, scores_diff=1e-5, bo
|
|||||||
if errMsg:
|
if errMsg:
|
||||||
test.fail(errMsg)
|
test.fail(errMsg)
|
||||||
|
|
||||||
|
|
||||||
# Returns a simple one-layer network created from Caffe's format
|
|
||||||
def getSimpleNet():
|
|
||||||
prototxt = """
|
|
||||||
name: "simpleNet"
|
|
||||||
input: "data"
|
|
||||||
layer {
|
|
||||||
type: "Identity"
|
|
||||||
name: "testLayer"
|
|
||||||
top: "testLayer"
|
|
||||||
bottom: "data"
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
return cv.dnn.readNetFromCaffe(bytearray(prototxt, 'utf8'))
|
|
||||||
|
|
||||||
|
|
||||||
def testBackendAndTarget(backend, target):
|
|
||||||
net = getSimpleNet()
|
|
||||||
net.setPreferableBackend(backend)
|
|
||||||
net.setPreferableTarget(target)
|
|
||||||
inp = np.random.standard_normal([1, 2, 3, 4]).astype(np.float32)
|
|
||||||
try:
|
|
||||||
net.setInput(inp)
|
|
||||||
net.forward()
|
|
||||||
except BaseException as e:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
haveInfEngine = testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU)
|
|
||||||
dnnBackendsAndTargets = [
|
|
||||||
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
|
||||||
]
|
|
||||||
|
|
||||||
if haveInfEngine:
|
|
||||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
|
|
||||||
if testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
|
|
||||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
|
|
||||||
|
|
||||||
if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
|
|
||||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
|
|
||||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
|
||||||
if haveInfEngine and cv.ocl_Device.getDefault().isIntel():
|
|
||||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
|
|
||||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
|
||||||
|
|
||||||
|
|
||||||
def printParams(backend, target):
|
def printParams(backend, target):
|
||||||
backendNames = {
|
backendNames = {
|
||||||
cv.dnn.DNN_BACKEND_OPENCV: 'OCV',
|
cv.dnn.DNN_BACKEND_OPENCV: 'OCV',
|
||||||
@ -116,8 +69,44 @@ def printParams(backend, target):
|
|||||||
|
|
||||||
class dnn_test(NewOpenCVTests):
|
class dnn_test(NewOpenCVTests):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(dnn_test, self).__init__(*args, **kwargs)
|
||||||
|
self.dnnBackendsAndTargets = [
|
||||||
|
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
||||||
|
]
|
||||||
|
|
||||||
|
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU):
|
||||||
|
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
|
||||||
|
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
|
||||||
|
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
|
||||||
|
|
||||||
|
if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
|
||||||
|
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
|
||||||
|
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
||||||
|
if cv.ocl_Device.getDefault().isIntel():
|
||||||
|
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL):
|
||||||
|
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
|
||||||
|
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16):
|
||||||
|
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
||||||
|
|
||||||
def find_dnn_file(self, filename, required=True):
|
def find_dnn_file(self, filename, required=True):
|
||||||
return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd())], required=required)
|
return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd()),
|
||||||
|
os.environ['OPENCV_TEST_DATA_PATH']],
|
||||||
|
required=required)
|
||||||
|
|
||||||
|
def checkIETarget(self, backend, target):
|
||||||
|
proto = self.find_dnn_file('dnn/layers/layer_convolution.prototxt', required=True)
|
||||||
|
model = self.find_dnn_file('dnn/layers/layer_convolution.caffemodel', required=True)
|
||||||
|
net = cv.dnn.readNet(proto, model)
|
||||||
|
net.setPreferableBackend(backend)
|
||||||
|
net.setPreferableTarget(target)
|
||||||
|
inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32)
|
||||||
|
try:
|
||||||
|
net.setInput(inp)
|
||||||
|
net.forward()
|
||||||
|
except BaseException as e:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def test_blobFromImage(self):
|
def test_blobFromImage(self):
|
||||||
np.random.seed(324)
|
np.random.seed(324)
|
||||||
@ -148,7 +137,7 @@ class dnn_test(NewOpenCVTests):
|
|||||||
|
|
||||||
def test_face_detection(self):
|
def test_face_detection(self):
|
||||||
testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
|
testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
|
||||||
proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt2', required=testdata_required)
|
proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt', required=testdata_required)
|
||||||
model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=testdata_required)
|
model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=testdata_required)
|
||||||
if proto is None or model is None:
|
if proto is None or model is None:
|
||||||
raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||||
@ -164,7 +153,7 @@ class dnn_test(NewOpenCVTests):
|
|||||||
[0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801]]
|
[0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801]]
|
||||||
|
|
||||||
print('\n')
|
print('\n')
|
||||||
for backend, target in dnnBackendsAndTargets:
|
for backend, target in self.dnnBackendsAndTargets:
|
||||||
printParams(backend, target)
|
printParams(backend, target)
|
||||||
|
|
||||||
net = cv.dnn.readNet(proto, model)
|
net = cv.dnn.readNet(proto, model)
|
||||||
@ -178,5 +167,52 @@ class dnn_test(NewOpenCVTests):
|
|||||||
|
|
||||||
normAssertDetections(self, ref, out, 0.5, scoresDiff, iouDiff)
|
normAssertDetections(self, ref, out, 0.5, scoresDiff, iouDiff)
|
||||||
|
|
||||||
|
def test_async(self):
|
||||||
|
timeout = 5000 # in milliseconds
|
||||||
|
testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
|
||||||
|
proto = self.find_dnn_file('dnn/layers/layer_convolution.prototxt', required=testdata_required)
|
||||||
|
model = self.find_dnn_file('dnn/layers/layer_convolution.caffemodel', required=testdata_required)
|
||||||
|
if proto is None or model is None:
|
||||||
|
raise unittest.SkipTest("Missing DNN test files (dnn/layers/layer_convolution.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||||
|
|
||||||
|
print('\n')
|
||||||
|
for backend, target in self.dnnBackendsAndTargets:
|
||||||
|
if backend != cv.dnn.DNN_BACKEND_INFERENCE_ENGINE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
printParams(backend, target)
|
||||||
|
|
||||||
|
netSync = cv.dnn.readNet(proto, model)
|
||||||
|
netSync.setPreferableBackend(backend)
|
||||||
|
netSync.setPreferableTarget(target)
|
||||||
|
|
||||||
|
netAsync = cv.dnn.readNet(proto, model)
|
||||||
|
netAsync.setPreferableBackend(backend)
|
||||||
|
netAsync.setPreferableTarget(target)
|
||||||
|
|
||||||
|
# Generate inputs
|
||||||
|
numInputs = 10
|
||||||
|
inputs = []
|
||||||
|
for _ in range(numInputs):
|
||||||
|
inputs.append(np.random.standard_normal([2, 6, 75, 113]).astype(np.float32))
|
||||||
|
|
||||||
|
# Run synchronously
|
||||||
|
refs = []
|
||||||
|
for i in range(numInputs):
|
||||||
|
netSync.setInput(inputs[i])
|
||||||
|
refs.append(netSync.forward())
|
||||||
|
|
||||||
|
# Run asynchronously. To make test more robust, process inputs in the reversed order.
|
||||||
|
outs = []
|
||||||
|
for i in reversed(range(numInputs)):
|
||||||
|
netAsync.setInput(inputs[i])
|
||||||
|
outs.insert(0, netAsync.forwardAsync())
|
||||||
|
|
||||||
|
for i in reversed(range(numInputs)):
|
||||||
|
if outs[i].wait_for(timeout) == 1:
|
||||||
|
self.fail("Timeout")
|
||||||
|
normAssert(self, refs[i], outs[i].get(), 'Index: %d' % i, 1e-10)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
NewOpenCVTests.bootstrap()
|
NewOpenCVTests.bootstrap()
|
||||||
|
@ -1043,6 +1043,7 @@ struct Net::Impl
|
|||||||
lastLayerId = 0;
|
lastLayerId = 0;
|
||||||
netWasAllocated = false;
|
netWasAllocated = false;
|
||||||
fusion = true;
|
fusion = true;
|
||||||
|
isAsync = false;
|
||||||
preferableBackend = DNN_BACKEND_DEFAULT;
|
preferableBackend = DNN_BACKEND_DEFAULT;
|
||||||
preferableTarget = DNN_TARGET_CPU;
|
preferableTarget = DNN_TARGET_CPU;
|
||||||
skipInfEngineInit = false;
|
skipInfEngineInit = false;
|
||||||
@ -1064,6 +1065,7 @@ struct Net::Impl
|
|||||||
|
|
||||||
bool netWasAllocated;
|
bool netWasAllocated;
|
||||||
bool fusion;
|
bool fusion;
|
||||||
|
bool isAsync;
|
||||||
std::vector<int64> layersTimings;
|
std::vector<int64> layersTimings;
|
||||||
Mat output_blob;
|
Mat output_blob;
|
||||||
|
|
||||||
@ -2321,6 +2323,9 @@ struct Net::Impl
|
|||||||
std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
|
std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
|
||||||
if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
|
if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
|
||||||
{
|
{
|
||||||
|
if (isAsync)
|
||||||
|
CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
|
||||||
|
|
||||||
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
|
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||||
{
|
{
|
||||||
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
|
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
|
||||||
@ -2476,7 +2481,7 @@ struct Net::Impl
|
|||||||
}
|
}
|
||||||
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
|
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
{
|
{
|
||||||
forwardInfEngine(node);
|
forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
|
||||||
}
|
}
|
||||||
else if (preferableBackend == DNN_BACKEND_VKCOM)
|
else if (preferableBackend == DNN_BACKEND_VKCOM)
|
||||||
{
|
{
|
||||||
@ -2535,15 +2540,6 @@ struct Net::Impl
|
|||||||
forwardLayer(ld);
|
forwardLayer(ld);
|
||||||
}
|
}
|
||||||
|
|
||||||
void forwardAll()
|
|
||||||
{
|
|
||||||
CV_TRACE_FUNCTION();
|
|
||||||
|
|
||||||
MapIdToLayerData::reverse_iterator last_layer = layers.rbegin();
|
|
||||||
CV_Assert(last_layer != layers.rend());
|
|
||||||
forwardToLayer(last_layer->second, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
|
void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
|
||||||
{
|
{
|
||||||
std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
|
std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
|
||||||
@ -2634,6 +2630,42 @@ struct Net::Impl
|
|||||||
{
|
{
|
||||||
return getBlob(getPinByAlias(outputName));
|
return getBlob(getPinByAlias(outputName));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
std::future<Mat> getBlobAsync(const LayerPin& pin)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
#ifdef HAVE_INF_ENGINE
|
||||||
|
if (!pin.valid())
|
||||||
|
CV_Error(Error::StsObjectNotFound, "Requested blob not found");
|
||||||
|
|
||||||
|
LayerData &ld = layers[pin.lid];
|
||||||
|
if ((size_t)pin.oid >= ld.outputBlobs.size())
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
|
||||||
|
"the #%d was requested", ld.name.c_str(),
|
||||||
|
(int)ld.outputBlobs.size(), (int)pin.oid));
|
||||||
|
}
|
||||||
|
if (preferableTarget != DNN_TARGET_CPU)
|
||||||
|
{
|
||||||
|
CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
|
||||||
|
// Transfer data to CPU if it's require.
|
||||||
|
ld.outputBlobsWrappers[pin.oid]->copyToHost();
|
||||||
|
}
|
||||||
|
CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE);
|
||||||
|
|
||||||
|
Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
|
||||||
|
return std::move(wrapper->futureMat);
|
||||||
|
#else
|
||||||
|
CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE backend is required");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
std::future<Mat> getBlobAsync(String outputName)
|
||||||
|
{
|
||||||
|
return getBlobAsync(getPinByAlias(outputName));
|
||||||
|
}
|
||||||
|
#endif // CV_CXX11
|
||||||
};
|
};
|
||||||
|
|
||||||
Net::Net() : impl(new Net::Impl)
|
Net::Net() : impl(new Net::Impl)
|
||||||
@ -2757,6 +2789,31 @@ Mat Net::forward(const String& outputName)
|
|||||||
return impl->getBlob(layerName);
|
return impl->getBlob(layerName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AsyncMat Net::forwardAsync(const String& outputName)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
|
CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE");
|
||||||
|
|
||||||
|
String layerName = outputName;
|
||||||
|
|
||||||
|
if (layerName.empty())
|
||||||
|
layerName = getLayerNames().back();
|
||||||
|
|
||||||
|
std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
|
||||||
|
impl->setUpNet(pins);
|
||||||
|
|
||||||
|
impl->isAsync = true;
|
||||||
|
impl->forwardToLayer(impl->getLayerData(layerName));
|
||||||
|
impl->isAsync = false;
|
||||||
|
|
||||||
|
return impl->getBlobAsync(layerName);
|
||||||
|
#else
|
||||||
|
CV_Error(Error::StsNotImplemented, "Asynchronous forward without C++11");
|
||||||
|
#endif // CV_CXX11
|
||||||
|
}
|
||||||
|
|
||||||
void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
|
void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -140,9 +140,10 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
|
|||||||
return blob;
|
return blob;
|
||||||
}
|
}
|
||||||
|
|
||||||
void runLayer(Ptr<Layer> layer, const std::vector<Mat>& inputs,
|
void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
|
||||||
std::vector<Mat>& outputs)
|
std::vector<Mat>& outputs)
|
||||||
{
|
{
|
||||||
|
Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
|
||||||
std::vector<MatShape> inpShapes(inputs.size());
|
std::vector<MatShape> inpShapes(inputs.size());
|
||||||
int ddepth = CV_32F;
|
int ddepth = CV_32F;
|
||||||
for (size_t i = 0; i < inputs.size(); ++i)
|
for (size_t i = 0; i < inputs.size(); ++i)
|
||||||
@ -669,14 +670,15 @@ void ONNXImporter::populateNet(Net dstNet)
|
|||||||
Mat blob = getBlob(node_proto, constBlobs, 1);
|
Mat blob = getBlob(node_proto, constBlobs, 1);
|
||||||
CV_Assert(blob.type() == CV_32SC1);
|
CV_Assert(blob.type() == CV_32SC1);
|
||||||
|
|
||||||
if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
|
|
||||||
Mat input = getBlob(node_proto, constBlobs, 0);
|
|
||||||
Mat out = input.reshape(0, static_cast<std::vector<int> >(blob));
|
|
||||||
constBlobs.insert(std::make_pair(layerParams.name, out));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
layerParams.set("dim", DictValue::arrayInt<int*>(
|
layerParams.set("dim", DictValue::arrayInt<int*>(
|
||||||
blob.ptr<int>(), blob.total() ));
|
blob.ptr<int>(), blob.total() ));
|
||||||
|
|
||||||
|
if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
|
||||||
|
std::vector<Mat> inputs(1, getBlob(node_proto, constBlobs, 0)), outputs;
|
||||||
|
runLayer(layerParams, inputs, outputs);
|
||||||
|
constBlobs.insert(std::make_pair(layerParams.name, outputs[0]));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
DictValue shape = layerParams.get("shape");
|
DictValue shape = layerParams.get("shape");
|
||||||
@ -749,8 +751,7 @@ void ONNXImporter::populateNet(Net dstNet)
|
|||||||
{
|
{
|
||||||
inputs[i] = getBlob(node_proto, constBlobs, i);
|
inputs[i] = getBlob(node_proto, constBlobs, i);
|
||||||
}
|
}
|
||||||
Ptr<Layer> concat = ConcatLayer::create(layerParams);
|
runLayer(layerParams, inputs, concatenated);
|
||||||
runLayer(concat, inputs, concatenated);
|
|
||||||
|
|
||||||
CV_Assert(concatenated.size() == 1);
|
CV_Assert(concatenated.size() == 1);
|
||||||
constBlobs.insert(std::make_pair(layerParams.name, concatenated[0]));
|
constBlobs.insert(std::make_pair(layerParams.name, concatenated[0]));
|
||||||
|
@ -168,7 +168,6 @@ void InfEngineBackendNet::init(int targetId)
|
|||||||
const std::string& name = it.first;
|
const std::string& name = it.first;
|
||||||
auto blobIt = allBlobs.find(name);
|
auto blobIt = allBlobs.find(name);
|
||||||
CV_Assert(blobIt != allBlobs.end());
|
CV_Assert(blobIt != allBlobs.end());
|
||||||
inpBlobs[name] = blobIt->second;
|
|
||||||
it.second->setPrecision(blobIt->second->precision());
|
it.second->setPrecision(blobIt->second->precision());
|
||||||
}
|
}
|
||||||
for (const auto& it : cnn.getOutputsInfo())
|
for (const auto& it : cnn.getOutputsInfo())
|
||||||
@ -176,7 +175,6 @@ void InfEngineBackendNet::init(int targetId)
|
|||||||
const std::string& name = it.first;
|
const std::string& name = it.first;
|
||||||
auto blobIt = allBlobs.find(name);
|
auto blobIt = allBlobs.find(name);
|
||||||
CV_Assert(blobIt != allBlobs.end());
|
CV_Assert(blobIt != allBlobs.end());
|
||||||
outBlobs[name] = blobIt->second;
|
|
||||||
it.second->setPrecision(blobIt->second->precision()); // Should be always FP32
|
it.second->setPrecision(blobIt->second->precision()); // Should be always FP32
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -288,6 +286,24 @@ InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::La
|
|||||||
return wrapToInfEngineBlob(m, reversedShape, layout);
|
return wrapToInfEngineBlob(m, reversedShape, layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InferenceEngine::Blob::Ptr cloneBlob(const InferenceEngine::Blob::Ptr& blob)
|
||||||
|
{
|
||||||
|
InferenceEngine::Precision precision = blob->precision();
|
||||||
|
InferenceEngine::Blob::Ptr copy;
|
||||||
|
if (precision == InferenceEngine::Precision::FP32)
|
||||||
|
{
|
||||||
|
copy = InferenceEngine::make_shared_blob<float>(precision, blob->layout(), blob->dims());
|
||||||
|
}
|
||||||
|
else if (precision == InferenceEngine::Precision::U8)
|
||||||
|
{
|
||||||
|
copy = InferenceEngine::make_shared_blob<uint8_t>(precision, blob->layout(), blob->dims());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
CV_Error(Error::StsNotImplemented, "Unsupported blob precision");
|
||||||
|
copy->allocate();
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr)
|
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr)
|
||||||
{
|
{
|
||||||
CV_Assert(!ptr.empty());
|
CV_Assert(!ptr.empty());
|
||||||
@ -800,9 +816,6 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
|
|||||||
plugin = InferenceEngine::InferencePlugin(enginePtr);
|
plugin = InferenceEngine::InferencePlugin(enginePtr);
|
||||||
|
|
||||||
netExec = plugin.LoadNetwork(net, {});
|
netExec = plugin.LoadNetwork(net, {});
|
||||||
infRequest = netExec.CreateInferRequest();
|
|
||||||
infRequest.SetInput(inpBlobs);
|
|
||||||
infRequest.SetOutput(outBlobs);
|
|
||||||
}
|
}
|
||||||
catch (const std::exception& ex)
|
catch (const std::exception& ex)
|
||||||
{
|
{
|
||||||
@ -828,9 +841,116 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void InfEngineBackendNet::forward()
|
void InfEngineBackendNet::InfEngineReqWrapper::makePromises(const std::vector<Ptr<BackendWrapper> >& outsWrappers)
|
||||||
{
|
{
|
||||||
infRequest.Infer();
|
auto outs = infEngineWrappers(outsWrappers);
|
||||||
|
outProms.clear();
|
||||||
|
outProms.resize(outs.size());
|
||||||
|
outsNames.resize(outs.size());
|
||||||
|
for (int i = 0; i < outs.size(); ++i)
|
||||||
|
{
|
||||||
|
outs[i]->futureMat = outProms[i].get_future();
|
||||||
|
outsNames[i] = outs[i]->dataPtr->name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void InfEngineBackendNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
|
||||||
|
bool isAsync)
|
||||||
|
{
|
||||||
|
// Look for finished requests.
|
||||||
|
Ptr<InfEngineReqWrapper> reqWrapper;
|
||||||
|
for (auto& wrapper : infRequests)
|
||||||
|
{
|
||||||
|
if (wrapper->isReady)
|
||||||
|
{
|
||||||
|
reqWrapper = wrapper;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (reqWrapper.empty())
|
||||||
|
{
|
||||||
|
reqWrapper = Ptr<InfEngineReqWrapper>(new InfEngineReqWrapper());
|
||||||
|
try
|
||||||
|
{
|
||||||
|
reqWrapper->req = netExec.CreateInferRequest();
|
||||||
|
}
|
||||||
|
catch (const std::exception& ex)
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what()));
|
||||||
|
}
|
||||||
|
infRequests.push_back(reqWrapper);
|
||||||
|
|
||||||
|
InferenceEngine::BlobMap inpBlobs, outBlobs;
|
||||||
|
for (const auto& it : cnn.getInputsInfo())
|
||||||
|
{
|
||||||
|
const std::string& name = it.first;
|
||||||
|
auto blobIt = allBlobs.find(name);
|
||||||
|
CV_Assert(blobIt != allBlobs.end());
|
||||||
|
inpBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second;
|
||||||
|
}
|
||||||
|
for (const auto& it : cnn.getOutputsInfo())
|
||||||
|
{
|
||||||
|
const std::string& name = it.first;
|
||||||
|
auto blobIt = allBlobs.find(name);
|
||||||
|
CV_Assert(blobIt != allBlobs.end());
|
||||||
|
outBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second;
|
||||||
|
}
|
||||||
|
reqWrapper->req.SetInput(inpBlobs);
|
||||||
|
reqWrapper->req.SetOutput(outBlobs);
|
||||||
|
|
||||||
|
InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req;
|
||||||
|
infRequestPtr->SetUserData(reqWrapper.get(), 0);
|
||||||
|
|
||||||
|
infRequestPtr->SetCompletionCallback({
|
||||||
|
[](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status)
|
||||||
|
{
|
||||||
|
InfEngineReqWrapper* wrapper;
|
||||||
|
request->GetUserData((void**)&wrapper, 0);
|
||||||
|
CV_Assert(wrapper);
|
||||||
|
|
||||||
|
for (int i = 0; i < wrapper->outProms.size(); ++i)
|
||||||
|
{
|
||||||
|
const std::string& name = wrapper->outsNames[i];
|
||||||
|
Mat m = infEngineBlobToMat(wrapper->req.GetBlob(name));
|
||||||
|
|
||||||
|
if (status == InferenceEngine::StatusCode::OK)
|
||||||
|
wrapper->outProms[i].set_value(m.clone());
|
||||||
|
else
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
std::runtime_error e("Async request failed");
|
||||||
|
wrapper->outProms[i].set_exception(std::make_exception_ptr(e));
|
||||||
|
} catch(...) {
|
||||||
|
CV_LOG_ERROR(NULL, "DNN: Exception occured during async inference exception propagation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wrapper->isReady = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (isAsync)
|
||||||
|
{
|
||||||
|
// Copy actual data to infer request's input blobs.
|
||||||
|
for (const auto& it : cnn.getInputsInfo())
|
||||||
|
{
|
||||||
|
const std::string& name = it.first;
|
||||||
|
auto blobIt = allBlobs.find(name);
|
||||||
|
Mat srcMat = infEngineBlobToMat(blobIt->second);
|
||||||
|
Mat dstMat = infEngineBlobToMat(reqWrapper->req.GetBlob(name));
|
||||||
|
srcMat.copyTo(dstMat);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set promises to output blobs wrappers.
|
||||||
|
reqWrapper->makePromises(outBlobsWrappers);
|
||||||
|
|
||||||
|
reqWrapper->isReady = false;
|
||||||
|
reqWrapper->req.StartAsync();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
reqWrapper->req.Infer();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
||||||
@ -920,14 +1040,15 @@ bool haveInfEngine()
|
|||||||
#endif // HAVE_INF_ENGINE
|
#endif // HAVE_INF_ENGINE
|
||||||
}
|
}
|
||||||
|
|
||||||
void forwardInfEngine(Ptr<BackendNode>& node)
|
void forwardInfEngine(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
|
||||||
|
Ptr<BackendNode>& node, bool isAsync)
|
||||||
{
|
{
|
||||||
CV_Assert(haveInfEngine());
|
CV_Assert(haveInfEngine());
|
||||||
#ifdef HAVE_INF_ENGINE
|
#ifdef HAVE_INF_ENGINE
|
||||||
CV_Assert(!node.empty());
|
CV_Assert(!node.empty());
|
||||||
Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
|
Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
|
||||||
CV_Assert(!ieNode.empty());
|
CV_Assert(!ieNode.empty());
|
||||||
ieNode->net->forward();
|
ieNode->net->forward(outBlobsWrappers, isAsync);
|
||||||
#endif // HAVE_INF_ENGINE
|
#endif // HAVE_INF_ENGINE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,7 +185,8 @@ public:
|
|||||||
|
|
||||||
void init(int targetId);
|
void init(int targetId);
|
||||||
|
|
||||||
void forward();
|
void forward(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
|
||||||
|
bool isAsync);
|
||||||
|
|
||||||
void initPlugin(InferenceEngine::ICNNNetwork& net);
|
void initPlugin(InferenceEngine::ICNNNetwork& net);
|
||||||
|
|
||||||
@ -197,12 +198,23 @@ private:
|
|||||||
InferenceEngine::InferenceEnginePluginPtr enginePtr;
|
InferenceEngine::InferenceEnginePluginPtr enginePtr;
|
||||||
InferenceEngine::InferencePlugin plugin;
|
InferenceEngine::InferencePlugin plugin;
|
||||||
InferenceEngine::ExecutableNetwork netExec;
|
InferenceEngine::ExecutableNetwork netExec;
|
||||||
InferenceEngine::InferRequest infRequest;
|
|
||||||
InferenceEngine::BlobMap allBlobs;
|
InferenceEngine::BlobMap allBlobs;
|
||||||
InferenceEngine::BlobMap inpBlobs;
|
|
||||||
InferenceEngine::BlobMap outBlobs;
|
|
||||||
InferenceEngine::TargetDevice targetDevice;
|
InferenceEngine::TargetDevice targetDevice;
|
||||||
|
|
||||||
|
struct InfEngineReqWrapper
|
||||||
|
{
|
||||||
|
InfEngineReqWrapper() : isReady(true) {}
|
||||||
|
|
||||||
|
void makePromises(const std::vector<Ptr<BackendWrapper> >& outs);
|
||||||
|
|
||||||
|
InferenceEngine::InferRequest req;
|
||||||
|
std::vector<std::promise<Mat> > outProms;
|
||||||
|
std::vector<std::string> outsNames;
|
||||||
|
bool isReady;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<Ptr<InfEngineReqWrapper> > infRequests;
|
||||||
|
|
||||||
InferenceEngine::CNNNetwork cnn;
|
InferenceEngine::CNNNetwork cnn;
|
||||||
bool hasNetOwner;
|
bool hasNetOwner;
|
||||||
|
|
||||||
@ -252,6 +264,7 @@ public:
|
|||||||
|
|
||||||
InferenceEngine::DataPtr dataPtr;
|
InferenceEngine::DataPtr dataPtr;
|
||||||
InferenceEngine::Blob::Ptr blob;
|
InferenceEngine::Blob::Ptr blob;
|
||||||
|
std::future<Mat> futureMat;
|
||||||
};
|
};
|
||||||
|
|
||||||
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
|
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
|
||||||
@ -302,7 +315,8 @@ CV__DNN_INLINE_NS_END
|
|||||||
|
|
||||||
bool haveInfEngine();
|
bool haveInfEngine();
|
||||||
|
|
||||||
void forwardInfEngine(Ptr<BackendNode>& node);
|
void forwardInfEngine(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
|
||||||
|
Ptr<BackendNode>& node, bool isAsync);
|
||||||
|
|
||||||
}} // namespace dnn, namespace cv
|
}} // namespace dnn, namespace cv
|
||||||
|
|
||||||
|
@ -342,4 +342,106 @@ TEST(Net, forwardAndRetrieve)
|
|||||||
normAssert(outBlobs[0][1], inp.rowRange(2, 4), "second part");
|
normAssert(outBlobs[0][1], inp.rowRange(2, 4), "second part");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_INF_ENGINE
|
||||||
|
// This test runs network in synchronous mode for different inputs and then
|
||||||
|
// runs the same model asynchronously for the same inputs.
|
||||||
|
typedef testing::TestWithParam<Target> Async;
|
||||||
|
TEST_P(Async, set_and_forward_single)
|
||||||
|
{
|
||||||
|
static const int kTimeout = 5000; // in milliseconds.
|
||||||
|
const int target = GetParam();
|
||||||
|
|
||||||
|
const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
||||||
|
const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
|
||||||
|
const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
|
||||||
|
|
||||||
|
Net netSync = readNet(model, proto);
|
||||||
|
netSync.setPreferableTarget(target);
|
||||||
|
|
||||||
|
Net netAsync = readNet(model, proto);
|
||||||
|
netAsync.setPreferableTarget(target);
|
||||||
|
|
||||||
|
// Generate inputs.
|
||||||
|
const int numInputs = 10;
|
||||||
|
std::vector<Mat> inputs(numInputs);
|
||||||
|
int blobSize[] = {2, 6, 75, 113};
|
||||||
|
for (int i = 0; i < numInputs; ++i)
|
||||||
|
{
|
||||||
|
inputs[i].create(4, &blobSize[0], CV_32FC1);
|
||||||
|
randu(inputs[i], 0.0f, 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run synchronously.
|
||||||
|
std::vector<Mat> refs(numInputs);
|
||||||
|
for (int i = 0; i < numInputs; ++i)
|
||||||
|
{
|
||||||
|
netSync.setInput(inputs[i]);
|
||||||
|
refs[i] = netSync.forward().clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run asynchronously. To make test more robust, process inputs in the reversed order.
|
||||||
|
for (int i = numInputs - 1; i >= 0; --i)
|
||||||
|
{
|
||||||
|
netAsync.setInput(inputs[i]);
|
||||||
|
|
||||||
|
std::future<Mat> out = netAsync.forwardAsync();
|
||||||
|
if (out.wait_for(std::chrono::milliseconds(kTimeout)) == std::future_status::timeout)
|
||||||
|
CV_Error(Error::StsAssert, "Timeout");
|
||||||
|
normAssert(refs[i], out.get(), format("Index: %d", i).c_str(), 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(Async, set_and_forward_all)
|
||||||
|
{
|
||||||
|
static const int kTimeout = 5000; // in milliseconds.
|
||||||
|
const int target = GetParam();
|
||||||
|
|
||||||
|
const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
||||||
|
const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
|
||||||
|
const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
|
||||||
|
|
||||||
|
|
||||||
|
Net netSync = readNet(model, proto);
|
||||||
|
netSync.setPreferableTarget(target);
|
||||||
|
|
||||||
|
Net netAsync = readNet(model, proto);
|
||||||
|
netAsync.setPreferableTarget(target);
|
||||||
|
|
||||||
|
// Generate inputs.
|
||||||
|
const int numInputs = 10;
|
||||||
|
std::vector<Mat> inputs(numInputs);
|
||||||
|
int blobSize[] = {2, 6, 75, 113};
|
||||||
|
for (int i = 0; i < numInputs; ++i)
|
||||||
|
{
|
||||||
|
inputs[i].create(4, &blobSize[0], CV_32FC1);
|
||||||
|
randu(inputs[i], 0.0f, 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run synchronously.
|
||||||
|
std::vector<Mat> refs(numInputs);
|
||||||
|
for (int i = 0; i < numInputs; ++i)
|
||||||
|
{
|
||||||
|
netSync.setInput(inputs[i]);
|
||||||
|
refs[i] = netSync.forward().clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run asynchronously. To make test more robust, process inputs in the reversed order.
|
||||||
|
std::vector<std::future<Mat> > outs(numInputs);
|
||||||
|
for (int i = numInputs - 1; i >= 0; --i)
|
||||||
|
{
|
||||||
|
netAsync.setInput(inputs[i]);
|
||||||
|
outs[i] = netAsync.forwardAsync();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = numInputs - 1; i >= 0; --i)
|
||||||
|
{
|
||||||
|
if (outs[i].wait_for(std::chrono::milliseconds(kTimeout)) == std::future_status::timeout)
|
||||||
|
CV_Error(Error::StsAssert, "Timeout");
|
||||||
|
normAssert(refs[i], outs[i].get(), format("Index: %d", i).c_str(), 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(/**/, Async, testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)));
|
||||||
|
#endif // HAVE_INF_ENGINE
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
@ -892,52 +892,52 @@ class CV_EXPORTS_W GeneralizedHoughGuil : public GeneralizedHough
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
//! Angle difference in degrees between two points in feature.
|
//! Angle difference in degrees between two points in feature.
|
||||||
virtual void setXi(double xi) = 0;
|
CV_WRAP virtual void setXi(double xi) = 0;
|
||||||
virtual double getXi() const = 0;
|
CV_WRAP virtual double getXi() const = 0;
|
||||||
|
|
||||||
//! Feature table levels.
|
//! Feature table levels.
|
||||||
virtual void setLevels(int levels) = 0;
|
CV_WRAP virtual void setLevels(int levels) = 0;
|
||||||
virtual int getLevels() const = 0;
|
CV_WRAP virtual int getLevels() const = 0;
|
||||||
|
|
||||||
//! Maximal difference between angles that treated as equal.
|
//! Maximal difference between angles that treated as equal.
|
||||||
virtual void setAngleEpsilon(double angleEpsilon) = 0;
|
CV_WRAP virtual void setAngleEpsilon(double angleEpsilon) = 0;
|
||||||
virtual double getAngleEpsilon() const = 0;
|
CV_WRAP virtual double getAngleEpsilon() const = 0;
|
||||||
|
|
||||||
//! Minimal rotation angle to detect in degrees.
|
//! Minimal rotation angle to detect in degrees.
|
||||||
virtual void setMinAngle(double minAngle) = 0;
|
CV_WRAP virtual void setMinAngle(double minAngle) = 0;
|
||||||
virtual double getMinAngle() const = 0;
|
CV_WRAP virtual double getMinAngle() const = 0;
|
||||||
|
|
||||||
//! Maximal rotation angle to detect in degrees.
|
//! Maximal rotation angle to detect in degrees.
|
||||||
virtual void setMaxAngle(double maxAngle) = 0;
|
CV_WRAP virtual void setMaxAngle(double maxAngle) = 0;
|
||||||
virtual double getMaxAngle() const = 0;
|
CV_WRAP virtual double getMaxAngle() const = 0;
|
||||||
|
|
||||||
//! Angle step in degrees.
|
//! Angle step in degrees.
|
||||||
virtual void setAngleStep(double angleStep) = 0;
|
CV_WRAP virtual void setAngleStep(double angleStep) = 0;
|
||||||
virtual double getAngleStep() const = 0;
|
CV_WRAP virtual double getAngleStep() const = 0;
|
||||||
|
|
||||||
//! Angle votes threshold.
|
//! Angle votes threshold.
|
||||||
virtual void setAngleThresh(int angleThresh) = 0;
|
CV_WRAP virtual void setAngleThresh(int angleThresh) = 0;
|
||||||
virtual int getAngleThresh() const = 0;
|
CV_WRAP virtual int getAngleThresh() const = 0;
|
||||||
|
|
||||||
//! Minimal scale to detect.
|
//! Minimal scale to detect.
|
||||||
virtual void setMinScale(double minScale) = 0;
|
CV_WRAP virtual void setMinScale(double minScale) = 0;
|
||||||
virtual double getMinScale() const = 0;
|
CV_WRAP virtual double getMinScale() const = 0;
|
||||||
|
|
||||||
//! Maximal scale to detect.
|
//! Maximal scale to detect.
|
||||||
virtual void setMaxScale(double maxScale) = 0;
|
CV_WRAP virtual void setMaxScale(double maxScale) = 0;
|
||||||
virtual double getMaxScale() const = 0;
|
CV_WRAP virtual double getMaxScale() const = 0;
|
||||||
|
|
||||||
//! Scale step.
|
//! Scale step.
|
||||||
virtual void setScaleStep(double scaleStep) = 0;
|
CV_WRAP virtual void setScaleStep(double scaleStep) = 0;
|
||||||
virtual double getScaleStep() const = 0;
|
CV_WRAP virtual double getScaleStep() const = 0;
|
||||||
|
|
||||||
//! Scale votes threshold.
|
//! Scale votes threshold.
|
||||||
virtual void setScaleThresh(int scaleThresh) = 0;
|
CV_WRAP virtual void setScaleThresh(int scaleThresh) = 0;
|
||||||
virtual int getScaleThresh() const = 0;
|
CV_WRAP virtual int getScaleThresh() const = 0;
|
||||||
|
|
||||||
//! Position votes threshold.
|
//! Position votes threshold.
|
||||||
virtual void setPosThresh(int posThresh) = 0;
|
CV_WRAP virtual void setPosThresh(int posThresh) = 0;
|
||||||
virtual int getPosThresh() const = 0;
|
CV_WRAP virtual int getPosThresh() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
//! @} imgproc_shape
|
//! @} imgproc_shape
|
||||||
@ -4175,11 +4175,11 @@ CV_EXPORTS_W int rotatedRectangleIntersection( const RotatedRect& rect1, const R
|
|||||||
|
|
||||||
/** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it.
|
/** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
|
CV_EXPORTS_W Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
|
||||||
|
|
||||||
/** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it.
|
/** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
|
CV_EXPORTS_W Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
|
||||||
|
|
||||||
//! @} imgproc_shape
|
//! @} imgproc_shape
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user