Merge remote-tracking branch 'upstream/3.4' into merge-3.4

2025-07-26 23:27:01 +08:00 · 2019-04-22 19:08:11 +03:00 · 2019-04-22 19:08:11 +03:00 · d0032b0717
commit d0032b0717
parent 8120595c3e 8eb685de9d
17 changed files with 562 additions and 196 deletions
--- a/cmake/OpenCVPCHSupport.cmake
+++ b/cmake/OpenCVPCHSupport.cmake
@ -302,7 +302,7 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)
 if [ -n \"$VERBOSE\" ]; then
  tail -n1 \$0
 fi
-${_command} -D$<JOIN:$<TARGET_PROPERTY:${_targetName},COMPILE_DEFINITIONS>, -D>
+${_command} '-D$<JOIN:$<TARGET_PROPERTY:${_targetName},COMPILE_DEFINITIONS>,' '-D>'
 ")
    GET_FILENAME_COMPONENT(_outdir ${_output} PATH)
    ADD_CUSTOM_COMMAND(
--- a/doc/py_tutorials/py_ml/py_svm/py_svm_opencv/py_svm_opencv.markdown
+++ b/doc/py_tutorials/py_ml/py_svm/py_svm_opencv/py_svm_opencv.markdown
@ -47,7 +47,7 @@ area and try to implement them.
 Additional Resources
 --------------------
-#  [Histograms of Oriented Gradients Video](www.youtube.com/watch?v=0Zib1YEE4LU‎)
+-#  [Histograms of Oriented Gradients Video](https://www.youtube.com/watch?v=0Zib1YEE4LU)
 Exercises
 ---------
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@ -431,19 +431,6 @@ inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a)
 inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a)
 { return v_float64x4(_mm256_castps_pd(a.val)); }
 #if CV_FP16
 inline v_float32x8 v256_load_fp16_f32(const short* ptr)
 {
    return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
 }
 inline void v_store_fp16(short* ptr, const v_float32x8& a)
 {
    __m128i fp16_value = _mm256_cvtps_ph(a.val, 0);
    _mm_store_si128((__m128i*)ptr, fp16_value);
 }
 #endif
 /* Recombine */
 /*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm)                    \
    inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)    \
@ -1400,7 +1387,7 @@ inline v_float32x8 v_cvt_f32(const v_float64x4& a)
 inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b)
 {
    __m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val);
-    return v_float32x8(_mm256_insertf128_ps(_mm256_castps128_ps256(af), bf, 1));
+    return v_float32x8(_v256_combine(af, bf));
 }
 inline v_float64x4 v_cvt_f64(const v_int32x8& a)
@ -1474,7 +1461,7 @@ inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx)
 }
 inline v_int32x8 v256_lut_quads(const int* tab, const int* idx)
 {
-    return v_int32x8(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i*)(tab + idx[0]))), _mm_loadu_si128((const __m128i*)(tab + idx[1])), 0x1));
+    return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
 }
 inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); }
 inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); }
@ -1490,7 +1477,7 @@ inline v_int64x4 v256_lut(const int64* tab, const int* idx)
 }
 inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx)
 {
-    return v_int64x4(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i*)(tab + idx[0]))), _mm_loadu_si128((const __m128i*)(tab + idx[1])), 0x1));
+    return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
 }
 inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); }
 inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); }
@ -1506,7 +1493,7 @@ inline v_float64x4 v256_lut(const double* tab, const int* idx)
 {
    return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8));
 }
-inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_mm256_insertf128_pd(_mm256_castpd128_pd256(_mm_loadu_pd(tab + idx[0])), _mm_loadu_pd(tab + idx[1]), 0x1)); }
+inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); }
 inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec)
 {
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@ -278,48 +278,6 @@ struct v_float64x2
 };
 #endif
 #if CV_FP16
 // Workaround for old compilers
 static inline int16x4_t vreinterpret_s16_f16(float16x4_t a) { return (int16x4_t)a; }
 static inline float16x4_t vreinterpret_f16_s16(int16x4_t a) { return (float16x4_t)a; }
 static inline float16x4_t cv_vld1_f16(const void* ptr)
 {
 #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
    return vreinterpret_f16_s16(vld1_s16((const short*)ptr));
 #else
    return vld1_f16((const __fp16*)ptr);
 #endif
 }
 static inline void cv_vst1_f16(void* ptr, float16x4_t a)
 {
 #ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
    vst1_s16((short*)ptr, vreinterpret_s16_f16(a));
 #else
    vst1_f16((__fp16*)ptr, a);
 #endif
 }
 #ifndef vdup_n_f16
    #define vdup_n_f16(v) (float16x4_t){v, v, v, v}
 #endif
 #endif // CV_FP16
 #if CV_FP16
 inline v_float32x4 v128_load_fp16_f32(const short* ptr)
 {
    float16x4_t a = cv_vld1_f16((const __fp16*)ptr);
    return v_float32x4(vcvt_f32_f16(a));
 }
 inline void v_store_fp16(short* ptr, const v_float32x4& a)
 {
    float16x4_t fp16 = vcvt_f16_f32(a.val);
    cv_vst1_f16((short*)ptr, fp16);
 }
 #endif
 #define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
 inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
 inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@ -2684,19 +2684,6 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
    return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val)));
 }
 #if CV_FP16
 inline v_float32x4 v128_load_fp16_f32(const short* ptr)
 {
    return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
 }
 inline void v_store_fp16(short* ptr, const v_float32x4& a)
 {
    __m128i fp16_value = _mm_cvtps_ph(a.val, 0);
    _mm_storel_epi64((__m128i*)ptr, fp16_value);
 }
 #endif
 ////////////// Lookup table access ////////////////////
 inline v_int8x16 v_lut(const schar* tab, const int* idx)
@ -2956,6 +2943,9 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
 inline v_float32x4 v_load_expand(const float16_t* ptr)
 {
 #if CV_FP16
    return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
 #else
    const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000);
    const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000);
    const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000));
@ -2968,10 +2958,15 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    __m128i zmask = _mm_cmpeq_epi32(e, z);
    __m128i ft = v_select_si128(zmask, zt, t);
    return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign)));
 #endif
 }
 inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
 {
 #if CV_FP16
    __m128i fp16_value = _mm_cvtps_ph(v.val, 0);
    _mm_storel_epi64((__m128i*)ptr, fp16_value);
 #else
    const __m128i signmask = _mm_set1_epi32(0x80000000);
    const __m128i rval = _mm_set1_epi32(0x3f000000);
@ -2993,6 +2988,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
    t = _mm_or_si128(t, sign);
    t = _mm_packs_epi32(t, t);
    _mm_storel_epi64((__m128i*)ptr, t);
 #endif
 }
 inline void v_cleanup() {}
--- a/modules/core/src/directx.cpp
+++ b/modules/core/src/directx.cpp
@ -256,6 +256,12 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
        CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms");
    std::vector<cl_platform_id> platforms(numPlatforms);
    status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
    if (status != CL_SUCCESS)
        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
    // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
    size_t exts_len;
    cv::AutoBuffer<char> extensions;
    bool is_support_cl_khr_d3d11_sharing = false;
@ -264,9 +270,6 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
 #endif
    for (int i = 0; i < (int)numPlatforms; i++)
    {
        status = clGetPlatformIDs(numPlatforms, &platforms[i], NULL);
        if (status != CL_SUCCESS)
            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
        status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &exts_len);
        if (status != CL_SUCCESS)
            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS");
@ -479,7 +482,7 @@ Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device)
    std::vector<cl_platform_id> platforms(numPlatforms);
    status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
+        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
    // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
@ -587,7 +590,7 @@ Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDev
    std::vector<cl_platform_id> platforms(numPlatforms);
    status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
+        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
    // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
@ -697,7 +700,7 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
    std::vector<cl_platform_id> platforms(numPlatforms);
    status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
+        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platforms");
    // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -44,6 +44,9 @@
 #include <vector>
 #include <opencv2/core.hpp>
 #ifdef CV_CXX11
 #include <future>
 #endif
 #include "../dnn/version.hpp"
@ -57,6 +60,18 @@ CV__DNN_INLINE_NS_BEGIN
    typedef std::vector<int> MatShape;
 #if defined(CV_CXX11) || defined(CV_DOXYGEN)
    typedef std::future<Mat> AsyncMat;
 #else
    // Just a workaround for bindings.
    struct AsyncMat
    {
        Mat get() { return Mat(); }
        void wait() const {}
        size_t wait_for(size_t milliseconds) const { CV_UNUSED(milliseconds); return -1; }
    };
 #endif
    /**
     * @brief Enum of computation backends supported by layers.
     * @see Net::setPreferableBackend
@ -68,7 +83,7 @@ CV__DNN_INLINE_NS_BEGIN
        //! DNN_BACKEND_OPENCV otherwise.
        DNN_BACKEND_DEFAULT,
        DNN_BACKEND_HALIDE,
-        DNN_BACKEND_INFERENCE_ENGINE,
+        DNN_BACKEND_INFERENCE_ENGINE,  //!< Intel's Inference Engine computational backend.
        DNN_BACKEND_OPENCV,
        DNN_BACKEND_VKCOM
    };
@ -84,8 +99,7 @@ CV__DNN_INLINE_NS_BEGIN
        DNN_TARGET_OPENCL_FP16,
        DNN_TARGET_MYRIAD,
        DNN_TARGET_VULKAN,
-        //! FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
+        DNN_TARGET_FPGA  //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
        DNN_TARGET_FPGA
    };
    CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
@ -458,6 +472,15 @@ CV__DNN_INLINE_NS_BEGIN
         */
        CV_WRAP Mat forward(const String& outputName = String());
        /** @brief Runs forward pass to compute output of layer with name @p outputName.
         *  @param outputName name for layer which output is needed to get
         *  @details By default runs forward pass for the whole network.
         *
         *  This is an asynchronous version of forward(const String&).
         *  dnn::DNN_BACKEND_INFERENCE_ENGINE backend is required.
         */
        CV_WRAP AsyncMat forwardAsync(const String& outputName = String());
        /** @brief Runs forward pass to compute output of layer with name @p outputName.
         *  @param outputBlobs contains all output blobs for specified layer.
         *  @param outputName name for layer which output is needed to get
--- a/modules/dnn/include/opencv2/dnn/version.hpp
+++ b/modules/dnn/include/opencv2/dnn/version.hpp
@ -6,7 +6,7 @@
 #define OPENCV_DNN_VERSION_HPP
 /// Use with major OpenCV version only.
-#define OPENCV_DNN_API_VERSION 20190412
+#define OPENCV_DNN_API_VERSION 20190422
 #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
 #define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
--- a/modules/dnn/misc/python/pyopencv_dnn.hpp
+++ b/modules/dnn/misc/python/pyopencv_dnn.hpp
@ -2,7 +2,13 @@
 typedef dnn::DictValue LayerId;
 typedef std::vector<dnn::MatShape> vector_MatShape;
 typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape;
-
+#ifdef CV_CXX11
 typedef std::chrono::milliseconds chrono_milliseconds;
 typedef std::future_status AsyncMatStatus;
 #else
 typedef size_t chrono_milliseconds;
 typedef size_t AsyncMatStatus;
 #endif
 template<>
 bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)
@ -40,6 +46,46 @@ bool pyopencv_to(PyObject *o, std::vector<Mat> &blobs, const char *name) //requi
  return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
 }
 #ifdef CV_CXX11
 template<>
 PyObject* pyopencv_from(const std::future<Mat>& f_)
 {
    std::future<Mat>& f = const_cast<std::future<Mat>&>(f_);
    Ptr<cv::dnn::AsyncMat> p(new std::future<Mat>(std::move(f)));
    return pyopencv_from(p);
 }
 template<>
 PyObject* pyopencv_from(const std::future_status& status)
 {
    return pyopencv_from((int)status);
 }
 template<>
 bool pyopencv_to(PyObject* src, std::chrono::milliseconds& dst, const char* name)
 {
    size_t millis = 0;
    if (pyopencv_to(src, millis, name))
    {
        dst = std::chrono::milliseconds(millis);
        return true;
    }
    else
        return false;
 }
 #else
 template<>
 PyObject* pyopencv_from(const cv::dnn::AsyncMat&)
 {
    CV_Error(Error::StsNotImplemented, "C++11 is required.");
    return 0;
 }
 #endif  // CV_CXX11
 template<typename T>
 PyObject* pyopencv_from(const dnn::DictValue &dv)
 {
--- a/modules/dnn/misc/python/shadow_async_mat.hpp
+++ b/modules/dnn/misc/python/shadow_async_mat.hpp
@ -0,0 +1,22 @@
 #error This is a shadow header file, which is not intended for processing by any compiler. \
       Only bindings parser should handle this file.
 namespace cv { namespace dnn {
 class CV_EXPORTS_W AsyncMat
 {
 public:
    //! Wait for Mat object readiness and return it.
    CV_WRAP Mat get();
    //! Wait for Mat object readiness.
    CV_WRAP void wait() const;
    /** @brief Wait for Mat object readiness specific amount of time.
     *  @param timeout Timeout in milliseconds
     *  @returns [std::future_status](https://en.cppreference.com/w/cpp/thread/future_status)
     */
    CV_WRAP AsyncMatStatus wait_for(std::chrono::milliseconds timeout) const;
 };
 }}
--- a/modules/dnn/misc/python/test/test_dnn.py
+++ b/modules/dnn/misc/python/test/test_dnn.py
@ -5,8 +5,8 @@ import numpy as np
 from tests_common import NewOpenCVTests, unittest
-def normAssert(test, a, b, lInf=1e-5):
+def normAssert(test, a, b, msg=None, lInf=1e-5):
-    test.assertLess(np.max(np.abs(a - b)), lInf)
+    test.assertLess(np.max(np.abs(a - b)), lInf, msg)
 def inter_area(box1, box2):
    x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
@ -53,53 +53,6 @@ def normAssertDetections(test, ref, out, confThreshold=0.0, scores_diff=1e-5, bo
    if errMsg:
        test.fail(errMsg)
 # Returns a simple one-layer network created from Caffe's format
 def getSimpleNet():
    prototxt = """
        name: "simpleNet"
        input: "data"
        layer {
          type: "Identity"
          name: "testLayer"
          top: "testLayer"
          bottom: "data"
        }
    """
    return cv.dnn.readNetFromCaffe(bytearray(prototxt, 'utf8'))
 def testBackendAndTarget(backend, target):
    net = getSimpleNet()
    net.setPreferableBackend(backend)
    net.setPreferableTarget(target)
    inp = np.random.standard_normal([1, 2, 3, 4]).astype(np.float32)
    try:
        net.setInput(inp)
        net.forward()
    except BaseException as e:
        return False
    return True
 haveInfEngine = testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU)
 dnnBackendsAndTargets = [
    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
 ]
 if haveInfEngine:
    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
    if testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
 if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
    if haveInfEngine and cv.ocl_Device.getDefault().isIntel():
        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
 def printParams(backend, target):
    backendNames = {
        cv.dnn.DNN_BACKEND_OPENCV: 'OCV',
@ -116,8 +69,44 @@ def printParams(backend, target):
 class dnn_test(NewOpenCVTests):
    def __init__(self, *args, **kwargs):
        super(dnn_test, self).__init__(*args, **kwargs)
        self.dnnBackendsAndTargets = [
            [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
        ]
        if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU):
            self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
        if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
            self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
        if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
            self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
            self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
            if cv.ocl_Device.getDefault().isIntel():
                if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL):
                    self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
                if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16):
                    self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
    def find_dnn_file(self, filename, required=True):
-        return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd())], required=required)
+        return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd()),
                                         os.environ['OPENCV_TEST_DATA_PATH']],
                              required=required)
    def checkIETarget(self, backend, target):
        proto = self.find_dnn_file('dnn/layers/layer_convolution.prototxt', required=True)
        model = self.find_dnn_file('dnn/layers/layer_convolution.caffemodel', required=True)
        net = cv.dnn.readNet(proto, model)
        net.setPreferableBackend(backend)
        net.setPreferableTarget(target)
        inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32)
        try:
            net.setInput(inp)
            net.forward()
        except BaseException as e:
            return False
        return True
    def test_blobFromImage(self):
        np.random.seed(324)
@ -148,7 +137,7 @@ class dnn_test(NewOpenCVTests):
    def test_face_detection(self):
        testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
-        proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt2', required=testdata_required)
+        proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt', required=testdata_required)
        model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=testdata_required)
        if proto is None or model is None:
            raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
@ -164,7 +153,7 @@ class dnn_test(NewOpenCVTests):
               [0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427,  0.5347801]]
        print('\n')
-        for backend, target in dnnBackendsAndTargets:
+        for backend, target in self.dnnBackendsAndTargets:
            printParams(backend, target)
            net = cv.dnn.readNet(proto, model)
@ -178,5 +167,52 @@ class dnn_test(NewOpenCVTests):
            normAssertDetections(self, ref, out, 0.5, scoresDiff, iouDiff)
    def test_async(self):
        timeout = 5000  # in milliseconds
        testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
        proto = self.find_dnn_file('dnn/layers/layer_convolution.prototxt', required=testdata_required)
        model = self.find_dnn_file('dnn/layers/layer_convolution.caffemodel', required=testdata_required)
        if proto is None or model is None:
            raise unittest.SkipTest("Missing DNN test files (dnn/layers/layer_convolution.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
        print('\n')
        for backend, target in self.dnnBackendsAndTargets:
            if backend != cv.dnn.DNN_BACKEND_INFERENCE_ENGINE:
                continue
            printParams(backend, target)
            netSync = cv.dnn.readNet(proto, model)
            netSync.setPreferableBackend(backend)
            netSync.setPreferableTarget(target)
            netAsync = cv.dnn.readNet(proto, model)
            netAsync.setPreferableBackend(backend)
            netAsync.setPreferableTarget(target)
            # Generate inputs
            numInputs = 10
            inputs = []
            for _ in range(numInputs):
                inputs.append(np.random.standard_normal([2, 6, 75, 113]).astype(np.float32))
            # Run synchronously
            refs = []
            for i in range(numInputs):
                netSync.setInput(inputs[i])
                refs.append(netSync.forward())
            # Run asynchronously. To make test more robust, process inputs in the reversed order.
            outs = []
            for i in reversed(range(numInputs)):
                netAsync.setInput(inputs[i])
                outs.insert(0, netAsync.forwardAsync())
            for i in reversed(range(numInputs)):
                if outs[i].wait_for(timeout) == 1:
                    self.fail("Timeout")
                normAssert(self, refs[i], outs[i].get(), 'Index: %d' % i, 1e-10)
 if __name__ == '__main__':
    NewOpenCVTests.bootstrap()
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -1043,6 +1043,7 @@ struct Net::Impl
        lastLayerId = 0;
        netWasAllocated = false;
        fusion = true;
        isAsync = false;
        preferableBackend = DNN_BACKEND_DEFAULT;
        preferableTarget = DNN_TARGET_CPU;
        skipInfEngineInit = false;
@ -1064,6 +1065,7 @@ struct Net::Impl
    bool netWasAllocated;
    bool fusion;
    bool isAsync;
    std::vector<int64> layersTimings;
    Mat output_blob;
@ -2321,6 +2323,9 @@ struct Net::Impl
            std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
            if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
            {
                if (isAsync)
                    CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
                if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
                {
                    std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
@ -2476,7 +2481,7 @@ struct Net::Impl
                }
                else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
                {
-                    forwardInfEngine(node);
+                    forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
                }
                else if (preferableBackend == DNN_BACKEND_VKCOM)
                {
@ -2535,15 +2540,6 @@ struct Net::Impl
        forwardLayer(ld);
    }
    void forwardAll()
    {
        CV_TRACE_FUNCTION();
        MapIdToLayerData::reverse_iterator last_layer = layers.rbegin();
        CV_Assert(last_layer != layers.rend());
        forwardToLayer(last_layer->second, true);
    }
    void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
    {
        std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
@ -2634,6 +2630,42 @@ struct Net::Impl
    {
        return getBlob(getPinByAlias(outputName));
    }
 #ifdef CV_CXX11
    std::future<Mat> getBlobAsync(const LayerPin& pin)
    {
        CV_TRACE_FUNCTION();
 #ifdef HAVE_INF_ENGINE
        if (!pin.valid())
            CV_Error(Error::StsObjectNotFound, "Requested blob not found");
        LayerData &ld = layers[pin.lid];
        if ((size_t)pin.oid >= ld.outputBlobs.size())
        {
            CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
                                           "the #%d was requested", ld.name.c_str(),
                                           (int)ld.outputBlobs.size(), (int)pin.oid));
        }
        if (preferableTarget != DNN_TARGET_CPU)
        {
            CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
            // Transfer data to CPU if it's require.
            ld.outputBlobsWrappers[pin.oid]->copyToHost();
        }
        CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE);
        Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
        return std::move(wrapper->futureMat);
 #else
        CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE backend is required");
 #endif
    }
    std::future<Mat> getBlobAsync(String outputName)
    {
        return getBlobAsync(getPinByAlias(outputName));
    }
 #endif  // CV_CXX11
 };
 Net::Net() : impl(new Net::Impl)
@ -2757,6 +2789,31 @@ Mat Net::forward(const String& outputName)
    return impl->getBlob(layerName);
 }
 AsyncMat Net::forwardAsync(const String& outputName)
 {
    CV_TRACE_FUNCTION();
 #ifdef CV_CXX11
    if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
        CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE");
    String layerName = outputName;
    if (layerName.empty())
        layerName = getLayerNames().back();
    std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
    impl->setUpNet(pins);
    impl->isAsync = true;
    impl->forwardToLayer(impl->getLayerData(layerName));
    impl->isAsync = false;
    return impl->getBlobAsync(layerName);
 #else
    CV_Error(Error::StsNotImplemented, "Asynchronous forward without C++11");
 #endif  // CV_CXX11
 }
 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
 {
    CV_TRACE_FUNCTION();
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -140,9 +140,10 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
    return blob;
 }
-void runLayer(Ptr<Layer> layer, const std::vector<Mat>& inputs,
+void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
              std::vector<Mat>& outputs)
 {
    Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
    std::vector<MatShape> inpShapes(inputs.size());
    int ddepth = CV_32F;
    for (size_t i = 0; i < inputs.size(); ++i)
@ -669,14 +670,15 @@ void ONNXImporter::populateNet(Net dstNet)
                Mat blob = getBlob(node_proto, constBlobs, 1);
                CV_Assert(blob.type() == CV_32SC1);
                if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
                    Mat input = getBlob(node_proto, constBlobs, 0);
                    Mat out = input.reshape(0, static_cast<std::vector<int> >(blob));
                    constBlobs.insert(std::make_pair(layerParams.name, out));
                    continue;
                }
                layerParams.set("dim", DictValue::arrayInt<int*>(
                            blob.ptr<int>(), blob.total() ));
                if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
                    std::vector<Mat> inputs(1, getBlob(node_proto, constBlobs, 0)), outputs;
                    runLayer(layerParams, inputs, outputs);
                    constBlobs.insert(std::make_pair(layerParams.name, outputs[0]));
                    continue;
                }
            }
            else {
                DictValue shape = layerParams.get("shape");
@ -749,8 +751,7 @@ void ONNXImporter::populateNet(Net dstNet)
                {
                    inputs[i] = getBlob(node_proto, constBlobs, i);
                }
-                Ptr<Layer> concat = ConcatLayer::create(layerParams);
+                runLayer(layerParams, inputs, concatenated);
                runLayer(concat, inputs, concatenated);
                CV_Assert(concatenated.size() == 1);
                constBlobs.insert(std::make_pair(layerParams.name, concatenated[0]));
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@ -168,7 +168,6 @@ void InfEngineBackendNet::init(int targetId)
        const std::string& name = it.first;
        auto blobIt = allBlobs.find(name);
        CV_Assert(blobIt != allBlobs.end());
        inpBlobs[name] = blobIt->second;
        it.second->setPrecision(blobIt->second->precision());
    }
    for (const auto& it : cnn.getOutputsInfo())
@ -176,7 +175,6 @@ void InfEngineBackendNet::init(int targetId)
        const std::string& name = it.first;
        auto blobIt = allBlobs.find(name);
        CV_Assert(blobIt != allBlobs.end());
        outBlobs[name] = blobIt->second;
        it.second->setPrecision(blobIt->second->precision());  // Should be always FP32
    }
@ -288,6 +286,24 @@ InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::La
    return wrapToInfEngineBlob(m, reversedShape, layout);
 }
 InferenceEngine::Blob::Ptr cloneBlob(const InferenceEngine::Blob::Ptr& blob)
 {
    InferenceEngine::Precision precision = blob->precision();
    InferenceEngine::Blob::Ptr copy;
    if (precision == InferenceEngine::Precision::FP32)
    {
        copy = InferenceEngine::make_shared_blob<float>(precision, blob->layout(), blob->dims());
    }
    else if (precision == InferenceEngine::Precision::U8)
    {
        copy = InferenceEngine::make_shared_blob<uint8_t>(precision, blob->layout(), blob->dims());
    }
    else
        CV_Error(Error::StsNotImplemented, "Unsupported blob precision");
    copy->allocate();
    return copy;
 }
 InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr)
 {
    CV_Assert(!ptr.empty());
@ -800,9 +816,6 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
        plugin = InferenceEngine::InferencePlugin(enginePtr);
        netExec = plugin.LoadNetwork(net, {});
        infRequest = netExec.CreateInferRequest();
        infRequest.SetInput(inpBlobs);
        infRequest.SetOutput(outBlobs);
    }
    catch (const std::exception& ex)
    {
@ -828,9 +841,116 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
    }
 }
-void InfEngineBackendNet::forward()
+void InfEngineBackendNet::InfEngineReqWrapper::makePromises(const std::vector<Ptr<BackendWrapper> >& outsWrappers)
 {
-    infRequest.Infer();
+    auto outs = infEngineWrappers(outsWrappers);
    outProms.clear();
    outProms.resize(outs.size());
    outsNames.resize(outs.size());
    for (int i = 0; i < outs.size(); ++i)
    {
        outs[i]->futureMat = outProms[i].get_future();
        outsNames[i] = outs[i]->dataPtr->name;
    }
 }
 void InfEngineBackendNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                                  bool isAsync)
 {
    // Look for finished requests.
    Ptr<InfEngineReqWrapper> reqWrapper;
    for (auto& wrapper : infRequests)
    {
        if (wrapper->isReady)
        {
            reqWrapper = wrapper;
            break;
        }
    }
    if (reqWrapper.empty())
    {
        reqWrapper = Ptr<InfEngineReqWrapper>(new InfEngineReqWrapper());
        try
        {
            reqWrapper->req = netExec.CreateInferRequest();
        }
        catch (const std::exception& ex)
        {
            CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what()));
        }
        infRequests.push_back(reqWrapper);
        InferenceEngine::BlobMap inpBlobs, outBlobs;
        for (const auto& it : cnn.getInputsInfo())
        {
            const std::string& name = it.first;
            auto blobIt = allBlobs.find(name);
            CV_Assert(blobIt != allBlobs.end());
            inpBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second;
        }
        for (const auto& it : cnn.getOutputsInfo())
        {
            const std::string& name = it.first;
            auto blobIt = allBlobs.find(name);
            CV_Assert(blobIt != allBlobs.end());
            outBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second;
        }
        reqWrapper->req.SetInput(inpBlobs);
        reqWrapper->req.SetOutput(outBlobs);
        InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req;
        infRequestPtr->SetUserData(reqWrapper.get(), 0);
        infRequestPtr->SetCompletionCallback({
            [](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status)
            {
                InfEngineReqWrapper* wrapper;
                request->GetUserData((void**)&wrapper, 0);
                CV_Assert(wrapper);
                for (int i = 0; i < wrapper->outProms.size(); ++i)
                {
                    const std::string& name = wrapper->outsNames[i];
                    Mat m = infEngineBlobToMat(wrapper->req.GetBlob(name));
                    if (status == InferenceEngine::StatusCode::OK)
                        wrapper->outProms[i].set_value(m.clone());
                    else
                    {
                        try {
                            std::runtime_error e("Async request failed");
                            wrapper->outProms[i].set_exception(std::make_exception_ptr(e));
                        } catch(...) {
                            CV_LOG_ERROR(NULL, "DNN: Exception occured during async inference exception propagation");
                        }
                    }
                }
                wrapper->isReady = true;
            }
        });
    }
    if (isAsync)
    {
        // Copy actual data to infer request's input blobs.
        for (const auto& it : cnn.getInputsInfo())
        {
            const std::string& name = it.first;
            auto blobIt = allBlobs.find(name);
            Mat srcMat = infEngineBlobToMat(blobIt->second);
            Mat dstMat = infEngineBlobToMat(reqWrapper->req.GetBlob(name));
            srcMat.copyTo(dstMat);
        }
        // Set promises to output blobs wrappers.
        reqWrapper->makePromises(outBlobsWrappers);
        reqWrapper->isReady = false;
        reqWrapper->req.StartAsync();
    }
    else
    {
        reqWrapper->req.Infer();
    }
 }
 Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
@ -920,14 +1040,15 @@ bool haveInfEngine()
 #endif  // HAVE_INF_ENGINE
 }
-void forwardInfEngine(Ptr<BackendNode>& node)
+void forwardInfEngine(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                      Ptr<BackendNode>& node, bool isAsync)
 {
    CV_Assert(haveInfEngine());
 #ifdef HAVE_INF_ENGINE
    CV_Assert(!node.empty());
    Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
    CV_Assert(!ieNode.empty());
-    ieNode->net->forward();
+    ieNode->net->forward(outBlobsWrappers, isAsync);
 #endif  // HAVE_INF_ENGINE
 }
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@ -185,7 +185,8 @@ public:
    void init(int targetId);
-    void forward();
+    void forward(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                 bool isAsync);
    void initPlugin(InferenceEngine::ICNNNetwork& net);
@ -197,12 +198,23 @@ private:
    InferenceEngine::InferenceEnginePluginPtr enginePtr;
    InferenceEngine::InferencePlugin plugin;
    InferenceEngine::ExecutableNetwork netExec;
    InferenceEngine::InferRequest infRequest;
    InferenceEngine::BlobMap allBlobs;
    InferenceEngine::BlobMap inpBlobs;
    InferenceEngine::BlobMap outBlobs;
    InferenceEngine::TargetDevice targetDevice;
    struct InfEngineReqWrapper
    {
        InfEngineReqWrapper() : isReady(true) {}
        void makePromises(const std::vector<Ptr<BackendWrapper> >& outs);
        InferenceEngine::InferRequest req;
        std::vector<std::promise<Mat> > outProms;
        std::vector<std::string> outsNames;
        bool isReady;
    };
    std::vector<Ptr<InfEngineReqWrapper> > infRequests;
    InferenceEngine::CNNNetwork cnn;
    bool hasNetOwner;
@ -252,6 +264,7 @@ public:
    InferenceEngine::DataPtr dataPtr;
    InferenceEngine::Blob::Ptr blob;
    std::future<Mat> futureMat;
 };
 InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
@ -302,7 +315,8 @@ CV__DNN_INLINE_NS_END
 bool haveInfEngine();
-void forwardInfEngine(Ptr<BackendNode>& node);
+void forwardInfEngine(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                      Ptr<BackendNode>& node, bool isAsync);
 }}  // namespace dnn, namespace cv
--- a/modules/dnn/test/test_misc.cpp
+++ b/modules/dnn/test/test_misc.cpp
@ -342,4 +342,106 @@ TEST(Net, forwardAndRetrieve)
    normAssert(outBlobs[0][1], inp.rowRange(2, 4), "second part");
 }
 #ifdef HAVE_INF_ENGINE
 // This test runs network in synchronous mode for different inputs and then
 // runs the same model asynchronously for the same inputs.
 typedef testing::TestWithParam<Target> Async;
 TEST_P(Async, set_and_forward_single)
 {
    static const int kTimeout = 5000;  // in milliseconds.
    const int target = GetParam();
    const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
    const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
    const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
    Net netSync = readNet(model, proto);
    netSync.setPreferableTarget(target);
    Net netAsync = readNet(model, proto);
    netAsync.setPreferableTarget(target);
    // Generate inputs.
    const int numInputs = 10;
    std::vector<Mat> inputs(numInputs);
    int blobSize[] = {2, 6, 75, 113};
    for (int i = 0; i < numInputs; ++i)
    {
        inputs[i].create(4, &blobSize[0], CV_32FC1);
        randu(inputs[i], 0.0f, 1.0f);
    }
    // Run synchronously.
    std::vector<Mat> refs(numInputs);
    for (int i = 0; i < numInputs; ++i)
    {
        netSync.setInput(inputs[i]);
        refs[i] = netSync.forward().clone();
    }
    // Run asynchronously. To make test more robust, process inputs in the reversed order.
    for (int i = numInputs - 1; i >= 0; --i)
    {
        netAsync.setInput(inputs[i]);
        std::future<Mat> out = netAsync.forwardAsync();
        if (out.wait_for(std::chrono::milliseconds(kTimeout)) == std::future_status::timeout)
            CV_Error(Error::StsAssert, "Timeout");
        normAssert(refs[i], out.get(), format("Index: %d", i).c_str(), 0, 0);
    }
 }
 TEST_P(Async, set_and_forward_all)
 {
    static const int kTimeout = 5000;  // in milliseconds.
    const int target = GetParam();
    const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
    const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
    const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
    Net netSync = readNet(model, proto);
    netSync.setPreferableTarget(target);
    Net netAsync = readNet(model, proto);
    netAsync.setPreferableTarget(target);
    // Generate inputs.
    const int numInputs = 10;
    std::vector<Mat> inputs(numInputs);
    int blobSize[] = {2, 6, 75, 113};
    for (int i = 0; i < numInputs; ++i)
    {
        inputs[i].create(4, &blobSize[0], CV_32FC1);
        randu(inputs[i], 0.0f, 1.0f);
    }
    // Run synchronously.
    std::vector<Mat> refs(numInputs);
    for (int i = 0; i < numInputs; ++i)
    {
        netSync.setInput(inputs[i]);
        refs[i] = netSync.forward().clone();
    }
    // Run asynchronously. To make test more robust, process inputs in the reversed order.
    std::vector<std::future<Mat> > outs(numInputs);
    for (int i = numInputs - 1; i >= 0; --i)
    {
        netAsync.setInput(inputs[i]);
        outs[i] = netAsync.forwardAsync();
    }
    for (int i = numInputs - 1; i >= 0; --i)
    {
        if (outs[i].wait_for(std::chrono::milliseconds(kTimeout)) == std::future_status::timeout)
            CV_Error(Error::StsAssert, "Timeout");
        normAssert(refs[i], outs[i].get(), format("Index: %d", i).c_str(), 0, 0);
    }
 }
 INSTANTIATE_TEST_CASE_P(/**/, Async, testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)));
 #endif  // HAVE_INF_ENGINE
 }} // namespace
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@ -892,52 +892,52 @@ class CV_EXPORTS_W GeneralizedHoughGuil : public GeneralizedHough
 {
 public:
    //! Angle difference in degrees between two points in feature.
-    virtual void setXi(double xi) = 0;
+    CV_WRAP virtual void setXi(double xi) = 0;
-    virtual double getXi() const = 0;
+    CV_WRAP virtual double getXi() const = 0;
    //! Feature table levels.
-    virtual void setLevels(int levels) = 0;
+    CV_WRAP virtual void setLevels(int levels) = 0;
-    virtual int getLevels() const = 0;
+    CV_WRAP virtual int getLevels() const = 0;
    //! Maximal difference between angles that treated as equal.
-    virtual void setAngleEpsilon(double angleEpsilon) = 0;
+    CV_WRAP virtual void setAngleEpsilon(double angleEpsilon) = 0;
-    virtual double getAngleEpsilon() const = 0;
+    CV_WRAP virtual double getAngleEpsilon() const = 0;
    //! Minimal rotation angle to detect in degrees.
-    virtual void setMinAngle(double minAngle) = 0;
+    CV_WRAP virtual void setMinAngle(double minAngle) = 0;
-    virtual double getMinAngle() const = 0;
+    CV_WRAP virtual double getMinAngle() const = 0;
    //! Maximal rotation angle to detect in degrees.
-    virtual void setMaxAngle(double maxAngle) = 0;
+    CV_WRAP virtual void setMaxAngle(double maxAngle) = 0;
-    virtual double getMaxAngle() const = 0;
+    CV_WRAP virtual double getMaxAngle() const = 0;
    //! Angle step in degrees.
-    virtual void setAngleStep(double angleStep) = 0;
+    CV_WRAP virtual void setAngleStep(double angleStep) = 0;
-    virtual double getAngleStep() const = 0;
+    CV_WRAP virtual double getAngleStep() const = 0;
    //! Angle votes threshold.
-    virtual void setAngleThresh(int angleThresh) = 0;
+    CV_WRAP virtual void setAngleThresh(int angleThresh) = 0;
-    virtual int getAngleThresh() const = 0;
+    CV_WRAP virtual int getAngleThresh() const = 0;
    //! Minimal scale to detect.
-    virtual void setMinScale(double minScale) = 0;
+    CV_WRAP virtual void setMinScale(double minScale) = 0;
-    virtual double getMinScale() const = 0;
+    CV_WRAP virtual double getMinScale() const = 0;
    //! Maximal scale to detect.
-    virtual void setMaxScale(double maxScale) = 0;
+    CV_WRAP virtual void setMaxScale(double maxScale) = 0;
-    virtual double getMaxScale() const = 0;
+    CV_WRAP virtual double getMaxScale() const = 0;
    //! Scale step.
-    virtual void setScaleStep(double scaleStep) = 0;
+    CV_WRAP virtual void setScaleStep(double scaleStep) = 0;
-    virtual double getScaleStep() const = 0;
+    CV_WRAP virtual double getScaleStep() const = 0;
    //! Scale votes threshold.
-    virtual void setScaleThresh(int scaleThresh) = 0;
+    CV_WRAP virtual void setScaleThresh(int scaleThresh) = 0;
-    virtual int getScaleThresh() const = 0;
+    CV_WRAP virtual int getScaleThresh() const = 0;
    //! Position votes threshold.
-    virtual void setPosThresh(int posThresh) = 0;
+    CV_WRAP virtual void setPosThresh(int posThresh) = 0;
-    virtual int getPosThresh() const = 0;
+    CV_WRAP virtual int getPosThresh() const = 0;
 };
 //! @} imgproc_shape
@ -4175,11 +4175,11 @@ CV_EXPORTS_W int rotatedRectangleIntersection( const RotatedRect& rect1, const R
 /** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it.
 */
-CV_EXPORTS Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
+CV_EXPORTS_W Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
 /** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it.
 */
-CV_EXPORTS Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
+CV_EXPORTS_W Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
 //! @} imgproc_shape