Merge remote-tracking branch 'upstream/3.4' into merge-3.4

2025-07-23 04:52:54 +08:00 · 2018-06-25 22:03:17 +03:00 · 2018-06-25 22:03:17 +03:00 · b39cd06249
commit b39cd06249
parent 46def2fdc1 ab8022f74e
26 changed files with 1663 additions and 1079 deletions
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@ -1531,7 +1531,7 @@ class TegraCvtColor_##name##_Invoker : public cv::ParallelLoopBody \
 public: \
    TegraCvtColor_##name##_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, int height_) : \
        cv::ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_), width(width_), height(height_) {} \
-    virtual void operator()(const cv::Range& range) const \
+    virtual void operator()(const cv::Range& range) const CV_OVERRIDE \
    { \
        CAROTENE_NS::func(CAROTENE_NS::Size2D(width, range.end-range.start), __VA_ARGS__); \
    } \
--- a/apps/CMakeLists.txt
+++ b/apps/CMakeLists.txt
@ -1,6 +1,39 @@
 add_definitions(-D__OPENCV_BUILD=1)
 add_definitions(-D__OPENCV_APPS=1)
 # Unified function for creating OpenCV applications:
 #   ocv_add_application(tgt [MODULES <m1> [<m2> ...]] SRCS <src1> [<src2> ...])
 function(ocv_add_application the_target)
  cmake_parse_arguments(APP "" "" "MODULES;SRCS" ${ARGN})
  ocv_check_dependencies(${APP_MODULES})
  if(NOT OCV_DEPENDENCIES_FOUND)
     return()
  endif()
  project(${the_target})
  ocv_target_include_modules_recurse(${the_target} ${APP_MODULES})
  ocv_target_include_directories(${the_target} PRIVATE "${OpenCV_SOURCE_DIR}/include/opencv")
  ocv_add_executable(${the_target} ${APP_SRCS})
  ocv_target_link_libraries(${the_target} ${APP_MODULES})
  set_target_properties(${the_target} PROPERTIES
                        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                        ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
                        RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                        OUTPUT_NAME "${the_target}")
  if(ENABLE_SOLUTION_FOLDERS)
    set_target_properties(${the_target} PROPERTIES FOLDER "applications")
  endif()
  if(INSTALL_CREATE_DISTRIB)
    if(BUILD_SHARED_LIBS)
      install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
    endif()
  else()
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
  endif()
 endfunction()
 link_libraries(${OPENCV_LINKER_LIBS})
 macro(ocv_add_app directory)
--- a/apps/annotation/CMakeLists.txt
+++ b/apps/annotation/CMakeLists.txt
@ -1,36 +1,3 @@
-SET(OPENCV_ANNOTATION_DEPS opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio)
+ocv_add_application(opencv_annotation
-ocv_check_dependencies(${OPENCV_ANNOTATION_DEPS})
+    MODULES opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio
-
+    SRCS opencv_annotation.cpp)
 if(NOT OCV_DEPENDENCIES_FOUND)
   return()
 endif()
 project(annotation)
 set(the_target opencv_annotation)
 ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_ANNOTATION_DEPS})
 file(GLOB SRCS *.cpp)
 set(annotation_files ${SRCS})
 ocv_add_executable(${the_target} ${annotation_files})
 ocv_target_link_libraries(${the_target} ${OPENCV_ANNOTATION_DEPS})
 set_target_properties(${the_target} PROPERTIES
                      DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                      ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
                      RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                      OUTPUT_NAME "opencv_annotation")
 if(ENABLE_SOLUTION_FOLDERS)
   set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 endif()
 if(INSTALL_CREATE_DISTRIB)
   if(BUILD_SHARED_LIBS)
      install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
   endif()
 else()
   install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
 endif()
--- a/apps/createsamples/CMakeLists.txt
+++ b/apps/createsamples/CMakeLists.txt
@ -1,38 +1,4 @@
 set(OPENCV_CREATESAMPLES_DEPS opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d opencv_videoio)
 ocv_check_dependencies(${OPENCV_CREATESAMPLES_DEPS})
 if(NOT OCV_DEPENDENCIES_FOUND)
  return()
 endif()
 project(createsamples)
 set(the_target opencv_createsamples)
 ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_CREATESAMPLES_DEPS})
 file(GLOB SRCS *.cpp)
-file(GLOB HDRS *.h*)
+ocv_add_application(opencv_createsamples
-
+    MODULES opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d opencv_videoio
-set(createsamples_files ${SRCS} ${HDRS})
+    SRCS ${SRCS})
 ocv_add_executable(${the_target} ${createsamples_files})
 ocv_target_link_libraries(${the_target} ${OPENCV_CREATESAMPLES_DEPS})
 set_target_properties(${the_target} PROPERTIES
                      DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                      ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
                      RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                      OUTPUT_NAME "opencv_createsamples")
 if(ENABLE_SOLUTION_FOLDERS)
  set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 endif()
 if(INSTALL_CREATE_DISTRIB)
  if(BUILD_SHARED_LIBS)
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
  endif()
 else()
  install(TARGETS ${the_target} OPTIONAL RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
 endif()
--- a/apps/interactive-calibration/CMakeLists.txt
+++ b/apps/interactive-calibration/CMakeLists.txt
@ -1,41 +1,6 @@
-set(OPENCV_INTERACTIVECALIBRATION_DEPS opencv_core opencv_imgproc opencv_features2d opencv_highgui opencv_calib3d opencv_videoio)
+set(DEPS opencv_core opencv_imgproc opencv_features2d opencv_highgui opencv_calib3d opencv_videoio)
 if(${BUILD_opencv_aruco})
-    list(APPEND OPENCV_INTERACTIVECALIBRATION_DEPS opencv_aruco)
+    list(APPEND DEPS opencv_aruco)
 endif()
 ocv_check_dependencies(${OPENCV_INTERACTIVECALIBRATION_DEPS})
 if(NOT OCV_DEPENDENCIES_FOUND)
  return()
 endif()
 project(interactive-calibration)
 set(the_target opencv_interactive-calibration)
 ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_INTERACTIVECALIBRATION_DEPS})
 file(GLOB SRCS *.cpp)
-file(GLOB HDRS *.h*)
+ocv_add_application(opencv_interactive-calibration MODULES ${DEPS} SRCS ${SRCS})
 set(interactive-calibration_files ${SRCS} ${HDRS})
 ocv_add_executable(${the_target} ${interactive-calibration_files})
 ocv_target_link_libraries(${the_target} ${OPENCV_INTERACTIVECALIBRATION_DEPS})
 set_target_properties(${the_target} PROPERTIES
                      DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                      ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
                      RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                    OUTPUT_NAME "opencv_interactive-calibration")
 if(ENABLE_SOLUTION_FOLDERS)
  set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 endif()
 if(INSTALL_CREATE_DISTRIB)
  if(BUILD_SHARED_LIBS)
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
  endif()
 else()
  install(TARGETS ${the_target} OPTIONAL RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
 endif()
--- a/apps/traincascade/CMakeLists.txt
+++ b/apps/traincascade/CMakeLists.txt
@ -1,42 +1,5 @@
-set(OPENCV_TRAINCASCADE_DEPS opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d)
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Woverloaded-virtual -Winconsistent-missing-override -Wsuggest-override)
 ocv_check_dependencies(${OPENCV_TRAINCASCADE_DEPS})
 if(NOT OCV_DEPENDENCIES_FOUND)
  return()
 endif()
 project(traincascade)
 set(the_target opencv_traincascade)
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Woverloaded-virtual
                                     -Winconsistent-missing-override -Wsuggest-override
 )
 ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_TRAINCASCADE_DEPS})
 file(GLOB SRCS *.cpp)
-file(GLOB HDRS *.h*)
+ocv_add_application(opencv_traincascade
-
+    MODULES opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d
-set(traincascade_files ${SRCS} ${HDRS})
+    SRCS ${SRCS})
 ocv_add_executable(${the_target} ${traincascade_files})
 ocv_target_link_libraries(${the_target} ${OPENCV_TRAINCASCADE_DEPS})
 set_target_properties(${the_target} PROPERTIES
                      DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                      ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
                      RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                      OUTPUT_NAME "opencv_traincascade")
 if(ENABLE_SOLUTION_FOLDERS)
  set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 endif()
 if(INSTALL_CREATE_DISTRIB)
  if(BUILD_SHARED_LIBS)
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
  endif()
 else()
  install(TARGETS ${the_target} OPTIONAL RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
 endif()
--- a/apps/version/CMakeLists.txt
+++ b/apps/version/CMakeLists.txt
@ -1,49 +1,5 @@
-set(OPENCV_APPLICATION_DEPS opencv_core)
+ocv_add_application(opencv_version MODULES opencv_core SRCS opencv_version.cpp)
 ocv_check_dependencies(${OPENCV_APPLICATION_DEPS})
 if(NOT OCV_DEPENDENCIES_FOUND)
  return()
 endif()
 project(opencv_version)
 set(the_target opencv_version)
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_APPLICATION_DEPS})
 ocv_add_executable(${the_target} opencv_version.cpp)
 ocv_target_link_libraries(${the_target} ${OPENCV_APPLICATION_DEPS})
 set_target_properties(${the_target} PROPERTIES
                      DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                      RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                      OUTPUT_NAME "opencv_version")
 set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 if(INSTALL_CREATE_DISTRIB)
  if(BUILD_SHARED_LIBS)
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT libs)
  endif()
 else()
  install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
 endif()
 if(WIN32)
-  project(opencv_version_win32)
+  ocv_add_application(opencv_version_win32 MODULES opencv_core SRCS opencv_version.cpp)
-  set(the_target opencv_version_win32)
+  target_compile_definitions(opencv_version_win32 PRIVATE "OPENCV_WIN32_API=1")
  ocv_target_include_modules_recurse(${the_target} ${OPENCV_APPLICATION_DEPS})
  ocv_add_executable(${the_target} opencv_version.cpp)
  ocv_target_link_libraries(${the_target} ${OPENCV_APPLICATION_DEPS})
  target_compile_definitions(${the_target} PRIVATE "OPENCV_WIN32_API=1")
  set_target_properties(${the_target} PROPERTIES
                        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                        RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                        OUTPUT_NAME "opencv_version_win32")
  set_target_properties(${the_target} PROPERTIES FOLDER "applications")
  if(INSTALL_CREATE_DISTRIB)
    if(BUILD_SHARED_LIBS)
      install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT libs)
    endif()
  else()
    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
  endif()
 endif()
--- a/apps/visualisation/CMakeLists.txt
+++ b/apps/visualisation/CMakeLists.txt
@ -1,36 +1,3 @@
-SET(OPENCV_VISUALISATION_DEPS opencv_core opencv_highgui opencv_imgproc opencv_videoio opencv_imgcodecs)
+ocv_add_application(opencv_visualisation
-ocv_check_dependencies(${OPENCV_VISUALISATION_DEPS})
+    MODULES opencv_core opencv_highgui opencv_imgproc opencv_videoio opencv_imgcodecs
-
+    SRCS opencv_visualisation.cpp)
 if(NOT OCV_DEPENDENCIES_FOUND)
   return()
 endif()
 project(visualisation)
 set(the_target opencv_visualisation)
 ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_VISUALISATION_DEPS})
 file(GLOB SRCS *.cpp)
 set(visualisation_files ${SRCS})
 ocv_add_executable(${the_target} ${visualisation_files})
 ocv_target_link_libraries(${the_target} ${OPENCV_VISUALISATION_DEPS})
 set_target_properties(${the_target} PROPERTIES
                      DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
                      ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
                      RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
                      OUTPUT_NAME "opencv_visualisation")
 if(ENABLE_SOLUTION_FOLDERS)
   set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 endif()
 if(INSTALL_CREATE_DISTRIB)
   if(BUILD_SHARED_LIBS)
      install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
   endif()
 else()
   install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
 endif()
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -361,6 +361,23 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        static Ptr<PermuteLayer> create(const LayerParams& params);
    };
    /**
     * Permute channels of 4-dimensional input blob.
     * @param group Number of groups to split input channels and pick in turns
     *              into output blob.
     *
     * \f[ groupSize = \frac{number\ of\ channels}{group} \f]
     * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
     * Read more at https://arxiv.org/pdf/1707.01083.pdf
     */
    class CV_EXPORTS ShuffleChannelLayer : public Layer
    {
    public:
        static Ptr<Layer> create(const LayerParams& params);
        int group;
    };
    /**
     * @brief Adds extra values for specific axes.
     * @param paddings Vector of paddings in format
@ -575,6 +592,17 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        static Ptr<ResizeLayer> create(const LayerParams& params);
    };
    /**
     * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public
     *
     * It differs from @ref ResizeLayer in output shape and resize scales computations.
     */
    class CV_EXPORTS InterpLayer : public Layer
    {
    public:
        static Ptr<Layer> create(const LayerParams& params);
    };
    class CV_EXPORTS ProposalLayer : public Layer
    {
    public:
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -988,52 +988,26 @@ struct Net::Impl
        ld.inputBlobsId[inNum] = from;
    }
    static void splitPin(const String &pinAlias, String &layerName, String &outName)
    {
        size_t delimPos = pinAlias.find('.');
        layerName = pinAlias.substr(0, delimPos);
        outName = (delimPos == String::npos) ? String() : pinAlias.substr(delimPos + 1);
    }
    int resolvePinOutputName(LayerData &ld, const String &outName)
    {
        if (outName.empty())
            return 0;
        if (std::isdigit(outName[0]))
        {
            char *lastChar;
            long inum = std::strtol(outName.c_str(), &lastChar, 10);
            if (*lastChar == 0)
            {
                CV_Assert(inum == (int)inum);
                return (int)inum;
            }
        }
        return ld.getLayerInstance()->outputNameToIndex(outName);
    }
-    LayerPin getPinByAlias(const String &pinAlias)
+    LayerPin getPinByAlias(const String &layerName)
    {
        LayerPin pin;
        String layerName, outName;
        splitPin(pinAlias, layerName, outName);
        pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
        if (pin.lid >= 0)
-            pin.oid = resolvePinOutputName(getLayerData(pin.lid), outName);
+            pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
        return pin;
    }
-    std::vector<LayerPin> getLayerOutPins(const String &pinAlias)
+    std::vector<LayerPin> getLayerOutPins(const String &layerName)
    {
        String layerName, outName;
        splitPin(pinAlias, layerName, outName);
        int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
        std::vector<LayerPin> pins;
@ -2044,12 +2018,6 @@ int Net::addLayer(const String &name, const String &type, LayerParams &params)
 {
    CV_TRACE_FUNCTION();
    if (name.find('.') != String::npos)
    {
        CV_Error(Error::StsBadArg, "Added layer name \"" + name + "\" must not contain dot symbol");
        return -1;
    }
    if (impl->getLayerId(name) >= 0)
    {
        CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
@ -2689,7 +2657,7 @@ int Layer::inputNameToIndex(String)
 int Layer::outputNameToIndex(const String&)
 {
-    return -1;
+    return 0;
 }
 bool Layer::supportBackend(int backendId)
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@ -84,6 +84,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(Reshape,        ReshapeLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Flatten,        FlattenLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Resize,         ResizeLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Interp,         InterpLayer);
    CV_DNN_REGISTER_LAYER_CLASS(CropAndResize,  CropAndResizeLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Convolution,    ConvolutionLayer);
@ -115,6 +116,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(Crop,           CropLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Eltwise,        EltwiseLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Permute,        PermuteLayer);
    CV_DNN_REGISTER_LAYER_CLASS(ShuffleChannel, ShuffleChannelLayer);
    CV_DNN_REGISTER_LAYER_CLASS(PriorBox,       PriorBoxLayer);
    CV_DNN_REGISTER_LAYER_CLASS(PriorBoxClustered, PriorBoxLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Reorg,          ReorgLayer);
--- a/modules/dnn/src/layers/crop_and_resize_layer.cpp
+++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp
@ -1,3 +1,9 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 // Copyright (C) 2018, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 #include "../precomp.hpp"
 #include "layers_common.hpp"
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -310,7 +310,6 @@ public:
            innerProductOp = Ptr<OCL4DNNInnerProduct<float> >(new OCL4DNNInnerProduct<float>(config));
        }
        UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
        for (size_t i = 0; i < inputs.size(); i++)
        {
            MatShape inshape, outshape;
@ -320,7 +319,6 @@ public:
            UMat srcMat, dstMat;
            srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
            dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
            dstMat.setTo(0.0f);
            if (!innerProductOp->Forward(srcMat, (use_half) ? half_blobs[0] : umat_blobs[0],
                                         (bias) ? (use_half ? half_blobs[1] : umat_blobs[1]) : UMat(),
@ -332,6 +330,7 @@ public:
            if (!use_half && bias && (outerSize > 1))
            {
                UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
                UMat& biases = umat_blobs[1];
                cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
            }
@ -354,6 +353,7 @@ public:
            if (bias)
            {
                UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
                UMat& biases = umat_blobs[1];
                cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
            }
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@ -11,7 +11,7 @@
 namespace cv { namespace dnn {
-class ResizeLayerImpl CV_FINAL : public ResizeLayer
+class ResizeLayerImpl : public ResizeLayer
 {
 public:
    ResizeLayerImpl(const LayerParams& params)
@ -33,7 +33,7 @@ public:
        interpolation = params.get<String>("interpolation");
        CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
-        alignCorners = params.get<bool>("align_corners", false);
+        bool alignCorners = params.get<bool>("align_corners", false);
        if (alignCorners)
            CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented");
    }
@ -66,6 +66,8 @@ public:
            outHeight = outputs[0].size[2];
            outWidth = outputs[0].size[3];
        }
        scaleHeight = static_cast<float>(inputs[0]->size[2]) / outHeight;
        scaleWidth = static_cast<float>(inputs[0]->size[3]) / outWidth;
    }
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
@ -103,8 +105,6 @@ public:
            const int inpWidth = inp.size[3];
            const int inpSpatialSize = inpHeight * inpWidth;
            const int outSpatialSize = outHeight * outWidth;
            const float heightScale = static_cast<float>(inpHeight) / (outHeight);
            const float widthScale = static_cast<float>(inpWidth) / (outWidth);
            const int numPlanes = inp.size[0] * inp.size[1];
            CV_Assert(inp.isContinuous(), out.isContinuous());
@ -112,13 +112,13 @@ public:
            Mat outPlanes = out.reshape(1, numPlanes * outHeight);
            for (int y = 0; y < outHeight; ++y)
            {
-                float input_y = y * heightScale;
+                float input_y = y * scaleHeight;
                int y0 = static_cast<int>(input_y);
                const float* inpData_row0 = inpPlanes.ptr<float>(y0);
                const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
                for (int x = 0; x < outWidth; ++x)
                {
-                    float input_x = x * widthScale;
+                    float input_x = x * scaleWidth;
                    int x0 = static_cast<int>(input_x);
                    int x1 = std::min(x0 + 1, inpWidth - 1);
@ -162,10 +162,10 @@ public:
        return Ptr<BackendNode>();
    }
-private:
+protected:
    int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight;
    String interpolation;
-    bool alignCorners;
+    float scaleWidth, scaleHeight;
 };
@ -174,5 +174,44 @@ Ptr<ResizeLayer> ResizeLayer::create(const LayerParams& params)
    return Ptr<ResizeLayer>(new ResizeLayerImpl(params));
 }
 class InterpLayerImpl CV_FINAL : public ResizeLayerImpl
 {
 public:
    InterpLayerImpl(const LayerParams& params) : ResizeLayerImpl(params) {}
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
        CV_Assert(inputs.size() == 1, inputs[0].size() == 4);
        outputs.resize(1, inputs[0]);
        outputs[0][2] = outHeight > 0 ? outHeight : (1 + zoomFactorHeight * (outputs[0][2] - 1));
        outputs[0][3] = outWidth > 0 ? outWidth : (1 + zoomFactorWidth * (outputs[0][3] - 1));
        // We can work in-place (do nothing) if input shape == output shape.
        return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
    }
    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
    {
        if (!outWidth && !outHeight)
        {
            outHeight = outputs[0].size[2];
            outWidth = outputs[0].size[3];
        }
        int inpHeight = inputs[0]->size[2];
        int inpWidth = inputs[0]->size[3];
        scaleHeight = (outHeight > 1) ? (static_cast<float>(inpHeight - 1) / (outHeight - 1)) : 0.f;
        scaleWidth = (outWidth > 1) ? (static_cast<float>(inpWidth - 1) / (outWidth - 1)) : 0.f;
    }
 };
 Ptr<Layer> InterpLayer::create(const LayerParams& params)
 {
    LayerParams lp(params);
    lp.set("interpolation", "bilinear");
    return Ptr<Layer>(new InterpLayerImpl(lp));
 }
 }  // namespace dnn
 }  // namespace cv
--- a/modules/dnn/src/layers/shuffle_channel_layer.cpp
+++ b/modules/dnn/src/layers/shuffle_channel_layer.cpp
@ -0,0 +1,104 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 // Copyright (C) 2018, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 #include "../precomp.hpp"
 namespace cv { namespace dnn {
 class ShuffleChannelLayerImpl CV_FINAL : public ShuffleChannelLayer
 {
 public:
    ShuffleChannelLayerImpl(const LayerParams& params)
    {
        group = params.get<int>("group", 1);
    }
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
        CV_Assert(inputs.size() == 1 && inputs[0].size() == 4);
        CV_Assert(inputs[0][1] % group == 0);
        Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
        return group == 1;
    }
    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
    {
        if (group != 1)
        {
            LayerParams lp;
            float order[] = {0, 2, 1, 3};
            lp.set("order", DictValue::arrayInt(&order[0], 4));
            permute = PermuteLayer::create(lp);
            Mat inp = *inputs[0];
            Mat out = outputs[0];
            permuteInpShape.resize(4);
            permuteInpShape[0] = inp.size[0];
            permuteInpShape[1] = group;
            permuteInpShape[2] = inp.size[1] / group;
            permuteInpShape[3] = inp.size[2]*inp.size[3];
            permuteOutShape.resize(4);
            permuteOutShape[0] = permuteInpShape[0];
            permuteOutShape[1] = permuteInpShape[2];
            permuteOutShape[2] = permuteInpShape[1];
            permuteOutShape[3] = permuteInpShape[3];
            inp = inp.reshape(1, permuteInpShape);
            out = out.reshape(1, permuteOutShape);
            std::vector<Mat*> permuteInputs(1, &inp);
            std::vector<Mat> permuteOutputs(1, out);
            permute->finalize(permuteInputs, permuteOutputs);
        }
    }
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
        Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
    }
    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
        Mat inp = *inputs[0];
        Mat out = outputs[0];
        if (inp.data != out.data)
        {
            if (!permute.empty())
            {
                inp = inp.reshape(1, permuteInpShape);
                out = out.reshape(1, permuteOutShape);
                std::vector<Mat*> permuteInputs(1, &inp);
                std::vector<Mat> permuteOutputs(1, out);
                permute->forward(permuteInputs, permuteOutputs, internals);
            }
            else
                inp.copyTo(out);
        }
    }
 private:
    Ptr<PermuteLayer> permute;
    std::vector<int> permuteInpShape, permuteOutShape;
 };
 Ptr<Layer> ShuffleChannelLayer::create(const LayerParams& params)
 {
    return Ptr<Layer>(new ShuffleChannelLayerImpl(params));
 }
 }  // namespace dnn
 }  // namespace cv
--- a/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
+++ b/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
--- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@ -306,6 +306,7 @@ class OCL4DNNConvSpatial
        std::string kernel_name_;
        std::string cache_path_;
        bool use_cache_path_; // true if cache_path_ directory exists
        bool run_auto_tuning_;
        bool force_auto_tuning_;
        int32_t kernel_index_;
        std::vector< cv::Ptr<kernelConfig> > kernelQueue;
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@ -55,6 +55,7 @@
 #include "../include/math_functions.hpp"
 #include "../include/default_kernel_config.hpp"
 #include "opencv2/dnn/shape_utils.hpp"
 #include "opencv2/core/utils/logger.hpp"
 #if defined WIN32 || defined _WIN32
 #include <windows.h>
@ -67,6 +68,69 @@ typedef std::map<std::string, std::string> kernel_hash_t;
 static kernel_hash_t kernelConfigMap;
 static bool defaultConfigLoaded = false;
 static std::string sanitize(const std::string& s)
 {
    std::string s_ = s;
    for (size_t i = 0; i < s_.size(); i++)
    {
        char c = s_[i];
        if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
        {
            s_[i] = '_';
        }
    }
    // TODO add hash?
    // s_ = s_ + cv::format("_%08llx", crc64((uchar*)s.c_str(), s.size()));
    return s_;
 }
 static void initializeGlobalBuiltinConfigurations(const std::string& cache_path)
 {
    CV_Assert(defaultConfigLoaded == false);
    CV_Assert(kernelConfigMap.empty());
    /* fp32 config */
    size_t numConfigs = sizeof(default_kernel_config_intel_fp32) /
                        sizeof(default_kernel_config_intel_fp32[0]) / 2;
    for (size_t i = 0; i < numConfigs; i++)
    {
        std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel_fp32[2 * i];
        if (!cache_path.empty())
        {
            std::string cacheFile = cache_path + sanitize(key);
            std::ifstream cachedKernel(cacheFile.c_str());
            if (cachedKernel)
                continue;  // external configuration found, skip builtin
        }
        std::pair<std::string, std::string> entry(
                key,
                default_kernel_config_intel_fp32[2 * i + 1]);
        kernelConfigMap.insert(entry);
    }
    /* fp16 config */
    numConfigs = sizeof(default_kernel_config_intel_fp16) /
                 sizeof(default_kernel_config_intel_fp16[0]) / 2;
    for (size_t i = 0; i < numConfigs; i++)
    {
        std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel_fp16[2 * i];
        if (!cache_path.empty())
        {
            std::string cacheFile = cache_path + sanitize(key);
            std::ifstream cachedKernel(cacheFile.c_str());
            if (cachedKernel)
                continue;  // external configuration found, skip builtin
        }
        std::pair<std::string, std::string> entry(
                key,
                default_kernel_config_intel_fp16[2 * i + 1]);
        kernelConfigMap.insert(entry);
    }
    defaultConfigLoaded = true;
 }
 template<typename Dtype>
 OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
 {
@ -139,9 +203,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
        }
    }
-    force_auto_tuning_ =
+    run_auto_tuning_ = use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false);
-            (use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false))
+    force_auto_tuning_ = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
            || utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
 }
 template<typename Dtype>
@ -272,40 +335,38 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
        // options
        options_ << " -cl-fast-relaxed-math -D KERNEL_IDLF -D convolve_simd=" << kernel_name_;
        options_ << " -cl-mad-enable";
        if (clOptionSupport("-cl-no-subgroup-ifp"))
            options_ << " -cl-no-subgroup-ifp ";
        // defs
        int32_t output_width = output_w_;
        int32_t output_height = output_h_;
        int32_t output_block_width = blockM;
        int32_t output_block_height = blockK;
-        const int32_t last_block_width = (output_width % output_block_width == 0) ?
+        int tile_x = (output_block_width - 1) * stride_w_ + kernel_w_ * dilation_w_;
-                                        output_block_width : output_width % output_block_width;
+        int tile_y = (output_block_height - 1) * stride_h_ + kernel_h_ * dilation_h_;
-        const int32_t last_block_height = (output_height % output_block_height == 0) ?
+        int invec_size = tile_y;
                                         output_block_height : output_height % output_block_height;
        int tile_x = alignSize((output_block_width - 1) * stride_w_ + kernel_w_ * dilation_w_, 4);
        int tile_y = (output_block_height -1) * stride_h_ + kernel_h_ * dilation_h_;
        int tile_y_stride = (4 * simd_size) / tile_x;
        int invec_size = divUp(tile_y, tile_y_stride);
        addDef("SIMD_SIZE", simd_size);
        addDef("filter_qualifier", "__global");
        addDef("OUT_BLOCK_WIDTH", output_block_width);
        addDef("OUT_BLOCK_HEIGHT", output_block_height);
        addDef("LAST_BLOCK_WIDTH", last_block_width);
        addDef("LAST_BLOCK_HEIGHT", last_block_height);
        addDef("INPUT_DEPTH", channels_ / group_);
        addDef("TOTAL_INPUT_DEPTH_SIZE", channels_);
        addDef("TOTAL_OUTPUT_DEPTH", num_output_);
        addDef("NUM_FILTERS", M_);
        addDef("TILE_X", tile_x);
        addDef("TILE_Y", tile_y);
        addDef("TILE_Y_STRIDE", tile_y_stride);
        addDef("INVEC_SIZE", invec_size);
        addDef("ALIGNED_NUM_FILTERS", (int)alignSize(M_, simd_size));
        addDef("OUT_BLOCK_SIZE", (output_block_width*output_block_height));
        addDef("APPLY_BIAS", bias_term_);
        addDef("WEIGHT_PREF", ((kernel_w_ * kernel_h_) == 1) ? 1 : 8);
        addDef("INPUT_PITCH", (width_ * height_));
        addDef("OUTPUT_PITCH", (output_w_ * output_h_));
        addDef("LEFT_FILTERS", ((int)alignSize(M_, simd_size) - M_));
        addDef("INPUT_WIDTH", width_);
        addDef("INPUT_HEIGHT", height_);
        addDef("FILTERS_IN_GROUP", ((int)alignSize(M_, simd_size) / simd_size));
        setFusionDefine(fused_activ_, fused_eltwise_);
        src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
@ -528,13 +589,6 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
    return;
 }
 #define dbg
 #ifdef dbg
 #define dbgPrint(x) (x)
 #else
 #define dbgPrint(x)
 #endif
 // For large enough input size, we do not need to tune kernels for different
 // size. The reason is with large input size, there will be enough work items
 // to feed al the EUs.
@ -545,6 +599,7 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
 template<typename Dtype>
 void OCL4DNNConvSpatial<Dtype>::generateKey()
 {
    std::string precision = (use_half_) ? "FP16" : "FP32";
    std::stringstream keyBuilder;
    // FIXME: to support fuse?
    keyBuilder << "k" << kernel_w_ << "x" << kernel_h_ << "_"
@ -558,21 +613,12 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
               << "num" << num_ << "_"
               << "M" << M_ << "_"
               << "activ" << fused_activ_ << "_"
-               << "eltwise" << fused_eltwise_;
+               << "eltwise" << fused_eltwise_ << "_"
               << precision;
    key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
-    key_sanitized_ = key_;
+    key_sanitized_ = sanitize(key_);
    for (size_t i = 0; i < key_sanitized_.size(); i++)
    {
        char c = key_sanitized_[i];
        if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
        {
            key_sanitized_[i] = '_';
        }
    }
    // TODO add hash?
    // key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size()));
    short_key_ = keyBuilder.str();
 }
@ -587,11 +633,6 @@ std::string OCL4DNNConvSpatial<Dtype>::generateSpecificKey(int32_t type, int32_t
               << "_" << blockHeight
               << "_" << blockDepth;
    if (!use_half_)
        keyBuilder << "_float";
    else
        keyBuilder << "_half";
    return keyBuilder.str();
 }
@ -1135,7 +1176,7 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
    cv::ocl::Timer timer(queue);
    timer.start();
    bool res = true;;
-    dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl);
+    CV_LOG_INFO(NULL, "Benchmarking kernel: " << config->kernelName);
    tuned_ = true;
    int loop_cnt = 4;
    for (int i = 0; i < loop_cnt; i++) {
@ -1152,7 +1193,6 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
    }
    float elapsedTime = timer.durationNS() * 1e-6 / loop_cnt;
    #ifdef dbg
    double out_w = output_w_;
    double out_h = output_h_;
    double out_z = M_;
@ -1160,16 +1200,8 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
    double k_h = kernel_h_;
    double k_z = channels_;
    double totalFlops = ((k_w*k_h*k_z -1)*2)*(out_w*out_h*out_z)*num_;
-    std::cout << "\tEstimated Gflops:" << (totalFlops * 1e-9)
+    CV_LOG_INFO(NULL, "\tEstimated Gflops:" << (totalFlops * 1e-9));
-              << std::endl;
+    CV_LOG_INFO(NULL, "\tEstimated GFLOPS/S: " << ((totalFlops * 1e-9)*(1000.0/elapsedTime)));
    std::cout << "\tEstimated GFLOPS/S: " << ((totalFlops * 1e-9)*(1000.0/elapsedTime))
              << std::endl;
    #if 0
    std::cout << "Estimated utilization: " <<
        ((((totalFlops/1000)/1000)/1000)*(1000.0/elapsedTime))/880.0
        << std::endl;
    #endif
    #endif
    return elapsedTime;
 }
@ -1225,18 +1257,18 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
                        if (use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
                            error_factor > 0.04 && !(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
                        {
-                            dbgPrint(printf("test verification failed @ image %d group %d"
+                            CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
-                                            "out_ch %d h %d w %d got %G expected %G\n",
+                                         << " out_ch " << out_ch << " h " << h << " w " << w
-                                            n, g, out_ch, h, w, data[offset], verify_data[offset]));
+                                         << " got " << data[offset] << " expected " << verify_data[offset]);
                            verificationFail = 1;
                            goto out;
                        }
                        else if (!use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
                                 !(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
                        {
-                            dbgPrint(printf("test verification failed @ image %d group %d"
+                            CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
-                                            "out_ch %d h %d w %d got %G expected %G\n",
+                                         << " out_ch " << out_ch << " h " << h << " w " << w
-                                            n, g, out_ch, h, w, data[offset], verify_data[offset]));
+                                         << " got " << data[offset] << " expected " << verify_data[offset]);
                            verificationFail = 1;
                            goto out;
                        }
@ -1517,17 +1549,11 @@ void OCL4DNNConvSpatial<float>::generate_idlf_tuneritems(std::vector< cv::Ptr<tu
        return;
    int actual_tile_x = kernel_w_ * dilation_w_ + (blockM - 1) * stride_w_ ;
-    int tile_x = alignSize(actual_tile_x, 4);
+    int tile_x = alignSize(actual_tile_x, simd_size);
-    int tile_y = kernel_h_ * dilation_h_ + (blockK - 1) * stride_h_;
+    if (tile_x > simd_size)
    if (tile_x > (4 * simd_size))
        return;
-    if ((blockM * blockK + divUp(tile_x * tile_y, simd_size)) > block_size_max)
+    if (blockM * blockK > block_size_max)
        return;
    int tile_y_stride = (4 * simd_size) / tile_x;
    int invec_size = divUp(tile_y, tile_y_stride);
    if (invec_size > 4)
        return;
    tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_INTEL_IDLF, blockM, blockK, simd_size));
@ -1570,11 +1596,7 @@ void OCL4DNNConvSpatial<float>::generateTunerItems(std::vector< cv::Ptr<tunerPar
                for (uint32_t height = height_max; height > 0; height--)
                {
                    generate_idlf_tuneritems(tunerItems, width, height, simd_size);
                    if (tunerItems.size() >= 8 && height == 2)
                        break;
                }
                if (tunerItems.size() >= 12 && width == 2)
                    break;
            }
        }
    }
@ -1661,10 +1683,8 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
        if (kernelQueue[x]->tested == false) {
            bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[x], verifyTop);
            if (verified == false) {
-                dbgPrint(std::cout << "Kernel "
+                CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[x]->kernelName << " failed verification");
-                         << kernelQueue[x]->kernelName
+                CV_LOG_ERROR(NULL, "kernelQueue[x]->workItem_output[0]: "
                         << " failed verification" << std::endl);
                dbgPrint(std::cout << "kernelQueue[x]->workItem_output[0]: "
                             << kernelQueue[x]->workItem_output[0] << " "
                             << "kernelQueue[x]->workItem_output[1]: "
                             << kernelQueue[x]->workItem_output[1] << " "
@ -1685,11 +1705,9 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
                             << "kernelQueue[x]->local_work_size[2]: "
                             << kernelQueue[x]->local_work_size[2] << " "
                             << kernelQueue[x]->swizzle_weights << " "
-                         << kernelQueue[x]->use_null_local << std::endl);
+                             << kernelQueue[x]->use_null_local);
            } else {
-                dbgPrint(std::cout << "Kernel "
+                CV_LOG_INFO(NULL, "Kernel " << kernelQueue[x]->kernelName << " pass verification");
                         << kernelQueue[x]->kernelName
                         << " pass verification" << std::endl);
            }
        }
        #endif
@ -1718,19 +1736,28 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
                break;
            } else {
                kernelQueue[fastestKernel]->tested = true;
-                dbgPrint(std::cout << "Kernel " <<
+                CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[fastestKernel]->kernelName <<
-                         kernelQueue[fastestKernel]->kernelName <<
+                             " failed verification");
                         " failed verification" << std::endl);
                failures++;
            }
        }
    }
    if (verification) {
-        dbgPrint(std::cout << "Kernel <" << kernelQueue[kernel_index_]->kernelName <<
+        CV_LOG_INFO(NULL, "Kernel <" << kernelQueue[kernel_index_]->kernelName <<
-                 "> passed verification" << std::endl);
+                    "> passed verification");
-        dbgPrint(std::cout << "Convolution Time:" << kernelQueue[kernel_index_]->executionTime << std::endl);
+        CV_LOG_INFO(NULL, "Convolution Time:" << kernelQueue[kernel_index_]->executionTime);
        double out_w = output_w_;
        double out_h = output_h_;
        double out_z = M_;
        double k_w = kernel_w_;
        double k_h = kernel_h_;
        double k_z = channels_;
        float elapsedTime = kernelQueue[kernel_index_]->executionTime;
        double totalFlops = ((k_w*k_h*k_z -1)*2)*(out_w*out_h*out_z)*num_;
        CV_LOG_INFO(NULL, "\tEstimated Gflops:" << (totalFlops * 1e-9));
        CV_LOG_INFO(NULL, "\tEstimated GFLOPS/S: " << ((totalFlops * 1e-9)*(1000.0/elapsedTime)));
    } else {
-        dbgPrint(std::cout << "fallback to basic kernel" << std::endl);
+        CV_LOG_INFO(NULL, "fallback to basic kernel");
        options_.str(""); options_.clear(); // clear contents and state flags
        createBasicKernel(1, 1, 1);
        kernel_index_ = kernelQueue.size() - 1;
@ -1805,7 +1832,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
    calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
-    if (force_auto_tuning_)
+    if (run_auto_tuning_ || force_auto_tuning_)
    {
        setupConvolution(bottom, top, weight, bias, numImages, benchData);
    }
@ -1820,18 +1847,8 @@ template<typename Dtype>
 bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
 {
    cv::AutoLock lock(kernelConfigMutex);
-    if (!defaultConfigLoaded)
+    if (!defaultConfigLoaded && !force_auto_tuning_)
-    {
+        initializeGlobalBuiltinConfigurations((use_cache_path_ && !cache_path_.empty()) ? (cache_path_ + '/') : std::string());
        const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
        for (size_t i = 0; i < numConfigs; i++)
        {
            std::pair<std::string, std::string> entry(
                    std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i],
                    default_kernel_config_intel[2 * i + 1]);
            kernelConfigMap.insert(entry);
        }
        defaultConfigLoaded = true;
    }
    kernel_hash_t::iterator it = kernelConfigMap.find(key_);
    if (it != kernelConfigMap.end())
@ -1904,9 +1921,12 @@ bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int typ
 template<typename Dtype>
 bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
 {
    if (force_auto_tuning_)
        return false;  // don't load results from external storage
    if (!use_cache_path_)
    {
-        if (cache_path_.empty() && !force_auto_tuning_)
+        if (cache_path_.empty())
        {
            static int warn_ = 0;
            if (!warn_)
--- a/modules/dnn/src/opencl/conv_layer_spatial.cl
+++ b/modules/dnn/src/opencl/conv_layer_spatial.cl
@ -206,8 +206,6 @@ __kernel void ConvolveBasic(
 #elif defined KERNEL_IDLF
 #define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
 // Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
 // Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the input image.
 // NDRange:  (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH
@ -219,124 +217,76 @@ __kernel void
 convolve_simd(
    ELTWISE_DATA_ARG
    FUSED_ARG
-    __global Dtype* inputs_base,
+    __global Dtype* inputs,
-    filter_qualifier Dtype* weights_base,
+    __global Dtype* weights,
    BIAS_KERNEL_ARG
-    __global Dtype* outputs_base,
+    __global Dtype* outputs,
    const ushort input_width,
    const ushort input_height,
    const ushort output_width,
    const ushort output_height)
 {
  __global Dtype* outputs = outputs_base;
  __global Dtype* inputs = inputs_base;
  filter_qualifier Dtype* weights = weights_base;
  unsigned int oc = get_global_id(0) * OUT_BLOCK_WIDTH;  // oc = Output Column
-  unsigned int or = get_global_id(1) * OUT_BLOCK_HEIGHT;// or = Output Row
+  unsigned int or = get_global_id(1) * OUT_BLOCK_HEIGHT; // or = Output Row
-  unsigned int fm = get_global_id(2);// fm = Feature Map = od = Output Depth
+  unsigned int fm = get_global_id(2);                    // fm = Feature Map = od = Output Depth
  unsigned int fmg = get_group_id(2);
  unsigned int lid = get_local_id(2);
-  Dtype out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT];
+  Dtype out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT] = { 0.0f };
  int in_addr;
  // find weights address of given neuron (lid is index)
-  unsigned int weight_addr = (fmg % (ALIGNED_NUM_FILTERS/SIMD_SIZE)) * INPUT_DEPTH * KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE + lid;
+  unsigned int weight_addr = (fmg % FILTERS_IN_GROUP) *
                             INPUT_DEPTH * KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE + lid;
-  for(int i=0;i<OUT_BLOCK_SIZE;i++) {
+  unsigned int num_in_batch = fm / ALIGNED_NUM_FILTERS;
    out[i]=0.0f;
  }
-  unsigned int num_in_batch = ( fm ) / ALIGNED_NUM_FILTERS;
+  unsigned int input_batch_offset = num_in_batch * INPUT_PITCH * TOTAL_INPUT_DEPTH_SIZE;
-  unsigned int input_batch_offset = num_in_batch * input_height * input_width * TOTAL_INPUT_DEPTH_SIZE;
+  int curr_y = or * STRIDE_Y;
-
+  int curr_x = oc * STRIDE_X + lid;
  int curr_local_y = ( lid / ( TILE_X / 4 ) );
  int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4;
  int curr_y = or * STRIDE_Y + curr_local_y;
  int curr_x = oc * STRIDE_X + curr_local_x;
 #if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
  int saved_y = curr_y;
 #endif
-  in_addr = input_batch_offset
+  int in_addr = input_batch_offset
-            +  (curr_y - INPUT_PAD_H) * input_width             // y tile offset
+                +  (curr_y - INPUT_PAD_H) * INPUT_WIDTH          // y tile offset
                +   curr_x - INPUT_PAD_W;                        // x tile offset
-  union {
+
-    Dtype4 in_vec[INVEC_SIZE];
+  Dtype in_buf[INVEC_SIZE];
    Dtype in_array[INVEC_SIZE * 4];
  } in_buf;
  for(int kd = 0; kd < INPUT_DEPTH; kd++)
  {
    int in_offset = in_addr;
-    int reg = 0;
+    __attribute__((opencl_unroll_hint(INVEC_SIZE)))
-    LOOP(INVEC_SIZE, reg,
+    for (int reg = 0; reg < INVEC_SIZE; reg++)
    {
-        if (curr_local_y + reg * TILE_Y_STRIDE < TILE_Y || INVEC_SIZE * TILE_Y_STRIDE <= (TILE_Y + 2) || reg < INVEC_SIZE - 1) {
+        in_buf[reg] = inputs[in_offset];
 #if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
-        if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + 3 >= INPUT_PAD_W && curr_x < input_width + INPUT_PAD_W) {
+        if (!(curr_y >= INPUT_PAD_H && curr_y < INPUT_HEIGHT + INPUT_PAD_H &&
-          if (curr_x < INPUT_PAD_W) {
+              curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W))
-            in_buf.in_vec[reg].s0 = 0;
+        {
-            if (curr_x + 1 >= INPUT_PAD_W && curr_x + 1 < input_width + INPUT_PAD_W)
+          in_buf[reg] = 0;
              in_buf.in_vec[reg].s1 = *(inputs + in_offset + 1);
            else
              in_buf.in_vec[reg].s1 = 0;
            if (curr_x + 2 >= INPUT_PAD_W && curr_x + 2 < input_width + INPUT_PAD_W)
              in_buf.in_vec[reg].s2 = *(inputs + in_offset + 2);
            else
              in_buf.in_vec[reg].s2 = 0;
            if (curr_x + 3 < input_width + INPUT_PAD_W)
              in_buf.in_vec[reg].s3 = *(inputs + in_offset + 3);
            else
              in_buf.in_vec[reg].s3 = 0;
          } else {
            VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
            if (curr_x + 1 >= input_width + INPUT_PAD_W)
              in_buf.in_vec[reg].s1 = 0;
            if (curr_x + 2 >= input_width + INPUT_PAD_W)
              in_buf.in_vec[reg].s2 = 0;
            if (curr_x + 3 >= input_width + INPUT_PAD_W)
              in_buf.in_vec[reg].s3 = 0;
        }
        } else {
          in_buf.in_vec[reg] = 0;
        }
        curr_y += TILE_Y_STRIDE;
 #else
        VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
 #endif
        curr_y += 1;
        in_offset += INPUT_WIDTH;
    }
-        in_offset += input_width * TILE_Y_STRIDE;
+
-      });
+    in_addr += INPUT_PITCH;
-    in_addr += input_height * input_width;
+
 #if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
    curr_y = saved_y;
 #endif
-#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
+    Dtype weight_buf[WEIGHT_PREF];
 #define WEIGHT_PREF 8
 #else
 #define WEIGHT_PREF 1
 #endif
    union {
      Dtype w[WEIGHT_PREF];
 #if KERNEL_WIDTH * KERNEL_HEIGHT != 1
      INT_TYPE8 ui8;
 #endif
    } weight_buf;
    int w_idx=0;
-    unsigned int orig_weight_addr = weight_addr;
+    for (int i = 0; i < WEIGHT_PREF; i++)
-#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
+    {
-    weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
+        weight_buf[i] = weights[weight_addr];
-    weight_addr += SIMD_SIZE * WEIGHT_PREF;
+        weight_addr += SIMD_SIZE;
-#else
+    }
    weight_buf.w[0] = as_Dtype(SUB_GROUP_BLOCK_READ((__global INT_TYPE *)&weights[weight_addr]));
    weight_addr += SIMD_SIZE * 1;
 #endif
-#define BLOCK_IN(n) sub_group_broadcast( in_buf.in_array[((n)%4) + ((n) / (TILE_Y_STRIDE * TILE_X)) * 4], (((n) % (TILE_Y_STRIDE * TILE_X))/4))
+#define BLOCK_IN(n, c) intel_sub_group_shuffle(in_buf[n], (c))
    int kr = 0;  // kr = Kernel Row
    LOOP(KERNEL_HEIGHT, kr,// LOOP is a macro that unrolls the loop.
@ -344,51 +294,29 @@ convolve_simd(
        int kc = 0;  // kc = Kernel Column
        LOOP(KERNEL_WIDTH, kc,
        {
-                for(int br=0; br < OUT_BLOCK_HEIGHT; br++) {
+            for (int br=0; br < OUT_BLOCK_HEIGHT; br++)
-                  for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++) {
+            {
-                    Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y) * TILE_X + bc * STRIDE_X + kc * DILATION_X);
+                for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++)
-                    out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf.w[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]);
+                {
                    Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y), bc * STRIDE_X + kc * DILATION_X);
                    out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]);
                }
            }
-#if KERNEL_WIDTH * KERNEL_HEIGHT > WEIGHT_PREF
+            weight_buf[w_idx % WEIGHT_PREF] = weights[weight_addr];
-                // We assume KERNEL_W is equal to KERNEL_H here.
+            weight_addr += SIMD_SIZE;
                if ((w_idx + 1) % WEIGHT_PREF == 0
                #if KERNEL_WIDTH * KERNEL_HEIGHT % 8 != 0
                && ((w_idx + 1) <= (KERNEL_WIDTH * KERNEL_HEIGHT - WEIGHT_PREF))
                #endif
                    ) {
                  weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
                  weight_addr += SIMD_SIZE * WEIGHT_PREF;  // weights must be stored in just the right SIMD swizzled format for this to work, see host code for details.
                }
              #if KERNEL_WIDTH*KERNEL_HEIGHT % 8 == 0
                // need to do nothing
              #else
                else if ((w_idx + 1) %  WEIGHT_PREF == 0 && ((w_idx + 1) > (KERNEL_WIDTH * KERNEL_HEIGHT - WEIGHT_PREF)))
                #if KERNEL_WIDTH * KERNEL_HEIGHT % 8 == 1
                  weight_buf.w[0] = weights[weight_addr];
                #elif KERNEL_WIDTH * KERNEL_HEIGHT % 8 == 2
                  weight_buf.ui8.s01 = SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)&weights[weight_addr]);
                #elif KERNEL_WIDTH * KERNEL_HEIGHT % 8 <= 4
                  weight_buf.ui8.s0123 = SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)&weights[weight_addr]);
                #else
                  weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
                #endif
              #endif
 #endif
            ++w_idx;
        });
    });
-    weight_addr = orig_weight_addr + KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE;
+    weight_addr -= WEIGHT_PREF * SIMD_SIZE;
  }
  }
  // dead code to work around possible compiler bug.
  if (ALIGNED_NUM_FILTERS != NUM_FILTERS && fm > 0xfffffffeul) {
    outputs[0] = BLOCK_IN(fm % SIMD_SIZE);
  }
  fm = fm % ALIGNED_NUM_FILTERS;
-  if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) {
+#if LEFT_FILTERS > 0
-  unsigned int out_addr = ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height;
+  if (fm < NUM_FILTERS)
 #endif
  {
    unsigned int out_addr = (num_in_batch * TOTAL_OUTPUT_DEPTH + fm) * OUTPUT_PITCH;
    out_addr += or * output_width + oc;
    // we need this address calculation for biases because we support views and batching
 #if APPLY_BIAS
@ -396,13 +324,16 @@ convolve_simd(
 #else
    Dtype bias = 0;
 #endif
    for(unsigned int r = 0; r < OUT_BLOCK_HEIGHT; r++) {
      if (r + or >= output_height) break;
      for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++) {
        if (c + oc >= output_width) break;
        // this does a scattered write to SIMD_SIZE different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
        ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c], fm);
    for(unsigned int r = 0; r < OUT_BLOCK_HEIGHT; r++)
    {
      if (r + or >= output_height) break;
      for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++)
      {
        if (c + oc >= output_width) break;
        // this does a scattered write to SIMD_SIZE different feature maps,
        // so that data within one map is contiguous, thus ready for input to next layer.
        ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c], fm);
      }
    }
  }
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -1137,11 +1137,95 @@ private:
    int outWidth, outHeight, zoomFactor;
 };
-TEST(Layer_Test_Interp, Accuracy)
+TEST(Layer_Test_Interp_custom, Accuracy)
 {
    CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
    testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
    LayerFactory::unregisterLayer("Interp");
 }
 TEST(Layer_Test_Interp, Accuracy)
 {
    testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
 }
 TEST(Layer_Test_PoolingIndices, Accuracy)
 {
    Net net;
    LayerParams lp;
    lp.set("pool", "max");
    lp.set("kernel_w", 2);
    lp.set("kernel_h", 2);
    lp.set("stride_w", 2);
    lp.set("stride_h", 2);
    lp.set("pad_w", 0);
    lp.set("pad_h", 0);
    lp.name = "testLayer.name";  // This test also checks that OpenCV lets use names with dots.
    lp.type = "Pooling";
    net.addLayerToPrev(lp.name, lp.type, lp);
    Mat inp(10, 10, CV_8U);
    randu(inp, 0, 255);
    Mat maxValues(5, 5, CV_32F, Scalar(-1)), indices(5, 5, CV_32F, Scalar(-1));
    for (int y = 0; y < 10; ++y)
    {
        int dstY = y / 2;
        for (int x = 0; x < 10; ++x)
        {
            int dstX = x / 2;
            uint8_t val = inp.at<uint8_t>(y, x);
            if ((float)inp.at<uint8_t>(y, x) > maxValues.at<float>(dstY, dstX))
            {
                maxValues.at<float>(dstY, dstX) = val;
                indices.at<float>(dstY, dstX) = y * 10 + x;
            }
        }
    }
    net.setInput(blobFromImage(inp));
    std::vector<Mat> outputs;
    net.forward(outputs, lp.name);
    normAssert(maxValues, outputs[0].reshape(1, 5));
    normAssert(indices, outputs[1].reshape(1, 5));
 }
 typedef testing::TestWithParam<tuple<Vec4i, int> > Layer_Test_ShuffleChannel;
 TEST_P(Layer_Test_ShuffleChannel, Accuracy)
 {
    Vec4i inpShapeVec = get<0>(GetParam());
    int group = get<1>(GetParam());
    ASSERT_EQ(inpShapeVec[1] % group, 0);
    const int groupSize = inpShapeVec[1] / group;
    Net net;
    LayerParams lp;
    lp.set("group", group);
    lp.type = "ShuffleChannel";
    lp.name = "testLayer";
    net.addLayerToPrev(lp.name, lp.type, lp);
    const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
    Mat inp(4, inpShape, CV_32F);
    randu(inp, 0, 255);
    net.setInput(inp);
    Mat out = net.forward();
    for (int n = 0; n < inpShapeVec[0]; ++n)
    {
        for (int c = 0; c < inpShapeVec[1]; ++c)
        {
            Mat outChannel = getPlane(out, n, c);
            Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
            normAssert(outChannel, inpChannel);
        }
    }
 }
 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
 /*input shape*/  Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
 /*group*/        Values(1, 2, 3, 6)
 ));
 }} // namespace
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -87,7 +87,7 @@ static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String out
    if (outLayerName.empty())
        outLayerName = net.getLayerNames().back();
-    net.setInput(inp, "0");
+    net.setInput(inp);
    std::vector<Mat> outBlobs;
    net.forward(outBlobs, outLayerName);
    normAssert(outRef, outBlobs[0]);
--- a/modules/videoio/src/cap_openni2.cpp
+++ b/modules/videoio/src/cap_openni2.cpp
@ -70,6 +70,35 @@
 #include "PS1080.h"
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 static cv::Mutex initOpenNI2Mutex;
 struct OpenNI2Initializer
 {
 public:
    static void init()
    {
        cv::AutoLock al(initOpenNI2Mutex);
        static OpenNI2Initializer initializer;
    }
 private:
    OpenNI2Initializer()
    {
        // Initialize and configure the context.
        openni::Status status = openni::OpenNI::initialize();
        if (status != openni::STATUS_OK)
        {
            CV_Error(CV_StsError, std::string("Failed to initialize:") + openni::OpenNI::getExtendedError());
        }
    }
    ~OpenNI2Initializer()
    {
        openni::OpenNI::shutdown();
    }
 };
 class CvCapture_OpenNI2 : public CvCapture
 {
 public:
@ -107,6 +136,8 @@ protected:
    static openni::VideoMode defaultStreamOutputMode(int stream);
    CvCapture_OpenNI2(int index, const char * filename);
    IplImage* retrieveDepthMap();
    IplImage* retrievePointCloudMap();
    IplImage* retrieveDisparityMap();
@ -116,8 +147,8 @@ protected:
    IplImage* retrieveGrayImage();
    IplImage* retrieveIrImage();
-    openni::Status toggleStream(int stream, bool toggle);
+    void toggleStream(int stream, bool toggle);
-    bool readCamerasParams();
+    void readCamerasParams();
    double getDepthGeneratorProperty(int propIdx) const;
    bool setDepthGeneratorProperty(int propIdx, double propVal);
@ -131,12 +162,11 @@ protected:
    // OpenNI context
    openni::Device device;
    bool isContextOpened;
    openni::Recorder recorder;
    // Data generators with its metadata
-    openni::VideoStream streams[CV_MAX_NUM_STREAMS];
+    std::vector<openni::VideoStream> streams;
-    openni::VideoFrameRef streamFrames[CV_MAX_NUM_STREAMS];
+    std::vector<openni::VideoFrameRef> streamFrames;
-    cv::Mat streamImages[CV_MAX_NUM_STREAMS];
+    std::vector<cv::Mat> streamImages;
    int maxBufferSize, maxTimeDuration; // for approx sync
    bool isCircleBuffer;
@ -191,80 +221,103 @@ openni::VideoMode CvCapture_OpenNI2::defaultStreamOutputMode(int stream)
    return mode;
 }
-CvCapture_OpenNI2::CvCapture_OpenNI2( int index )
+
 CvCapture_OpenNI2::CvCapture_OpenNI2(int index) :
    CvCapture_OpenNI2(index, nullptr)
 { }
 CvCapture_OpenNI2::CvCapture_OpenNI2(const char * filename) :
    CvCapture_OpenNI2(-1, filename)
 { }
 CvCapture_OpenNI2::CvCapture_OpenNI2(int index, const char * filename) :
    device(),
    isContextOpened(false),
    streams(CV_MAX_NUM_STREAMS),
    streamFrames(CV_MAX_NUM_STREAMS),
    streamImages(CV_MAX_NUM_STREAMS),
    maxBufferSize(DEFAULT_MAX_BUFFER_SIZE),
    maxTimeDuration(DEFAULT_MAX_TIME_DURATION),
    isCircleBuffer(DEFAULT_IS_CIRCLE_BUFFER),
    baseline(0),
    depthFocalLength_VGA(0),
    shadowValue(0),
    noSampleValue(0),
    outputMaps(outputMapsTypesCount)
 {
    // Initialize and configure the context.
    OpenNI2Initializer::init();
    const char* deviceURI = openni::ANY_DEVICE;
-    openni::Status status;
+    bool needColor = true;
    bool needIR = true;
    if (index >= 0)
    {
        int deviceType = DEVICE_DEFAULT;
-
+        if (index >= 10)
    noSampleValue = shadowValue = 0;
    isContextOpened = false;
    maxBufferSize = DEFAULT_MAX_BUFFER_SIZE;
    isCircleBuffer = DEFAULT_IS_CIRCLE_BUFFER;
    maxTimeDuration = DEFAULT_MAX_TIME_DURATION;
    if( index >= 10 )
        {
            deviceType = index / 10;
            index %= 10;
        }
-
+        // Asus XTION and Occipital Structure Sensor do not have an image generator
-    // Initialize and configure the context.
+        needColor = (deviceType != DEVICE_ASUS_XTION);
    status = openni::OpenNI::initialize();
    if (status != openni::STATUS_OK)
    {
        CV_Error(CV_StsError, cv::format("Failed to initialize:", openni::OpenNI::getExtendedError()));
        return;
    }
        // find appropriate device URI
        openni::Array<openni::DeviceInfo> ldevs;
        if (index > 0)
        {
            openni::OpenNI::enumerateDevices(&ldevs);
            if (index < ldevs.getSize())
                deviceURI = ldevs[index].getUri();
-    }
+            else
    status = device.open(deviceURI);
    if( status != openni::STATUS_OK )
            {
-        CV_Error(CV_StsError, cv::format("OpenCVKinect: Device open failed see: %s\n", openni::OpenNI::getExtendedError()));
+                CV_Error(CV_StsError, "OpenCVKinect2: Device index exceeds the number of available OpenNI devices");
-        openni::OpenNI::shutdown();
+            }
-        return;
+        }
    }
    else
    {
        deviceURI = filename;
    }
-    status = toggleStream(CV_DEPTH_STREAM, true);
+    openni::Status status;
-    // Asus XTION and Occipital Structure Sensor do not have an image generator
+    status = device.open(deviceURI);
    if (deviceType != DEVICE_ASUS_XTION)
        status = openni::Status(status | toggleStream(CV_COLOR_STREAM, true));
    if (status != openni::STATUS_OK)
    {
-        openni::OpenNI::shutdown();
+        CV_Error(CV_StsError, std::string("OpenCVKinect2: Failed to open device: ") + openni::OpenNI::getExtendedError());
        return;
    }
-    if (!readCamerasParams())
+    toggleStream(CV_DEPTH_STREAM, true);
-    {
+    if (needColor)
-        CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Could not read cameras parameters\n"));
+        toggleStream(CV_COLOR_STREAM, true);
-        return;
+    if (needIR)
-    }
+        toggleStream(CV_IR_STREAM, true);
    outputMaps.resize( outputMapsTypesCount );
    isContextOpened = true;
    setProperty(CV_CAP_PROP_OPENNI_REGISTRATION, 1.0);
    // default for Kinect2 camera
    setProperty(CV_CAP_PROP_OPENNI2_MIRROR, 0.0);
    isContextOpened = true;
 }
-openni::Status CvCapture_OpenNI2::toggleStream(int stream, bool toggle)
+CvCapture_OpenNI2::~CvCapture_OpenNI2()
 {
    for (size_t i = 0; i < streams.size(); ++i)
    {
        streamFrames[i].release();
        streams[i].stop();
        streams[i].destroy();
    }
    device.close();
 }
 void CvCapture_OpenNI2::toggleStream(int stream, bool toggle)
 {
    openni::Status status;
    // for logging
-    static const char* stream_names[CV_MAX_NUM_STREAMS] = {
+    static const std::string stream_names[CV_MAX_NUM_STREAMS] = {
        "depth",
        "color",
        "IR"
@ -280,140 +333,92 @@ openni::Status CvCapture_OpenNI2::toggleStream(int stream, bool toggle)
    {
        // already opened
        if (streams[stream].isValid())
-            return openni::STATUS_OK;
+            return;
        // open stream
        status = streams[stream].create(device, stream_sensor_types[stream]);
        if (status == openni::STATUS_OK)
        {
-            // set video mode
+            // try to set up default stream mode (if available)
-            status = streams[stream].setVideoMode(defaultStreamOutputMode(stream)); // xn::DepthGenerator supports VGA only! (Jan 2011)
+            const openni::Array<openni::VideoMode>& vm = streams[stream].getSensorInfo().getSupportedVideoModes();
            openni::VideoMode dm = defaultStreamOutputMode(stream);
            for (int i = 0; i < vm.getSize(); i++)
            {
                if (vm[i].getPixelFormat() == dm.getPixelFormat() &&
                    vm[i].getResolutionX() == dm.getResolutionX() &&
                    vm[i].getResolutionY() == dm.getResolutionY() &&
                    vm[i].getFps() == dm.getFps())
                {
                    status = streams[stream].setVideoMode(defaultStreamOutputMode(stream));
                    if (status != openni::STATUS_OK)
                    {
                CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't set %s stream output mode: %s\n",
                                                 stream_names[stream],
                                                 openni::OpenNI::getExtendedError()));
                        streams[stream].destroy();
-                return status;
+                        CV_Error(CV_StsError, std::string("OpenCVKinect2 : Couldn't set ") +
                                 stream_names[stream] + std::string(" stream output mode: ") +
                                 std::string(openni::OpenNI::getExtendedError()));
                    }
                }
            }
            // start stream
            status = streams[stream].start();
            if (status != openni::STATUS_OK)
            {
                CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't start %s stream: %s\n",
                                                 stream_names[stream],
                                                 openni::OpenNI::getExtendedError()));
                streams[stream].destroy();
-                return status;
+                CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't start ") +
                         stream_names[stream] + std::string(" stream: ") +
                         std::string(openni::OpenNI::getExtendedError()));
            }
        }
        else
        {
-            CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't find %s stream:: %s\n",
+            CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't find ") +
-                                             stream_names[stream],
+                     stream_names[stream] + " stream: " +
-                                             openni::OpenNI::getExtendedError()));
+                     std::string(openni::OpenNI::getExtendedError()));
            return status;
        }
    }
    else if (streams[stream].isValid()) // want to close stream
    {
-        streams[stream].stop();
+        //FIX for libfreenect2
-        streams[stream].destroy();
+        //which stops the whole device when stopping only one stream
    }
-    return openni::STATUS_OK;
+        //streams[stream].stop();
        //streams[stream].destroy();
    }
 }
 CvCapture_OpenNI2::CvCapture_OpenNI2(const char * filename)
 {
    openni::Status status;
-    isContextOpened = false;
+void CvCapture_OpenNI2::readCamerasParams()
    maxBufferSize = DEFAULT_MAX_BUFFER_SIZE;
    isCircleBuffer = DEFAULT_IS_CIRCLE_BUFFER;
    maxTimeDuration = DEFAULT_MAX_TIME_DURATION;
    // Initialize and configure the context.
    status = openni::OpenNI::initialize();
    if (status != openni::STATUS_OK)
    {
        CV_Error(CV_StsError, cv::format("Failed to initialize:", openni::OpenNI::getExtendedError()));
        return;
    }
    // Open file
    status = device.open(filename);
    if( status != openni::STATUS_OK )
    {
        CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Failed to open input file (%s): %s\n", filename, openni::OpenNI::getExtendedError()));
        return;
    }
    status = openni::Status(toggleStream(CV_DEPTH_STREAM, true) | toggleStream(CV_COLOR_STREAM, true));
    if (status != openni::STATUS_OK)
    {
        openni::OpenNI::shutdown();
        return;
    }
    if( !readCamerasParams() )
    {
        CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Could not read cameras parameters\n"));
        return;
    }
    outputMaps.resize( outputMapsTypesCount );
    isContextOpened = true;
 }
 CvCapture_OpenNI2::~CvCapture_OpenNI2()
 {
    for (int i = 0; i < CV_MAX_NUM_STREAMS; ++i)
    {
        streamFrames[i].release();
        streams[i].stop();
        streams[i].destroy();
    }
    device.close();
    openni::OpenNI::shutdown();
 }
 bool CvCapture_OpenNI2::readCamerasParams()
 {
    double pixelSize = 0;
    if (streams[CV_DEPTH_STREAM].getProperty<double>(XN_STREAM_PROPERTY_ZERO_PLANE_PIXEL_SIZE, &pixelSize) != openni::STATUS_OK)
    {
-        CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::readCamerasParams : Could not read pixel size!\n"));
+        CV_Error(CV_StsError, "CvCapture_OpenNI2::readCamerasParams : Could not read pixel size!" +
-        return false;
+                              std::string(openni::OpenNI::getExtendedError()));
    }
    // pixel size @ VGA = pixel size @ SXGA x 2
    pixelSize *= 2.0; // in mm
    // focal length of IR camera in pixels for VGA resolution
-    int zeroPlanDistance; // in mm
+    unsigned long long zeroPlaneDistance; // in mm
-    if (streams[CV_DEPTH_STREAM].getProperty(XN_STREAM_PROPERTY_ZERO_PLANE_DISTANCE, &zeroPlanDistance) != openni::STATUS_OK)
+    if (streams[CV_DEPTH_STREAM].getProperty(XN_STREAM_PROPERTY_ZERO_PLANE_DISTANCE, &zeroPlaneDistance) != openni::STATUS_OK)
    {
-        CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::readCamerasParams : Could not read virtual plane distance!\n"));
+        CV_Error(CV_StsError, "CvCapture_OpenNI2::readCamerasParams : Could not read virtual plane distance!" +
-        return false;
+                              std::string(openni::OpenNI::getExtendedError()));
    }
    if (streams[CV_DEPTH_STREAM].getProperty<double>(XN_STREAM_PROPERTY_EMITTER_DCMOS_DISTANCE, &baseline) != openni::STATUS_OK)
    {
-        CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::readCamerasParams : Could not read base line!\n"));
+        CV_Error(CV_StsError, "CvCapture_OpenNI2::readCamerasParams : Could not read base line!" +
-        return false;
+                              std::string(openni::OpenNI::getExtendedError()));
    }
    // baseline from cm -> mm
    baseline *= 10;
    // focal length from mm -> pixels (valid for 640x480)
-    depthFocalLength_VGA = (int)((double)zeroPlanDistance / (double)pixelSize);
+    depthFocalLength_VGA = (int)((double)zeroPlaneDistance / (double)pixelSize);
    return true;
 }
 double CvCapture_OpenNI2::getProperty( int propIdx ) const
@ -500,7 +505,7 @@ double CvCapture_OpenNI2::getCommonProperty( int propIdx ) const
        break;
    }
    default :
-        CV_Error( CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for getting.\n", propIdx) );
+        CV_Error( CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for getting.", propIdx) );
    }
    return propValue;
@ -525,14 +530,20 @@ bool CvCapture_OpenNI2::setCommonProperty( int propIdx, double propValue )
    // There is a set of properties that correspond to depth generator by default
    // (is they are pass without particular generator flag).
    case CV_CAP_PROP_OPENNI_REGISTRATION:
-        isSet = setDepthGeneratorProperty( propIdx, propValue );
+        isSet = setDepthGeneratorProperty(propIdx, propValue);
        break;
    case CV_CAP_PROP_OPENNI2_SYNC:
        isSet = device.setDepthColorSyncEnabled(propValue > 0.0) == openni::STATUS_OK;
        break;
    case CV_CAP_PROP_FRAME_WIDTH:
    case CV_CAP_PROP_FRAME_HEIGHT:
    case CV_CAP_PROP_AUTOFOCUS:
        isSet = false;
        break;
    default:
-        CV_Error( CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for setting.\n", propIdx) );
+        CV_Error(CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for setting.", propIdx));
    }
    return isSet;
@ -565,9 +576,13 @@ double CvCapture_OpenNI2::getDepthGeneratorProperty( int propIdx ) const
        propValue = streams[CV_DEPTH_STREAM].getMaxPixelValue();
        break;
    case CV_CAP_PROP_OPENNI_BASELINE :
        if(baseline <= 0)
            const_cast<CvCapture_OpenNI2*>(this)->readCamerasParams();
        propValue = baseline;
        break;
    case CV_CAP_PROP_OPENNI_FOCAL_LENGTH :
        if(depthFocalLength_VGA <= 0)
            const_cast<CvCapture_OpenNI2*>(this)->readCamerasParams();
        propValue = (double)depthFocalLength_VGA;
        break;
    case CV_CAP_PROP_OPENNI_REGISTRATION :
@ -580,7 +595,7 @@ double CvCapture_OpenNI2::getDepthGeneratorProperty( int propIdx ) const
        propValue = streamFrames[CV_DEPTH_STREAM].getFrameIndex();
        break;
    default :
-        CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for getting.\n", propIdx) );
+        CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for getting.", propIdx) );
    }
    return propValue;
@ -594,7 +609,10 @@ bool CvCapture_OpenNI2::setDepthGeneratorProperty( int propIdx, double propValue
    {
    case CV_CAP_PROP_OPENNI_GENERATOR_PRESENT:
        if (isContextOpened)
-            isSet = toggleStream(CV_DEPTH_STREAM, propValue > 0.0) == openni::STATUS_OK;
+        {
            toggleStream(CV_DEPTH_STREAM, propValue > 0.0);
            isSet = true;
        }
        break;
    case CV_CAP_PROP_OPENNI_REGISTRATION:
        {
@ -612,12 +630,13 @@ bool CvCapture_OpenNI2::setDepthGeneratorProperty( int propIdx, double propValue
                        {
                            openni::Status status = device.setImageRegistrationMode(mode);
                            if( status != openni::STATUS_OK )
-                                CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setDepthGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
+                                CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setDepthGeneratorProperty: ") +
                                         std::string(openni::OpenNI::getExtendedError()));
                            else
                                isSet = true;
                        }
                        else
-                            CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setDepthGeneratorProperty : Unsupported viewpoint.\n"));
+                            CV_Error(CV_StsError, "CvCapture_OpenNI2::setDepthGeneratorProperty: Unsupported viewpoint.");
                    }
                    else
                        isSet = true;
@ -627,14 +646,15 @@ bool CvCapture_OpenNI2::setDepthGeneratorProperty( int propIdx, double propValue
            {
                openni::Status status = device.setImageRegistrationMode(openni::IMAGE_REGISTRATION_OFF);
                if( status != openni::STATUS_OK )
-                    CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setDepthGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
+                    CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setDepthGeneratorProperty: ") +
                             std::string(openni::OpenNI::getExtendedError()));
                else
                    isSet = true;
            }
        }
        break;
    default:
-        CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for setting.\n", propIdx) );
+        CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for setting.", propIdx) );
    }
    return isSet;
@ -668,7 +688,7 @@ double CvCapture_OpenNI2::getImageGeneratorProperty( int propIdx ) const
        propValue = (double)streamFrames[CV_COLOR_STREAM].getFrameIndex();
        break;
    default :
-        CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.\n", propIdx) );
+        CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.", propIdx) );
    }
    return propValue;
@ -682,7 +702,10 @@ bool CvCapture_OpenNI2::setImageGeneratorProperty(int propIdx, double propValue)
        {
        case CV_CAP_PROP_OPENNI_GENERATOR_PRESENT:
            if (isContextOpened)
-                isSet = toggleStream(CV_COLOR_STREAM, propValue > 0.0) == openni::STATUS_OK;
+            {
                toggleStream(CV_COLOR_STREAM, propValue > 0.0);
                isSet = true;
            }
            break;
        case CV_CAP_PROP_OPENNI_OUTPUT_MODE :
        {
@ -713,18 +736,19 @@ bool CvCapture_OpenNI2::setImageGeneratorProperty(int propIdx, double propValue)
                mode.setFps(60);
                 break;
            default :
-                CV_Error( CV_StsBadArg, "Unsupported image generator output mode.\n");
+                CV_Error( CV_StsBadArg, "Unsupported image generator output mode.");
            }
            openni::Status status = streams[CV_COLOR_STREAM].setVideoMode( mode );
            if( status != openni::STATUS_OK )
-                CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setImageGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
+                CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setImageGeneratorProperty: ") +
                         std::string(openni::OpenNI::getExtendedError()));
            else
                isSet = true;
            break;
        }
        default:
-            CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.\n", propIdx) );
+            CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.", propIdx) );
        }
    return isSet;
@ -758,7 +782,7 @@ double CvCapture_OpenNI2::getIrGeneratorProperty(int propIdx) const
        propValue = (double)streamFrames[CV_IR_STREAM].getFrameIndex();
        break;
    default:
-        CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.\n", propIdx));
+        CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.", propIdx));
    }
    return propValue;
@ -772,7 +796,10 @@ bool CvCapture_OpenNI2::setIrGeneratorProperty(int propIdx, double propValue)
    {
    case CV_CAP_PROP_OPENNI_GENERATOR_PRESENT:
        if (isContextOpened)
-            isSet = toggleStream(CV_IR_STREAM, propValue > 0.0) == openni::STATUS_OK;
+        {
            toggleStream(CV_IR_STREAM, propValue > 0.0);
            isSet = true;
        }
        break;
    case CV_CAP_PROP_OPENNI_OUTPUT_MODE:
    {
@ -803,18 +830,19 @@ bool CvCapture_OpenNI2::setIrGeneratorProperty(int propIdx, double propValue)
            mode.setFps(60);
            break;
        default:
-            CV_Error(CV_StsBadArg, "Unsupported image generator output mode.\n");
+            CV_Error(CV_StsBadArg, "Unsupported image generator output mode.");
        }
        openni::Status status = streams[CV_IR_STREAM].setVideoMode(mode);
        if (status != openni::STATUS_OK)
-            CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setImageGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
+            CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setImageGeneratorProperty: ") +
                     std::string(openni::OpenNI::getExtendedError()));
        else
            isSet = true;
        break;
    }
    default:
-        CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.\n", propIdx));
+        CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.", propIdx));
    }
    return isSet;
@ -931,10 +959,12 @@ IplImage* CvCapture_OpenNI2::retrieveDisparityMap()
    if (!streamFrames[CV_DEPTH_STREAM].isValid())
        return 0;
    readCamerasParams();
    cv::Mat disp32;
    computeDisparity_32F(streamFrames[CV_DEPTH_STREAM], disp32, baseline, depthFocalLength_VGA, noSampleValue, shadowValue);
-    disp32.convertTo( outputMaps[CV_CAP_OPENNI_DISPARITY_MAP].mat, CV_8UC1 );
+    disp32.convertTo(outputMaps[CV_CAP_OPENNI_DISPARITY_MAP].mat, CV_8UC1);
    return outputMaps[CV_CAP_OPENNI_DISPARITY_MAP].getIplImagePtr();
 }
@ -944,6 +974,8 @@ IplImage* CvCapture_OpenNI2::retrieveDisparityMap_32F()
    if (!streamFrames[CV_DEPTH_STREAM].isValid())
        return 0;
    readCamerasParams();
    computeDisparity_32F(streamFrames[CV_DEPTH_STREAM], outputMaps[CV_CAP_OPENNI_DISPARITY_MAP_32F].mat, baseline, depthFocalLength_VGA, noSampleValue, shadowValue);
    return outputMaps[CV_CAP_OPENNI_DISPARITY_MAP_32F].getIplImagePtr();
@ -966,7 +998,7 @@ inline void getBGRImageFromMetaData( const openni::VideoFrameRef& imageMetaData,
 {
   cv::Mat bufferImage;
   if( imageMetaData.getVideoMode().getPixelFormat() != openni::PIXEL_FORMAT_RGB888 )
-        CV_Error( CV_StsUnsupportedFormat, "Unsupported format of grabbed image\n" );
+        CV_Error( CV_StsUnsupportedFormat, "Unsupported format of grabbed image." );
   bgrImage.create(imageMetaData.getHeight(), imageMetaData.getWidth(), CV_8UC3);
   bufferImage.create(imageMetaData.getHeight(), imageMetaData.getWidth(), CV_8UC3);
@ -989,7 +1021,7 @@ inline void getGrayImageFromMetaData(const openni::VideoFrameRef& imageMetaData,
    }
    else
    {
-        CV_Error(CV_StsUnsupportedFormat, "Unsupported format of grabbed image\n");
+        CV_Error(CV_StsUnsupportedFormat, "Unsupported format of grabbed image.");
    }
 }
--- a/samples/cpp/videocapture_basic.cpp
+++ b/samples/cpp/videocapture_basic.cpp
@ -20,7 +20,7 @@ int main(int, char**)
    //--- INITIALIZE VIDEOCAPTURE
    VideoCapture cap;
    // open the default camera using default API
-    cap.open(0);
+    // cap.open(0);
    // OR advance usage: select any API backend
    int deviceID = 0;             // 0 = open default camera
    int apiID = cv::CAP_ANY;      // 0 = autodetect default API
--- a/samples/data/dnn/object_detection_classes_coco.txt
+++ b/samples/data/dnn/object_detection_classes_coco.txt
@ -9,7 +9,7 @@ truck
 boat
 traffic light
 fire hydrant
-
+street sign
 stop sign
 parking meter
 bench
@ -23,11 +23,11 @@ elephant
 bear
 zebra
 giraffe
-
+hat
 backpack
 umbrella
-
+shoe
-
+eye glasses
 handbag
 tie
 suitcase
@ -42,7 +42,7 @@ skateboard
 surfboard
 tennis racket
 bottle
-
+plate
 wine glass
 cup
 fork
@ -63,12 +63,12 @@ chair
 couch
 potted plant
 bed
-
+mirror
 dining table
-
+window
-
+desk
 toilet
-
+door
 tv
 laptop
 mouse
@ -80,7 +80,7 @@ oven
 toaster
 sink
 refrigerator
-
+blender
 book
 clock
 vase
--- a/samples/data/dnn/object_detection_classes_yolov3.txt
+++ b/samples/data/dnn/object_detection_classes_yolov3.txt
@ -0,0 +1,80 @@
 person
 bicycle
 car
 motorcycle
 airplane
 bus
 train
 truck
 boat
 traffic light
 fire hydrant
 stop sign
 parking meter
 bench
 bird
 cat
 dog
 horse
 sheep
 cow
 elephant
 bear
 zebra
 giraffe
 backpack
 umbrella
 handbag
 tie
 suitcase
 frisbee
 skis
 snowboard
 sports ball
 kite
 baseball bat
 baseball glove
 skateboard
 surfboard
 tennis racket
 bottle
 wine glass
 cup
 fork
 knife
 spoon
 bowl
 banana
 apple
 sandwich
 orange
 broccoli
 carrot
 hot dog
 pizza
 donut
 cake
 chair
 couch
 potted plant
 bed
 dining table
 toilet
 tv
 laptop
 mouse
 remote
 keyboard
 cell phone
 microwave
 oven
 toaster
 sink
 refrigerator
 book
 clock
 vase
 scissors
 teddy bear
 hair drier
 toothbrush
--- a/samples/opencl/opencl-opencv-interop.cpp
+++ b/samples/opencl/opencl-opencv-interop.cpp
@ -14,6 +14,8 @@
 #include <iomanip>
 #include <stdexcept>
 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS // eliminate build warning
 #ifdef __APPLE__