From 6e299b582acf78c67fb979cf6795750165bf572a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 00:39:09 +0000 Subject: [PATCH 01/15] dnn(test): decompose 'DynamicAxes' test --- modules/dnn/test/test_onnx_importer.cpp | 153 +++++++++++++++++++++++- 1 file changed, 149 insertions(+), 4 deletions(-) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index d5a5a86091..ac1b89d99c 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -908,7 +908,58 @@ TEST_P(Test_ONNX_layers, GatherMultiOutput) testONNXModels("gather_multi_output"); } -TEST_P(Test_ONNX_layers, DynamicAxes) +TEST_P(Test_ONNX_layers, DynamicAxes_squeeze_and_conv) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif + testONNXModels("squeeze_and_conv_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_unsqueeze_and_conv) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif + testONNXModels("unsqueeze_and_conv_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_gather) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif + testONNXModels("gather_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_gather_scalar) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy @@ -929,18 +980,112 @@ TEST_P(Test_ONNX_layers, DynamicAxes) } #endif #endif - testONNXModels("squeeze_and_conv_dynamic_axes"); - testONNXModels("unsqueeze_and_conv_dynamic_axes"); - testONNXModels("gather_dynamic_axes"); testONNXModels("gather_scalar_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_slice) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("slice_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_slice_opset_11) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("slice_opset_11_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_resize_opset11_torch16) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("resize_opset11_torch1.6_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_average_pooling) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("average_pooling_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_maxpooling_sigmoid) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("maxpooling_sigmoid_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_dynamic_batch) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("dynamic_batch"); } + TEST_P(Test_ONNX_layers, MaxPool1d) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) From c408157a4def11d1e00d8d43565b04572c11c048 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 02:37:45 +0000 Subject: [PATCH 02/15] dnn: do not try to rebuilt network during setInput() - this doesn't make sense in case of multiple inputs --- modules/dnn/src/dnn.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index b35dda9ddf..65b8205f1d 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1339,6 +1339,11 @@ struct Net::Impl : public detail::NetImplBase #endif clear(); + if (hasDynamicShapes) + { + updateLayersShapes(); + } + this->blobsToKeep = blobsToKeep_; allocateLayers(blobsToKeep_); @@ -3886,13 +3891,8 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons bool oldShape = prevShape == blobShape; blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]); - if (!oldShape) { + if (!oldShape) ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid]; - if (impl->hasDynamicShapes) - { - impl->updateLayersShapes(); - } - } if (!ld.outputBlobsWrappers[pin.oid].empty()) { From ada16fd188ecdb45424f2398fc89479eaaa61f22 Mon Sep 17 00:00:00 2001 From: APrigarina Date: Thu, 23 Dec 2021 10:00:25 +0300 Subject: [PATCH 03/15] prevent index error in find_obj sample --- samples/python/find_obj.py | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/python/find_obj.py b/samples/python/find_obj.py index 14dfa74902..c5783bf456 100755 --- a/samples/python/find_obj.py +++ b/samples/python/find_obj.py @@ -86,6 +86,7 @@ def explore_match(win, img1, img2, kp_pairs, status = None, H = None): if status is None: status = np.ones(len(kp_pairs), np.bool_) + status = status.reshape((len(kp_pairs), 1)) p1, p2 = [], [] # python 2 / python 3 change of zip unpacking for kpp in kp_pairs: p1.append(np.int32(kpp[0].pt)) From 381d9bafdf10158a417a2dab9154b6890e6a6a1d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 09:58:56 +0000 Subject: [PATCH 04/15] imgcodecs: disable OpenEXR in runtime for 3rdparty source code - builtin OpenEXR source code is outdated - external OpenEXR distributions are allowed --- cmake/OpenCVFindLibsGrfmt.cmake | 1 + modules/imgcodecs/CMakeLists.txt | 12 ++++++++++++ modules/imgcodecs/src/grfmt_exr.cpp | 26 ++++++++++++++++++++++++++ modules/imgcodecs/test/test_grfmt.cpp | 2 +- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 2d28dff875..d3244cf7a5 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -240,6 +240,7 @@ if(WITH_OPENEXR) set(OPENEXR_LIBRARIES IlmImf) add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr") if(OPENEXR_VERSION) # check via TARGET doesn't work + set(BUILD_OPENEXR ON) set(HAVE_OPENEXR YES) endif() endif() diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt index 10fb3278fb..9b59a3821b 100644 --- a/modules/imgcodecs/CMakeLists.txt +++ b/modules/imgcodecs/CMakeLists.txt @@ -45,12 +45,21 @@ if(HAVE_JASPER) list(APPEND GRFMT_LIBS ${JASPER_LIBRARIES}) if(OPENCV_IO_FORCE_JASPER) add_definitions(-DOPENCV_IMGCODECS_FORCE_JASPER=1) + else() + message(STATUS "imgcodecs: Jasper codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/14058") endif() endif() if(HAVE_OPENEXR) include_directories(SYSTEM ${OPENEXR_INCLUDE_PATHS}) list(APPEND GRFMT_LIBS ${OPENEXR_LIBRARIES}) + if(OPENCV_IO_FORCE_OPENEXR + OR NOT BUILD_OPENEXR # external OpenEXR versions are not disabled + ) + add_definitions(-DOPENCV_IMGCODECS_USE_OPENEXR=1) + else() + message(STATUS "imgcodecs: OpenEXR codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/21326") + endif() endif() if(HAVE_PNG OR HAVE_TIFF OR HAVE_OPENEXR) @@ -149,6 +158,9 @@ ocv_add_accuracy_tests() if(TARGET opencv_test_imgcodecs AND HAVE_JASPER AND "$ENV{OPENCV_IO_ENABLE_JASPER}") ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_JASPER_TESTS=1) endif() +if(TARGET opencv_test_imgcodecs AND HAVE_OPENEXR AND "$ENV{OPENCV_IO_ENABLE_OPENEXR}") + ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS=1) +endif() if(TARGET opencv_test_imgcodecs AND HAVE_PNG AND NOT (PNG_VERSION VERSION_LESS "1.6.31")) # details: https://github.com/glennrp/libpng/commit/68cb0aaee3de6371b81a4613476d9b33e43e95b1 ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_PNG_WITH_EXIF=1) diff --git a/modules/imgcodecs/src/grfmt_exr.cpp b/modules/imgcodecs/src/grfmt_exr.cpp index 8cf4db99c5..92b132bdba 100644 --- a/modules/imgcodecs/src/grfmt_exr.cpp +++ b/modules/imgcodecs/src/grfmt_exr.cpp @@ -44,6 +44,9 @@ #ifdef HAVE_OPENEXR +#include +#include + #if defined _MSC_VER && _MSC_VER >= 1200 # pragma warning( disable: 4100 4244 4267 ) #endif @@ -78,6 +81,27 @@ namespace cv { +static bool isOpenEXREnabled() +{ + static const bool PARAM_ENABLE_OPENEXR = utils::getConfigurationParameterBool("OPENCV_IO_ENABLE_OPENEXR", +#ifdef OPENCV_IMGCODECS_USE_OPENEXR + true +#else + false +#endif + ); + return PARAM_ENABLE_OPENEXR; +} +static void initOpenEXR() +{ + if (!isOpenEXREnabled()) + { + const char* message = "imgcodecs: OpenEXR codec is disabled. You can enable it via 'OPENCV_IO_ENABLE_OPENEXR' option. Refer for details and cautions here: https://github.com/opencv/opencv/issues/21326"; + CV_LOG_WARNING(NULL, message); + CV_Error(Error::StsNotImplemented, message); + } +} + /////////////////////// ExrDecoder /////////////////// ExrDecoder::ExrDecoder() @@ -575,6 +599,7 @@ void ExrDecoder::RGBToGray( float *in, float *out ) ImageDecoder ExrDecoder::newDecoder() const { + initOpenEXR(); return makePtr(); } @@ -698,6 +723,7 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) ImageEncoder ExrEncoder::newEncoder() const { + initOpenEXR(); return makePtr(); } diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp index 6866c8d092..cbf6289d23 100644 --- a/modules/imgcodecs/test/test_grfmt.cpp +++ b/modules/imgcodecs/test/test_grfmt.cpp @@ -363,6 +363,6 @@ TEST(Imgcodecs, write_parameter_type) }} // namespace -#ifdef HAVE_OPENEXR +#if defined(HAVE_OPENEXR) && defined(OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS) #include "test_exr.impl.hpp" #endif From 85d4e56bb160e0f0a947dc3eb384c2e5885ed0db Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 12:15:14 +0000 Subject: [PATCH 05/15] highgui(gtk): fix onmouse event flags --- modules/highgui/src/window_gtk.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 5ce88fb7d9..db0858369a 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -1881,14 +1881,18 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da (unsigned)pt.y < (unsigned)(image_widget->original_image->height) )) { - state &= gtk_accelerator_get_default_mod_mask(); - flags |= BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) | - BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) | - BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) | - BIT_MAP(state, GDK_MOD2_MASK, CV_EVENT_FLAG_ALTKEY) | + // handle non-keyboard (mouse) modifiers first + flags |= BIT_MAP(state, GDK_BUTTON1_MASK, CV_EVENT_FLAG_LBUTTON) | BIT_MAP(state, GDK_BUTTON2_MASK, CV_EVENT_FLAG_MBUTTON) | BIT_MAP(state, GDK_BUTTON3_MASK, CV_EVENT_FLAG_RBUTTON); + // keyboard modifiers + state &= gtk_accelerator_get_default_mod_mask(); + flags |= + BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) | + BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) | + BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) | + BIT_MAP(state, GDK_MOD2_MASK, CV_EVENT_FLAG_ALTKEY); window->on_mouse( cv_event, pt.x, pt.y, flags, window->on_mouse_param ); } } From 36bd2a65e35c0aa5070150e1db773b091a3000d6 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 13:06:44 +0000 Subject: [PATCH 06/15] highgui(gtk): repair scroll events --- modules/highgui/src/window_gtk.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 5ce88fb7d9..a2c9505eb1 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -1824,6 +1824,10 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da } else if( event->type == GDK_SCROLL ) { + GdkEventScroll* event_scroll = (GdkEventScroll*)event; + pt32f.x = cvFloor(event_scroll->x); + pt32f.y = cvFloor(event_scroll->y); + #if defined(GTK_VERSION3_4) // NOTE: in current implementation doesn't possible to put into callback function delta_x and delta_y separately double delta = (event->scroll.delta_x + event->scroll.delta_y); From cc02fcd8899d3eecaac1199f17ce40e6fbd4f4e8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 22 Dec 2021 23:33:57 +0000 Subject: [PATCH 07/15] dnn: improve debug messages, add ONNX opset version --- modules/dnn/src/dnn.cpp | 114 ++++++++++++------ modules/dnn/src/dnn_common.hpp | 37 ++++++ .../dnn/src/onnx/onnx_graph_simplifier.hpp | 2 - modules/dnn/src/onnx/onnx_importer.cpp | 54 ++++++++- modules/dnn/src/precomp.hpp | 1 + .../src/tensorflow/tf_graph_simplifier.hpp | 2 - 6 files changed, 169 insertions(+), 41 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index b35dda9ddf..e71026260e 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -247,8 +247,6 @@ std::vector getAvailableTargets(Backend be) namespace { - typedef std::vector ShapesVec; - struct LayerShapes { ShapesVec in, out, internal; @@ -2981,20 +2979,24 @@ struct Net::Impl : public detail::NetImplBase void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { - std::vector& inputLayerIds = layers[id].inputBlobsId; + CV_CheckGE(id, 0, ""); + CV_CheckLT(id, (int)layers.size(), ""); + LayerData& layerData = layers[id]; + std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = inOutShapes[id]; - if (id == 0 && inOutShapes[id].in[0].empty()) + if (id == 0 && layerShapes.in[0].empty()) { - if (!layers[0].outputBlobs.empty()) + if (!layerData.outputBlobs.empty()) { ShapesVec shapes; - for (int i = 0; i < layers[0].outputBlobs.size(); i++) + for (int i = 0; i < layerData.outputBlobs.size(); i++) { - Mat& inp = layers[0].outputBlobs[i]; - CV_Assert(inp.total()); + Mat& inp = layerData.outputBlobs[i]; + CV_Assert(!inp.empty()); shapes.push_back(shape(inp)); } - inOutShapes[0].in = shapes; + layerShapes.in = shapes; } else { @@ -3010,17 +3012,17 @@ struct Net::Impl : public detail::NetImplBase } if (none) { - inOutShapes[0].out.clear(); + layerShapes.out.clear(); return; } else { - inOutShapes[0].in = inputShapes; + layerShapes.in = inputShapes; } } } - if (inOutShapes[id].in.empty()) + if (layerShapes.in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { @@ -3033,14 +3035,14 @@ struct Net::Impl : public detail::NetImplBase getLayerShapesRecursively(layerId, inOutShapes); } const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; - inOutShapes[id].in.push_back(shape); + layerShapes.in.push_back(shape); } } - const ShapesVec& is = inOutShapes[id].in; - ShapesVec& os = inOutShapes[id].out; - ShapesVec& ints = inOutShapes[id].internal; - int requiredOutputs = layers[id].requiredOutputs.size(); - Ptr l = layers[id].getLayerInstance(); + const ShapesVec& is = layerShapes.in; + ShapesVec& os = layerShapes.out; + ShapesVec& ints = layerShapes.internal; + int requiredOutputs = layerData.requiredOutputs.size(); + Ptr l = layerData.getLayerInstance(); CV_Assert(l); bool layerSupportInPlace = false; try @@ -3068,13 +3070,38 @@ struct Net::Impl : public detail::NetImplBase CV_LOG_ERROR(NULL, "Exception message: " << e.what()); throw; } - inOutShapes[id].supportInPlace = layerSupportInPlace; + layerShapes.supportInPlace = layerSupportInPlace; - for (int i = 0; i < ints.size(); i++) - CV_Assert(total(ints[i]) > 0); + try + { + for (int i = 0; i < ints.size(); i++) + CV_CheckGT(total(ints[i]), 0, ""); - for (int i = 0; i < os.size(); i++) - CV_Assert(total(os[i]) > 0); + for (int i = 0; i < os.size(); i++) + CV_CheckGT(total(os[i]), 0, ""); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size() << + " inplace=" << layerSupportInPlace); + for (size_t i = 0; i < is.size(); ++i) + { + CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); + } + for (size_t i = 0; i < os.size(); ++i) + { + CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); + } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } + CV_LOG_ERROR(NULL, "Exception message: " << e.what()); + throw; + } } void getLayersShapes(const ShapesVec& netInputShapes, @@ -3102,42 +3129,57 @@ struct Net::Impl : public detail::NetImplBase void updateLayersShapes() { - CV_Assert(!layers[0].outputBlobs.empty()); + CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); + CV_Assert(netInputLayer); + DataLayer& inputLayer = *netInputLayer; + LayerData& inputLayerData = layers[0]; + CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); + CV_Assert(!inputLayerData.outputBlobs.empty()); ShapesVec inputShapes; - for(int i = 0; i < layers[0].outputBlobs.size(); i++) + for(int i = 0; i < inputLayerData.outputBlobs.size(); i++) { - Mat& inp = layers[0].outputBlobs[i]; - CV_Assert(inp.total()); - if (preferableBackend == DNN_BACKEND_OPENCV && + Mat& inp = inputLayerData.outputBlobs[i]; + CV_Assert(!inp.empty()); + if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation preferableTarget == DNN_TARGET_OPENCL_FP16) { - layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); + inp.create(inp.dims, inp.size, CV_16S); } inputShapes.push_back(shape(inp)); } + CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); LayersShapesMap layersShapes; layersShapes[0].in = inputShapes; for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { int layerId = it->first; - std::vector& inputLayerIds = it->second.inputBlobsId; - if (layersShapes[layerId].in.empty()) + LayerData& layerData = it->second; + std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = layersShapes[layerId]; + CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); + if (layerShapes.in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { - int inputLayerId = inputLayerIds[i].lid; + const LayerPin& inputPin = inputLayerIds[i]; + int inputLayerId = inputPin.lid; + CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId); - if(inputIt == layersShapes.end() || inputIt->second.out.empty()) + if (inputIt == layersShapes.end() || inputIt->second.out.empty()) { getLayerShapesRecursively(inputLayerId, layersShapes); } - const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid]; - layersShapes[layerId].in.push_back(shape); + const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; + layerShapes.in.push_back(shape); } - it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in); + layerData.layerInstance->updateMemoryShapes(layerShapes.in); } + CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); } + CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); } LayerPin getLatestLayerPin(const std::vector& pins) diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index 0f3feda91b..7360031801 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -29,6 +29,43 @@ struct NetImplBase } // namespace detail + +typedef std::vector ShapesVec; + +static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string()) +{ + std::ostringstream ss; + if (!name.empty()) + ss << name << ' '; + ss << '['; + for(size_t i = 0, n = shapes.size(); i < n; ++i) + ss << ' ' << toString(shapes[i]); + ss << " ]"; + return ss.str(); +} + +static inline std::string toString(const Mat& blob, const std::string& name = std::string()) +{ + std::ostringstream ss; + if (!name.empty()) + ss << name << ' '; + if (blob.empty()) + { + ss << ""; + } + else if (blob.dims == 1) + { + Mat blob_ = blob; + blob_.dims = 2; // hack + ss << blob_.t(); + } + else + { + ss << blob.reshape(1, 1); + } + return ss.str(); +} + CV__DNN_EXPERIMENTAL_NS_END }} // namespace diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp index b4497adb75..dd4948d729 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp @@ -8,8 +8,6 @@ #ifndef __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__ #define __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__ -#include "../precomp.hpp" - #if defined(__GNUC__) && __GNUC__ >= 5 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-override" diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 7bedf9543f..0d3313c039 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -65,6 +65,7 @@ public: ONNXImporter(Net& net, const char *onnxFile) : dstNet(net), dispatch(buildDispatchMap()) + , onnx_opset(0) { hasDynamicShapes = false; CV_Assert(onnxFile); @@ -86,6 +87,7 @@ public: ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) : dstNet(net), dispatch(buildDispatchMap()) + , onnx_opset(0) { hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); @@ -178,6 +180,9 @@ private: const DispatchMap dispatch; static const DispatchMap buildDispatchMap(); + + int onnx_opset; // OperatorSetIdProto for 'onnx' domain + void parseOperatorSet(); }; inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey) @@ -489,10 +494,45 @@ void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::Node void ONNXImporter::addConstant(const std::string& name, const Mat& blob) { + CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob)); constBlobs.insert(std::make_pair(name, blob)); outShapes.insert(std::make_pair(name, shape(blob))); } +void ONNXImporter::parseOperatorSet() +{ + int ir_version = model_proto.has_ir_version() ? static_cast(model_proto.ir_version()) : -1; + if (ir_version < 3) + return; + + int opset_size = model_proto.opset_import_size(); + if (opset_size <= 0) + { + CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information") + return; + } + + for (int i = 0; i < opset_size; ++i) + { + const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i); + const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string(); + int version = opset_entry.has_version() ? opset_entry.version() : -1; + if (domain.empty() || domain == "ai.onnx") + { + // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md + onnx_opset = std::max(onnx_opset, version); + } + else + { + // OpenCV don't know other opsets + // will fail later on unsupported node processing + CV_LOG_WARNING(NULL, "DNN/ONNX: unsupported opset[" << i << "]: domain='" << domain << "' version=" << version); + } + } + + CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset); +} + void ONNXImporter::populateNet() { CV_Assert(model_proto.has_graph()); @@ -513,6 +553,8 @@ void ONNXImporter::populateNet() << ", outputs = " << graph_proto.output_size() ); + parseOperatorSet(); + simplifySubgraphs(graph_proto); const int layersSize = graph_proto.node_size(); @@ -539,7 +581,8 @@ void ONNXImporter::populateNet() if (!tensorShape.dim(j).dim_param().empty() && !(j == 0 && inpShape.size() >= 3)) hasDynamicShapes = true; } - if (!inpShape.empty() && !hasDynamicShapes) + CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] shape=" << toString(inpShape)); + if (!inpShape.empty() && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs { inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size } @@ -573,6 +616,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) CV_Assert(node_proto.output_size() >= 1); std::string name = node_proto.output(0); const std::string& layer_type = node_proto.op_type(); + const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string(); + if (!layer_type_domain.empty() && layer_type_domain != "ai.onnx") + { + CV_LOG_WARNING(NULL, "DNN/ONNX: can't handle node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s@%s]:(%s)", layer_type.c_str(), layer_type_domain.c_str(), name.c_str()) + ); + CV_Error(Error::StsNotImplemented, cv::format("ONNX: unsupported domain: %s", layer_type_domain.c_str())); + } + CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp index 62f8714af1..76ebc14f7d 100644 --- a/modules/dnn/src/precomp.hpp +++ b/modules/dnn/src/precomp.hpp @@ -60,5 +60,6 @@ #include #include #include +#include #include "dnn_common.hpp" diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp index 4f3dfa870d..39e1fefaf3 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp @@ -8,8 +8,6 @@ #ifndef __OPENCV_DNN_TF_SIMPLIFIER_HPP__ #define __OPENCV_DNN_TF_SIMPLIFIER_HPP__ -#include "../precomp.hpp" - #ifdef HAVE_PROTOBUF #include "tf_io.hpp" From 51e65db715826931b67d3128cf5aaab5b3d655f8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 01:59:34 +0000 Subject: [PATCH 08/15] dnn(onnx): fix Resize inputs handling --- modules/dnn/src/onnx/onnx_importer.cpp | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 0d3313c039..defec7978b 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -2348,6 +2348,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node addLayer(layerParams, node_proto); } +// https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { for (int i = 1; i < node_proto.input_size(); i++) @@ -2368,30 +2369,38 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node if (layerParams.get("mode") == "linear" && framework_name == "pytorch") layerParams.set("mode", "opencv_linear"); - // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes] - int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1 - : node_proto.input_size() > 2 ? 2 : 1; + // opset-10: input = [X, scales] + // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes] + int scalesInputId = node_proto.input_size() == 2 ? 1 : 2; - Mat scales = getBlob(node_proto, foundScaleId); - if (scales.total() == 4) + Mat scales = getBlob(node_proto, scalesInputId); + if (!scales.empty()) { + CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected"); layerParams.set("zoom_factor_y", scales.at(2)); layerParams.set("zoom_factor_x", scales.at(3)); } - else + else if (node_proto.input_size() >= 4) // opset-11 { - const std::string& inputLast = node_proto.input(node_proto.input_size() - 1); - if (constBlobs.find(inputLast) != constBlobs.end()) + const std::string& inputSizes = node_proto.input(3); + if (constBlobs.find(inputSizes) != constBlobs.end()) { - Mat shapes = getBlob(inputLast); - CV_CheckEQ(shapes.size[0], 4, ""); - CV_CheckEQ(shapes.size[1], 1, ""); + Mat shapes = getBlob(inputSizes); + CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected"); CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); if (shapes.depth() == CV_32F) shapes.convertTo(shapes, CV_32S); layerParams.set("width", shapes.at(3)); layerParams.set("height", shapes.at(2)); } + else + { + CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str())); + } + } + else + { + CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters"); } replaceLayerParam(layerParams, "mode", "interpolation"); addLayer(layerParams, node_proto); From 88a18c8b6afb1f094b952d6348d688615cb15fe8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 02:01:03 +0000 Subject: [PATCH 09/15] dnn(onnx): emit error in Shape for dynamic input --- modules/dnn/src/onnx/onnx_importer.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index defec7978b..21bd6cc065 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -2146,11 +2146,22 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP CV_Assert(shapeIt != outShapes.end()); const MatShape& inpShape = shapeIt->second; - Mat shapeMat(inpShape.size(), 1, CV_32S); - for (int j = 0; j < inpShape.size(); ++j) - shapeMat.at(j) = inpShape[j]; - shapeMat.dims = 1; + int dims = static_cast(inpShape.size()); + Mat shapeMat(dims, 1, CV_32S); + bool isDynamicShape = false; + for (int j = 0; j < dims; ++j) + { + int sz = inpShape[j]; + isDynamicShape |= (sz == 0); + shapeMat.at(j) = sz; + } + shapeMat.dims = 1; // FIXIT Mat 1D + if (isDynamicShape) + { + CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0))); + CV_Assert(!isDynamicShape); // not supported + } addConstant(layerParams.name, shapeMat); } From 032a61b19759628a7a05e264bce01a343615bdfb Mon Sep 17 00:00:00 2001 From: catree Date: Wed, 22 Dec 2021 21:29:50 +0100 Subject: [PATCH 10/15] Summarize PnP pose computation on a single separate page. --- doc/opencv.bib | 8 +- modules/calib3d/doc/calib3d.bib | 13 +- modules/calib3d/doc/solvePnP.markdown | 176 +++++++++++++ modules/calib3d/include/opencv2/calib3d.hpp | 273 ++------------------ 4 files changed, 212 insertions(+), 258 deletions(-) create mode 100644 modules/calib3d/doc/solvePnP.markdown diff --git a/doc/opencv.bib b/doc/opencv.bib index bace4c68c9..800f57cafa 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -240,7 +240,7 @@ hal_id = {inria-00350283}, hal_version = {v1}, } -@article{Collins14 +@article{Collins14, year = {2014}, issn = {0920-5691}, journal = {International Journal of Computer Vision}, @@ -1271,6 +1271,12 @@ number={2}, pages={117-135}, } +@inproceedings{Zuliani2014RANSACFD, + title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...}, + author={Marco Zuliani}, + year={2014}, + url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf} +} @inproceedings{forstner1987fast, title={A fast operator for detection and precise location of distincs points, corners and center of circular features}, author={FORSTNER, W}, diff --git a/modules/calib3d/doc/calib3d.bib b/modules/calib3d/doc/calib3d.bib index a7e5a23982..074aab0a1b 100644 --- a/modules/calib3d/doc/calib3d.bib +++ b/modules/calib3d/doc/calib3d.bib @@ -40,10 +40,11 @@ publisher={IEEE} } -@inproceedings{Terzakis20, - author = {Terzakis, George and Lourakis, Manolis}, - year = {2020}, - month = {09}, - pages = {}, - title = {A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem} +@inproceedings{Terzakis2020SQPnP, + title={A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem}, + author={George Terzakis and Manolis Lourakis}, + booktitle={European Conference on Computer Vision}, + pages={478--494}, + year={2020}, + publisher={Springer International Publishing} } diff --git a/modules/calib3d/doc/solvePnP.markdown b/modules/calib3d/doc/solvePnP.markdown new file mode 100644 index 0000000000..dd4fbaa15d --- /dev/null +++ b/modules/calib3d/doc/solvePnP.markdown @@ -0,0 +1,176 @@ +# Perspective-n-Point (PnP) pose computation {#calib3d_solvePnP} + +## Pose computation overview + +The pose computation problem @cite Marchand16 consists in solving for the rotation and translation that minimizes the reprojection error from 3D-2D point correspondences. + +The `solvePnP` and related functions estimate the object pose given a set of object points, their corresponding image projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward). + +![](pnp.jpg) + +Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ +using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$ (also denoted \f$ \bf{K} \f$ in the literature): + +\f[ + \begin{align*} + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + f_x & 0 & c_x \\ + 0 & f_y & c_y \\ + 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + 1 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix} + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming +a 3D point expressed in the world frame into the camera frame: + +\f[ + \begin{align*} + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +## Pose computation methods +@anchor calib3d_solvePnP_flags + +Refer to the cv::SolvePnPMethod enum documentation for the list of possible values. Some details about each method are described below: + +- cv::SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In +this case the function finds such a pose that minimizes reprojection error, that is the sum +of squared distances between the observed projections "imagePoints" and the projected (using +cv::projectPoints ) "objectPoints". Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. +Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition. +- cv::SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). +In this case the function requires exactly four object and image points. +- cv::SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis +"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). +In this case the function requires exactly four object and image points. +- cv::SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). +- cv::SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of J. Hesch and S. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). +- cv::SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, +F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +assuming that both have the same value. Then the cameraMatrix is updated with the estimated +focal length. +- cv::SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. +- cv::SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. +It requires 4 coplanar object points defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- cv::SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the +Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis2020SQPnP). It requires 3 or more points. + +## P3P + +The cv::solveP3P() computes an object pose from exactly 3 3D-2D point correspondences. A P3P problem has up to 4 solutions. + +@note The solutions are sorted by reprojection errors (lowest to highest). + +## PnP + +The cv::solvePnP() returns the rotation and the translation vectors that transform a 3D point expressed in the object +coordinate frame to the camera coordinate frame, using different methods: +- P3P methods (cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P): need 4 input points to return a unique solution. +- cv::SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. +- cv::SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation. +Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- for all the other flags, number of input points must be >= 4 and object points can be in any configuration. + +## Generic PnP + +The cv::solvePnPGeneric() allows retrieving all the possible solutions. + +Currently, only cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P, cv::SOLVEPNP_IPPE, cv::SOLVEPNP_IPPE_SQUARE, cv::SOLVEPNP_SQPNP can return multiple solutions. + +## RANSAC PnP + +The cv::solvePnPRansac() computes the object pose wrt. the camera frame using a RANSAC scheme to deal with outliers. + +More information can be found in @cite Zuliani2014RANSACFD + +## Pose refinement + +Pose refinement consists in estimating the rotation and translation that minimizes the reprojection error using a non-linear minimization method and starting from an initial estimate of the solution. OpenCV proposes cv::solvePnPRefineLM() and cv::solvePnPRefineVVS() for this problem. + +cv::solvePnPRefineLM() uses a non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 and the current implementation computes the rotation update as a perturbation and not on SO(3). + +cv::solvePnPRefineVVS() uses a Gauss-Newton non-linear minimization scheme @cite Marchand16 and with an update of the rotation part computed using the exponential map. + +@note at least three 3D-2D point correspondences are necessary. diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 1d2e2a9609..d6b23d9e3e 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -447,7 +447,9 @@ enum { LMEDS = 4, //!< least-median of squares algorithm }; enum SolvePnPMethod { - SOLVEPNP_ITERATIVE = 0, + SOLVEPNP_ITERATIVE = 0, //!< Pose refinement using non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 \n + //!< Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. \n + //!< Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition. SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n @@ -464,7 +466,7 @@ enum SolvePnPMethod { //!< - point 1: [ squareLength / 2, squareLength / 2, 0] //!< - point 2: [ squareLength / 2, -squareLength / 2, 0] //!< - point 3: [-squareLength / 2, -squareLength / 2, 0] - SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis20 + SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis2020SQPnP #ifndef CV_DOXYGEN SOLVEPNP_MAX_COUNT //!< Used for count #endif @@ -779,6 +781,9 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details */ /** @brief Finds an object pose from 3D-2D point correspondences. + +@see @ref calib3d_solvePnP + This function returns the rotation and the translation vectors that transform a 3D point expressed in the object coordinate frame to the camera coordinate frame, using different methods: - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution. @@ -805,133 +810,9 @@ the model coordinate system to the camera coordinate system. @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. -@param flags Method for solving a PnP problem: -- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In -this case the function finds such a pose that minimizes reprojection error, that is the sum -of squared distances between the observed projections imagePoints and the projected (using -@ref projectPoints ) objectPoints . -- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang -"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis -"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the -paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of J. Hesch and S. Roumeliotis. -"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, -F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length -Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ -assuming that both have the same value. Then the cameraMatrix is updated with the estimated -focal length. -- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. -- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. -It requires 4 coplanar object points defined in the following order: - - point 0: [-squareLength / 2, squareLength / 2, 0] - - point 1: [ squareLength / 2, squareLength / 2, 0] - - point 2: [ squareLength / 2, -squareLength / 2, 0] - - point 3: [-squareLength / 2, -squareLength / 2, 0] -- @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the -Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points. +@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags - -The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below -(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward -and the Z-axis forward). - -![](pnp.jpg) - -Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ -using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: - -\f[ - \begin{align*} - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - f_x & 0 & c_x \\ - 0 & f_y & c_y \\ - 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - 1 & 0 & 0 & 0 \\ - 0 & 1 & 0 & 0 \\ - 0 & 0 & 1 & 0 - \end{bmatrix} - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] - -The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming -a 3D point expressed in the world frame into the camera frame: - -\f[ - \begin{align*} - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] +More information about Perspective-n-Points is described in @ref calib3d_solvePnP @note - An example of how to use solvePnP for planar augmented reality can be found at @@ -971,6 +852,8 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, /** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, @@ -1019,6 +902,8 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint /** @brief Finds an object pose from 3 3D-2D point correspondences. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or 1x3/3x1 3-channel. vector\ can be also passed here. @param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel. @@ -1050,6 +935,8 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints, /** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can also be passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, @@ -1077,6 +964,8 @@ CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoi /** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can also be passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, @@ -1105,6 +994,9 @@ CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePo double VVSlambda = 1); /** @brief Finds an object pose from 3D-2D point correspondences. + +@see @ref calib3d_solvePnP + This function returns a list of all the possible solutions (a solution is a couple), depending on the number of input points and the chosen method: - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points. @@ -1132,37 +1024,7 @@ the model coordinate system to the camera coordinate system. @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. -@param flags Method for solving a PnP problem: -- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In -this case the function finds such a pose that minimizes reprojection error, that is the sum -of squared distances between the observed projections imagePoints and the projected (using -projectPoints ) objectPoints . -- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang -"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis -"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the -paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. -"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, -F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length -Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ -assuming that both have the same value. Then the cameraMatrix is updated with the estimated -focal length. -- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. -- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. -It requires 4 coplanar object points defined in the following order: - - point 0: [-squareLength / 2, squareLength / 2, 0] - - point 1: [ squareLength / 2, squareLength / 2, 0] - - point 2: [ squareLength / 2, -squareLength / 2, 0] - - point 3: [-squareLength / 2, -squareLength / 2, 0] +@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags @param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE and useExtrinsicGuess is set to true. @param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE @@ -1171,98 +1033,7 @@ and useExtrinsicGuess is set to true. (\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points and the 3D object points projected with the estimated pose. -The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below -(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward -and the Z-axis forward). - -![](pnp.jpg) - -Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ -using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: - -\f[ - \begin{align*} - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - f_x & 0 & c_x \\ - 0 & f_y & c_y \\ - 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - 1 & 0 & 0 & 0 \\ - 0 & 1 & 0 & 0 \\ - 0 & 0 & 1 & 0 - \end{bmatrix} - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] - -The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming -a 3D point expressed in the world frame into the camera frame: - -\f[ - \begin{align*} - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] +More information is described in @ref calib3d_solvePnP @note - An example of how to use solvePnP for planar augmented reality can be found at From ed4becf00757e6ee2aec1312d7ee688fd414e9bc Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 16:00:52 +0000 Subject: [PATCH 11/15] dnn(onnx): debug dump of inputs/outputs/initializers in importer --- .../dnn/src/onnx/onnx_graph_simplifier.cpp | 2 +- .../dnn/src/onnx/onnx_graph_simplifier.hpp | 2 +- modules/dnn/src/onnx/onnx_importer.cpp | 127 +++++++++++++----- 3 files changed, 99 insertions(+), 32 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index e4cf73fd07..78e593788d 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -636,7 +636,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } -Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) +Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto) { if (tensor_proto.raw_data().empty() && tensor_proto.float_data().empty() && tensor_proto.double_data().empty() && tensor_proto.int64_data().empty()) diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp index dd4948d729..8e04e97803 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp @@ -31,7 +31,7 @@ void convertInt64ToInt32(const T1& src, T2& dst, int size) } } -Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto); +Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto); CV__DNN_EXPERIMENTAL_NS_END }} // namespace dnn, namespace cv diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 21bd6cc065..47ec830313 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -10,7 +10,7 @@ #include #undef CV_LOG_STRIP_LEVEL -#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 #include #ifdef HAVE_PROTOBUF @@ -193,6 +193,53 @@ inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, co } } +static +void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix) +{ + CV_Assert(valueInfoProto.has_name()); + CV_Assert(valueInfoProto.has_type()); + const opencv_onnx::TypeProto& typeProto = valueInfoProto.type(); + CV_Assert(typeProto.has_tensor_type()); + const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type(); + CV_Assert(tensor.has_shape()); + const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape(); + + int dim_size = tensorShape.dim_size(); + CV_CheckGE(dim_size, 0, ""); + MatShape shape(dim_size); + for (int j = 0; j < dim_size; ++j) + { + const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j); + if (dimension.has_dim_param()) + { + CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)"); + } + // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition + if (dimension.has_denotation()) + { + CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'"); + } + shape[j] = dimension.dim_value(); + } + CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape)); +} + +static +void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix) +{ + if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE) + return; + int dim_size = tensorProto.dims_size(); + CV_CheckGE(dim_size, 0, ""); + MatShape shape(dim_size); + for (int j = 0; j < dim_size; ++j) + { + int sz = static_cast(tensorProto.dims(j)); + shape[j] = sz; + } + CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type()); +} + void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto) { if (!tensor_proto.raw_data().empty()) { @@ -233,17 +280,17 @@ void runLayer(LayerParams& params, const std::vector& inputs, std::map ONNXImporter::getGraphTensors( const opencv_onnx::GraphProto& graph_proto) { - opencv_onnx::TensorProto tensor_proto; - std::map layers_weights; + std::map layers_weights; - for (int i = 0; i < graph_proto.initializer_size(); i++) - { - tensor_proto = graph_proto.initializer(i); - Mat mat = getMatFromTensor(tensor_proto); - releaseONNXTensor(tensor_proto); - layers_weights.insert(std::make_pair(tensor_proto.name(), mat)); - } - return layers_weights; + for (int i = 0; i < graph_proto.initializer_size(); i++) + { + const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i); + dumpTensorProto(i, tensor_proto, "initializer"); + Mat mat = getMatFromTensor(tensor_proto); + releaseONNXTensor(const_cast(tensor_proto)); // drop already loaded data + layers_weights.insert(std::make_pair(tensor_proto.name(), mat)); + } + return layers_weights; } static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) { @@ -549,6 +596,7 @@ void ONNXImporter::populateNet() << " model produced by '" << framework_name << "'" << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str())) << ". Number of nodes = " << graph_proto.node_size() + << ", initializers = " << graph_proto.initializer_size() << ", inputs = " << graph_proto.input_size() << ", outputs = " << graph_proto.output_size() ); @@ -560,48 +608,67 @@ void ONNXImporter::populateNet() const int layersSize = graph_proto.node_size(); CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes"); - constBlobs = getGraphTensors(graph_proto); + constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer + std::vector netInputs; // map with network inputs (without const blobs) // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes. for (int i = 0; i < graph_proto.input_size(); ++i) { const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i); CV_Assert(valueInfoProto.has_name()); + const std::string& name = valueInfoProto.name(); CV_Assert(valueInfoProto.has_type()); - opencv_onnx::TypeProto typeProto = valueInfoProto.type(); + const opencv_onnx::TypeProto& typeProto = valueInfoProto.type(); CV_Assert(typeProto.has_tensor_type()); - opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type(); + const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type(); CV_Assert(tensor.has_shape()); - opencv_onnx::TensorShapeProto tensorShape = tensor.shape(); + const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape(); - MatShape inpShape(tensorShape.dim_size()); - for (int j = 0; j < inpShape.size(); ++j) + int dim_size = tensorShape.dim_size(); + CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test + MatShape inpShape(dim_size); + for (int j = 0; j < dim_size; ++j) { - inpShape[j] = tensorShape.dim(j).dim_value(); + const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j); + if (dimension.has_dim_param()) + { + CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)"); + } + // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition + if (dimension.has_denotation()) + { + CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'"); + } + inpShape[j] = dimension.dim_value(); // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic - if (!tensorShape.dim(j).dim_param().empty() && !(j == 0 && inpShape.size() >= 3)) + if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3)) + { hasDynamicShapes = true; + } } - CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] shape=" << toString(inpShape)); - if (!inpShape.empty() && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs + bool isInitialized = ((constBlobs.find(name) != constBlobs.end())); + CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape)); + CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape)); + if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs { inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size } outShapes[valueInfoProto.name()] = inpShape; - } - - // create map with network inputs (without const blobs) - // fill map: push layer name, layer id and output id - std::vector netInputs; - for (int j = 0; j < graph_proto.input_size(); j++) - { - const std::string& name = graph_proto.input(j).name(); - if (constBlobs.find(name) == constBlobs.end()) { + // fill map: push layer name, layer id and output id + if (!isInitialized) + { netInputs.push_back(name); layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } + dstNet.setInputsNames(netInputs); + // dump outputs + for (int i = 0; i < graph_proto.output_size(); ++i) + { + dumpValueInfoProto(i, graph_proto.output(i), "output"); + } + for(int li = 0; li < layersSize; li++) { const opencv_onnx::NodeProto& node_proto = graph_proto.node(li); From 6385511e88f188436c462d334cb4bcc4aa9c629b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 24 Dec 2021 00:14:17 +0000 Subject: [PATCH 12/15] dnn: add checks in pooling layer implementation - to avoid out of buffer access --- modules/dnn/src/layers/pooling_layer.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index e79aa367d3..7067270ed6 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -1113,9 +1113,16 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - int addedDims = isPool1D? inpShape.size() : local_kernel.size(); - for (int i = 0; i < addedDims; i++) { + size_t addedDims = isPool1D? inpShape.size() : local_kernel.size(); + CV_CheckLE(addedDims, inpShape.size(), ""); + CV_CheckLE(addedDims, pads_begin.size(), ""); + CV_CheckLE(addedDims, pads_end.size(), ""); + CV_CheckLE(addedDims, local_kernel.size(), ""); + CV_CheckLE(addedDims, strides.size(), ""); + for (int i = 0; i < addedDims; i++) + { float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; + CV_CheckGE(dst, 0.0f, ""); outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } From f43fec7ee674d9fc65be21119066c3e67c856357 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Dec 2021 10:39:43 +0000 Subject: [PATCH 13/15] cmake: use find_package(OpenEXR) to support OpenEXR 3+ --- CMakeLists.txt | 4 ++-- cmake/OpenCVFindLibsGrfmt.cmake | 1 + cmake/OpenCVFindOpenEXR.cmake | 21 +++++++++++++++++++++ modules/imgcodecs/src/grfmt_exr.hpp | 1 + 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 169c385fad..ba2d477501 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -224,7 +224,7 @@ OCV_OPTION(BUILD_TIFF "Build libtiff from source" (WIN32 OCV_OPTION(BUILD_JASPER "Build libjasper from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_JPEG "Build libjpeg from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_PNG "Build libpng from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) -OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) ) +OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_WEBP "Build WebP from source" (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_TBB "Download and build TBB from source" (ANDROID OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_IPP_IW "Build IPP IW from source" (NOT MINGW OR OPENCV_FORCE_3RDPARTY_BUILD) IF (X86_64 OR X86) AND NOT WINRT ) @@ -306,7 +306,7 @@ OCV_OPTION(WITH_JPEG "Include JPEG support" ON OCV_OPTION(WITH_WEBP "Include WebP support" ON VISIBLE_IF NOT WINRT VERIFY HAVE_WEBP) -OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" BUILD_OPENEXR OR NOT CMAKE_CROSSCOMPILING +OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" ((WIN32 OR ANDROID OR APPLE) OR BUILD_OPENEXR) OR NOT CMAKE_CROSSCOMPILING VISIBLE_IF NOT APPLE_FRAMEWORK AND NOT WINRT VERIFY HAVE_OPENEXR) OCV_OPTION(WITH_OPENGL "Include OpenGL support" OFF diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 2d28dff875..a31de898c5 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -241,6 +241,7 @@ if(WITH_OPENEXR) add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr") if(OPENEXR_VERSION) # check via TARGET doesn't work set(HAVE_OPENEXR YES) + set(BUILD_OPENEXR ON) endif() endif() endif() diff --git a/cmake/OpenCVFindOpenEXR.cmake b/cmake/OpenCVFindOpenEXR.cmake index ef633e853a..94b3f9f098 100644 --- a/cmake/OpenCVFindOpenEXR.cmake +++ b/cmake/OpenCVFindOpenEXR.cmake @@ -9,6 +9,27 @@ # OPENEXR_LIBRARIES = libraries that are needed to use OpenEXR. # +if(NOT HAVE_CXX11) + message(STATUS "OpenEXR: enable C++11 to use external OpenEXR") + return() +endif() + +if(NOT OPENCV_SKIP_OPENEXR_FIND_PACKAGE) + find_package(OpenEXR 3 QUIET) + #ocv_cmake_dump_vars(EXR) + if(OpenEXR_FOUND) + if(TARGET OpenEXR::OpenEXR) # OpenEXR 3+ + set(OPENEXR_LIBRARIES OpenEXR::OpenEXR) + set(OPENEXR_INCLUDE_PATHS "") + set(OPENEXR_VERSION "${OpenEXR_VERSION}") + set(OPENEXR_FOUND 1) + return() + else() + message(STATUS "Unsupported find_package(OpenEXR) - missing OpenEXR::OpenEXR target (version ${OpenEXR_VERSION})") + endif() + endif() +endif() + SET(OPENEXR_LIBRARIES "") SET(OPENEXR_LIBSEARCH_SUFFIXES "") file(TO_CMAKE_PATH "$ENV{ProgramFiles}" ProgramFiles_ENV_PATH) diff --git a/modules/imgcodecs/src/grfmt_exr.hpp b/modules/imgcodecs/src/grfmt_exr.hpp index 99acd775c2..a86874d228 100644 --- a/modules/imgcodecs/src/grfmt_exr.hpp +++ b/modules/imgcodecs/src/grfmt_exr.hpp @@ -53,6 +53,7 @@ #include #include #include +#include #include "grfmt_base.hpp" namespace cv From d086ee14ddee785175007481d9a49aae0f1d9405 Mon Sep 17 00:00:00 2001 From: ABHINAV JHA Date: Wed, 22 Dec 2021 02:13:19 +0530 Subject: [PATCH 14/15] Fix: Grammatical mistake --- .../introduction/windows_install/windows_install.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorials/introduction/windows_install/windows_install.markdown b/doc/tutorials/introduction/windows_install/windows_install.markdown index b02a85c23c..ae38e2c8ce 100644 --- a/doc/tutorials/introduction/windows_install/windows_install.markdown +++ b/doc/tutorials/introduction/windows_install/windows_install.markdown @@ -360,7 +360,7 @@ libraries). If you do not need the support for some of these, you can just freel Set the OpenCV environment variable and add it to the systems path {#tutorial_windows_install_path} ================================================================= -First we set an environment variable to make easier our work. This will hold the build directory of +First, we set an environment variable to make our work easier. This will hold the build directory of our OpenCV library that we use in our projects. Start up a command window and enter: @code setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc11 (suggested for Visual Studio 2012 - 32 bit Windows) From cdfa8a668bc13d8f0905d0e16ca2029f8e98e5b2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 24 Dec 2021 14:51:01 +0000 Subject: [PATCH 15/15] python: use '((x,y), (w,h), angle)' in std::vector --- .../core/include/opencv2/core/bindings_utils.hpp | 15 +++++++++++++++ modules/python/src2/cv2.cpp | 4 ++++ modules/python/test/test_misc.py | 12 ++++++++++++ 3 files changed, 31 insertions(+) diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index c53511f88f..67efdcdac2 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -103,6 +103,21 @@ String dumpRotatedRect(const RotatedRect& argument) argument.size.height, argument.angle); } +CV_WRAP static inline +RotatedRect testRotatedRect(float x, float y, float w, float h, float angle) +{ + return RotatedRect(Point2f(x, y), Size2f(w, h), angle); +} + +CV_WRAP static inline +std::vector testRotatedRectVector(float x, float y, float w, float h, float angle) +{ + std::vector result; + for (int i = 0; i < 10; i++) + result.push_back(RotatedRect(Point2f(x + i, y + 2 * i), Size2f(w, h), angle + 10 * i)); + return result; +} + CV_WRAP static inline String dumpRange(const Range& argument) { diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 67c4166799..3241b4f5e8 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -518,6 +518,10 @@ template struct IsRepresentableAsMatDataType::channel_type>::type> : TrueType { }; + +// https://github.com/opencv/opencv/issues/20930 +template <> struct IsRepresentableAsMatDataType : FalseType {}; + } // namespace traits typedef std::vector vector_uchar; diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index c992c9450d..de7af1d350 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -583,6 +583,18 @@ class Arguments(NewOpenCVTests): self.assertEqual(ints.dtype, np.int32, "Vector of integers has wrong elements type") self.assertEqual(ints.shape, expected_shape, "Vector of integers has wrong shape.") + def test_result_rotated_rect_issue_20930(self): + rr = cv.utils.testRotatedRect(10, 20, 100, 200, 45) + self.assertTrue(isinstance(rr, tuple), msg=type(rr)) + self.assertEqual(len(rr), 3) + + rrv = cv.utils.testRotatedRectVector(10, 20, 100, 200, 45) + self.assertTrue(isinstance(rrv, tuple), msg=type(rrv)) + self.assertEqual(len(rrv), 10) + + rr = rrv[0] + self.assertTrue(isinstance(rr, tuple), msg=type(rrv)) + self.assertEqual(len(rr), 3) class SamplesFindFile(NewOpenCVTests):