diff --git a/doc/opencv.bib b/doc/opencv.bib index 0135418d4a..901fbe86ee 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -620,7 +620,7 @@ volume = {1}, publisher = {IEEE} } -@article{Lowe:2004:DIF:993451.996342, +@article{Lowe04, author = {Lowe, David G.}, title = {Distinctive Image Features from Scale-Invariant Keypoints}, journal = {Int. J. Comput. Vision}, diff --git a/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown b/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown index ed76d638f5..8602cc9398 100644 --- a/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown +++ b/doc/py_tutorials/py_feature2d/py_feature_homography/py_feature_homography.markdown @@ -44,7 +44,7 @@ img1 = cv.imread('box.png',0) # queryImage img2 = cv.imread('box_in_scene.png',0) # trainImage # Initiate SIFT detector -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() # find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img1,None) diff --git a/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown b/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown index d8ba8f856d..aeab98bfd6 100644 --- a/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown +++ b/doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown @@ -110,7 +110,7 @@ img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage # Initiate SIFT detector -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() # find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img1,None) @@ -174,7 +174,7 @@ img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage # Initiate SIFT detector -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() # find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img1,None) diff --git a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown index f9d70938eb..656f5423c5 100644 --- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown +++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown @@ -119,7 +119,7 @@ import cv2 as cv img = cv.imread('home.jpg') gray= cv.cvtColor(img,cv.COLOR_BGR2GRAY) -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() kp = sift.detect(gray,None) img=cv.drawKeypoints(gray,kp,img) @@ -151,7 +151,7 @@ Now to calculate the descriptor, OpenCV provides two methods. We will see the second method: @code{.py} -sift = cv.xfeatures2d.SIFT_create() +sift = cv.SIFT_create() kp, des = sift.detectAndCompute(gray,None) @endcode Here kp will be a list of keypoints and des is a numpy array of shape diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown index d8961f5f96..cb05f3dd48 100644 --- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown +++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown @@ -27,7 +27,7 @@ Binary descriptors (ORB, BRISK, ...) are matched using the inputsNames; std::vector inp_shapes; for (auto& it : ieNet.getInputsInfo()) @@ -3468,6 +3470,8 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]); } + CV_TRACE_REGION_NEXT("backendNode"); + Ptr backendNode; #ifdef HAVE_DNN_NGRAPH if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam()) @@ -3489,8 +3493,25 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe #endif } + CV_TRACE_REGION_NEXT("register_outputs"); + +#ifdef HAVE_DNN_NGRAPH + auto ngraphFunction = ieNet.getFunction(); +#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2) + std::list< std::shared_ptr > ngraphOperations; +#else + std::vector< std::shared_ptr > ngraphOperations; +#endif + if (ngraphFunction) + { + ngraphOperations = ngraphFunction->get_ops(); + } +#endif + for (auto& it : ieNet.getOutputsInfo()) { + CV_TRACE_REGION("output"); + LayerParams lp; int lid = cvNet.addLayer(it.first, "", lp); @@ -3499,15 +3520,38 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe #ifdef HAVE_DNN_NGRAPH if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam()) { + const auto& outputName = it.first; Ptr cvLayer(new NgraphBackendLayer(ieNet)); + cvLayer->name = outputName; + cvLayer->type = "_unknown_"; - InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str()); - CV_Assert(ieLayer); + if (ngraphFunction) + { + CV_TRACE_REGION("ngraph_function"); + bool found = false; + for (const auto& op : ngraphOperations) + { + CV_Assert(op); + if (op->get_friendly_name() == outputName) + { + const std::string typeName = op->get_type_info().name; + cvLayer->type = typeName; + found = true; + break; + } + } + if (!found) + CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'"); + } + else + { + CV_TRACE_REGION("legacy_cnn_layer"); + InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str()); + CV_Assert(ieLayer); - cvLayer->name = it.first; - cvLayer->type = ieLayer->type; + cvLayer->type = ieLayer->type; + } ld.layerInstance = cvLayer; - ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode; } else @@ -3532,6 +3576,9 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe for (int i = 0; i < inputsNames.size(); ++i) cvNet.connect(0, i, lid, i); } + + CV_TRACE_REGION_NEXT("finalize"); + cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam()); cvNet.impl->skipInfEngineInit = true; diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index d5f2443f22..460eaca3b7 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -382,6 +382,36 @@ public: } }; +class BatchNormalizationSubgraph : public Subgraph +{ +public: + BatchNormalizationSubgraph() + { + int input = addNodeToMatch(""); + int data1 = addNodeToMatch("Constant"); + int data2 = addNodeToMatch("Constant"); + int data3 = addNodeToMatch("Constant"); + int data4 = addNodeToMatch("Constant"); + int shape1 = addNodeToMatch("Constant"); + int reshape1 = addNodeToMatch("Reshape", data1, shape1); + int shape2 = addNodeToMatch("Constant"); + int reshape2 = addNodeToMatch("Reshape", data2, shape2); + int shape3 = addNodeToMatch("Constant"); + int reshape3 = addNodeToMatch("Reshape", data3, shape3); + int shape4 = addNodeToMatch("Constant"); + int reshape4 = addNodeToMatch("Reshape", data4, shape4); + int sqrtNode = addNodeToMatch("Sqrt", reshape3); + int A = addNodeToMatch("Constant"); + int divNode = addNodeToMatch("Div", A, sqrtNode); + int mul1 = addNodeToMatch("Mul", reshape1, divNode); + int mul2 = addNodeToMatch("Mul", reshape4, mul1); + int sub = addNodeToMatch("Sub", reshape2, mul2); + int mul3 = addNodeToMatch("Mul", input, mul1); + addNodeToMatch("Add", mul3, sub); + setFusedNode("BatchNormalization", input, data1, data2, data4 ,data3); + } +}; + void simplifySubgraphs(opencv_onnx::GraphProto& net) { std::vector > subgraphs; @@ -394,6 +424,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 56e9c0a217..08c6509e3b 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -309,11 +309,30 @@ static void addConstant(const std::string& name, outShapes.insert(std::make_pair(name, shape(blob))); } +void addConstantNodesForInitializers(opencv_onnx::GraphProto& graph_proto) +{ + int num_initializers = graph_proto.initializer_size(); + for (int id = 0; id < num_initializers; id++) + { + opencv_onnx::TensorProto initializer = graph_proto.initializer(id); + opencv_onnx::NodeProto* constant_node = graph_proto.add_node(); + constant_node->set_op_type("Constant"); + constant_node->set_name(initializer.name()); + constant_node->add_output(initializer.name()); + opencv_onnx::AttributeProto* value = constant_node->add_attribute(); + opencv_onnx::TensorProto* tensor = initializer.New(); + tensor->CopyFrom(initializer); + releaseONNXTensor(initializer); + value->set_allocated_t(tensor); + } +} + void ONNXImporter::populateNet(Net dstNet) { CV_Assert(model_proto.has_graph()); opencv_onnx::GraphProto graph_proto = model_proto.graph(); + addConstantNodesForInitializers(graph_proto); simplifySubgraphs(graph_proto); std::map constBlobs = getGraphTensors(graph_proto); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 04cb55e309..7ac6d39360 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -570,6 +570,10 @@ TEST_P(Test_Darknet_layers, reorg) TEST_P(Test_Darknet_layers, maxpool) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testDarknetLayer("maxpool"); } diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index ce3ab23a77..7e6d7f87d2 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -357,6 +357,11 @@ TEST_P(MaxPooling, Accuracy) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + LayerParams lp; lp.set("pool", "max"); lp.set("kernel_w", kernel.width); diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 869a3575d7..565db4cd47 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -134,6 +134,8 @@ static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob void runIE(Target target, const std::string& xmlPath, const std::string& binPath, std::map& inputsMap, std::map& outputsMap) { + SCOPED_TRACE("runIE"); + CNNNetReader reader; reader.ReadNetwork(xmlPath); reader.ReadWeights(binPath); @@ -247,6 +249,8 @@ void runCV(Backend backendId, Target targetId, const std::string& xmlPath, const const std::map& inputsMap, std::map& outputsMap) { + SCOPED_TRACE("runOCV"); + Net net = readNet(xmlPath, binPath); for (auto& it : inputsMap) net.setInput(it.second, it.first); @@ -273,9 +277,18 @@ TEST_P(DNNTestOpenVINO, models) const Backend backendId = get<0>(get<0>(GetParam())); const Target targetId = get<1>(get<0>(GetParam())); + std::string modelName = get<1>(GetParam()); - if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - throw SkipTestException("No support for async forward"); + ASSERT_FALSE(backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) << + "Inference Engine backend is required"; + +#if INF_ENGINE_VER_MAJOR_GE(2020020000) + if (targetId == DNN_TARGET_MYRIAD && backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (modelName == "person-detection-retail-0013") // IRv10 + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#endif if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); @@ -284,7 +297,6 @@ TEST_P(DNNTestOpenVINO, models) else FAIL() << "Unknown backendId"; - std::string modelName = get<1>(GetParam()); bool isFP16 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD); const std::map& models = getOpenVINOTestModels(); @@ -301,8 +313,8 @@ TEST_P(DNNTestOpenVINO, models) // Single Myriad device cannot be shared across multiple processes. if (targetId == DNN_TARGET_MYRIAD) resetMyriadDevice(); - runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap); - runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap); + EXPECT_NO_THROW(runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap)) << "runIE"; + EXPECT_NO_THROW(runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap)) << "runCV"; double eps = 0; #if INF_ENGINE_VER_MAJOR_GE(2020010000) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 593b8c074f..fadc19763a 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -98,7 +98,14 @@ TEST_P(Test_ONNX_layers, InstanceNorm) TEST_P(Test_ONNX_layers, MaxPooling) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("maxpooling", npy, 0, 0, false, false); +} +TEST_P(Test_ONNX_layers, MaxPooling_2) +{ testONNXModels("two_maxpooling", npy, 0, 0, false, false); } @@ -314,6 +321,15 @@ TEST_P(Test_ONNX_layers, BatchNormalization3D) testONNXModels("batch_norm_3d"); } +TEST_P(Test_ONNX_layers, BatchNormalizationUnfused) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("frozenBatchNorm2d"); +} + TEST_P(Test_ONNX_layers, Transpose) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) @@ -396,6 +412,16 @@ TEST_P(Test_ONNX_layers, ResizeUnfused) testONNXModels("resize_bilinear_unfused_opset11_torch1.4"); } +TEST_P(Test_ONNX_layers, ResizeUnfusedTwoInputs) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("upsample_unfused_two_inputs_opset9_torch1.4", npy, 0, 0, false, true, 2); + testONNXModels("upsample_unfused_two_inputs_opset11_torch1.4", npy, 0, 0, false, true, 2); +} + TEST_P(Test_ONNX_layers, MultyInputs) { testONNXModels("multy_inputs", npy, 0, 0, false, true, 2); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 285da55ba0..3b2e1b1657 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -128,13 +128,32 @@ TEST_P(Test_TensorFlow_layers, reduce_mean) runTensorFlowNet("global_pool_by_axis"); } -TEST_P(Test_TensorFlow_layers, conv) +TEST_P(Test_TensorFlow_layers, conv_single_conv) { runTensorFlowNet("single_conv"); +} +TEST_P(Test_TensorFlow_layers, conv_atrous_conv2d_valid) +{ runTensorFlowNet("atrous_conv2d_valid"); +} +TEST_P(Test_TensorFlow_layers, conv_atrous_conv2d_same) +{ runTensorFlowNet("atrous_conv2d_same"); +} +TEST_P(Test_TensorFlow_layers, conv_depthwise_conv2d) +{ runTensorFlowNet("depthwise_conv2d"); +} +TEST_P(Test_TensorFlow_layers, conv_keras_atrous_conv2d_same) +{ runTensorFlowNet("keras_atrous_conv2d_same"); +} +TEST_P(Test_TensorFlow_layers, conv_pool_nchw) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif runTensorFlowNet("conv_pool_nchw"); } @@ -291,11 +310,32 @@ TEST_P(Test_TensorFlow_layers, slim_batch_norm) runTensorFlowNet("slim_batch_norm", false, l1, lInf); } -TEST_P(Test_TensorFlow_layers, pooling) +TEST_P(Test_TensorFlow_layers, pooling_max_pool_even) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif runTensorFlowNet("max_pool_even"); +} +TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_valid) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif runTensorFlowNet("max_pool_odd_valid"); +} +TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_same) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif runTensorFlowNet("max_pool_odd_same"); +} +TEST_P(Test_TensorFlow_layers, pooling_reduce_mean) +{ runTensorFlowNet("reduce_mean"); // an average pooling over all spatial dimensions. } @@ -815,24 +855,67 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection) INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets()); -TEST_P(Test_TensorFlow_layers, fp16_weights) + +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_single_conv) { - float l1 = 0.00078; - float lInf = 0.012; + float l1 = 0.00078, lInf = 0.012; runTensorFlowNet("fp16_single_conv", false, l1, lInf); +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_odd_same) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + float l1 = 0.00078, lInf = 0.012; runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf); +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_eltwise_add_mul) +{ + float l1 = 0.00078, lInf = 0.012; runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf); +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_pad_and_concat) +{ + float l1 = 0.00078, lInf = 0.012; runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf); +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_padding_valid) +{ + float l1 = 0.00078, lInf = 0.012; runTensorFlowNet("fp16_padding_valid", false, l1, lInf); +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_even) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + float l1 = 0.00078, lInf = 0.012; // Reference output values are in range [0.0889, 1.651] runTensorFlowNet("fp16_max_pool_even", false, (target == DNN_TARGET_MYRIAD) ? 0.003 : l1, lInf); - if (target == DNN_TARGET_MYRIAD) - { +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_deconvolution) +{ + float l1 = 0.00078, lInf = 0.012; + if (target == DNN_TARGET_MYRIAD) { l1 = 0.0041; lInf = 0.024; } // Reference output values are in range [0, 10.75] runTensorFlowNet("fp16_deconvolution", false, l1, lInf); +} +TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_odd_valid) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + float l1 = 0.00078, lInf = 0.012; + if (target == DNN_TARGET_MYRIAD) { + l1 = 0.0041; + lInf = 0.024; + } // Reference output values are in range [0.418, 2.297] runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf); } diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 24f0af5163..19429fb04e 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -244,6 +244,39 @@ typedef Feature2D DescriptorExtractor; //! @addtogroup features2d_main //! @{ + +/** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform +(SIFT) algorithm by D. Lowe @cite Lowe04 . +*/ +class CV_EXPORTS_W SIFT : public Feature2D +{ +public: + /** + @param nfeatures The number of best features to retain. The features are ranked by their scores + (measured in SIFT algorithm as the local contrast) + + @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The + number of octaves is computed automatically from the image resolution. + + @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform + (low-contrast) regions. The larger the threshold, the less features are produced by the detector. + + @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning + is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are + filtered out (more features are retained). + + @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image + is captured with a weak camera with soft lenses, you might want to reduce the number. + */ + CV_WRAP static Ptr create(int nfeatures = 0, int nOctaveLayers = 3, + double contrastThreshold = 0.04, double edgeThreshold = 10, + double sigma = 1.6); +}; + +typedef SIFT SiftFeatureDetector; +typedef SIFT SiftDescriptorExtractor; + + /** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11 . */ class CV_EXPORTS_W BRISK : public Feature2D diff --git a/modules/features2d/misc/java/test/SIFTDescriptorExtractorTest.java b/modules/features2d/misc/java/test/SIFTDescriptorExtractorTest.java index 2c1ac7ca01..c548ff7792 100644 --- a/modules/features2d/misc/java/test/SIFTDescriptorExtractorTest.java +++ b/modules/features2d/misc/java/test/SIFTDescriptorExtractorTest.java @@ -6,6 +6,7 @@ import org.opencv.core.MatOfKeyPoint; import org.opencv.core.Point; import org.opencv.core.Scalar; import org.opencv.core.KeyPoint; +import org.opencv.features2d.SIFT; import org.opencv.test.OpenCVTestCase; import org.opencv.test.OpenCVTestRunner; import org.opencv.imgproc.Imgproc; @@ -29,7 +30,7 @@ public class SIFTDescriptorExtractorTest extends OpenCVTestCase { @Override protected void setUp() throws Exception { super.setUp(); - extractor = createClassInstance(XFEATURES2D+"SIFT", DEFAULT_FACTORY, null, null); + extractor = SIFT.create(); keypoint = new KeyPoint(55.775577545166016f, 44.224422454833984f, 16, 9.754629f, 8617.863f, 1, -1); matSize = 100; truth = new Mat(1, 128, CvType.CV_32FC1) { diff --git a/modules/features2d/perf/perf_sift.cpp b/modules/features2d/perf/perf_sift.cpp new file mode 100644 index 0000000000..fd9579bed5 --- /dev/null +++ b/modules/features2d/perf/perf_sift.cpp @@ -0,0 +1,85 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +typedef perf::TestBaseWithParam SIFT_detect; +typedef perf::TestBaseWithParam SIFT_extract; +typedef perf::TestBaseWithParam SIFT_full; + +#define SIFT_IMAGES \ + "cv/detectors_descriptors_evaluation/images_datasets/leuven/img1.png",\ + "stitching/a3.png" + +PERF_TEST_P_(SIFT_detect, SIFT) +{ + string filename = getDataPath(GetParam()); + Mat frame = imread(filename, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "Unable to load source image " << filename; + + Mat mask; + declare.in(frame).time(90); + Ptr detector = SIFT::create(); + vector points; + + PERF_SAMPLE_BEGIN(); + detector->detect(frame, points, mask); + PERF_SAMPLE_END(); + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P_(SIFT_extract, SIFT) +{ + string filename = getDataPath(GetParam()); + Mat frame = imread(filename, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "Unable to load source image " << filename; + + Mat mask; + declare.in(frame).time(90); + + Ptr detector = SIFT::create(); + vector points; + Mat descriptors; + detector->detect(frame, points, mask); + + PERF_SAMPLE_BEGIN(); + detector->compute(frame, points, descriptors); + PERF_SAMPLE_END(); + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P_(SIFT_full, SIFT) +{ + string filename = getDataPath(GetParam()); + Mat frame = imread(filename, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "Unable to load source image " << filename; + + Mat mask; + declare.in(frame).time(90); + Ptr detector = SIFT::create(); + vector points; + Mat descriptors; + + PERF_SAMPLE_BEGIN(); + detector->detectAndCompute(frame, mask, points, descriptors, false); + PERF_SAMPLE_END(); + + SANITY_CHECK_NOTHING(); +} + + +INSTANTIATE_TEST_CASE_P(/*nothing*/, SIFT_detect, + testing::Values(SIFT_IMAGES) +); +INSTANTIATE_TEST_CASE_P(/*nothing*/, SIFT_extract, + testing::Values(SIFT_IMAGES) +); +INSTANTIATE_TEST_CASE_P(/*nothing*/, SIFT_full, + testing::Values(SIFT_IMAGES) +); + +}} // namespace diff --git a/modules/features2d/src/sift.cpp b/modules/features2d/src/sift.cpp new file mode 100644 index 0000000000..81254ecbd8 --- /dev/null +++ b/modules/features2d/src/sift.cpp @@ -0,0 +1,1190 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (c) 2006-2010, Rob Hess +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2020, Intel Corporation, all rights reserved. + +/**********************************************************************************************\ + Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/ + Below is the original copyright. + Patent US6711293 expired in March 2020. + +// Copyright (c) 2006-2010, Rob Hess +// All rights reserved. + +// The following patent has been issued for methods embodied in this +// software: "Method and apparatus for identifying scale invariant features +// in an image and use of same for locating an object in an image," David +// G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application +// filed March 8, 1999. Asignee: The University of British Columbia. For +// further details, contact David Lowe (lowe@cs.ubc.ca) or the +// University-Industry Liaison Office of the University of British +// Columbia. + +// Note that restrictions imposed by this patent (and possibly others) +// exist independently of and may be in conflict with the freedoms granted +// in this license, which refers to copyright of the program, not patents +// for any methods that it implements. Both copyright and patent law must +// be obeyed to legally use and redistribute this program and it is not the +// purpose of this license to induce you to infringe any patents or other +// property right claims or to contest validity of any such claims. If you +// redistribute or use the program, then this license merely protects you +// from committing copyright infringement. It does not protect you from +// committing patent infringement. So, before you do anything with this +// program, make sure that you have permission to do so not merely in terms +// of copyright, but also in terms of patent law. + +// Please note that this license is not to be understood as a guarantee +// either. If you use the program according to this license, but in +// conflict with patent law, it does not mean that the licensor will refund +// you for any losses that you incur if you are sued for your patent +// infringement. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright and +// patent notices, this list of conditions and the following +// disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Oregon State University nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. + +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\**********************************************************************************************/ + +#include "precomp.hpp" +#include +#include +#include + +#include + +namespace cv +{ + +/*! + SIFT implementation. + + The class implements SIFT algorithm by D. Lowe. + */ +class SIFT_Impl : public SIFT +{ +public: + explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3, + double contrastThreshold = 0.04, double edgeThreshold = 10, + double sigma = 1.6); + + //! returns the descriptor size in floats (128) + int descriptorSize() const CV_OVERRIDE; + + //! returns the descriptor type + int descriptorType() const CV_OVERRIDE; + + //! returns the default norm type + int defaultNorm() const CV_OVERRIDE; + + //! finds the keypoints and computes descriptors for them using SIFT algorithm. + //! Optionally it can compute descriptors for the user-provided keypoints + void detectAndCompute(InputArray img, InputArray mask, + std::vector& keypoints, + OutputArray descriptors, + bool useProvidedKeypoints = false) CV_OVERRIDE; + + void buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const; + void buildDoGPyramid( const std::vector& pyr, std::vector& dogpyr ) const; + void findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, + std::vector& keypoints ) const; + +protected: + CV_PROP_RW int nfeatures; + CV_PROP_RW int nOctaveLayers; + CV_PROP_RW double contrastThreshold; + CV_PROP_RW double edgeThreshold; + CV_PROP_RW double sigma; +}; + +Ptr SIFT::create( int _nfeatures, int _nOctaveLayers, + double _contrastThreshold, double _edgeThreshold, double _sigma ) +{ + CV_TRACE_FUNCTION(); + return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma); +} + +/******************************* Defs and macros *****************************/ + +// default width of descriptor histogram array +static const int SIFT_DESCR_WIDTH = 4; + +// default number of bins per histogram in descriptor array +static const int SIFT_DESCR_HIST_BINS = 8; + +// assumed gaussian blur for input image +static const float SIFT_INIT_SIGMA = 0.5f; + +// width of border in which to ignore keypoints +static const int SIFT_IMG_BORDER = 5; + +// maximum steps of keypoint interpolation before failure +static const int SIFT_MAX_INTERP_STEPS = 5; + +// default number of bins in histogram for orientation assignment +static const int SIFT_ORI_HIST_BINS = 36; + +// determines gaussian sigma for orientation assignment +static const float SIFT_ORI_SIG_FCTR = 1.5f; + +// determines the radius of the region used in orientation assignment +static const float SIFT_ORI_RADIUS = 3 * SIFT_ORI_SIG_FCTR; + +// orientation magnitude relative to max that results in new feature +static const float SIFT_ORI_PEAK_RATIO = 0.8f; + +// determines the size of a single descriptor orientation histogram +static const float SIFT_DESCR_SCL_FCTR = 3.f; + +// threshold on magnitude of elements of descriptor vector +static const float SIFT_DESCR_MAG_THR = 0.2f; + +// factor used to convert floating-point descriptor to unsigned char +static const float SIFT_INT_DESCR_FCTR = 512.f; + +#define DoG_TYPE_SHORT 0 +#if DoG_TYPE_SHORT +// intermediate type used for DoG pyramids +typedef short sift_wt; +static const int SIFT_FIXPT_SCALE = 48; +#else +// intermediate type used for DoG pyramids +typedef float sift_wt; +static const int SIFT_FIXPT_SCALE = 1; +#endif + +static inline void +unpackOctave(const KeyPoint& kpt, int& octave, int& layer, float& scale) +{ + octave = kpt.octave & 255; + layer = (kpt.octave >> 8) & 255; + octave = octave < 128 ? octave : (-128 | octave); + scale = octave >= 0 ? 1.f/(1 << octave) : (float)(1 << -octave); +} + +static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma ) +{ + CV_TRACE_FUNCTION(); + + Mat gray, gray_fpt; + if( img.channels() == 3 || img.channels() == 4 ) + { + cvtColor(img, gray, COLOR_BGR2GRAY); + gray.convertTo(gray_fpt, DataType::type, SIFT_FIXPT_SCALE, 0); + } + else + img.convertTo(gray_fpt, DataType::type, SIFT_FIXPT_SCALE, 0); + + float sig_diff; + + if( doubleImageSize ) + { + sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) ); + Mat dbl; +#if DoG_TYPE_SHORT + resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR_EXACT); +#else + resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR); +#endif + Mat result; + GaussianBlur(dbl, result, Size(), sig_diff, sig_diff); + return result; + } + else + { + sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f) ); + Mat result; + GaussianBlur(gray_fpt, result, Size(), sig_diff, sig_diff); + return result; + } +} + + +void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const +{ + CV_TRACE_FUNCTION(); + + std::vector sig(nOctaveLayers + 3); + pyr.resize(nOctaves*(nOctaveLayers + 3)); + + // precompute Gaussian sigmas using the following formula: + // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2 + sig[0] = sigma; + double k = std::pow( 2., 1. / nOctaveLayers ); + for( int i = 1; i < nOctaveLayers + 3; i++ ) + { + double sig_prev = std::pow(k, (double)(i-1))*sigma; + double sig_total = sig_prev*k; + sig[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev); + } + + for( int o = 0; o < nOctaves; o++ ) + { + for( int i = 0; i < nOctaveLayers + 3; i++ ) + { + Mat& dst = pyr[o*(nOctaveLayers + 3) + i]; + if( o == 0 && i == 0 ) + dst = base; + // base of new octave is halved image from end of previous octave + else if( i == 0 ) + { + const Mat& src = pyr[(o-1)*(nOctaveLayers + 3) + nOctaveLayers]; + resize(src, dst, Size(src.cols/2, src.rows/2), + 0, 0, INTER_NEAREST); + } + else + { + const Mat& src = pyr[o*(nOctaveLayers + 3) + i-1]; + GaussianBlur(src, dst, Size(), sig[i], sig[i]); + } + } + } +} + + +class buildDoGPyramidComputer : public ParallelLoopBody +{ +public: + buildDoGPyramidComputer( + int _nOctaveLayers, + const std::vector& _gpyr, + std::vector& _dogpyr) + : nOctaveLayers(_nOctaveLayers), + gpyr(_gpyr), + dogpyr(_dogpyr) { } + + void operator()( const cv::Range& range ) const CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + + const int begin = range.start; + const int end = range.end; + + for( int a = begin; a < end; a++ ) + { + const int o = a / (nOctaveLayers + 2); + const int i = a % (nOctaveLayers + 2); + + const Mat& src1 = gpyr[o*(nOctaveLayers + 3) + i]; + const Mat& src2 = gpyr[o*(nOctaveLayers + 3) + i + 1]; + Mat& dst = dogpyr[o*(nOctaveLayers + 2) + i]; + subtract(src2, src1, dst, noArray(), DataType::type); + } + } + +private: + int nOctaveLayers; + const std::vector& gpyr; + std::vector& dogpyr; +}; + +void SIFT_Impl::buildDoGPyramid( const std::vector& gpyr, std::vector& dogpyr ) const +{ + CV_TRACE_FUNCTION(); + + int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); + dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); + + parallel_for_(Range(0, nOctaves * (nOctaveLayers + 2)), buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr)); +} + +// Computes a gradient orientation histogram at a specified pixel +static float calcOrientationHist( const Mat& img, Point pt, int radius, + float sigma, float* hist, int n ) +{ + CV_TRACE_FUNCTION(); + + int i, j, k, len = (radius*2+1)*(radius*2+1); + + float expf_scale = -1.f/(2.f * sigma * sigma); + AutoBuffer buf(len*4 + n+4); + float *X = buf.data(), *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len; + float* temphist = W + len + 2; + + for( i = 0; i < n; i++ ) + temphist[i] = 0.f; + + for( i = -radius, k = 0; i <= radius; i++ ) + { + int y = pt.y + i; + if( y <= 0 || y >= img.rows - 1 ) + continue; + for( j = -radius; j <= radius; j++ ) + { + int x = pt.x + j; + if( x <= 0 || x >= img.cols - 1 ) + continue; + + float dx = (float)(img.at(y, x+1) - img.at(y, x-1)); + float dy = (float)(img.at(y-1, x) - img.at(y+1, x)); + + X[k] = dx; Y[k] = dy; W[k] = (i*i + j*j)*expf_scale; + k++; + } + } + + len = k; + + // compute gradient values, orientations and the weights over the pixel neighborhood + cv::hal::exp32f(W, W, len); + cv::hal::fastAtan2(Y, X, Ori, len, true); + cv::hal::magnitude32f(X, Y, Mag, len); + + k = 0; +#if CV_AVX2 + { + __m256 __nd360 = _mm256_set1_ps(n/360.f); + __m256i __n = _mm256_set1_epi32(n); + int CV_DECL_ALIGNED(32) bin_buf[8]; + float CV_DECL_ALIGNED(32) w_mul_mag_buf[8]; + for ( ; k <= len - 8; k+=8 ) + { + __m256i __bin = _mm256_cvtps_epi32(_mm256_mul_ps(__nd360, _mm256_loadu_ps(&Ori[k]))); + + __bin = _mm256_sub_epi32(__bin, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __bin), __n)); + __bin = _mm256_add_epi32(__bin, _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __bin))); + + __m256 __w_mul_mag = _mm256_mul_ps(_mm256_loadu_ps(&W[k]), _mm256_loadu_ps(&Mag[k])); + + _mm256_store_si256((__m256i *) bin_buf, __bin); + _mm256_store_ps(w_mul_mag_buf, __w_mul_mag); + + temphist[bin_buf[0]] += w_mul_mag_buf[0]; + temphist[bin_buf[1]] += w_mul_mag_buf[1]; + temphist[bin_buf[2]] += w_mul_mag_buf[2]; + temphist[bin_buf[3]] += w_mul_mag_buf[3]; + temphist[bin_buf[4]] += w_mul_mag_buf[4]; + temphist[bin_buf[5]] += w_mul_mag_buf[5]; + temphist[bin_buf[6]] += w_mul_mag_buf[6]; + temphist[bin_buf[7]] += w_mul_mag_buf[7]; + } + } +#endif + for( ; k < len; k++ ) + { + int bin = cvRound((n/360.f)*Ori[k]); + if( bin >= n ) + bin -= n; + if( bin < 0 ) + bin += n; + temphist[bin] += W[k]*Mag[k]; + } + + // smooth the histogram + temphist[-1] = temphist[n-1]; + temphist[-2] = temphist[n-2]; + temphist[n] = temphist[0]; + temphist[n+1] = temphist[1]; + + i = 0; +#if CV_AVX2 + { + __m256 __d_1_16 = _mm256_set1_ps(1.f/16.f); + __m256 __d_4_16 = _mm256_set1_ps(4.f/16.f); + __m256 __d_6_16 = _mm256_set1_ps(6.f/16.f); + for( ; i <= n - 8; i+=8 ) + { +#if CV_FMA3 + __m256 __hist = _mm256_fmadd_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-2]), _mm256_loadu_ps(&temphist[i+2])), + __d_1_16, + _mm256_fmadd_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-1]), _mm256_loadu_ps(&temphist[i+1])), + __d_4_16, + _mm256_mul_ps(_mm256_loadu_ps(&temphist[i]), __d_6_16))); +#else + __m256 __hist = _mm256_add_ps( + _mm256_mul_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-2]), _mm256_loadu_ps(&temphist[i+2])), + __d_1_16), + _mm256_add_ps( + _mm256_mul_ps( + _mm256_add_ps(_mm256_loadu_ps(&temphist[i-1]), _mm256_loadu_ps(&temphist[i+1])), + __d_4_16), + _mm256_mul_ps(_mm256_loadu_ps(&temphist[i]), __d_6_16))); +#endif + _mm256_storeu_ps(&hist[i], __hist); + } + } +#endif + for( ; i < n; i++ ) + { + hist[i] = (temphist[i-2] + temphist[i+2])*(1.f/16.f) + + (temphist[i-1] + temphist[i+1])*(4.f/16.f) + + temphist[i]*(6.f/16.f); + } + + float maxval = hist[0]; + for( i = 1; i < n; i++ ) + maxval = std::max(maxval, hist[i]); + + return maxval; +} + + +// +// Interpolates a scale-space extremum's location and scale to subpixel +// accuracy to form an image feature. Rejects features with low contrast. +// Based on Section 4 of Lowe's paper. +static bool adjustLocalExtrema( const std::vector& dog_pyr, KeyPoint& kpt, int octv, + int& layer, int& r, int& c, int nOctaveLayers, + float contrastThreshold, float edgeThreshold, float sigma ) +{ + CV_TRACE_FUNCTION(); + + const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE); + const float deriv_scale = img_scale*0.5f; + const float second_deriv_scale = img_scale; + const float cross_deriv_scale = img_scale*0.25f; + + float xi=0, xr=0, xc=0, contr=0; + int i = 0; + + for( ; i < SIFT_MAX_INTERP_STEPS; i++ ) + { + int idx = octv*(nOctaveLayers+2) + layer; + const Mat& img = dog_pyr[idx]; + const Mat& prev = dog_pyr[idx-1]; + const Mat& next = dog_pyr[idx+1]; + + Vec3f dD((img.at(r, c+1) - img.at(r, c-1))*deriv_scale, + (img.at(r+1, c) - img.at(r-1, c))*deriv_scale, + (next.at(r, c) - prev.at(r, c))*deriv_scale); + + float v2 = (float)img.at(r, c)*2; + float dxx = (img.at(r, c+1) + img.at(r, c-1) - v2)*second_deriv_scale; + float dyy = (img.at(r+1, c) + img.at(r-1, c) - v2)*second_deriv_scale; + float dss = (next.at(r, c) + prev.at(r, c) - v2)*second_deriv_scale; + float dxy = (img.at(r+1, c+1) - img.at(r+1, c-1) - + img.at(r-1, c+1) + img.at(r-1, c-1))*cross_deriv_scale; + float dxs = (next.at(r, c+1) - next.at(r, c-1) - + prev.at(r, c+1) + prev.at(r, c-1))*cross_deriv_scale; + float dys = (next.at(r+1, c) - next.at(r-1, c) - + prev.at(r+1, c) + prev.at(r-1, c))*cross_deriv_scale; + + Matx33f H(dxx, dxy, dxs, + dxy, dyy, dys, + dxs, dys, dss); + + Vec3f X = H.solve(dD, DECOMP_LU); + + xi = -X[2]; + xr = -X[1]; + xc = -X[0]; + + if( std::abs(xi) < 0.5f && std::abs(xr) < 0.5f && std::abs(xc) < 0.5f ) + break; + + if( std::abs(xi) > (float)(INT_MAX/3) || + std::abs(xr) > (float)(INT_MAX/3) || + std::abs(xc) > (float)(INT_MAX/3) ) + return false; + + c += cvRound(xc); + r += cvRound(xr); + layer += cvRound(xi); + + if( layer < 1 || layer > nOctaveLayers || + c < SIFT_IMG_BORDER || c >= img.cols - SIFT_IMG_BORDER || + r < SIFT_IMG_BORDER || r >= img.rows - SIFT_IMG_BORDER ) + return false; + } + + // ensure convergence of interpolation + if( i >= SIFT_MAX_INTERP_STEPS ) + return false; + + { + int idx = octv*(nOctaveLayers+2) + layer; + const Mat& img = dog_pyr[idx]; + const Mat& prev = dog_pyr[idx-1]; + const Mat& next = dog_pyr[idx+1]; + Matx31f dD((img.at(r, c+1) - img.at(r, c-1))*deriv_scale, + (img.at(r+1, c) - img.at(r-1, c))*deriv_scale, + (next.at(r, c) - prev.at(r, c))*deriv_scale); + float t = dD.dot(Matx31f(xc, xr, xi)); + + contr = img.at(r, c)*img_scale + t * 0.5f; + if( std::abs( contr ) * nOctaveLayers < contrastThreshold ) + return false; + + // principal curvatures are computed using the trace and det of Hessian + float v2 = img.at(r, c)*2.f; + float dxx = (img.at(r, c+1) + img.at(r, c-1) - v2)*second_deriv_scale; + float dyy = (img.at(r+1, c) + img.at(r-1, c) - v2)*second_deriv_scale; + float dxy = (img.at(r+1, c+1) - img.at(r+1, c-1) - + img.at(r-1, c+1) + img.at(r-1, c-1)) * cross_deriv_scale; + float tr = dxx + dyy; + float det = dxx * dyy - dxy * dxy; + + if( det <= 0 || tr*tr*edgeThreshold >= (edgeThreshold + 1)*(edgeThreshold + 1)*det ) + return false; + } + + kpt.pt.x = (c + xc) * (1 << octv); + kpt.pt.y = (r + xr) * (1 << octv); + kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16); + kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2; + kpt.response = std::abs(contr); + + return true; +} + + +class findScaleSpaceExtremaComputer : public ParallelLoopBody +{ +public: + findScaleSpaceExtremaComputer( + int _o, + int _i, + int _threshold, + int _idx, + int _step, + int _cols, + int _nOctaveLayers, + double _contrastThreshold, + double _edgeThreshold, + double _sigma, + const std::vector& _gauss_pyr, + const std::vector& _dog_pyr, + TLSData > &_tls_kpts_struct) + + : o(_o), + i(_i), + threshold(_threshold), + idx(_idx), + step(_step), + cols(_cols), + nOctaveLayers(_nOctaveLayers), + contrastThreshold(_contrastThreshold), + edgeThreshold(_edgeThreshold), + sigma(_sigma), + gauss_pyr(_gauss_pyr), + dog_pyr(_dog_pyr), + tls_kpts_struct(_tls_kpts_struct) { } + void operator()( const cv::Range& range ) const CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + + const int begin = range.start; + const int end = range.end; + + static const int n = SIFT_ORI_HIST_BINS; + float hist[n]; + + const Mat& img = dog_pyr[idx]; + const Mat& prev = dog_pyr[idx-1]; + const Mat& next = dog_pyr[idx+1]; + + std::vector *tls_kpts = tls_kpts_struct.get(); + + KeyPoint kpt; + for( int r = begin; r < end; r++) + { + const sift_wt* currptr = img.ptr(r); + const sift_wt* prevptr = prev.ptr(r); + const sift_wt* nextptr = next.ptr(r); + + for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) + { + sift_wt val = currptr[c]; + + // find local extrema with pixel accuracy + if( std::abs(val) > threshold && + ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && + val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && + val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && + val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && + val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && + val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && + val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && + val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && + val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || + (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && + val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && + val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && + val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && + val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && + val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && + val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && + val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && + val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) + { + CV_TRACE_REGION("pixel_candidate"); + + int r1 = r, c1 = c, layer = i; + if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, + nOctaveLayers, (float)contrastThreshold, + (float)edgeThreshold, (float)sigma) ) + continue; + float scl_octv = kpt.size*0.5f/(1 << o); + float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer], + Point(c1, r1), + cvRound(SIFT_ORI_RADIUS * scl_octv), + SIFT_ORI_SIG_FCTR * scl_octv, + hist, n); + float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); + for( int j = 0; j < n; j++ ) + { + int l = j > 0 ? j - 1 : n - 1; + int r2 = j < n-1 ? j + 1 : 0; + + if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) + { + float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); + bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; + kpt.angle = 360.f - (float)((360.f/n) * bin); + if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) + kpt.angle = 0.f; + { + tls_kpts->push_back(kpt); + } + } + } + } + } + } + } +private: + int o, i; + int threshold; + int idx, step, cols; + int nOctaveLayers; + double contrastThreshold; + double edgeThreshold; + double sigma; + const std::vector& gauss_pyr; + const std::vector& dog_pyr; + TLSData > &tls_kpts_struct; +}; + +// +// Detects features at extrema in DoG scale space. Bad features are discarded +// based on contrast and ratio of principal curvatures. +void SIFT_Impl::findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, + std::vector& keypoints ) const +{ + CV_TRACE_FUNCTION(); + + const int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); + const int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); + + keypoints.clear(); + TLSDataAccumulator > tls_kpts_struct; + + for( int o = 0; o < nOctaves; o++ ) + for( int i = 1; i <= nOctaveLayers; i++ ) + { + const int idx = o*(nOctaveLayers+2)+i; + const Mat& img = dog_pyr[idx]; + const int step = (int)img.step1(); + const int rows = img.rows, cols = img.cols; + + parallel_for_(Range(SIFT_IMG_BORDER, rows-SIFT_IMG_BORDER), + findScaleSpaceExtremaComputer( + o, i, threshold, idx, step, cols, + nOctaveLayers, + contrastThreshold, + edgeThreshold, + sigma, + gauss_pyr, dog_pyr, tls_kpts_struct)); + } + + std::vector*> kpt_vecs; + tls_kpts_struct.gather(kpt_vecs); + for (size_t i = 0; i < kpt_vecs.size(); ++i) { + keypoints.insert(keypoints.end(), kpt_vecs[i]->begin(), kpt_vecs[i]->end()); + } +} + + +static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, + int d, int n, float* dst ) +{ + CV_TRACE_FUNCTION(); + + Point pt(cvRound(ptf.x), cvRound(ptf.y)); + float cos_t = cosf(ori*(float)(CV_PI/180)); + float sin_t = sinf(ori*(float)(CV_PI/180)); + float bins_per_rad = n / 360.f; + float exp_scale = -1.f/(d * d * 0.5f); + float hist_width = SIFT_DESCR_SCL_FCTR * scl; + int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); + // Clip the radius to the diagonal of the image to avoid autobuffer too large exception + radius = std::min(radius, (int) sqrt(((double) img.cols)*img.cols + ((double) img.rows)*img.rows)); + cos_t /= hist_width; + sin_t /= hist_width; + + int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2); + int rows = img.rows, cols = img.cols; + + AutoBuffer buf(len*6 + histlen); + float *X = buf.data(), *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; + float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len; + + for( i = 0; i < d+2; i++ ) + { + for( j = 0; j < d+2; j++ ) + for( k = 0; k < n+2; k++ ) + hist[(i*(d+2) + j)*(n+2) + k] = 0.; + } + + for( i = -radius, k = 0; i <= radius; i++ ) + for( j = -radius; j <= radius; j++ ) + { + // Calculate sample's histogram array coords rotated relative to ori. + // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. + // r_rot = 1.5) have full weight placed in row 1 after interpolation. + float c_rot = j * cos_t - i * sin_t; + float r_rot = j * sin_t + i * cos_t; + float rbin = r_rot + d/2 - 0.5f; + float cbin = c_rot + d/2 - 0.5f; + int r = pt.y + i, c = pt.x + j; + + if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && + r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) + { + float dx = (float)(img.at(r, c+1) - img.at(r, c-1)); + float dy = (float)(img.at(r-1, c) - img.at(r+1, c)); + X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; + W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; + k++; + } + } + + len = k; + cv::hal::fastAtan2(Y, X, Ori, len, true); + cv::hal::magnitude32f(X, Y, Mag, len); + cv::hal::exp32f(W, W, len); + + k = 0; +#if CV_AVX2 + { + int CV_DECL_ALIGNED(32) idx_buf[8]; + float CV_DECL_ALIGNED(32) rco_buf[64]; + const __m256 __ori = _mm256_set1_ps(ori); + const __m256 __bins_per_rad = _mm256_set1_ps(bins_per_rad); + const __m256i __n = _mm256_set1_epi32(n); + for( ; k <= len - 8; k+=8 ) + { + __m256 __rbin = _mm256_loadu_ps(&RBin[k]); + __m256 __cbin = _mm256_loadu_ps(&CBin[k]); + __m256 __obin = _mm256_mul_ps(_mm256_sub_ps(_mm256_loadu_ps(&Ori[k]), __ori), __bins_per_rad); + __m256 __mag = _mm256_mul_ps(_mm256_loadu_ps(&Mag[k]), _mm256_loadu_ps(&W[k])); + + __m256 __r0 = _mm256_floor_ps(__rbin); + __rbin = _mm256_sub_ps(__rbin, __r0); + __m256 __c0 = _mm256_floor_ps(__cbin); + __cbin = _mm256_sub_ps(__cbin, __c0); + __m256 __o0 = _mm256_floor_ps(__obin); + __obin = _mm256_sub_ps(__obin, __o0); + + __m256i __o0i = _mm256_cvtps_epi32(__o0); + __o0i = _mm256_add_epi32(__o0i, _mm256_and_si256(__n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __o0i))); + __o0i = _mm256_sub_epi32(__o0i, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __o0i), __n)); + + __m256 __v_r1 = _mm256_mul_ps(__mag, __rbin); + __m256 __v_r0 = _mm256_sub_ps(__mag, __v_r1); + + __m256 __v_rc11 = _mm256_mul_ps(__v_r1, __cbin); + __m256 __v_rc10 = _mm256_sub_ps(__v_r1, __v_rc11); + + __m256 __v_rc01 = _mm256_mul_ps(__v_r0, __cbin); + __m256 __v_rc00 = _mm256_sub_ps(__v_r0, __v_rc01); + + __m256 __v_rco111 = _mm256_mul_ps(__v_rc11, __obin); + __m256 __v_rco110 = _mm256_sub_ps(__v_rc11, __v_rco111); + + __m256 __v_rco101 = _mm256_mul_ps(__v_rc10, __obin); + __m256 __v_rco100 = _mm256_sub_ps(__v_rc10, __v_rco101); + + __m256 __v_rco011 = _mm256_mul_ps(__v_rc01, __obin); + __m256 __v_rco010 = _mm256_sub_ps(__v_rc01, __v_rco011); + + __m256 __v_rco001 = _mm256_mul_ps(__v_rc00, __obin); + __m256 __v_rco000 = _mm256_sub_ps(__v_rc00, __v_rco001); + + __m256i __one = _mm256_set1_epi32(1); + __m256i __idx = _mm256_add_epi32( + _mm256_mullo_epi32( + _mm256_add_epi32( + _mm256_mullo_epi32(_mm256_add_epi32(_mm256_cvtps_epi32(__r0), __one), _mm256_set1_epi32(d + 2)), + _mm256_add_epi32(_mm256_cvtps_epi32(__c0), __one)), + _mm256_set1_epi32(n + 2)), + __o0i); + + _mm256_store_si256((__m256i *)idx_buf, __idx); + + _mm256_store_ps(&(rco_buf[0]), __v_rco000); + _mm256_store_ps(&(rco_buf[8]), __v_rco001); + _mm256_store_ps(&(rco_buf[16]), __v_rco010); + _mm256_store_ps(&(rco_buf[24]), __v_rco011); + _mm256_store_ps(&(rco_buf[32]), __v_rco100); + _mm256_store_ps(&(rco_buf[40]), __v_rco101); + _mm256_store_ps(&(rco_buf[48]), __v_rco110); + _mm256_store_ps(&(rco_buf[56]), __v_rco111); + #define HIST_SUM_HELPER(id) \ + hist[idx_buf[(id)]] += rco_buf[(id)]; \ + hist[idx_buf[(id)]+1] += rco_buf[8 + (id)]; \ + hist[idx_buf[(id)]+(n+2)] += rco_buf[16 + (id)]; \ + hist[idx_buf[(id)]+(n+3)] += rco_buf[24 + (id)]; \ + hist[idx_buf[(id)]+(d+2)*(n+2)] += rco_buf[32 + (id)]; \ + hist[idx_buf[(id)]+(d+2)*(n+2)+1] += rco_buf[40 + (id)]; \ + hist[idx_buf[(id)]+(d+3)*(n+2)] += rco_buf[48 + (id)]; \ + hist[idx_buf[(id)]+(d+3)*(n+2)+1] += rco_buf[56 + (id)]; + + HIST_SUM_HELPER(0); + HIST_SUM_HELPER(1); + HIST_SUM_HELPER(2); + HIST_SUM_HELPER(3); + HIST_SUM_HELPER(4); + HIST_SUM_HELPER(5); + HIST_SUM_HELPER(6); + HIST_SUM_HELPER(7); + + #undef HIST_SUM_HELPER + } + } +#endif + for( ; k < len; k++ ) + { + float rbin = RBin[k], cbin = CBin[k]; + float obin = (Ori[k] - ori)*bins_per_rad; + float mag = Mag[k]*W[k]; + + int r0 = cvFloor( rbin ); + int c0 = cvFloor( cbin ); + int o0 = cvFloor( obin ); + rbin -= r0; + cbin -= c0; + obin -= o0; + + if( o0 < 0 ) + o0 += n; + if( o0 >= n ) + o0 -= n; + + // histogram update using tri-linear interpolation + float v_r1 = mag*rbin, v_r0 = mag - v_r1; + float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; + float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; + float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; + float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; + float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; + float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; + + int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; + hist[idx] += v_rco000; + hist[idx+1] += v_rco001; + hist[idx+(n+2)] += v_rco010; + hist[idx+(n+3)] += v_rco011; + hist[idx+(d+2)*(n+2)] += v_rco100; + hist[idx+(d+2)*(n+2)+1] += v_rco101; + hist[idx+(d+3)*(n+2)] += v_rco110; + hist[idx+(d+3)*(n+2)+1] += v_rco111; + } + + // finalize histogram, since the orientation histograms are circular + for( i = 0; i < d; i++ ) + for( j = 0; j < d; j++ ) + { + int idx = ((i+1)*(d+2) + (j+1))*(n+2); + hist[idx] += hist[idx+n]; + hist[idx+1] += hist[idx+n+1]; + for( k = 0; k < n; k++ ) + dst[(i*d + j)*n + k] = hist[idx+k]; + } + // copy histogram to the descriptor, + // apply hysteresis thresholding + // and scale the result, so that it can be easily converted + // to byte array + float nrm2 = 0; + len = d*d*n; + k = 0; +#if CV_AVX2 + { + float CV_DECL_ALIGNED(32) nrm2_buf[8]; + __m256 __nrm2 = _mm256_setzero_ps(); + __m256 __dst; + for( ; k <= len - 8; k += 8 ) + { + __dst = _mm256_loadu_ps(&dst[k]); +#if CV_FMA3 + __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2); +#else + __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst)); +#endif + } + _mm256_store_ps(nrm2_buf, __nrm2); + nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] + + nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7]; + } +#endif + for( ; k < len; k++ ) + nrm2 += dst[k]*dst[k]; + + float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; + + i = 0, nrm2 = 0; +#if 0 //CV_AVX2 + // This code cannot be enabled because it sums nrm2 in a different order, + // thus producing slightly different results + { + float CV_DECL_ALIGNED(32) nrm2_buf[8]; + __m256 __dst; + __m256 __nrm2 = _mm256_setzero_ps(); + __m256 __thr = _mm256_set1_ps(thr); + for( ; i <= len - 8; i += 8 ) + { + __dst = _mm256_loadu_ps(&dst[i]); + __dst = _mm256_min_ps(__dst, __thr); + _mm256_storeu_ps(&dst[i], __dst); +#if CV_FMA3 + __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2); +#else + __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst)); +#endif + } + _mm256_store_ps(nrm2_buf, __nrm2); + nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] + + nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7]; + } +#endif + for( ; i < len; i++ ) + { + float val = std::min(dst[i], thr); + dst[i] = val; + nrm2 += val*val; + } + nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON); + +#if 1 + k = 0; +#if CV_AVX2 + { + __m256 __dst; + __m256 __min = _mm256_setzero_ps(); + __m256 __max = _mm256_set1_ps(255.0f); // max of uchar + __m256 __nrm2 = _mm256_set1_ps(nrm2); + for( k = 0; k <= len - 8; k+=8 ) + { + __dst = _mm256_loadu_ps(&dst[k]); + __dst = _mm256_min_ps(_mm256_max_ps(_mm256_round_ps(_mm256_mul_ps(__dst, __nrm2), _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC), __min), __max); + _mm256_storeu_ps(&dst[k], __dst); + } + } +#endif + for( ; k < len; k++ ) + { + dst[k] = saturate_cast(dst[k]*nrm2); + } +#else + float nrm1 = 0; + for( k = 0; k < len; k++ ) + { + dst[k] *= nrm2; + nrm1 += dst[k]; + } + nrm1 = 1.f/std::max(nrm1, FLT_EPSILON); + for( k = 0; k < len; k++ ) + { + dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR); + } +#endif +} + +class calcDescriptorsComputer : public ParallelLoopBody +{ +public: + calcDescriptorsComputer(const std::vector& _gpyr, + const std::vector& _keypoints, + Mat& _descriptors, + int _nOctaveLayers, + int _firstOctave) + : gpyr(_gpyr), + keypoints(_keypoints), + descriptors(_descriptors), + nOctaveLayers(_nOctaveLayers), + firstOctave(_firstOctave) { } + + void operator()( const cv::Range& range ) const CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + + const int begin = range.start; + const int end = range.end; + + static const int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS; + + for ( int i = begin; i= firstOctave && layer <= nOctaveLayers+2); + float size=kpt.size*scale; + Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale); + const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer]; + + float angle = 360.f - kpt.angle; + if(std::abs(angle - 360.f) < FLT_EPSILON) + angle = 0.f; + calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr((int)i)); + } + } +private: + const std::vector& gpyr; + const std::vector& keypoints; + Mat& descriptors; + int nOctaveLayers; + int firstOctave; +}; + +static void calcDescriptors(const std::vector& gpyr, const std::vector& keypoints, + Mat& descriptors, int nOctaveLayers, int firstOctave ) +{ + CV_TRACE_FUNCTION(); + parallel_for_(Range(0, static_cast(keypoints.size())), calcDescriptorsComputer(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave)); +} + +////////////////////////////////////////////////////////////////////////////////////////// + +SIFT_Impl::SIFT_Impl( int _nfeatures, int _nOctaveLayers, + double _contrastThreshold, double _edgeThreshold, double _sigma ) + : nfeatures(_nfeatures), nOctaveLayers(_nOctaveLayers), + contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma) +{ +} + +int SIFT_Impl::descriptorSize() const +{ + return SIFT_DESCR_WIDTH*SIFT_DESCR_WIDTH*SIFT_DESCR_HIST_BINS; +} + +int SIFT_Impl::descriptorType() const +{ + return CV_32F; +} + +int SIFT_Impl::defaultNorm() const +{ + return NORM_L2; +} + + +void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, + std::vector& keypoints, + OutputArray _descriptors, + bool useProvidedKeypoints) +{ + CV_TRACE_FUNCTION(); + + int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0; + Mat image = _image.getMat(), mask = _mask.getMat(); + + if( image.empty() || image.depth() != CV_8U ) + CV_Error( Error::StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" ); + + if( !mask.empty() && mask.type() != CV_8UC1 ) + CV_Error( Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)" ); + + if( useProvidedKeypoints ) + { + firstOctave = 0; + int maxOctave = INT_MIN; + for( size_t i = 0; i < keypoints.size(); i++ ) + { + int octave, layer; + float scale; + unpackOctave(keypoints[i], octave, layer, scale); + firstOctave = std::min(firstOctave, octave); + maxOctave = std::max(maxOctave, octave); + actualNLayers = std::max(actualNLayers, layer-2); + } + + firstOctave = std::min(firstOctave, 0); + CV_Assert( firstOctave >= -1 && actualNLayers <= nOctaveLayers ); + actualNOctaves = maxOctave - firstOctave + 1; + } + + Mat base = createInitialImage(image, firstOctave < 0, (float)sigma); + std::vector gpyr; + int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(std::log( (double)std::min( base.cols, base.rows ) ) / std::log(2.) - 2) - firstOctave; + + //double t, tf = getTickFrequency(); + //t = (double)getTickCount(); + buildGaussianPyramid(base, gpyr, nOctaves); + + //t = (double)getTickCount() - t; + //printf("pyramid construction time: %g\n", t*1000./tf); + + if( !useProvidedKeypoints ) + { + std::vector dogpyr; + buildDoGPyramid(gpyr, dogpyr); + //t = (double)getTickCount(); + findScaleSpaceExtrema(gpyr, dogpyr, keypoints); + KeyPointsFilter::removeDuplicatedSorted( keypoints ); + + if( nfeatures > 0 ) + KeyPointsFilter::retainBest(keypoints, nfeatures); + //t = (double)getTickCount() - t; + //printf("keypoint detection time: %g\n", t*1000./tf); + + if( firstOctave < 0 ) + for( size_t i = 0; i < keypoints.size(); i++ ) + { + KeyPoint& kpt = keypoints[i]; + float scale = 1.f/(float)(1 << -firstOctave); + kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255); + kpt.pt *= scale; + kpt.size *= scale; + } + + if( !mask.empty() ) + KeyPointsFilter::runByPixelsMask( keypoints, mask ); + } + else + { + // filter keypoints by mask + //KeyPointsFilter::runByPixelsMask( keypoints, mask ); + } + + if( _descriptors.needed() ) + { + //t = (double)getTickCount(); + int dsize = descriptorSize(); + _descriptors.create((int)keypoints.size(), dsize, CV_32F); + Mat descriptors = _descriptors.getMat(); + + calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave); + //t = (double)getTickCount() - t; + //printf("descriptor extraction time: %g\n", t*1000./tf); + } +} + +} diff --git a/modules/features2d/test/test_descriptors_invariance.cpp b/modules/features2d/test/test_descriptors_invariance.cpp index 1487ee9d94..ca1f9b92bf 100644 --- a/modules/features2d/test/test_descriptors_invariance.cpp +++ b/modules/features2d/test/test_descriptors_invariance.cpp @@ -17,6 +17,9 @@ const static std::string IMAGE_BIKES = "detectors_descriptors_evaluation/images_ * Descriptors's rotation invariance check */ +INSTANTIATE_TEST_CASE_P(SIFT, DescriptorRotationInvariance, + Value(IMAGE_TSUKUBA, SIFT::create(), SIFT::create(), 0.98f)); + INSTANTIATE_TEST_CASE_P(BRISK, DescriptorRotationInvariance, Value(IMAGE_TSUKUBA, BRISK::create(), BRISK::create(), 0.99f)); @@ -33,6 +36,10 @@ INSTANTIATE_TEST_CASE_P(AKAZE_DESCRIPTOR_KAZE, DescriptorRotationInvariance, * Descriptor's scale invariance check */ +// TODO: Expected: (descInliersRatio) >= (minInliersRatio), actual: 0.330378 vs 0.78 +INSTANTIATE_TEST_CASE_P(DISABLED_SIFT, DescriptorScaleInvariance, + Value(IMAGE_BIKES, SIFT::create(), SIFT::create(), 0.78f)); + INSTANTIATE_TEST_CASE_P(AKAZE, DescriptorScaleInvariance, Value(IMAGE_BIKES, AKAZE::create(), AKAZE::create(), 0.6f)); diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp index ffacbe3f43..1a750feb8c 100644 --- a/modules/features2d/test/test_descriptors_regression.cpp +++ b/modules/features2d/test/test_descriptors_regression.cpp @@ -18,6 +18,13 @@ namespace opencv_test { namespace { * Tests registrations * \****************************************************************************************/ +TEST( Features2d_DescriptorExtractor_SIFT, regression ) +{ + CV_DescriptorExtractorTest > test( "descriptor-sift", 1.0f, + SIFT::create() ); + test.safe_run(); +} + TEST( Features2d_DescriptorExtractor_BRISK, regression ) { CV_DescriptorExtractorTest test( "descriptor-brisk", @@ -64,7 +71,7 @@ TEST( Features2d_DescriptorExtractor_AKAZE_DESCRIPTOR_KAZE, regression ) test.safe_run(); } -TEST( Features2d_DescriptorExtractor, batch ) +TEST( Features2d_DescriptorExtractor, batch_ORB ) { string path = string(cvtest::TS::ptr()->get_data_path() + "detectors_descriptors_evaluation/images_datasets/graf"); vector imgs, descriptors; @@ -92,6 +99,35 @@ TEST( Features2d_DescriptorExtractor, batch ) } } +TEST( Features2d_DescriptorExtractor, batch_SIFT ) +{ + string path = string(cvtest::TS::ptr()->get_data_path() + "detectors_descriptors_evaluation/images_datasets/graf"); + vector imgs, descriptors; + vector > keypoints; + int i, n = 6; + Ptr sift = SIFT::create(); + + for( i = 0; i < n; i++ ) + { + string imgname = format("%s/img%d.png", path.c_str(), i+1); + Mat img = imread(imgname, 0); + imgs.push_back(img); + } + + sift->detect(imgs, keypoints); + sift->compute(imgs, keypoints, descriptors); + + ASSERT_EQ((int)keypoints.size(), n); + ASSERT_EQ((int)descriptors.size(), n); + + for( i = 0; i < n; i++ ) + { + EXPECT_GT((int)keypoints[i].size(), 100); + EXPECT_GT(descriptors[i].rows, 100); + } +} + + class DescriptorImage : public TestWithParam { protected: diff --git a/modules/features2d/test/test_detectors_invariance.cpp b/modules/features2d/test/test_detectors_invariance.cpp index be89f0c65f..e45ae8363f 100644 --- a/modules/features2d/test/test_detectors_invariance.cpp +++ b/modules/features2d/test/test_detectors_invariance.cpp @@ -17,6 +17,9 @@ const static std::string IMAGE_BIKES = "detectors_descriptors_evaluation/images_ * Detector's rotation invariance check */ +INSTANTIATE_TEST_CASE_P(SIFT, DetectorRotationInvariance, + Value(IMAGE_TSUKUBA, SIFT::create(), 0.45f, 0.70f)); + INSTANTIATE_TEST_CASE_P(BRISK, DetectorRotationInvariance, Value(IMAGE_TSUKUBA, BRISK::create(), 0.45f, 0.76f)); @@ -33,6 +36,10 @@ INSTANTIATE_TEST_CASE_P(AKAZE_DESCRIPTOR_KAZE, DetectorRotationInvariance, * Detector's scale invariance check */ +// TODO: Expected: (keyPointMatchesRatio) >= (minKeyPointMatchesRatio), actual: 0.596752 vs 0.69 +INSTANTIATE_TEST_CASE_P(DISABLED_SIFT, DetectorScaleInvariance, + Value(IMAGE_BIKES, SIFT::create(), 0.69f, 0.98f)); + INSTANTIATE_TEST_CASE_P(BRISK, DetectorScaleInvariance, Value(IMAGE_BIKES, BRISK::create(), 0.08f, 0.49f)); diff --git a/modules/features2d/test/test_detectors_regression.cpp b/modules/features2d/test/test_detectors_regression.cpp index 6542783d40..37e0c8f2bc 100644 --- a/modules/features2d/test/test_detectors_regression.cpp +++ b/modules/features2d/test/test_detectors_regression.cpp @@ -18,6 +18,12 @@ namespace opencv_test { namespace { * Tests registrations * \****************************************************************************************/ +TEST( Features2d_Detector_SIFT, regression ) +{ + CV_FeatureDetectorTest test( "detector-sift", SIFT::create() ); + test.safe_run(); +} + TEST( Features2d_Detector_BRISK, regression ) { CV_FeatureDetectorTest test( "detector-brisk", BRISK::create() ); diff --git a/modules/features2d/test/test_keypoints.cpp b/modules/features2d/test/test_keypoints.cpp index b09d7ebf60..c169c97163 100644 --- a/modules/features2d/test/test_keypoints.cpp +++ b/modules/features2d/test/test_keypoints.cpp @@ -177,4 +177,11 @@ TEST(Features2d_Detector_Keypoints_AKAZE, validation) test_mldb.safe_run(); } +TEST(Features2d_Detector_Keypoints_SIFT, validation) +{ + CV_FeatureDetectorKeypointsTest test(SIFT::create()); + test.safe_run(); +} + + }} // namespace diff --git a/modules/videoio/doc/videoio_overview.markdown b/modules/videoio/doc/videoio_overview.markdown index 843de0e2b7..6c798bbdc5 100644 --- a/modules/videoio/doc/videoio_overview.markdown +++ b/modules/videoio/doc/videoio_overview.markdown @@ -39,10 +39,10 @@ For example to grab from default camera using Direct Show as backend ```cpp //declare a capture object -cv::VideoCapture cap(0 + cv::CAP_DSHOW); +cv::VideoCapture cap(0, cv::CAP_DSHOW); //or specify the apiPreference with open -cap.open(0 + cv::CAP_DSHOW); +cap.open(0, cv::CAP_DSHOW); ``` If you want to grab from a file using the Direct Show as backend: diff --git a/modules/videoio/src/cap_aravis.cpp b/modules/videoio/src/cap_aravis.cpp index 0d7acee0b0..2040c9c250 100644 --- a/modules/videoio/src/cap_aravis.cpp +++ b/modules/videoio/src/cap_aravis.cpp @@ -59,7 +59,7 @@ // Please obvserve, that jumbo frames are required when high fps & 16bit data is selected. // (camera, switches/routers and the computer this software is running on) // -// Basic usage: VideoCapture cap(CAP_ARAVIS + ); +// Basic usage: VideoCapture cap(, CAP_ARAVIS); // // Supported properties: // read/write diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp index 609099817b..e5b63de943 100644 --- a/samples/cpp/stitching_detailed.cpp +++ b/samples/cpp/stitching_detailed.cpp @@ -431,10 +431,11 @@ int main(int argc, char* argv[]) { finder = xfeatures2d::SURF::create(); } - else if (features_type == "sift") { - finder = xfeatures2d::SIFT::create(); - } #endif + else if (features_type == "sift") + { + finder = SIFT::create(); + } else { cout << "Unknown 2D features type: '" << features_type << "'.\n"; diff --git a/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp b/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp index 23ea221eb4..6b1d553330 100644 --- a/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp +++ b/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/Utils.cpp @@ -323,15 +323,8 @@ void createFeatures(const std::string &featureName, int numKeypoints, cv::Ptr