From 575a609b80cd787c993c3608d0897c3e7aed567f Mon Sep 17 00:00:00 2001 From: Vahid Nikoofard Date: Tue, 14 Apr 2020 00:23:53 -0300 Subject: [PATCH 01/17] Update calcBackProject_Demo1.py To round a ndarray it's necessary to use np.round() instead to Built-in Python round() --- .../back_projection/calcBackProject_Demo1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/python/tutorial_code/Histograms_Matching/back_projection/calcBackProject_Demo1.py b/samples/python/tutorial_code/Histograms_Matching/back_projection/calcBackProject_Demo1.py index eb0bdabd6c..fc85aca339 100644 --- a/samples/python/tutorial_code/Histograms_Matching/back_projection/calcBackProject_Demo1.py +++ b/samples/python/tutorial_code/Histograms_Matching/back_projection/calcBackProject_Demo1.py @@ -31,7 +31,7 @@ def Hist_and_Backproj(val): histImg = np.zeros((h, w, 3), dtype=np.uint8) for i in range(bins): - cv.rectangle(histImg, (i*bin_w, h), ( (i+1)*bin_w, h - int(round( hist[i]*h/255.0 )) ), (0, 0, 255), cv.FILLED) + cv.rectangle(histImg, (i*bin_w, h), ( (i+1)*bin_w, h - int(np.round( hist[i]*h/255.0 )) ), (0, 0, 255), cv.FILLED) cv.imshow('Histogram', histImg) ## [Draw the histogram] From b27ae9c63bcfc2b58f169deec7985ddaab3845b7 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 12 May 2020 15:50:31 +0300 Subject: [PATCH 02/17] Switch v1::Multiply to v0::Multiply --- modules/dnn/src/layers/batch_norm_layer.cpp | 2 +- modules/dnn/src/layers/scale_layer.cpp | 2 +- modules/dnn/test/test_onnx_importer.cpp | 11 ++--------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 109f141352..1b77234899 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -373,7 +373,7 @@ public: shape[1] = weights_.total(); auto weight = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), weights_.data); auto bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), bias_.data); - auto scale_node = std::make_shared(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); + auto scale_node = std::make_shared(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); auto scale_shift = std::make_shared(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY); return Ptr(new InfEngineNgraphNode(scale_shift)); } diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 5cae154be8..a53618f156 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -248,7 +248,7 @@ public: auto weight = blobs.empty() ? ieInpNode1 : std::make_shared(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); - node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); } if (hasBias || !hasWeights) { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index cfffc9629a..181d32d861 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -299,17 +299,11 @@ TEST_P(Test_ONNX_layers, BatchNormalization3D) TEST_P(Test_ONNX_layers, BatchNormalizationUnfused) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); testONNXModels("frozenBatchNorm2d"); } TEST_P(Test_ONNX_layers, BatchNormalizationSubgraph) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); testONNXModels("batch_norm_subgraph"); } @@ -351,9 +345,8 @@ TEST_P(Test_ONNX_layers, Expand) TEST_P(Test_ONNX_layers, ExpandHW) { - // ngraph::op::v1::Multiply bug - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); testONNXModels("expand_hw"); } From df305e83fa19189d85cfcef68253aef612fef437 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 13 May 2020 22:15:22 +0300 Subject: [PATCH 03/17] Fix BatchNorm reinitialization after fusion --- modules/dnn/src/layers/batch_norm_layer.cpp | 9 ++++ modules/dnn/test/test_layers.cpp | 57 +++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 109f141352..ded8ae051d 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -94,6 +94,15 @@ public: dstWeightsData[i] = w; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; } + // We will use blobs to store origin weights and bias to restore them in case of reinitialization. + weights_.copyTo(blobs[0].reshape(1, 1)); + bias_.copyTo(blobs[1].reshape(1, 1)); + } + + virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE + { + blobs[0].reshape(1, 1).copyTo(weights_); + blobs[1].reshape(1, 1).copyTo(bias_); } void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 8124764fc1..c31b9f3720 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1780,4 +1780,61 @@ TEST_P(Layer_Test_Slice, variable_input_shape) INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets()); +typedef testing::TestWithParam > Layer_Test_BatchNorm; +TEST_P(Layer_Test_BatchNorm, fusion) +{ + // This tests reinitializes network by forwarding different batch size input. + // We check BatchNorm layer weights restoring after fusion. + int backendId = get<0>(GetParam()); + int targetId = get<1>(GetParam()); + const int ch = 4; + + Mat mean(1, ch, CV_32F), var(1, ch, CV_32F), weights(1, ch, CV_32F); + randu(mean, 0, 1); + randu(var, 0, 1); + randu(weights, 0, 1); + + Net net; + { + LayerParams lp; + lp.type = "BatchNorm"; + lp.name = "bn"; + lp.set("has_weight", false); + lp.set("has_bias", false); + lp.blobs.push_back(mean); + lp.blobs.push_back(var); + net.addLayerToPrev(lp.name, lp.type, lp); + } + { + LayerParams lp; + lp.type = "Scale"; + lp.name = "scale"; + lp.set("has_bias", false); + lp.blobs.push_back(weights); + net.addLayerToPrev(lp.name, lp.type, lp); + } + + Mat inp(4, 5, CV_32FC(ch)); + randu(inp, 0, 1); + + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + + net.setInput(blobFromImage(inp)); + Mat ref = net.forward(); + + net.setInput(blobFromImages(std::vector(2, inp))); + Mat out = net.forward(); + + for (int i = 0; i < 2; ++i) + { + std::vector ranges(4, Range::all()); + ranges[0].start = i; + ranges[0].end = i + 1; + normAssert(out(ranges), ref); + } +} + +INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets()); + }} // namespace From fa349b7a4ec5272782380e8eaf1486d57847f3b6 Mon Sep 17 00:00:00 2001 From: Sauhaarda Chowdhuri Date: Wed, 13 May 2020 13:14:05 -0700 Subject: [PATCH 04/17] Fix #17279 Documentation Error Update documentation to solve #17279. Simple documentation bug. --- doc/js_tutorials/js_setup/js_nodejs/js_nodejs.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/js_tutorials/js_setup/js_nodejs/js_nodejs.markdown b/doc/js_tutorials/js_setup/js_nodejs/js_nodejs.markdown index 75ae205e13..9ff4a05d24 100644 --- a/doc/js_tutorials/js_setup/js_nodejs/js_nodejs.markdown +++ b/doc/js_tutorials/js_setup/js_nodejs/js_nodejs.markdown @@ -146,7 +146,7 @@ npm install canvas jsdom @code{.js} const { Canvas, createCanvas, Image, ImageData, loadImage } = require('canvas'); const { JSDOM } = require('jsdom'); -const { writeFileSync } = require('fs'); +const { writeFileSync, existsSync, mkdirSync } = require("fs"); // This is our program. This time we use JavaScript async / await and promises to handle asynchronicity. (async () => { From 35245cb76e548daac9a7afd20217836c533dfa0b Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Thu, 14 May 2020 05:44:14 +0900 Subject: [PATCH 05/17] fix test failure on Mali T760 and Mali T628 --- modules/features2d/test/ocl/test_feature2d.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/test/ocl/test_feature2d.cpp b/modules/features2d/test/ocl/test_feature2d.cpp index 618db332c2..7024854ce1 100644 --- a/modules/features2d/test/ocl/test_feature2d.cpp +++ b/modules/features2d/test/ocl/test_feature2d.cpp @@ -51,7 +51,7 @@ OCL_TEST_P(Feature2DFixture, KeypointsSame) for (size_t i = 0; i < keypoints.size(); ++i) { EXPECT_GE(KeyPoint::overlap(keypoints[i], ukeypoints[i]), 0.95); - EXPECT_NEAR(keypoints[i].angle, ukeypoints[i].angle, 0.001); + EXPECT_NEAR(keypoints[i].angle, ukeypoints[i].angle, 0.05); } } From b4a6aa335d8c793397529570dc1c5b5c7be578ab Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 13 May 2020 23:51:52 +0300 Subject: [PATCH 06/17] TensorFlow bilinear resize downscale --- modules/dnn/src/layers/resize_layer.cpp | 6 +-- .../src/tensorflow/tf_graph_simplifier.cpp | 51 ++++++++++++++++++- modules/dnn/src/tensorflow/tf_importer.cpp | 8 +-- modules/dnn/test/test_tf_importer.cpp | 1 + 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index c86fa7f717..09e68eee47 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -19,8 +19,8 @@ namespace cv { namespace dnn { class ResizeLayerImpl : public ResizeLayer { public: - ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(params.get("zoom_factor_x", params.get("zoom_factor", 0))), - zoomFactorHeight(params.get("zoom_factor_y", params.get("zoom_factor", 0))), + ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(params.get("zoom_factor_x", params.get("zoom_factor", 0))), + zoomFactorHeight(params.get("zoom_factor_y", params.get("zoom_factor", 0))), scaleWidth(0), scaleHeight(0) { setParamsFrom(params); @@ -223,7 +223,7 @@ public: protected: int outWidth, outHeight; - const int zoomFactorWidth, zoomFactorHeight; + const float zoomFactorWidth, zoomFactorHeight; String interpolation; float scaleWidth, scaleHeight; bool alignCorners; diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 1afed2cf46..99b3d7ac2f 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -495,8 +495,9 @@ public: ResizeBilinearSubgraph() { int input = addNodeToMatch(""); + int shapeSource = addNodeToMatch(""); - int shape = addNodeToMatch("Shape", input); + int shape = addNodeToMatch("Shape", shapeSource); int stack = addNodeToMatch("Const"); int stack_1 = addNodeToMatch("Const"); int stack_2 = addNodeToMatch("Const"); @@ -504,7 +505,7 @@ public: int factorY = addNodeToMatch("Const"); int mul = addNodeToMatch("Mul", strided_slice, factorY); - shape = addNodeToMatch("Shape", input); + shape = addNodeToMatch("Shape", shapeSource); stack = addNodeToMatch("Const"); stack_1 = addNodeToMatch("Const"); stack_2 = addNodeToMatch("Const"); @@ -519,6 +520,51 @@ public: } }; +// In case of resizing by factor. +class ResizeBilinearSubgraphDown : public TFSubgraph +{ +public: + ResizeBilinearSubgraphDown() + { + int input = addNodeToMatch(""); + int shapeSource = addNodeToMatch(""); + + int shape = addNodeToMatch("Shape", shapeSource); + int stack = addNodeToMatch("Const"); + int stack_1 = addNodeToMatch("Const"); + int stack_2 = addNodeToMatch("Const"); + int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2); + int factorY = addNodeToMatch("Const"); + int div = addNodeToMatch("RealDiv", addNodeToMatch("Cast", strided_slice), factorY); + int cast = addNodeToMatch("Cast", div); + + shape = addNodeToMatch("Shape", shapeSource); + stack = addNodeToMatch("Const"); + stack_1 = addNodeToMatch("Const"); + stack_2 = addNodeToMatch("Const"); + strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2); + int factorX = addNodeToMatch("Const"); + int div_1 = addNodeToMatch("RealDiv", addNodeToMatch("Cast", strided_slice), factorX); + int cast_1 = addNodeToMatch("Cast", div_1); + + int pack = addNodeToMatch("Pack", cast, cast_1); + + addNodeToMatch("ResizeBilinear", input, pack); + setFusedNode("ResizeBilinear", input, factorY, factorX); + } + + virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode, + std::vector& inputNodes) CV_OVERRIDE + { + + for (int i = 1; i < 3; ++i) + { + tensorflow::TensorProto* factor = inputNodes[i]->mutable_attr()->at("value").mutable_tensor(); + factor->set_double_val(0, 1.0 / factor->double_val(0)); + } + } +}; + // In case of resizing by factor. class UpsamplingKerasSubgraph : public TFSubgraph { @@ -702,6 +748,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net) subgraphs.push_back(Ptr(new PReLUSubgraph(true))); subgraphs.push_back(Ptr(new PReLUSubgraph(false))); subgraphs.push_back(Ptr(new FlattenProdSubgraph())); + subgraphs.push_back(Ptr(new ResizeBilinearSubgraphDown())); for (int i = 0; i < net.node_size(); ++i) { diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 9fd611fd0a..e684b94e46 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1932,10 +1932,10 @@ void TFImporter::populateNet(Net dstNet) { Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckTypeEQ(factorHeight.type(), CV_32SC1, ""); CV_CheckEQ(factorHeight.total(), (size_t)1, ""); - CV_CheckTypeEQ(factorWidth.type(), CV_32SC1, ""); CV_CheckEQ(factorWidth.total(), (size_t)1, ""); - layerParams.set("zoom_factor_x", factorWidth.at(0)); - layerParams.set("zoom_factor_y", factorHeight.at(0)); + factorHeight.convertTo(factorHeight, CV_32F); + factorWidth.convertTo(factorWidth, CV_32F); + layerParams.set("zoom_factor_x", factorWidth.at(0)); + layerParams.set("zoom_factor_y", factorHeight.at(0)); } else CV_Assert(layer.input_size() == 2 || layer.input_size() == 3); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 642b5158b1..b20b2a58ff 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -969,6 +969,7 @@ TEST_P(Test_TensorFlow_layers, resize_bilinear) { runTensorFlowNet("resize_bilinear"); runTensorFlowNet("resize_bilinear_factor"); + runTensorFlowNet("resize_bilinear_down"); } TEST_P(Test_TensorFlow_layers, tf2_dense) From c5a2d283674b802a93959a235cfff9c4b9da1374 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 13 May 2020 14:44:23 +0300 Subject: [PATCH 07/17] Determine SSD input shape --- samples/dnn/tf_text_graph_ssd.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index bf1d788d41..46a9064738 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -234,6 +234,12 @@ def createSSDGraph(modelPath, configPath, outputPath): # Connect input node to the first layer assert(graph_def.node[0].op == 'Placeholder') + try: + input_shape = graph_def.node[0].attr['shape']['shape'][0]['dim'] + input_shape[1]['size'] = image_height + input_shape[2]['size'] = image_width + except: + print("Input shapes are undefined") # assert(graph_def.node[1].op == 'Conv2D') weights = graph_def.node[1].input[-1] for i in range(len(graph_def.node[1].input)): From 400a781ebfe960acd87e3166eb3f8ab1997b5fe2 Mon Sep 17 00:00:00 2001 From: Nicolas Roduit Date: Fri, 8 May 2020 12:22:04 +0200 Subject: [PATCH 08/17] Prefer addall instead of iteration for performance --- .../src/java/org/opencv/utils/Converters.java | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/modules/java/generator/src/java/org/opencv/utils/Converters.java b/modules/java/generator/src/java/org/opencv/utils/Converters.java index 94675da183..35beecbc4b 100644 --- a/modules/java/generator/src/java/org/opencv/utils/Converters.java +++ b/modules/java/generator/src/java/org/opencv/utils/Converters.java @@ -519,8 +519,7 @@ public class Converters { Mat res; int lCount = (pts != null) ? pts.size() : 0; if (lCount > 0) { - for (MatOfPoint vpt : pts) - mats.add(vpt); + mats.addAll(pts); res = vector_Mat_to_Mat(mats); } else { res = new Mat(); @@ -568,8 +567,7 @@ public class Converters { Mat res; int lCount = (pts != null) ? pts.size() : 0; if (lCount > 0) { - for (MatOfPoint2f vpt : pts) - mats.add(vpt); + mats.addAll(pts); res = vector_Mat_to_Mat(mats); } else { res = new Mat(); @@ -600,8 +598,7 @@ public class Converters { Mat res; int lCount = (pts != null) ? pts.size() : 0; if (lCount > 0) { - for (MatOfPoint3f vpt : pts) - mats.add(vpt); + mats.addAll(pts); res = vector_Mat_to_Mat(mats); } else { res = new Mat(); @@ -614,8 +611,7 @@ public class Converters { Mat res; int lCount = (kps != null) ? kps.size() : 0; if (lCount > 0) { - for (MatOfKeyPoint vkp : kps) - mats.add(vkp); + mats.addAll(kps); res = vector_Mat_to_Mat(mats); } else { res = new Mat(); @@ -714,8 +710,7 @@ public class Converters { Mat res; int lCount = (lvdm != null) ? lvdm.size() : 0; if (lCount > 0) { - for (MatOfDMatch vdm : lvdm) - mats.add(vdm); + mats.addAll(lvdm); res = vector_Mat_to_Mat(mats); } else { res = new Mat(); @@ -746,8 +741,7 @@ public class Converters { Mat res; int lCount = (lvb != null) ? lvb.size() : 0; if (lCount > 0) { - for (MatOfByte vb : lvb) - mats.add(vb); + mats.addAll(lvb); res = vector_Mat_to_Mat(mats); } else { res = new Mat(); From cddd7f10d5689bb9a7f4d4639611fd3092bf408f Mon Sep 17 00:00:00 2001 From: Ganesh Kathiresan Date: Thu, 14 May 2020 22:00:01 +0530 Subject: [PATCH 09/17] Merge pull request #17224 from ganesh-k13:bugfix/calib3d/17201 * Fixed indexing in prefilter * Initialised prefilter * Initialised prefilter with value initialisation * Added TC to trigger different Mem Allocs in BufferBM * Optimize cases with only needed conditions --- modules/calib3d/src/stereobm.cpp | 5 +- modules/calib3d/test/test_stereomatching.cpp | 51 +++++++++++++++++++- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index afc404bffe..96c8d0662d 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -347,7 +347,8 @@ public: htext(nstripes, NULL), cbuf0(nstripes, NULL), sad_short(nstripes, NULL), - hsad_short(nstripes, NULL) + hsad_short(nstripes, NULL), + prefilter() { const int wsz = params.SADWindowSize; const int ndisp = params.numDisparities; @@ -379,7 +380,7 @@ public: if (params.useNormPrefilter()) { for (size_t i = 0; i < 2; ++i) - area.allocate(prefilter[0], width + params.preFilterSize + 2); + area.allocate(prefilter[i], width + params.preFilterSize + 2); } area.commit(); diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp index 94fc9718cc..e92c170c00 100644 --- a/modules/calib3d/test/test_stereomatching.cpp +++ b/modules/calib3d/test/test_stereomatching.cpp @@ -809,6 +809,55 @@ protected: } }; +TEST(Calib3d_StereoBM, regression) { CV_StereoBMTest test; test.safe_run(); } + +/* < preFilter, < preFilterCap, SADWindowSize > >*/ +typedef tuple < int, tuple < int, int > > BufferBM_Params_t; + +typedef testing::TestWithParam< BufferBM_Params_t > Calib3d_StereoBM_BufferBM; + +const int preFilters[] = +{ + StereoBM::PREFILTER_NORMALIZED_RESPONSE, + StereoBM::PREFILTER_XSOBEL +}; + +const tuple < int, int > useShortsConditions[] = +{ + make_tuple(30, 19), + make_tuple(32, 23) +}; + +TEST_P(Calib3d_StereoBM_BufferBM, memAllocsTest) +{ + const int preFilter = get<0>(GetParam()); + const int preFilterCap = get<0>(get<1>(GetParam())); + const int SADWindowSize = get<1>(get<1>(GetParam())); + + String path = cvtest::TS::ptr()->get_data_path() + "cv/stereomatching/datasets/teddy/"; + Mat leftImg = imread(path + "im2.png", 0); + ASSERT_FALSE(leftImg.empty()); + Mat rightImg = imread(path + "im6.png", 0); + ASSERT_FALSE(rightImg.empty()); + Mat leftDisp; + { + Ptr bm = StereoBM::create(16,9); + bm->setPreFilterType(preFilter); + bm->setPreFilterCap(preFilterCap); + bm->setBlockSize(SADWindowSize); + bm->compute( leftImg, rightImg, leftDisp); + + ASSERT_FALSE(leftDisp.empty()); + } +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/, Calib3d_StereoBM_BufferBM, + testing::Combine( + testing::ValuesIn(preFilters), + testing::ValuesIn(useShortsConditions) + ) + ); + //----------------------------------- StereoSGBM test ----------------------------------------------------- class CV_StereoSGBMTest : public CV_StereoMatchingTest @@ -869,8 +918,6 @@ protected: } }; - -TEST(Calib3d_StereoBM, regression) { CV_StereoBMTest test; test.safe_run(); } TEST(Calib3d_StereoSGBM, regression) { CV_StereoSGBMTest test; test.safe_run(); } TEST(Calib3d_StereoSGBM_HH4, regression) From 58426c80a3dd67e92423ce4a94dcfd55af08d202 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 14 May 2020 17:13:29 +0000 Subject: [PATCH 10/17] samples: fix OpenCL events leaks --- samples/opencl/opencl-opencv-interop.cpp | 25 +++++++++--------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/samples/opencl/opencl-opencv-interop.cpp b/samples/opencl/opencl-opencv-interop.cpp index 3d1357668f..3d6fec09ae 100644 --- a/samples/opencl/opencl-opencv-interop.cpp +++ b/samples/opencl/opencl-opencv-interop.cpp @@ -468,7 +468,6 @@ private: cl_kernel m_kernelImg; cl_mem m_img_src; // used as src in case processing of cl image cl_mem m_mem_obj; - cl_event m_event; }; @@ -498,7 +497,6 @@ App::App(CommandLineParser& cmd) m_kernelImg = 0; m_img_src = 0; m_mem_obj = 0; - m_event = 0; } // ctor @@ -529,11 +527,6 @@ App::~App() m_mem_obj = 0; } - if (m_event) - { - clReleaseEvent(m_event); - } - if (m_kernelBuf) { clReleaseKernel(m_kernelBuf); @@ -775,11 +768,13 @@ int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem size_t origin[] = { 0, 0, 0 }; size_t region[] = { (size_t)frame.cols, (size_t)frame.rows, 1 }; - res = clEnqueueCopyImage(m_queue, m_img_src, mem, origin, origin, region, 0, 0, &m_event); + cl_event asyncEvent = 0; + res = clEnqueueCopyImage(m_queue, m_img_src, mem, origin, origin, region, 0, 0, &asyncEvent); if (CL_SUCCESS != res) return -1; - res = clWaitForEvents(1, &m_event); + res = clWaitForEvents(1, &asyncEvent); + clReleaseEvent(asyncEvent); if (CL_SUCCESS != res) return -1; @@ -795,19 +790,17 @@ int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem } } - m_event = clCreateUserEvent(m_context, &res); - if (0 == m_event || CL_SUCCESS != res) - return -1; - // process left half of frame in OpenCL size_t size[] = { (size_t)frame.cols / 2, (size_t)frame.rows }; - res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &m_event); + cl_event asyncEvent = 0; + res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &asyncEvent); if (CL_SUCCESS != res) return -1; - res = clWaitForEvents(1, &m_event); + res = clWaitForEvents(1, &asyncEvent); + clReleaseEvent(asyncEvent); if (CL_SUCCESS != res) - return - 1; + return -1; mem_obj[0] = mem; From 7d1094b7e102d4c44cfa09171f97b4f50fc850d7 Mon Sep 17 00:00:00 2001 From: tweenietomatoes Date: Fri, 15 May 2020 14:41:55 +0300 Subject: [PATCH 11/17] Important single character fix --- samples/_winpack_build_sample.cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/_winpack_build_sample.cmd b/samples/_winpack_build_sample.cmd index f67e192bc1..96f4e5b13f 100644 --- a/samples/_winpack_build_sample.cmd +++ b/samples/_winpack_build_sample.cmd @@ -78,7 +78,7 @@ IF ERRORLEVEL 1 ( POPD cl /? >NUL 2>NUL Date: Thu, 14 May 2020 12:48:37 -0700 Subject: [PATCH 12/17] Add target conditionals for Mac Catalyst --- modules/videoio/src/cap_avfoundation.mm | 2 +- modules/videoio/src/cap_ios_abstract_camera.mm | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_avfoundation.mm b/modules/videoio/src/cap_avfoundation.mm index 9d4fc66e8b..6592341bc3 100644 --- a/modules/videoio/src/cap_avfoundation.mm +++ b/modules/videoio/src/cap_avfoundation.mm @@ -383,7 +383,7 @@ int CvCaptureCAM::startCaptureDevice(int cameraNum) { [mCaptureDecompressedVideoOutput setVideoSettings:pixelBufferOptions]; mCaptureDecompressedVideoOutput.alwaysDiscardsLateVideoFrames = YES; -#if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR +#if (TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR) && !TARGET_OS_MACCATALYST mCaptureDecompressedVideoOutput.minFrameDuration = CMTimeMake(1, 30); #endif diff --git a/modules/videoio/src/cap_ios_abstract_camera.mm b/modules/videoio/src/cap_ios_abstract_camera.mm index 032ab850fa..1f698424d8 100644 --- a/modules/videoio/src/cap_ios_abstract_camera.mm +++ b/modules/videoio/src/cap_ios_abstract_camera.mm @@ -299,11 +299,13 @@ } else { +#if !TARGET_OS_MACCATALYST // Deprecated in 6.0; here for backward compatibility if ([self.captureVideoPreviewLayer isOrientationSupported]) { [self.captureVideoPreviewLayer setOrientation:self.defaultAVCaptureVideoOrientation]; } +#endif } if (parentView != nil) { From d81ac52ce263e6edde17d85cb25ac0b678411fe0 Mon Sep 17 00:00:00 2001 From: Chris Ballinger Date: Thu, 14 May 2020 13:43:03 -0700 Subject: [PATCH 13/17] Remove linking against AssetsLibrary framework --- modules/imgcodecs/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt index 50354769e6..92f380ea22 100644 --- a/modules/imgcodecs/CMakeLists.txt +++ b/modules/imgcodecs/CMakeLists.txt @@ -105,7 +105,7 @@ file(GLOB imgcodecs_ext_hdrs if(IOS) list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/ios_conversions.mm) - list(APPEND IMGCODECS_LIBRARIES "-framework Accelerate" "-framework CoreGraphics" "-framework QuartzCore" "-framework AssetsLibrary") + list(APPEND IMGCODECS_LIBRARIES "-framework Accelerate" "-framework CoreGraphics" "-framework QuartzCore") endif() if(APPLE_FRAMEWORK) list(APPEND IMGCODECS_LIBRARIES "-framework UIKit") From a1b09a3734c78a3fb87d32d112b042d010d5ecd1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Apr 2020 17:37:08 +0000 Subject: [PATCH 14/17] imgproc(perf): add GaussianBlur cases for SIFT --- modules/imgproc/perf/opencl/perf_filters.cpp | 43 +++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/modules/imgproc/perf/opencl/perf_filters.cpp b/modules/imgproc/perf/opencl/perf_filters.cpp index ab065d9751..b4e29ae67f 100644 --- a/modules/imgproc/perf/opencl/perf_filters.cpp +++ b/modules/imgproc/perf/opencl/perf_filters.cpp @@ -238,15 +238,13 @@ OCL_PERF_TEST_P(ScharrFixture, Scharr, ///////////// GaussianBlur //////////////////////// -typedef FilterFixture GaussianBlurFixture; +typedef FilterFixture OCL_GaussianBlurFixture; -OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur, - ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, OCL_PERF_ENUM(3, 5, 7))) +PERF_TEST_P_(OCL_GaussianBlurFixture, GaussianBlur) { - const FilterParams params = GetParam(); + const FilterParams& params = GetParam(); const Size srcSize = get<0>(params); const int type = get<1>(params), ksize = get<2>(params); - const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 2 + DBL_EPSILON : 3e-4; checkDeviceMaxMemoryAllocSize(srcSize, type); @@ -255,9 +253,42 @@ OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur, OCL_TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 1, 1, cv::BORDER_CONSTANT); - SANITY_CHECK(dst, eps); + SANITY_CHECK_NOTHING(); } +INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_GaussianBlurFixture, + ::testing::Combine( + OCL_TEST_SIZES, + OCL_TEST_TYPES, + OCL_PERF_ENUM(3, 5, 7) + ) +); + +INSTANTIATE_TEST_CASE_P(SIFT, OCL_GaussianBlurFixture, + ::testing::Combine( + ::testing::Values(sz1080p), + ::testing::Values(CV_32FC1), + OCL_PERF_ENUM(11, 13, 17, 21, 27) + ) +); + +INSTANTIATE_TEST_CASE_P(DISABLED_FULL, OCL_GaussianBlurFixture, + ::testing::Combine( + ::testing::Values(sz1080p), + ::testing::Values( + CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, + CV_8SC1, CV_8SC2, CV_8SC3, CV_8SC4, + CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4, + CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, + CV_32SC1, CV_32SC2, CV_32SC3, CV_32SC4, + CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4, + CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4 + ), + OCL_PERF_ENUM(3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29) + ) +); + + ///////////// Filter2D //////////////////////// typedef FilterFixture Filter2DFixture; From a3b109eca0e7656a05224eacbdcd15fdd0e11204 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Apr 2020 18:17:22 +0000 Subject: [PATCH 15/17] imgproc: enable GaussianBlur IPP parallel processing --- modules/core/include/opencv2/core/private.hpp | 2 -- modules/imgproc/CMakeLists.txt | 6 +++++ modules/imgproc/src/smooth.dispatch.cpp | 24 +++++++++++++++---- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 24f7fc69b8..4af6e3b443 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -208,8 +208,6 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); } #define IPP_DISABLE_HOUGH 1 // improper integration/results #define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7 -#define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3) - // Temporary disabled named IPP region. Performance #define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations #define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653) diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index a74c883cd3..3b45482481 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -12,3 +12,9 @@ ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_add_dispatched_file(undistort SSE2 AVX2) ocv_define_module(imgproc opencv_core WRAP java python js) + +ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR) +option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF) +if(OPENCV_IPP_GAUSSIAN_BLUR) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1") +endif() diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index 4ac7df8b4d..65122d20e2 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -470,9 +470,14 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, #endif -#if 0 //defined HAVE_IPP +#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option + +#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1 +#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1 +#define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1 + // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling -#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL +#if IPP_VERSION_X100 < 201900 #define IPP_GAUSSIANBLUR_PARALLEL 0 #else #define IPP_GAUSSIANBLUR_PARALLEL 1 @@ -555,6 +560,14 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, return false; const int threads = ippiSuggestThreadsNum(iwDst, 2); + + if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == 1 && ksize.width > 25)) + return false; + if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == 1 && src.type() == CV_16SC4)) + return false; + if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4)) + return false; + if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { bool ok; ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); @@ -655,8 +668,6 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, CV_OVX_RUN(true, openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) - //CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); - if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix())) { std::vector fkx, fky; @@ -681,6 +692,11 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, } } +#if defined ENABLE_IPP_GAUSSIAN_BLUR + // IPP is not bit-exact to OpenCV implementation + CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); +#endif + sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType); } From 07c56f149ff611bb5d2a154b661f3d38983aa659 Mon Sep 17 00:00:00 2001 From: dmallia17 Date: Mon, 18 May 2020 11:53:17 -0400 Subject: [PATCH 16/17] Merge pull request #17313 from hunter-college-ossd-spr-2020:revise-knn-tutorials * Revise and expand kNN Python tutorials * Correct NPTEL link --- .../py_knn_opencv/py_knn_opencv.markdown | 64 ++++----- .../py_knn_understanding.markdown | 123 +++++++++--------- 2 files changed, 94 insertions(+), 93 deletions(-) diff --git a/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown b/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown index 0b4c2bd744..e876ddf3e6 100644 --- a/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown +++ b/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown @@ -4,20 +4,20 @@ OCR of Hand-written Data using kNN {#tutorial_py_knn_opencv} Goal ---- -In this chapter - - We will use our knowledge on kNN to build a basic OCR application. - - We will try with Digits and Alphabets data available that comes with OpenCV. +In this chapter: + - We will use our knowledge on kNN to build a basic OCR (Optical Character Recognition) application. + - We will try our application on Digits and Alphabets data that comes with OpenCV. OCR of Hand-written Digits -------------------------- -Our goal is to build an application which can read the handwritten digits. For this we need some -train_data and test_data. OpenCV comes with an image digits.png (in the folder +Our goal is to build an application which can read handwritten digits. For this we need some +training data and some test data. OpenCV comes with an image digits.png (in the folder opencv/samples/data/) which has 5000 handwritten digits (500 for each digit). Each digit is -a 20x20 image. So our first step is to split this image into 5000 different digits. For each digit, -we flatten it into a single row with 400 pixels. That is our feature set, ie intensity values of all -pixels. It is the simplest feature set we can create. We use first 250 samples of each digit as -train_data, and next 250 samples as test_data. So let's prepare them first. +a 20x20 image. So our first step is to split this image into 5000 different digit images. Then for each digit (20x20 image), +we flatten it into a single row with 400 pixels. That is our feature set, i.e. intensity values of all +pixels. It is the simplest feature set we can create. We use the first 250 samples of each digit as +training data, and the other 250 samples as test data. So let's prepare them first. @code{.py} import numpy as np import cv2 as cv @@ -28,10 +28,10 @@ gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY) # Now we split the image to 5000 cells, each 20x20 size cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)] -# Make it into a Numpy array. It size will be (50,100,20,20) +# Make it into a Numpy array: its size will be (50,100,20,20) x = np.array(cells) -# Now we prepare train_data and test_data. +# Now we prepare the training data and test data train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400) test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400) @@ -40,7 +40,7 @@ k = np.arange(10) train_labels = np.repeat(k,250)[:,np.newaxis] test_labels = train_labels.copy() -# Initiate kNN, train the data, then test it with test data for k=1 +# Initiate kNN, train it on the training data, then test it with the test data with k=1 knn = cv.ml.KNearest_create() knn.train(train, cv.ml.ROW_SAMPLE, train_labels) ret,result,neighbours,dist = knn.findNearest(test,k=5) @@ -52,13 +52,15 @@ correct = np.count_nonzero(matches) accuracy = correct*100.0/result.size print( accuracy ) @endcode -So our basic OCR app is ready. This particular example gave me an accuracy of 91%. One option -improve accuracy is to add more data for training, especially the wrong ones. So instead of finding -this training data every time I start application, I better save it, so that next time, I directly -read this data from a file and start classification. You can do it with the help of some Numpy -functions like np.savetxt, np.savez, np.load etc. Please check their docs for more details. +So our basic OCR app is ready. This particular example gave me an accuracy of 91%. One option to +improve accuracy is to add more data for training, especially for the digits where we had more errors. + +Instead of finding +this training data every time I start the application, I better save it, so that the next time, I can directly +read this data from a file and start classification. This can be done with the help of some Numpy +functions like np.savetxt, np.savez, np.load, etc. Please check the NumPy docs for more details. @code{.py} -# save the data +# Save the data np.savez('knn_data.npz',train=train, train_labels=train_labels) # Now load the data @@ -71,36 +73,36 @@ In my system, it takes around 4.4 MB of memory. Since we are using intensity val features, it would be better to convert the data to np.uint8 first and then save it. It takes only 1.1 MB in this case. Then while loading, you can convert back into float32. -OCR of English Alphabets +OCR of the English Alphabet ------------------------ -Next we will do the same for English alphabets, but there is a slight change in data and feature +Next we will do the same for the English alphabet, but there is a slight change in data and feature set. Here, instead of images, OpenCV comes with a data file, letter-recognition.data in opencv/samples/cpp/ folder. If you open it, you will see 20000 lines which may, on first sight, look -like garbage. Actually, in each row, first column is an alphabet which is our label. Next 16 numbers -following it are its different features. These features are obtained from [UCI Machine Learning +like garbage. Actually, in each row, the first column is a letter which is our label. The next 16 numbers +following it are the different features. These features are obtained from the [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml/). You can find the details of these features in [this page](http://archive.ics.uci.edu/ml/datasets/Letter+Recognition). -There are 20000 samples available, so we take first 10000 data as training samples and remaining -10000 as test samples. We should change the alphabets to ascii characters because we can't work with -alphabets directly. +There are 20000 samples available, so we take the first 10000 as training samples and the remaining +10000 as test samples. We should change the letters to ascii characters because we can't work with +letters directly. @code{.py} import cv2 as cv import numpy as np -# Load the data, converters convert the letter to a number +# Load the data and convert the letters to numbers data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',', converters= {0: lambda ch: ord(ch)-ord('A')}) -# split the data to two, 10000 each for train and test +# Split the dataset in two, with 10000 samples each for training and test sets train, test = np.vsplit(data,2) -# split trainData and testData to features and responses +# Split trainData and testData into features and responses responses, trainData = np.hsplit(train,[1]) labels, testData = np.hsplit(test,[1]) -# Initiate the kNN, classify, measure accuracy. +# Initiate the kNN, classify, measure accuracy knn = cv.ml.KNearest_create() knn.train(trainData, cv.ml.ROW_SAMPLE, responses) ret, result, neighbours, dist = knn.findNearest(testData, k=5) @@ -110,10 +112,12 @@ accuracy = correct*100.0/10000 print( accuracy ) @endcode It gives me an accuracy of 93.22%. Again, if you want to increase accuracy, you can iteratively add -error data in each level. +more data. Additional Resources -------------------- +1. [Wikipedia article on Optical character recognition](https://en.wikipedia.org/wiki/Optical_character_recognition) Exercises --------- +1. Here we used k=5. What happens if you try other values of k? Can you find a value that maximizes accuracy (minimizes the number of errors)? \ No newline at end of file diff --git a/doc/py_tutorials/py_ml/py_knn/py_knn_understanding/py_knn_understanding.markdown b/doc/py_tutorials/py_ml/py_knn/py_knn_understanding/py_knn_understanding.markdown index 30e53be87e..9f76e0f808 100644 --- a/doc/py_tutorials/py_ml/py_knn/py_knn_understanding/py_knn_understanding.markdown +++ b/doc/py_tutorials/py_ml/py_knn/py_knn_understanding/py_knn_understanding.markdown @@ -4,61 +4,55 @@ Understanding k-Nearest Neighbour {#tutorial_py_knn_understanding} Goal ---- -In this chapter, we will understand the concepts of k-Nearest Neighbour (kNN) algorithm. +In this chapter, we will understand the concepts of the k-Nearest Neighbour (kNN) algorithm. Theory ------ -kNN is one of the simplest of classification algorithms available for supervised learning. The idea -is to search for closest match of the test data in feature space. We will look into it with below +kNN is one of the simplest classification algorithms available for supervised learning. The idea +is to search for the closest match(es) of the test data in the feature space. We will look into it with the below image. ![image](images/knn_theory.png) -In the image, there are two families, Blue Squares and Red Triangles. We call each family as -**Class**. Their houses are shown in their town map which we call feature space. *(You can consider -a feature space as a space where all datas are projected. For example, consider a 2D coordinate -space. Each data has two features, x and y coordinates. You can represent this data in your 2D -coordinate space, right? Now imagine if there are three features, you need 3D space. Now consider N -features, where you need N-dimensional space, right? This N-dimensional space is its feature space. -In our image, you can consider it as a 2D case with two features)*. +In the image, there are two families: Blue Squares and Red Triangles. We refer to each family as +a **Class**. Their houses are shown in their town map which we call the **Feature Space**. You can consider +a feature space as a space where all data are projected. For example, consider a 2D coordinate +space. Each datum has two features, a x coordinate and a y coordinate. You can represent this datum in your 2D +coordinate space, right? Now imagine that there are three features, you will need 3D space. Now consider N +features: you need N-dimensional space, right? This N-dimensional space is its feature space. +In our image, you can consider it as a 2D case with two features. -Now a new member comes into the town and creates a new home, which is shown as green circle. He -should be added to one of these Blue/Red families. We call that process, **Classification**. What we -do? Since we are dealing with kNN, let us apply this algorithm. +Now consider what happens if a new member comes into the town and creates a new home, which is shown as the green circle. He +should be added to one of these Blue or Red families (or *classes*). We call that process, **Classification**. How exactly should this new member be classified? Since we are dealing with kNN, let us apply the algorithm. -One method is to check who is his nearest neighbour. From the image, it is clear it is the Red -Triangle family. So he is also added into Red Triangle. This method is called simply **Nearest -Neighbour**, because classification depends only on the nearest neighbour. +One simple method is to check who is his nearest neighbour. From the image, it is clear that it is a member of the Red +Triangle family. So he is classified as a Red Triangle. This method is called simply **Nearest Neighbour** classification, because classification depends only on the *nearest neighbour*. -But there is a problem with that. Red Triangle may be the nearest. But what if there are lot of Blue -Squares near to him? Then Blue Squares have more strength in that locality than Red Triangle. So -just checking nearest one is not sufficient. Instead we check some k nearest families. Then whoever -is majority in them, the new guy belongs to that family. In our image, let's take k=3, ie 3 nearest -families. He has two Red and one Blue (there are two Blues equidistant, but since k=3, we take only +But there is a problem with this approach! Red Triangle may be the nearest neighbour, but what if there are also a lot of Blue +Squares nearby? Then Blue Squares have more strength in that locality than Red Triangles, so +just checking the nearest one is not sufficient. Instead we may want to check some **k** nearest families. Then whichever family is the majority amongst them, the new guy should belong to that family. In our image, let's take k=3, i.e. consider the 3 nearest +neighbours. The new member has two Red neighbours and one Blue neighbour (there are two Blues equidistant, but since k=3, we can take only one of them), so again he should be added to Red family. But what if we take k=7? Then he has 5 Blue -families and 2 Red families. Great!! Now he should be added to Blue family. So it all changes with -value of k. More funny thing is, what if k = 4? He has 2 Red and 2 Blue neighbours. It is a tie !!! -So better take k as an odd number. So this method is called **k-Nearest Neighbour** since -classification depends on k nearest neighbours. +neighbours and 2 Red neighbours and should be added to the Blue family. The result will vary with the selected +value of k. Note that if k is not an odd number, we can get a tie, as would happen in the above case with k=4. We would see that our new member has 2 Red and 2 Blue neighbours as his four nearest neighbours and we would need to choose a method for breaking the tie to perform classification. So to reiterate, this method is called **k-Nearest Neighbour** since +classification depends on the *k nearest neighbours*. Again, in kNN, it is true we are considering k neighbours, but we are giving equal importance to -all, right? Is it justice? For example, take the case of k=4. We told it is a tie. But see, the 2 -Red families are more closer to him than the other 2 Blue families. So he is more eligible to be -added to Red. So how do we mathematically explain that? We give some weights to each family -depending on their distance to the new-comer. For those who are near to him get higher weights while -those are far away get lower weights. Then we add total weights of each family separately. Whoever -gets highest total weights, new-comer goes to that family. This is called **modified kNN**. +all, right? Is this justified? For example, take the tied case of k=4. As we can see, the 2 +Red neighbours are actually closer to the new member than the other 2 Blue neighbours, so he is more eligible to be +added to the Red family. How do we mathematically explain that? We give some weights to each neighbour +depending on their distance to the new-comer: those who are nearer to him get higher weights, while +those that are farther away get lower weights. Then we add the total weights of each family separately and classify the new-comer as part of whichever family +received higher total weights. This is called **modified kNN** or **weighted kNN**. So what are some important things you see here? -- You need to have information about all the houses in town, right? Because, we have to check - the distance from new-comer to all the existing houses to find the nearest neighbour. If there - are plenty of houses and families, it takes lots of memory, and more time for calculation - also. -- There is almost zero time for any kind of training or preparation. +- Because we have to check + the distance from the new-comer to all the existing houses to find the nearest neighbour(s), you need to have information about all of the houses in town, right? If there are plenty of houses and families, it takes a lot of memory, and also more time for calculation. +- There is almost zero time for any kind of "training" or preparation. Our "learning" involves only memorizing (storing) the data, before testing and classifying. -Now let's see it in OpenCV. +Now let's see this algorithm at work in OpenCV. kNN in OpenCV ------------- @@ -67,11 +61,11 @@ We will do a simple example here, with two families (classes), just like above. chapter, we will do an even better example. So here, we label the Red family as **Class-0** (so denoted by 0) and Blue family as **Class-1** -(denoted by 1). We create 25 families or 25 training data, and label them either Class-0 or Class-1. -We do all these with the help of Random Number Generator in Numpy. +(denoted by 1). We create 25 neighbours or 25 training data, and label each of them as either part of Class-0 or Class-1. +We can do this with the help of a Random Number Generator from NumPy. -Then we plot it with the help of Matplotlib. Red families are shown as Red Triangles and Blue -families are shown as Blue Squares. +Then we can plot it with the help of Matplotlib. Red neighbours are shown as Red Triangles and Blue +neighbours are shown as Blue Squares. @code{.py} import cv2 as cv import numpy as np @@ -80,36 +74,36 @@ import matplotlib.pyplot as plt # Feature set containing (x,y) values of 25 known/training data trainData = np.random.randint(0,100,(25,2)).astype(np.float32) -# Labels each one either Red or Blue with numbers 0 and 1 +# Label each one either Red or Blue with numbers 0 and 1 responses = np.random.randint(0,2,(25,1)).astype(np.float32) -# Take Red families and plot them +# Take Red neighbours and plot them red = trainData[responses.ravel()==0] plt.scatter(red[:,0],red[:,1],80,'r','^') -# Take Blue families and plot them +# Take Blue neighbours and plot them blue = trainData[responses.ravel()==1] plt.scatter(blue[:,0],blue[:,1],80,'b','s') plt.show() @endcode -You will get something similar to our first image. Since you are using random number generator, you -will be getting different data each time you run the code. +You will get something similar to our first image. Since you are using a random number generator, you +will get different data each time you run the code. -Next initiate the kNN algorithm and pass the trainData and responses to train the kNN (It constructs -a search tree). +Next initiate the kNN algorithm and pass the trainData and responses to train the kNN. (Underneath the hood, it constructs +a search tree: see the Additional Resources section below for more information on this.) -Then we will bring one new-comer and classify him to a family with the help of kNN in OpenCV. Before -going to kNN, we need to know something on our test data (data of new comers). Our data should be a +Then we will bring one new-comer and classify him as belonging to a family with the help of kNN in OpenCV. Before +running kNN, we need to know something about our test data (data of new comers). Our data should be a floating point array with size \f$number \; of \; testdata \times number \; of \; features\f$. Then we -find the nearest neighbours of new-comer. We can specify how many neighbours we want. It returns: +find the nearest neighbours of the new-comer. We can specify *k*: how many neighbours we want. (Here we used 3.) It returns: --# The label given to new-comer depending upon the kNN theory we saw earlier. If you want Nearest - Neighbour algorithm, just specify k=1 where k is the number of neighbours. -2. The labels of k-Nearest Neighbours. -3. Corresponding distances from new-comer to each nearest neighbour. +1. The label given to the new-comer depending upon the kNN theory we saw earlier. If you want the *Nearest + Neighbour* algorithm, just specify k=1. +2. The labels of the k-Nearest Neighbours. +3. The corresponding distances from the new-comer to each nearest neighbour. -So let's see how it works. New comer is marked in green color. +So let's see how it works. The new-comer is marked in green. @code{.py} newcomer = np.random.randint(0,100,(1,2)).astype(np.float32) plt.scatter(newcomer[:,0],newcomer[:,1],80,'g','o') @@ -124,21 +118,21 @@ print( "distance: {}\n".format(dist) ) plt.show() @endcode -I got the result as follows: +I got the following results: @code{.py} result: [[ 1.]] neighbours: [[ 1. 1. 1.]] distance: [[ 53. 58. 61.]] @endcode -It says our new-comer got 3 neighbours, all from Blue family. Therefore, he is labelled as Blue -family. It is obvious from plot below: +It says that our new-comer's 3 nearest neighbours are all from the Blue family. Therefore, he is labelled as part of the Blue +family. It is obvious from the plot below: ![image](images/knn_simple.png) -If you have large number of data, you can just pass it as array. Corresponding results are also +If you have multiple new-comers (test data), you can just pass them as an array. Corresponding results are also obtained as arrays. @code{.py} -# 10 new comers +# 10 new-comers newcomers = np.random.randint(0,100,(10,2)).astype(np.float32) ret, results,neighbours,dist = knn.findNearest(newcomer, 3) # The results also will contain 10 labels. @@ -146,8 +140,11 @@ ret, results,neighbours,dist = knn.findNearest(newcomer, 3) Additional Resources -------------------- --# [NPTEL notes on Pattern Recognition, Chapter - 11](http://www.nptel.iitm.ac.in/courses/106108057/12) +1. [NPTEL notes on Pattern Recognition, Chapter + 11](https://nptel.ac.in/courses/106/108/106108057/) +2. [Wikipedia article on Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search) +3. [Wikipedia article on k-d tree](https://en.wikipedia.org/wiki/K-d_tree) Exercises --------- +1. Try repeating the above with more classes and different choices of k. Does choosing k become harder with more classes in the same 2D feature space? \ No newline at end of file From b5035ce9912240e68dc9439257d3621aca27b0d5 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 15 May 2020 00:53:38 +0300 Subject: [PATCH 17/17] Increase test threshold for YOLOv3 on OCL FP16 --- modules/dnn/test/test_common.impl.hpp | 7 ++++++- modules/dnn/test/test_darknet_importer.cpp | 13 ++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index ee5c1e958b..fdd1fe20cb 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -106,6 +106,7 @@ void normAssertDetections( int testClassId = testClassIds[i]; const cv::Rect2d& testBox = testBoxes[i]; bool matched = false; + double topIoU = 0; for (int j = 0; j < refBoxes.size() && !matched; ++j) { if (!matchedRefBoxes[j] && testClassId == refClassIds[j] && @@ -113,7 +114,8 @@ void normAssertDetections( { double interArea = (testBox & refBoxes[j]).area(); double iou = interArea / (testBox.area() + refBoxes[j].area() - interArea); - if (std::abs(iou - 1.0) < boxes_iou_diff) + topIoU = std::max(topIoU, iou); + if (1.0 - iou < boxes_iou_diff) { matched = true; matchedRefBoxes[j] = true; @@ -121,8 +123,11 @@ void normAssertDetections( } } if (!matched) + { std::cout << cv::format("Unmatched prediction: class %d score %f box ", testClassId, testScore) << testBox << std::endl; + std::cout << "Highest IoU: " << topIoU << std::endl; + } EXPECT_TRUE(matched) << comment; } diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index de5ff7f0c4..244bc80157 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -464,7 +464,7 @@ TEST_P(Test_Darknet_nets, YOLOv3) 1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4; std::string config_file = "yolov3.cfg"; std::string weights_file = "yolov3.weights"; @@ -487,15 +487,10 @@ TEST_P(Test_Darknet_nets, YOLOv3) #if defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { - if (INF_ENGINE_VER_MAJOR_LE(2018050000) && target == DNN_TARGET_OPENCL) + if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - else if (INF_ENGINE_VER_MAJOR_EQ(2019020000)) - { - if (target == DNN_TARGET_OPENCL) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - if (target == DNN_TARGET_OPENCL_FP16) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } + else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); else if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);