From d773691848f6850eb1b21b2b6031a5f64c321efa Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Wed, 4 Aug 2021 15:37:20 +0300 Subject: [PATCH 1/9] add note about hierarchy in python version --- modules/imgproc/include/opencv2/imgproc.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index a1cfff991d..5e66b14e3b 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -3951,6 +3951,7 @@ hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are se in contours of the next and previous contours at the same hierarchical level, the first child contour and the parent contour, respectively. If for the contour i there are no next, previous, parent, or nested contours, the corresponding elements of hierarchy[i] will be negative. +@note In Python, hierarchy is nested inside a top level array. Use hierarchy[0][i] to access hierarchical elements of i-th contour. @param mode Contour retrieval mode, see #RetrievalModes @param method Contour approximation method, see #ContourApproximationModes @param offset Optional offset by which every contour point is shifted. This is useful if the From 2a177052de55c85554194a2464a91e6e09c7f768 Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Mon, 9 Aug 2021 12:08:55 +0530 Subject: [PATCH 2/9] fix bug in prior-box variances --- .../dnn/src/layers/detection_output_layer.cpp | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 8374d74293..614b3a6462 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -456,7 +456,7 @@ public: // Retrieve all prior bboxes std::vector priorBBoxes; std::vector > priorVariances; - GetPriorBBoxes(priorData, numPriors, _bboxesNormalized, priorBBoxes, priorVariances); + GetPriorBBoxes(priorData, numPriors, _bboxesNormalized, _varianceEncodedInTarget, priorBBoxes, priorVariances); // Decode all loc predictions to bboxes util::NormalizedBBox clipBounds; @@ -750,7 +750,7 @@ public: CV_Assert(prior_bboxes.size() == prior_variances.size()); CV_Assert(prior_bboxes.size() == bboxes.size()); size_t num_bboxes = prior_bboxes.size(); - CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4); + CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4 || variance_encoded_in_target); decode_bboxes.clear(); decode_bboxes.resize(num_bboxes); if(variance_encoded_in_target) { @@ -802,12 +802,13 @@ public: } // Get prior bounding boxes from prior_data - // prior_data: 1 x 2 x num_priors * 4 x 1 blob. + // prior_data: 1 x 1 x num_priors * 4 x 1 blob or 1 x 2 x num_priors * 4 x 1 blob. // num_priors: number of priors. // prior_bboxes: stores all the prior bboxes in the format of util::NormalizedBBox. // prior_variances: stores all the variances needed by prior bboxes. static void GetPriorBBoxes(const float* priorData, const int& numPriors, - bool normalized_bbox, std::vector& priorBBoxes, + bool normalized_bbox, bool variance_encoded_in_target, + std::vector& priorBBoxes, std::vector >& priorVariances) { priorBBoxes.clear(); priorBBoxes.resize(numPriors); @@ -823,13 +824,16 @@ public: bbox.set_size(BBoxSize(bbox, normalized_bbox)); } - for (int i = 0; i < numPriors; ++i) + if (!variance_encoded_in_target) { - int startIdx = (numPriors + i) * 4; - // not needed here: priorVariances[i].clear(); - for (int j = 0; j < 4; ++j) + for (int i = 0; i < numPriors; ++i) { - priorVariances[i].push_back(priorData[startIdx + j]); + int startIdx = (numPriors + i) * 4; + // not needed here: priorVariances[i].clear(); + for (int j = 0; j < 4; ++j) + { + priorVariances[i].push_back(priorData[startIdx + j]); + } } } } From 739ff84732f6385a43b32b37d875921b9073d009 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Mon, 9 Aug 2021 13:28:33 +0300 Subject: [PATCH 3/9] add Max layer to TFImporter --- modules/dnn/src/tensorflow/tf_importer.cpp | 26 +++++++++++++++++----- modules/dnn/test/test_tf_importer.cpp | 22 ++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index fa33211a50..ca9d7c5e21 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -647,7 +647,7 @@ const TFImporter::DispatchMap TFImporter::buildDispatchMap() dispatch["PriorBox"] = &TFImporter::parsePriorBox; dispatch["Softmax"] = &TFImporter::parseSoftmax; dispatch["CropAndResize"] = &TFImporter::parseCropAndResize; - dispatch["Mean"] = dispatch["Sum"] = &TFImporter::parseMean; + dispatch["Mean"] = dispatch["Sum"] = dispatch["Max"] = &TFImporter::parseMean; dispatch["Pack"] = &TFImporter::parsePack; dispatch["ClipByValue"] = &TFImporter::parseClipByValue; dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu; @@ -657,6 +657,7 @@ const TFImporter::DispatchMap TFImporter::buildDispatchMap() return dispatch; } +// "Conv2D" "SpaceToBatchND" "DepthwiseConv2dNative" "Pad" "MirrorPad" "Conv3D" void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) { tensorflow::NodeDef layer = layer_; @@ -876,6 +877,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N data_layouts[name] = DATA_LAYOUT_NHWC; } +// "BiasAdd" "Add" "AddV2" "Sub" "AddN" void TFImporter::parseBias(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1087,6 +1089,7 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD } } +// "Flatten" "Squeeze" void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1245,6 +1248,7 @@ void TFImporter::parseLrn(tensorflow::GraphDef& net, const tensorflow::NodeDef& connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } +// "Concat" "ConcatV2" void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1295,6 +1299,7 @@ void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDe } } +// "MaxPool" "MaxPool3D" void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1316,6 +1321,7 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs); } +// "AvgPool" "AvgPool3D" void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1502,6 +1508,7 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow:: connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } +// "Mul" "RealDiv" void TFImporter::parseMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1659,6 +1666,7 @@ void TFImporter::parseMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& } } +// "FusedBatchNorm" "FusedBatchNormV3" void TFImporter::parseFusedBatchNorm(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { // op: "FusedBatchNorm" @@ -1918,6 +1926,7 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod data_layouts[name] = DATA_LAYOUT_UNKNOWN; } +// "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D" void TFImporter::parseResize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) { tensorflow::NodeDef layer = layer_; @@ -2106,6 +2115,7 @@ void TFImporter::parseCropAndResize(tensorflow::GraphDef& net, const tensorflow: connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); } +// "Mean" "Sum" "Max" void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { // Computes the mean of elements across dimensions of a tensor. @@ -2124,7 +2134,12 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& const std::string& name = layer.name(); const std::string& type = layer.op(); const int num_inputs = layer.input_size(); + std::string pool_type = cv::toLowerCase(type); + if (pool_type == "mean") + { + pool_type = "ave"; + } CV_CheckGT(num_inputs, 0, ""); Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); @@ -2161,7 +2176,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& LayerParams avgLp; std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); - avgLp.set("pool", type == "Mean" ? "ave" : "sum"); + avgLp.set("pool", pool_type); // pooling kernel H x 1 avgLp.set("global_pooling_h", true); avgLp.set("kernel_w", 1); @@ -2202,7 +2217,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& int axis = toNCHW(indices.at(0)); if (axis == 2 || axis == 3) { - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("pool", pool_type); layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); int id = dstNet.addLayer(name, "Pooling", layerParams); @@ -2234,7 +2249,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& Pin inpId = parsePin(layer.input(0)); addPermuteLayer(order, name + "/nhwc", inpId); - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("pool", pool_type); layerParams.set("kernel_h", 1); layerParams.set("global_pooling_w", true); int id = dstNet.addLayer(name, "Pooling", layerParams); @@ -2264,7 +2279,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("pool", pool_type); layerParams.set("global_pooling", true); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; @@ -2368,6 +2383,7 @@ void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::Nod connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } +// "Abs" "Tanh" "Sigmoid" "Relu" "Elu" "Exp" "Identity" "Relu6" void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 4f7840f9e4..68d6e88a66 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -128,6 +128,13 @@ TEST_P(Test_TensorFlow_layers, reduce_mean) runTensorFlowNet("global_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_max) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + runTensorFlowNet("max_pool_by_axis"); +} + TEST_P(Test_TensorFlow_layers, reduce_sum) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) @@ -135,11 +142,21 @@ TEST_P(Test_TensorFlow_layers, reduce_sum) runTensorFlowNet("sum_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_max_channel) +{ + runTensorFlowNet("reduce_max_channel"); +} + TEST_P(Test_TensorFlow_layers, reduce_sum_channel) { runTensorFlowNet("reduce_sum_channel"); } +TEST_P(Test_TensorFlow_layers, reduce_max_channel_keep_dims) +{ + runTensorFlowNet("reduce_max_channel", false, 0.0, 0.0, false, "_keep_dims"); +} + TEST_P(Test_TensorFlow_layers, reduce_sum_channel_keep_dims) { runTensorFlowNet("reduce_sum_channel", false, 0.0, 0.0, false, "_keep_dims"); @@ -386,6 +403,11 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_mean) runTensorFlowNet("reduce_mean"); // an average pooling over all spatial dimensions. } +TEST_P(Test_TensorFlow_layers, pooling_reduce_max) +{ + runTensorFlowNet("reduce_max"); // a MAX pooling over all spatial dimensions. +} + TEST_P(Test_TensorFlow_layers, pooling_reduce_sum) { runTensorFlowNet("reduce_sum"); // a SUM pooling over all spatial dimensions. From 992b47b9916f9dbdfee16ed1a59ba64cda0779bb Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Tue, 10 Aug 2021 18:53:28 +0300 Subject: [PATCH 4/9] add 19769 and 19769_lightweight tests --- modules/imgproc/test/test_convhull.cpp | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp index f1d739b0e0..dee3769762 100644 --- a/modules/imgproc/test/test_convhull.cpp +++ b/modules/imgproc/test/test_convhull.cpp @@ -2384,5 +2384,78 @@ TEST(Imgproc_minAreaRect, reproducer_18157) EXPECT_TRUE(checkMinAreaRect(rr, contour)) << rr.center << " " << rr.size << " " << rr.angle; } +TEST(Imgproc_minAreaRect, reproducer_19769_lightweight) +{ + const int N = 23; + float pts_[N][2] = { + {1325, 732}, {1248, 808}, {582, 1510}, {586, 1524}, + {595, 1541}, {599, 1547}, {789, 1745}, {829, 1786}, + {997, 1958}, {1116, 2074}, {1207, 2066}, {1216, 2058}, + {1231, 2044}, {1265, 2011}, {2036, 1254}, {2100, 1191}, + {2169, 1123}, {2315, 979}, {2395, 900}, {2438, 787}, + {2434, 782}, {2416, 762}, {2266, 610} + }; + Mat contour(N, 1, CV_32FC2, (void*)pts_); + + RotatedRect rr = cv::minAreaRect(contour); + + EXPECT_TRUE(checkMinAreaRect(rr, contour)) << rr.center << " " << rr.size << " " << rr.angle; +} + +TEST(Imgproc_minAreaRect, reproducer_19769) +{ + const int N = 169; + float pts_[N][2] = { + {1854, 227}, {1850, 228}, {1847, 229}, {1835, 235}, + {1832, 237}, {1829, 239}, {1825, 242}, {1818, 248}, + {1807, 258}, {1759, 306}, {1712, 351}, {1708, 356}, + {1658, 404}, {1655, 408}, {1602, 459}, {1599, 463}, + {1542, 518}, {1477, 582}, {1402, 656}, {1325, 732}, + {1248, 808}, {1161, 894}, {1157, 898}, {1155, 900}, + {1068, 986}, {1060, 995}, {1058, 997}, {957, 1097}, + {956, 1097}, {814, 1238}, {810, 1242}, {805, 1248}, + {610, 1442}, {603, 1450}, {599, 1455}, {596, 1459}, + {594, 1462}, {592, 1465}, {590, 1470}, {588, 1472}, + {586, 1476}, {586, 1478}, {584, 1481}, {583, 1485}, + {582, 1490}, {582, 1510}, {583, 1515}, {584, 1518}, + {585, 1521}, {586, 1524}, {593, 1538}, {595, 1541}, + {597, 1544}, {599, 1547}, {603, 1552}, {609, 1559}, + {623, 1574}, {645, 1597}, {677, 1630}, {713, 1667}, + {753, 1707}, {789, 1744}, {789, 1745}, {829, 1786}, + {871, 1828}, {909, 1867}, {909, 1868}, {950, 1910}, + {953, 1912}, {997, 1958}, {1047, 2009}, {1094, 2056}, + {1105, 2066}, {1110, 2070}, {1113, 2072}, {1116, 2074}, + {1119, 2076}, {1122, 2077}, {1124, 2079}, {1130, 2082}, + {1133, 2083}, {1136, 2084}, {1139, 2085}, {1142, 2086}, + {1148, 2087}, {1166, 2087}, {1170, 2086}, {1174, 2085}, + {1177, 2084}, {1180, 2083}, {1188, 2079}, {1190, 2077}, + {1193, 2076}, {1196, 2074}, {1199, 2072}, {1202, 2070}, + {1207, 2066}, {1216, 2058}, {1231, 2044}, {1265, 2011}, + {1314, 1962}, {1360, 1917}, {1361, 1917}, {1408, 1871}, + {1457, 1822}, {1508, 1773}, {1512, 1768}, {1560, 1722}, + {1617, 1665}, {1671, 1613}, {1730, 1554}, {1784, 1502}, + {1786, 1500}, {1787, 1498}, {1846, 1440}, {1850, 1437}, + {1908, 1380}, {1974, 1314}, {2034, 1256}, {2036, 1254}, + {2100, 1191}, {2169, 1123}, {2242, 1051}, {2315, 979}, + {2395, 900}, {2426, 869}, {2435, 859}, {2438, 855}, + {2440, 852}, {2442, 849}, {2443, 846}, {2445, 844}, + {2446, 842}, {2446, 840}, {2448, 837}, {2449, 834}, + {2450, 829}, {2450, 814}, {2449, 809}, {2448, 806}, + {2447, 803}, {2442, 793}, {2440, 790}, {2438, 787}, + {2434, 782}, {2428, 775}, {2416, 762}, {2411, 758}, + {2342, 688}, {2340, 686}, {2338, 684}, {2266, 610}, + {2260, 605}, {2170, 513}, {2075, 417}, {2073, 415}, + {2069, 412}, {1955, 297}, {1955, 296}, {1913, 254}, + {1904, 246}, {1897, 240}, {1894, 238}, {1891, 236}, + {1888, 234}, {1880, 230}, {1877, 229}, {1874, 228}, + {1870, 227} + }; + Mat contour(N, 1, CV_32FC2, (void*)pts_); + + RotatedRect rr = cv::minAreaRect(contour); + + EXPECT_TRUE(checkMinAreaRect(rr, contour)) << rr.center << " " << rr.size << " " << rr.angle; +} + }} // namespace /* End of file. */ From 9d61c181434a6903fa15e4915b9fffed65ebcae8 Mon Sep 17 00:00:00 2001 From: utibenkei Date: Sun, 8 Aug 2021 01:08:31 +0900 Subject: [PATCH 5/9] fix testSaveLoad --- modules/ml/misc/java/test/MLTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ml/misc/java/test/MLTest.java b/modules/ml/misc/java/test/MLTest.java index 2b08543a84..504805dffa 100644 --- a/modules/ml/misc/java/test/MLTest.java +++ b/modules/ml/misc/java/test/MLTest.java @@ -36,7 +36,7 @@ public class MLTest extends OpenCVTestCase { String filename = OpenCVTestRunner.getTempFileName("yml"); saved.save(filename); SVM loaded = SVM.load(filename); - assertTrue(saved.isTrained()); + assertTrue(loaded.isTrained()); } } From 8199967b3189fb9aa711afc4e815cd13f312b7ae Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Wed, 11 Aug 2021 19:08:52 +0300 Subject: [PATCH 6/9] fix choose minimum angle in rotatingCalipers --- modules/imgproc/src/rotcalipers.cpp | 55 ++++++++++++++++++----------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/modules/imgproc/src/rotcalipers.cpp b/modules/imgproc/src/rotcalipers.cpp index 527f71a247..e3d81c7e0c 100644 --- a/modules/imgproc/src/rotcalipers.cpp +++ b/modules/imgproc/src/rotcalipers.cpp @@ -88,6 +88,32 @@ enum { CALIPERS_MAXHEIGHT=0, CALIPERS_MINAREARECT=1, CALIPERS_MAXDIST=2 }; // Notes: //F*/ +static void rotate90CCW(const cv::Point2f& in, cv::Point2f &out) +{ + out.x = -in.y; + out.y = in.x; +} + +static void rotate90CW(const cv::Point2f& in, cv::Point2f &out) +{ + out.x = in.y; + out.y = -in.x; +} + +static void rotate180(const cv::Point2f& in, cv::Point2f &out) +{ + out.x = -in.x; + out.y = -in.y; +} + +/* return true if first vector is to the right (clockwise) of the second */ +static bool firstVecIsRight(const cv::Point2f& vec1, const cv::Point2f &vec2) +{ + cv::Point2f tmp; + rotate90CW(vec1, tmp); + return tmp.x * vec2.x + tmp.y * vec2.y < 0; +} + /* we will use usual cartesian coordinates */ static void rotatingCalipers( const Point2f* points, int n, int mode, float* out ) { @@ -100,6 +126,7 @@ static void rotatingCalipers( const Point2f* points, int n, int mode, float* out Point2f* vect = (Point2f*)(inv_vect_length + n); int left = 0, bottom = 0, right = 0, top = 0; int seq[4] = { -1, -1, -1, -1 }; + Point2f rot_vect[4]; /* rotating calipers sides will always have coordinates (a,b) (-b,a) (-a,-b) (b, -a) @@ -179,32 +206,18 @@ static void rotatingCalipers( const Point2f* points, int n, int mode, float* out /* all of edges will be checked while rotating calipers by 90 degrees */ for( k = 0; k < n; k++ ) { - /* sinus of minimal angle */ - /*float sinus;*/ - - /* compute cosine of angle between calipers side and polygon edge */ - /* dp - dot product */ - float dp[4] = { - +base_a * vect[seq[0]].x + base_b * vect[seq[0]].y, - -base_b * vect[seq[1]].x + base_a * vect[seq[1]].y, - -base_a * vect[seq[2]].x - base_b * vect[seq[2]].y, - +base_b * vect[seq[3]].x - base_a * vect[seq[3]].y, - }; - - float maxcos = dp[0] * inv_vect_length[seq[0]]; - /* number of calipers edges, that has minimal angle with edge */ int main_element = 0; - /* choose minimal angle */ - for ( i = 1; i < 4; ++i ) + /* choose minimum angle between calipers side and polygon edge by dot product sign */ + rot_vect[0] = vect[seq[0]]; + rotate90CW(vect[seq[1]], rot_vect[1]); + rotate180(vect[seq[2]], rot_vect[2]); + rotate90CCW(vect[seq[3]], rot_vect[3]); + for (i = 1; i < 4; i++) { - float cosalpha = dp[i] * inv_vect_length[seq[i]]; - if (cosalpha > maxcos) - { + if (firstVecIsRight(rot_vect[i], rot_vect[main_element])) main_element = i; - maxcos = cosalpha; - } } /*rotate calipers*/ From 25cd7c7c509826e42214d00ca4e73ac096abd382 Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Thu, 12 Aug 2021 14:40:40 +0300 Subject: [PATCH 7/9] add note about Python's dsize to doc --- modules/imgproc/include/opencv2/imgproc.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 5e66b14e3b..f7583c1926 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -2223,7 +2223,7 @@ enlarge an image, it will generally look best with c#INTER_CUBIC (slow) or #INTE @param src input image. @param dst output image; it has the size dsize (when it is non-zero) or the size computed from src.size(), fx, and fy; the type of dst is the same as of src. -@param dsize output image size; if it equals zero, it is computed as: +@param dsize output image size; if it equals zero (`None` in Python), it is computed as: \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] Either dsize or both fx and fy must be non-zero. @param fx scale factor along the horizontal axis; when it equals 0, it is computed as From 4300bb2e1f5eadd9b6eb1244ab2ed0250c2418b2 Mon Sep 17 00:00:00 2001 From: Iyad Ahmed Date: Thu, 12 Aug 2021 16:51:02 +0000 Subject: [PATCH 8/9] Merge pull request #20541 from iyadahmed:video_capture_timeout_prop * VideoCapture timeout set/get * Common formatting for enum values * Fix enum values wrongly in videoio.hpp * Define timeout enum values in public api and align with master --- modules/videoio/include/opencv2/videoio.hpp | 2 ++ modules/videoio/src/cap_ffmpeg_api.hpp | 4 ++- modules/videoio/src/cap_ffmpeg_impl.hpp | 33 +++++++++++++++++---- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index ba9c18bd97..aa247dd84e 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -179,6 +179,8 @@ enum VideoCaptureProperties { CAP_PROP_BITRATE =47, //!< (read-only) Video bitrate in kbits/s CAP_PROP_ORIENTATION_META=48, //!< (read-only) Frame rotation defined by stream meta (applicable for FFmpeg back-end only) CAP_PROP_ORIENTATION_AUTO=49, //!< if true - rotates output frames of CvCapture considering video file's metadata (applicable for FFmpeg back-end only) (https://github.com/opencv/opencv/issues/15499) + CAP_PROP_OPEN_TIMEOUT_MSEC=53, + CAP_PROP_READ_TIMEOUT_MSEC=54, #ifndef CV_DOXYGEN CV__CAP_PROP_LATEST #endif diff --git a/modules/videoio/src/cap_ffmpeg_api.hpp b/modules/videoio/src/cap_ffmpeg_api.hpp index 984d36f23c..e618765539 100644 --- a/modules/videoio/src/cap_ffmpeg_api.hpp +++ b/modules/videoio/src/cap_ffmpeg_api.hpp @@ -30,7 +30,9 @@ enum CV_FFMPEG_CAP_PROP_CODEC_PIXEL_FORMAT=46, CV_FFMPEG_CAP_PROP_BITRATE=47, CV_FFMPEG_CAP_PROP_ORIENTATION_META=48, - CV_FFMPEG_CAP_PROP_ORIENTATION_AUTO=49 + CV_FFMPEG_CAP_PROP_ORIENTATION_AUTO=49, + CV_FFMPEG_CAP_PROP_OPEN_TIMEOUT_MSEC=53, + CV_FFMPEG_CAP_PROP_READ_TIMEOUT_MSEC=54 }; typedef struct CvCapture_FFMPEG CvCapture_FFMPEG; diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 4164ab941c..937d348215 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -186,8 +186,8 @@ extern "C" { #endif #if USE_AV_INTERRUPT_CALLBACK -#define LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS 30000 -#define LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS 30000 +#define LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS 30000 +#define LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS 30000 #ifdef _WIN32 // http://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows @@ -534,6 +534,8 @@ struct CvCapture_FFMPEG AVDictionary *dict; #endif #if USE_AV_INTERRUPT_CALLBACK + int open_timeout_ms; + int read_timeout_ms; AVInterruptCallbackMetadata interrupt_metadata; #endif @@ -568,6 +570,11 @@ void CvCapture_FFMPEG::init() frame_number = 0; eps_zero = 0.000025; +#if USE_AV_INTERRUPT_CALLBACK + open_timeout_ms = LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS; + read_timeout_ms = LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS; +#endif + rotation_angle = 0; #if (LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)) @@ -923,7 +930,7 @@ bool CvCapture_FFMPEG::open( const char* _filename ) #if USE_AV_INTERRUPT_CALLBACK /* interrupt callback */ - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = open_timeout_ms; get_monotonic_time(&interrupt_metadata.value); ic = avformat_alloc_context(); @@ -1227,7 +1234,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_INTERRUPT_CALLBACK // activate interrupt callback get_monotonic_time(&interrupt_metadata.value); - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = read_timeout_ms; #endif // get the next frame @@ -1483,6 +1490,12 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const #else return 0; #endif +#if USE_AV_INTERRUPT_CALLBACK + case CV_FFMPEG_CAP_PROP_OPEN_TIMEOUT_MSEC: + return static_cast(open_timeout_ms); + case CV_FFMPEG_CAP_PROP_READ_TIMEOUT_MSEC: + return static_cast(read_timeout_ms); +#endif // USE_AV_INTERRUPT_CALLBACK default: break; } @@ -1677,6 +1690,14 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value ) return false; #endif break; +#if USE_AV_INTERRUPT_CALLBACK + case CV_FFMPEG_CAP_PROP_OPEN_TIMEOUT_MSEC: + open_timeout_ms = (int)value; + break; + case CV_FFMPEG_CAP_PROP_READ_TIMEOUT_MSEC: + read_timeout_ms = (int)value; + break; +#endif // USE_AV_INTERRUPT_CALLBACK default: return false; } @@ -3114,7 +3135,7 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma #if USE_AV_INTERRUPT_CALLBACK /* interrupt callback */ - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS; get_monotonic_time(&interrupt_metadata.value); ctx_ = avformat_alloc_context(); @@ -3241,7 +3262,7 @@ bool InputMediaStream_FFMPEG::read(unsigned char** data, int* size, int* endOfFi #if USE_AV_INTERRUPT_CALLBACK // activate interrupt callback get_monotonic_time(&interrupt_metadata.value); - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS; #endif // free last packet if exist From cfb36443fb02586aac34cb14f6f67b551e29cb68 Mon Sep 17 00:00:00 2001 From: Julia Bareeva <34717687+JulieBar@users.noreply.github.com> Date: Fri, 13 Aug 2021 15:41:00 +0300 Subject: [PATCH 9/9] Merge pull request #20506 from JulieBar:lstm_activations * Support activations(Sigmoid, Tanh) for LSTM * fix warning --- modules/dnn/src/layers/recurrent_layers.cpp | 49 ++++++++++++++++++--- modules/dnn/src/onnx/onnx_importer.cpp | 38 +++++++++++++--- modules/dnn/test/test_onnx_importer.cpp | 5 +++ 3 files changed, 79 insertions(+), 13 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index a6715aefca..9088c13390 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -80,12 +80,31 @@ static void sigmoid(const Mat &src, Mat &dst) cv::pow(1 + dst, -1, dst); } +typedef void (*ActivationFunction)(const Mat &src, Mat &dst); +static ActivationFunction get_activation_function(const String& activation) { + // most used activations for PyTorch and TF : Tanh, Sigmoid + // if you need to support more optional activations use std::map instead + if (activation == "Tanh") + { + return tanh; + } + else if (activation == "Sigmoid") + { + return sigmoid; + } + else + { + CV_Error(Error::StsNotImplemented, + cv::format("Activation function [%s] for layer LSTM is not supported", activation.c_str())); + } +} + class LSTMLayerImpl CV_FINAL : public LSTMLayer { int numTimeStamps, numSamples; bool allocated; - MatShape outTailShape; //shape of single output sample + MatShape outTailShape; //shape of single output sample MatShape outTsShape; //shape of N output samples bool useTimestampDim; @@ -95,6 +114,10 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer bool reverse; // If true, go in negative direction along the time axis bool bidirectional; // If true, produces both forward and reversed directions along time axis + ActivationFunction f_activation; + ActivationFunction g_activation; + ActivationFunction h_activation; + public: LSTMLayerImpl(const LayerParams& params) @@ -145,6 +168,20 @@ public: reverse = params.get("reverse", false); CV_Assert(!reverse || !bidirectional); + // read activations + DictValue activations = params.get("activations", ""); + if (activations.size() == 1) // if activations wasn't specified use default + { + f_activation = sigmoid; + g_activation = tanh; + h_activation = tanh; + } else { + CV_Assert(activations.size() == 3); + f_activation = get_activation_function(activations.getStringValue(0)); + g_activation = get_activation_function(activations.getStringValue(1)); + h_activation = get_activation_function(activations.getStringValue(2)); + } + allocated = false; outTailShape.clear(); } @@ -339,15 +376,15 @@ public: Mat gatesIF = gates.colRange(0, 2*numOut); gemm(cInternal, blobs[5], 1, gateI, 1, gateI); gemm(cInternal, blobs[6], 1, gateF, 1, gateF); - sigmoid(gatesIF, gatesIF); + f_activation(gatesIF, gatesIF); } else { Mat gatesIFO = gates.colRange(0, 3*numOut); - sigmoid(gatesIFO, gatesIFO); + f_activation(gatesIFO, gatesIFO); } - tanh(gateG, gateG); + g_activation(gateG, gateG); //compute c_t multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1} @@ -362,11 +399,11 @@ public: if (usePeephole) { gemm(cInternal, blobs[7], 1, gateO, 1, gateO); - sigmoid(gateO, gateO); + f_activation(gateO, gateO); } //compute h_t - tanh(cInternal, hInternal); + h_activation(cInternal, hInternal); multiply(gateO, hInternal, hInternal); //save results in output blobs diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index b833b2ea44..32b56278bd 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -244,6 +244,10 @@ static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protob return DictValue::arrayInt(&dst[0], src.size()); } +static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) { + return DictValue::arrayString(src.begin(), static_cast(src.size())); +} + LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto) { LayerParams lp; @@ -301,6 +305,10 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("dilation", parse(attribute_proto.ints())); } + else if(attribute_name == "activations" && node_proto.op_type() == "LSTM") + { + lp.set(attribute_name, parseStr(attribute_proto.strings())); + } else if (attribute_proto.has_i()) { ::google::protobuf::int64 src = attribute_proto.i(); @@ -997,18 +1005,32 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr lstmParams.name += "/lstm"; // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() == 7); + CV_Assert(node_proto.input_size() >= 7); Mat Wx = getBlob(node_proto, 1); Mat Wh = getBlob(node_proto, 2); Mat b = getBlob(node_proto, 3); - Mat h0 = getBlob(node_proto, 5); - Mat c0 = getBlob(node_proto, 6); - - b = b.reshape(1, b.size[0]); const int numHidden = lstmParams.get("hidden_size"); const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. const int numFeatures = Wx.size[2]; + + Mat h0, c0; + if (!node_proto.input(5).empty()) { + h0 = getBlob(node_proto, 5); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + } else { + // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros + h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); + } + if (!node_proto.input(6).empty()) { + c0 = getBlob(node_proto, 6); + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + } else { + // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros + c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); + } + + b = b.reshape(1, b.size[0]); Mat bx = b.colRange(0, b.cols / 2); Mat bh = b.colRange(b.cols / 2, b.cols); b = bx + bh; @@ -1036,8 +1058,7 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr } Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + lstmParams.blobs.resize(5); lstmParams.blobs[0] = Wh; @@ -1045,6 +1066,9 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr lstmParams.blobs[2] = b; lstmParams.blobs[3] = h0; lstmParams.blobs[4] = c0; + + // read direction attribute + lstmParams.set("reverse", lstmParams.get("direction", "") == "reverse"); lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 05f77730af..a446a37c79 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -665,6 +665,11 @@ TEST_P(Test_ONNX_layers, Split_EltwiseMax) testONNXModels("split_max"); } +TEST_P(Test_ONNX_layers, LSTM_Activations) +{ + testONNXModels("lstm_cntk_tanh", pb, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, LSTM) { testONNXModels("lstm", npy, 0, 0, false, false);