From 543e0302d341703637e48063848f67b4f12000e9 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 15:43:19 +0300 Subject: [PATCH] Support global pooling by axis --- .../dnn/include/opencv2/dnn/all_layers.hpp | 1 + modules/dnn/src/layers/pooling_layer.cpp | 11 ++++ modules/dnn/src/tensorflow/tf_importer.cpp | 52 ++++++++++++------- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index b251b4adb3..d62b1f0bc7 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -251,6 +251,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; + int global_axis; bool computeMaxIdx; String padMode; bool ceilMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 5727e2b3f9..c4b452c5ac 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -95,6 +95,8 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + global_axis = params.has("global_axis") ? params.get("global_axis") : -1; + getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); @@ -149,6 +151,9 @@ public: if (globalPooling) { kernel = Size(inp[1], inp[0]); kernel_size = std::vector(inp.begin(), inp.end()); + } else if (global_axis != -1) { + kernel_size[global_axis] = inp[global_axis]; + kernel = Size(kernel_size[1], kernel_size[0]); } getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); @@ -1037,6 +1042,12 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } + else if (global_axis != -1) + { + CV_Assert(global_axis >= 0 && global_axis < inpShape.size()); + outShape[2 + global_axis] = 1; + } + int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 6fbaf98f96..426f8f8da4 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1944,14 +1944,13 @@ void TFImporter::populateNet(Net dstNet) layer_id[flattenName] = flattenId; connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); - LayerParams reshapeLp; std::string reshapeName = name + "/reshape"; CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", 0); + reshapeLp.set("axis", indices.at(0)); reshapeLp.set("num_axes", 1); - std::vector newShape = {1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + int newShape[] = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); layer_id[reshapeName] = reshapeId; @@ -1961,23 +1960,38 @@ void TFImporter::populateNet(Net dstNet) std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); - avgLp.set("kernel_h", 3); // TODO: node.shape[0] - avgLp.set("kernel_w", 1); + // pooling kernel H x 1 + avgLp.set("global_axis", 0); + avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; - // one input only connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); - LayerParams reshapeLp2; - std::string reshapeName2 = name; - CV_Assert(layer_id.find(reshapeName2) == layer_id.end()); - newShape = {2, 20, 314, 253}; // TODO: remove out shapes + LayerParams sliceLp; + std::string sliceName = name + "/slice"; + CV_Assert(layer_id.find(sliceName) == layer_id.end()); + sliceLp.set("axis", indices.at(0)); + int begin[] = {0}; + int size[] = {1}; + sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); + sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); + int sliceId = dstNet.addLayer(sliceName, "Slice", sliceLp); + layer_id[sliceName] = sliceId; + connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - reshapeLp2.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(sliceName), squeezeId, 0); - int reshapeId2 = dstNet.addLayer(reshapeName2, "Reshape", reshapeLp2); - layer_id[reshapeName2] = reshapeId2; - connect(layer_id, dstNet, Pin(avgName), reshapeId2, 0); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, Pin(avgName), id, 0); + connect(layer_id, dstNet, Pin(squeezeName), id, 1); } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); @@ -2021,13 +2035,15 @@ void TFImporter::populateNet(Net dstNet) std::string base_name = name + "/reshape_"; std::vector reshape_names; for (int i = 0; i < num; i++) { - std::string reshape_name = base_name + std::to_string(i); + std::ostringstream ss; + ss << i; + std::string reshape_name = base_name + ss.str(); reshape_names.push_back(reshape_name); LayerParams reshapeLP; reshapeLP.set("axis", dim); reshapeLP.set("num_axes", 1); - std::vector outShape = {1, -1}; - reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + int outShape[] = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); layer_id[reshape_name] = id; connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0);