From 6e33769e56c520b0c17f1a971f9513fc75a9fae9 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 23 Dec 2019 15:47:20 +0300 Subject: [PATCH 01/25] Add human parsing demo --- modules/dnn/src/tensorflow/tf_importer.cpp | 120 ++++++++++++--- samples/dnn/human_parsing.py | 165 +++++++++++++++++++++ 2 files changed, 265 insertions(+), 20 deletions(-) create mode 100644 samples/dnn/human_parsing.py diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b1d7178798..6fbaf98f96 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1935,34 +1935,114 @@ void TFImporter::populateNet(Net dstNet) Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(indices.type() == CV_32SC1); - if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) - CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); - - layerParams.set("pool", "ave"); - layerParams.set("global_pooling", true); - - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - - // There are two attributes, "keepdims" and a deprecated "keep_dims". - bool keepDims = false; - if (hasLayerAttr(layer, "keepdims")) - keepDims = getLayerAttr(layer, "keepdims").b(); - else if (hasLayerAttr(layer, "keep_dims")) - keepDims = getLayerAttr(layer, "keep_dims").b(); - - if (!keepDims) + if (indices.total() == 1 && indices.at(0) == 0) { LayerParams flattenLp; std::string flattenName = name + "/flatten"; CV_Assert(layer_id.find(flattenName) == layer_id.end()); int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); layer_id[flattenName] = flattenId; - connect(layer_id, dstNet, Pin(name), flattenId, 0); + connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); + + + LayerParams reshapeLp; + std::string reshapeName = name + "/reshape"; + CV_Assert(layer_id.find(reshapeName) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + std::vector newShape = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + + int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); + layer_id[reshapeName] = reshapeId; + connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0); + + LayerParams avgLp; + std::string avgName = name + "/avg"; + CV_Assert(layer_id.find(avgName) == layer_id.end()); + avgLp.set("pool", "ave"); + avgLp.set("kernel_h", 3); // TODO: node.shape[0] + avgLp.set("kernel_w", 1); + int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); + layer_id[avgName] = avgId; + // one input only + connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); + + LayerParams reshapeLp2; + std::string reshapeName2 = name; + CV_Assert(layer_id.find(reshapeName2) == layer_id.end()); + newShape = {2, 20, 314, 253}; // TODO: remove out shapes + + reshapeLp2.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + + int reshapeId2 = dstNet.addLayer(reshapeName2, "Reshape", reshapeLp2); + layer_id[reshapeName2] = reshapeId2; + connect(layer_id, dstNet, Pin(avgName), reshapeId2, 0); + } else { + if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) + CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); + + layerParams.set("pool", "ave"); + layerParams.set("global_pooling", true); + + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + + // There are two attributes, "keepdims" and a deprecated "keep_dims". + bool keepDims = false; + if (hasLayerAttr(layer, "keepdims")) + keepDims = getLayerAttr(layer, "keepdims").b(); + else if (hasLayerAttr(layer, "keep_dims")) + keepDims = getLayerAttr(layer, "keep_dims").b(); + + if (!keepDims) + { + LayerParams flattenLp; + std::string flattenName = name + "/flatten"; + CV_Assert(layer_id.find(flattenName) == layer_id.end()); + int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); + layer_id[flattenName] = flattenId; + connect(layer_id, dstNet, Pin(name), flattenId, 0); + } } } + else if (type == "Pack") + { + CV_Assert(hasLayerAttr(layer, "axis")); + int dim = (int)getLayerAttr(layer, "axis").i(); + if (dim != 0) + CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); + + CV_Assert(hasLayerAttr(layer, "N")); + int num = (int)getLayerAttr(layer, "N").i(); + CV_Assert(layer.input_size() == num); + std::string base_name = name + "/reshape_"; + std::vector reshape_names; + for (int i = 0; i < num; i++) { + std::string reshape_name = base_name + std::to_string(i); + reshape_names.push_back(reshape_name); + LayerParams reshapeLP; + reshapeLP.set("axis", dim); + reshapeLP.set("num_axes", 1); + std::vector outShape = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); + layer_id[reshape_name] = id; + connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); + } + + layerParams.set("axis", dim); + int id = dstNet.addLayer(name, "Concat", layerParams); + layer_id[name] = id; + + for (int li = 0; li < num; li++) { + Pin inp = parsePin(reshape_names[li]); + connect(layer_id, dstNet, inp, id, li); + } + + } else if (type == "ClipByValue") { // op: "ClipByValue" diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py new file mode 100644 index 0000000000..84d0663871 --- /dev/null +++ b/samples/dnn/human_parsing.py @@ -0,0 +1,165 @@ +import cv2 as cv +import numpy as np +import argparse + + +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, + cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) +targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) + +parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') +parser.add_argument('--model', '-m', required=True, help='Path to pb model.') +parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help="Choose one of computation backends: " + "%d: automatically (by default), " + "%d: Halide language (http://halide-lang.org/), " + "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "%d: OpenCV implementation" % backends) +parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Choose one of target computation devices: ' + '%d: CPU target (by default), ' + '%d: OpenCL, ' + '%d: OpenCL fp16 (half-float precision), ' + '%d: VPU' % targets) + +# To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view +# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet +# Change script evaluate_parsing_JPPNet-s2.py for human parsing +# 1. Remove preprocessing to create image_batch_origin: +# - with tf.name_scope("create_inputs"): +# ... +# Add +# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') +# +# 2. Create input +# image = cv2.imread(path/to/image) +# image_rev = np.flip(image, axis=1) +# image_h, image_w = image.shape[:2] +# input = np.stack([image, image_rev], axis=0) +# +# 3. Hardcode image_h and image_w shapes to determine output shapes +# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, [image_h, image_w]), +# tf.image.resize_images(parsing_out1_075, [image_h, image_w]), +# tf.image.resize_images(parsing_out1_125, [image_h, image_w])]), axis=0) +# Do similarly with parsing_out2, parsing_out3 +# 4. Remove postprocessing +# - parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) +# +# 5. To save model after sess.run(...) add: +# - input_graph_def = tf.get_default_graph().as_graph_def() +# - output_node = "Mean_3" +# - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) +# - +# - output_graph = "LIP_JPPNet.pb" +# - with tf.gfile.GFile(output_graph, "wb") as f: +# - f.write(output_graph_def.SerializeToString()) + + + +def preprocess(image_path): + """ + Create 4-dimensional blob from image and flip image + :param image_path: path to input image + """ + image = cv.imread(image_path) + image_rev = np.flip(image, axis=1) + input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434)) + return input + + +def run_net(input, model_path, backend, target): + """ + Read network and infer model + :param model_path: path to JPPNet model + """ + net = cv.dnn.readNet(model_path) + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + net.setInput(input) + out = net.forward() + return out + + +def postprocess(out): + """ + Create a grayscale human segmentation + :param out: network output + """ + # LIP classes + # 0 Background + # 1 Hat + # 2 Hair + # 3 Glove + # 4 Sunglasses + # 5 UpperClothes + # 6 Dress + # 7 Coat + # 8 Socks + # 9 Pants + # 10 Jumpsuits + # 11 Scarf + # 12 Skirt + # 13 Face + # 14 LeftArm + # 15 RightArm + # 16 LeftLeg + # 17 RightLeg + # 18 LeftShoe + # 19 RightShoe + head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0) + head_output = head_output.squeeze(0) + tail_output = tail_output.squeeze(0) + tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0) + tail_list = [arr.squeeze(0) for arr in tail_list] + tail_list_rev = [tail_list[i] for i in range(14)] + tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]]) + tail_output_rev = np.stack(tail_list_rev, axis=0) + tail_output_rev = np.flip(tail_output_rev, axis=2) + raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=False) + raw_output_all = np.expand_dims(raw_output_all, axis=0) + raw_output_all = np.argmax(raw_output_all, axis=1) + raw_output_all = raw_output_all.transpose(1, 2, 0) + return raw_output_all + + +def decode_labels(gray_image): + """ + Colorize image according to labels + :param gray_image: grayscale human segmentation result + """ + height, width, _ = gray_image.shape + colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0), + (0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128), + (0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170), + (170, 255, 85), (255, 255, 0), (255, 170, 0)] + + segm = np.stack([colors[idx] for idx in gray_image.flatten()]) + segm = segm.reshape(height, width, 3).astype(np.uint8) + segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB) + return segm + + +def parse_human(image_path, model_path, backend, target): + """ + Prepare input for execution, run net and postprocess output to parse human. + :param image_path: path to input image + :param model_path: path to JPPNet model + :param backend: name of computation backend + :param target: name of computation target + """ + input = preprocess(image_path) + output = run_net(input, model_path, backend, target) + grayscale_out = postprocess(output) + segmentation = decode_labels(grayscale_out) + return segmentation + + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + output = parse_human(args.input, args.model, args.backend, args.target) + winName = 'Deep learning human parsing in OpenCV' + cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) + cv.imshow(winName, output) + cv.waitKey() From ffa72fc9793ceabfed9d339aa9238758abe47979 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 09:45:27 +0300 Subject: [PATCH 02/25] Refactoring --- samples/dnn/human_parsing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 84d0663871..467a19a3b1 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -117,8 +117,7 @@ def postprocess(out): tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]]) tail_output_rev = np.stack(tail_list_rev, axis=0) tail_output_rev = np.flip(tail_output_rev, axis=2) - raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=False) - raw_output_all = np.expand_dims(raw_output_all, axis=0) + raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True) raw_output_all = np.argmax(raw_output_all, axis=1) raw_output_all = raw_output_all.transpose(1, 2, 0) return raw_output_all From 543e0302d341703637e48063848f67b4f12000e9 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 15:43:19 +0300 Subject: [PATCH 03/25] Support global pooling by axis --- .../dnn/include/opencv2/dnn/all_layers.hpp | 1 + modules/dnn/src/layers/pooling_layer.cpp | 11 ++++ modules/dnn/src/tensorflow/tf_importer.cpp | 52 ++++++++++++------- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index b251b4adb3..d62b1f0bc7 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -251,6 +251,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; + int global_axis; bool computeMaxIdx; String padMode; bool ceilMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 5727e2b3f9..c4b452c5ac 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -95,6 +95,8 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + global_axis = params.has("global_axis") ? params.get("global_axis") : -1; + getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); @@ -149,6 +151,9 @@ public: if (globalPooling) { kernel = Size(inp[1], inp[0]); kernel_size = std::vector(inp.begin(), inp.end()); + } else if (global_axis != -1) { + kernel_size[global_axis] = inp[global_axis]; + kernel = Size(kernel_size[1], kernel_size[0]); } getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); @@ -1037,6 +1042,12 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } + else if (global_axis != -1) + { + CV_Assert(global_axis >= 0 && global_axis < inpShape.size()); + outShape[2 + global_axis] = 1; + } + int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 6fbaf98f96..426f8f8da4 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1944,14 +1944,13 @@ void TFImporter::populateNet(Net dstNet) layer_id[flattenName] = flattenId; connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); - LayerParams reshapeLp; std::string reshapeName = name + "/reshape"; CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", 0); + reshapeLp.set("axis", indices.at(0)); reshapeLp.set("num_axes", 1); - std::vector newShape = {1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + int newShape[] = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); layer_id[reshapeName] = reshapeId; @@ -1961,23 +1960,38 @@ void TFImporter::populateNet(Net dstNet) std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); - avgLp.set("kernel_h", 3); // TODO: node.shape[0] - avgLp.set("kernel_w", 1); + // pooling kernel H x 1 + avgLp.set("global_axis", 0); + avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; - // one input only connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); - LayerParams reshapeLp2; - std::string reshapeName2 = name; - CV_Assert(layer_id.find(reshapeName2) == layer_id.end()); - newShape = {2, 20, 314, 253}; // TODO: remove out shapes + LayerParams sliceLp; + std::string sliceName = name + "/slice"; + CV_Assert(layer_id.find(sliceName) == layer_id.end()); + sliceLp.set("axis", indices.at(0)); + int begin[] = {0}; + int size[] = {1}; + sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); + sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); + int sliceId = dstNet.addLayer(sliceName, "Slice", sliceLp); + layer_id[sliceName] = sliceId; + connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - reshapeLp2.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(sliceName), squeezeId, 0); - int reshapeId2 = dstNet.addLayer(reshapeName2, "Reshape", reshapeLp2); - layer_id[reshapeName2] = reshapeId2; - connect(layer_id, dstNet, Pin(avgName), reshapeId2, 0); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, Pin(avgName), id, 0); + connect(layer_id, dstNet, Pin(squeezeName), id, 1); } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); @@ -2021,13 +2035,15 @@ void TFImporter::populateNet(Net dstNet) std::string base_name = name + "/reshape_"; std::vector reshape_names; for (int i = 0; i < num; i++) { - std::string reshape_name = base_name + std::to_string(i); + std::ostringstream ss; + ss << i; + std::string reshape_name = base_name + ss.str(); reshape_names.push_back(reshape_name); LayerParams reshapeLP; reshapeLP.set("axis", dim); reshapeLP.set("num_axes", 1); - std::vector outShape = {1, -1}; - reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + int outShape[] = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); layer_id[reshape_name] = id; connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); From cf477f7e9faac694c404472274aa9b5b0c6f7627 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 16:42:00 +0300 Subject: [PATCH 04/25] Fix global axis --- modules/dnn/src/layers/pooling_layer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index c4b452c5ac..55abcec0bf 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -95,8 +95,6 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - global_axis = params.has("global_axis") ? params.get("global_axis") : -1; - getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); @@ -126,6 +124,7 @@ public: CV_Error(Error::StsBadArg, "Cannot determine pooling type"); setParamsFrom(params); ceilMode = params.get("ceil_mode", true); + global_axis = params.get("global_axis", -1); spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } From 752653c70b2a0173e99b2323f279e63ef59918e2 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Sat, 28 Dec 2019 18:03:40 +0300 Subject: [PATCH 05/25] Update global pooling --- .../dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 28 +++++++++++++++---- modules/dnn/src/tensorflow/tf_importer.cpp | 3 +- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index d62b1f0bc7..73c85cad1c 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -251,7 +251,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; - int global_axis; + std::vector isGlobalPooling; bool computeMaxIdx; String padMode; bool ceilMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 55abcec0bf..aae9730c1a 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -122,9 +122,17 @@ public: } else CV_Error(Error::StsBadArg, "Cannot determine pooling type"); + setParamsFrom(params); ceilMode = params.get("ceil_mode", true); - global_axis = params.get("global_axis", -1); + if (params.has("is_global_pooling")) + { + const DictValue &global_axis = params.get("is_global_pooling"); + int size = global_axis.size(); + isGlobalPooling.resize(size); + for (int i = 0; i < size; i++) + isGlobalPooling[i] = global_axis.get(i); + } spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } @@ -150,8 +158,12 @@ public: if (globalPooling) { kernel = Size(inp[1], inp[0]); kernel_size = std::vector(inp.begin(), inp.end()); - } else if (global_axis != -1) { - kernel_size[global_axis] = inp[global_axis]; + } else if (!isGlobalPooling.empty()) { + for (int i = 0; i < isGlobalPooling.size(); i++) + { + if (isGlobalPooling[i]) + kernel_size[i] = inp[i]; + } kernel = Size(kernel_size[1], kernel_size[0]); } @@ -1041,10 +1053,14 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } - else if (global_axis != -1) + else if (!isGlobalPooling.empty()) { - CV_Assert(global_axis >= 0 && global_axis < inpShape.size()); - outShape[2 + global_axis] = 1; + CV_Assert(isGlobalPooling.size() == inpShape.size()); + for (int i = 0; i < isGlobalPooling.size(); i++) + { + if (isGlobalPooling[i]) + outShape[2 + i] = 1; + } } int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 426f8f8da4..f757efef5c 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1961,7 +1961,8 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); // pooling kernel H x 1 - avgLp.set("global_axis", 0); + bool isGlobalPooling[] = {true, false}; + avgLp.set("is_global_pooling", DictValue::arrayInt(&isGlobalPooling[0], 2)); avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; From 7eba3a7c9638c8a3f008e3595d60cbba1391b290 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Thu, 9 Jan 2020 13:59:35 +0300 Subject: [PATCH 06/25] Add pack description --- .../dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/layers/layers_common.cpp | 20 +++++++-- modules/dnn/src/layers/layers_common.hpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 42 +++++++------------ modules/dnn/src/tensorflow/tf_importer.cpp | 16 +++---- 5 files changed, 43 insertions(+), 39 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 73c85cad1c..2b9de0b663 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -250,7 +250,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN std::vector pads_begin, pads_end; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; - bool globalPooling; + CV_DEPRECATED_EXTERNAL bool globalPooling; std::vector isGlobalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index f119c12ac0..266d2cf45f 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -144,14 +144,26 @@ void getStrideAndPadding(const LayerParams ¶ms, std::vector& pads_be } } -void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode) { - globalPooling = params.has("global_pooling") && - params.get("global_pooling"); + bool is_global = params.get("global_pooling", false); + globalPooling = std::vector(3, is_global); + if (params.has("global_d")) + { + globalPooling[0] = params.get("global_d"); + } + else if (params.has("global_h")) + { + globalPooling[1] = params.get("global_h"); + } + else if (params.has("global_w")) + { + globalPooling[2] = params.get("global_w"); + } - if (globalPooling) + if (is_global) { util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size")) diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index b574d7eed0..81e7bdd11c 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -63,7 +63,7 @@ void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode, std::vector& adjust_pads); -void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index aae9730c1a..c881cc7c8d 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -79,6 +79,7 @@ public: { computeMaxIdx = true; globalPooling = false; + isGlobalPooling = std::vector(3, false); stride = Size(1, 1); pad_t = pad_l = pad_b = pad_r = 0; @@ -95,7 +96,8 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); + getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode); + globalPooling = std::accumulate(isGlobalPooling.begin(), isGlobalPooling.end(), 0) == 3; if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); stride = Size(strides[1], strides[0]); @@ -125,14 +127,7 @@ public: setParamsFrom(params); ceilMode = params.get("ceil_mode", true); - if (params.has("is_global_pooling")) - { - const DictValue &global_axis = params.get("is_global_pooling"); - int size = global_axis.size(); - isGlobalPooling.resize(size); - for (int i = 0; i < size; i++) - isGlobalPooling[i] = global_axis.get(i); - } + spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } @@ -155,17 +150,14 @@ public: inp.push_back(inputs[0].size[i]); out.push_back(outputs[0].size[i]); } - if (globalPooling) { - kernel = Size(inp[1], inp[0]); - kernel_size = std::vector(inp.begin(), inp.end()); - } else if (!isGlobalPooling.empty()) { - for (int i = 0; i < isGlobalPooling.size(); i++) - { - if (isGlobalPooling[i]) - kernel_size[i] = inp[i]; - } - kernel = Size(kernel_size[1], kernel_size[0]); + kernel_size.resize(out.size()); + int diff_size = isGlobalPooling.size() - kernel_size.size(); + for (int i = 0; i < kernel_size.size(); i++) + { + if (isGlobalPooling[i + diff_size]) + kernel_size[i] = inp[i]; } + kernel = Size(kernel_size[1], kernel_size[0]); getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { @@ -1053,14 +1045,12 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } - else if (!isGlobalPooling.empty()) + + int diff_size = isGlobalPooling.size() - (outShape.size() - 2); + for (int i = 2; i < outShape.size(); i++) { - CV_Assert(isGlobalPooling.size() == inpShape.size()); - for (int i = 0; i < isGlobalPooling.size(); i++) - { - if (isGlobalPooling[i]) - outShape[2 + i] = 1; - } + if (isGlobalPooling[i - 2 + diff_size]) + outShape[i] = 1; } int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index f757efef5c..565002d637 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1961,8 +1961,7 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); // pooling kernel H x 1 - bool isGlobalPooling[] = {true, false}; - avgLp.set("is_global_pooling", DictValue::arrayInt(&isGlobalPooling[0], 2)); + avgLp.set("global_h", true); avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; @@ -2025,6 +2024,12 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Pack") { + // op: tf.stack(list of tensors, axis=0) + // Join a list of inputs along a new axis. + // The "axis" specifies the index of the new axis in the dimensions of the output. + // Example: given a list with "N" tensors of shape (C, H, W): + // if axis == 0 then the output tensor will have the shape (N, C, H, W), + // if axis == 1 then the output tensor will have the shape (C, N, H, W). CV_Assert(hasLayerAttr(layer, "axis")); int dim = (int)getLayerAttr(layer, "axis").i(); if (dim != 0) @@ -2054,11 +2059,8 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Concat", layerParams); layer_id[name] = id; - for (int li = 0; li < num; li++) { - Pin inp = parsePin(reshape_names[li]); - connect(layer_id, dstNet, inp, id, li); - } - + for (int li = 0; li < num; li++) + connect(layer_id, dstNet, Pin(reshape_names[li]), id, li); } else if (type == "ClipByValue") { From a33d50084dac5e23e956c6592c23b4c288ab2458 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 09:01:57 +0300 Subject: [PATCH 07/25] Add global_pooling_dim flags --- modules/dnn/src/layers/layers_common.cpp | 18 ++++++------------ modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 266d2cf45f..2f8f1091c8 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -150,18 +150,12 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern { bool is_global = params.get("global_pooling", false); globalPooling = std::vector(3, is_global); - if (params.has("global_d")) - { - globalPooling[0] = params.get("global_d"); - } - else if (params.has("global_h")) - { - globalPooling[1] = params.get("global_h"); - } - else if (params.has("global_w")) - { - globalPooling[2] = params.get("global_w"); - } + if (params.has("global_pooling_d")) + globalPooling[0] = params.get("global_pooling_d"); + else if (params.has("global_pooling_h")) + globalPooling[1] = params.get("global_pooling_h"); + else if (params.has("global_pooling_w")) + globalPooling[2] = params.get("global_pooling_w"); if (is_global) { diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 565002d637..b73982eb89 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1961,7 +1961,7 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); // pooling kernel H x 1 - avgLp.set("global_h", true); + avgLp.set("global_pooling_h", true); avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; From e9e3af0aaa18b93ad885890de41f6c35e8852b4c Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 11:22:19 +0300 Subject: [PATCH 08/25] Add global pool by axis test --- modules/dnn/src/layers/slice_layer.cpp | 7 +++---- modules/dnn/test/test_tf_importer.cpp | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index d7cafd7f93..b29833c5bb 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -114,10 +114,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && -#ifdef HAVE_INF_ENGINE - INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && -#endif + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && sliceRanges.size() == 1) || + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && + sliceRanges.size() == 1 && sliceRanges[0].size() == 4); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 8826fa09ff..ecfc1635e0 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -121,6 +121,13 @@ public: } }; +TEST_P(Test_TensorFlow_layers, reduce_mean) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + runTensorFlowNet("global_pool_by_axis"); +} + TEST_P(Test_TensorFlow_layers, conv) { runTensorFlowNet("single_conv"); From 4625337179d07aafe8b80f666950a06e9fcafd93 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 12:41:31 +0300 Subject: [PATCH 09/25] Add docs reduce mean --- modules/dnn/src/layers/layers_common.cpp | 4 +- modules/dnn/src/layers/pooling_layer.cpp | 2 - modules/dnn/src/layers/slice_layer.cpp | 4 +- modules/dnn/src/tensorflow/tf_importer.cpp | 59 ++++++++++++++-------- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 2f8f1091c8..f26c9778ec 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -152,9 +152,9 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern globalPooling = std::vector(3, is_global); if (params.has("global_pooling_d")) globalPooling[0] = params.get("global_pooling_d"); - else if (params.has("global_pooling_h")) + if (params.has("global_pooling_h")) globalPooling[1] = params.get("global_pooling_h"); - else if (params.has("global_pooling_w")) + if (params.has("global_pooling_w")) globalPooling[2] = params.get("global_pooling_w"); if (is_global) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index c881cc7c8d..04c2e65b4b 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -124,10 +124,8 @@ public: } else CV_Error(Error::StsBadArg, "Cannot determine pooling type"); - setParamsFrom(params); ceilMode = params.get("ceil_mode", true); - spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index b29833c5bb..662ade8f14 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -116,7 +116,9 @@ public: return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && sliceRanges.size() == 1) || (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && - +#ifdef HAVE_INF_ENGINE + INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && +#endif sliceRanges.size() == 1 && sliceRanges[0].size() == 4); } diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b73982eb89..b3527d1092 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1932,9 +1932,29 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Mean") { + // Computes the mean of elements across dimensions of a tensor. + // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, + // else the reduced dimensions are retained with length 1. + // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1 + // if keepdims is false we use Flatten after Pooling: out_shape = NxC + // if indices = [0] we use a global pooling by indices. + // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input, + // if keepdims is false we use Flatten after Slice. + // Example: input_shape = NxCxHxW + // determine out shape: NxCxHxW --Slice--> 1xCxHxW + // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) + // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape + Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(indices.type() == CV_32SC1); + // There are two attributes, "keepdims" and a deprecated "keep_dims". + bool keepDims = false; + if (hasLayerAttr(layer, "keepdims")) + keepDims = getLayerAttr(layer, "keepdims").b(); + else if (hasLayerAttr(layer, "keep_dims")) + keepDims = getLayerAttr(layer, "keep_dims").b(); + if (indices.total() == 1 && indices.at(0) == 0) { LayerParams flattenLp; @@ -1968,49 +1988,44 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); LayerParams sliceLp; - std::string sliceName = name + "/slice"; - CV_Assert(layer_id.find(sliceName) == layer_id.end()); + std::string layerShapeName = name + "/slice"; + CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); sliceLp.set("axis", indices.at(0)); int begin[] = {0}; int size[] = {1}; sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); - int sliceId = dstNet.addLayer(sliceName, "Slice", sliceLp); - layer_id[sliceName] = sliceId; + int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp); + layer_id[layerShapeName] = sliceId; connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", indices.at(0)); - squeezeLp.set("end_axis", indices.at(0) + 1); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(sliceName), squeezeId, 0); + if (!keepDims) + { + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); + layerShapeName = squeezeName; + } int id = dstNet.addLayer(name, "Reshape", layerParams); layer_id[name] = id; connect(layer_id, dstNet, Pin(avgName), id, 0); - connect(layer_id, dstNet, Pin(squeezeName), id, 1); + connect(layer_id, dstNet, Pin(layerShapeName), id, 1); } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); layerParams.set("pool", "ave"); layerParams.set("global_pooling", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - // There are two attributes, "keepdims" and a deprecated "keep_dims". - bool keepDims = false; - if (hasLayerAttr(layer, "keepdims")) - keepDims = getLayerAttr(layer, "keepdims").b(); - else if (hasLayerAttr(layer, "keep_dims")) - keepDims = getLayerAttr(layer, "keep_dims").b(); - if (!keepDims) { LayerParams flattenLp; From 9ed372b297178db85e9da2bb8ecacf88541a781a Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 14:09:08 +0300 Subject: [PATCH 10/25] Update get memory shapes --- modules/dnn/src/layers/pooling_layer.cpp | 35 ++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 04c2e65b4b..1fec982dea 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -149,11 +149,12 @@ public: out.push_back(outputs[0].size[i]); } kernel_size.resize(out.size()); - int diff_size = isGlobalPooling.size() - kernel_size.size(); for (int i = 0; i < kernel_size.size(); i++) { - if (isGlobalPooling[i + diff_size]) - kernel_size[i] = inp[i]; + int pool_idx = isGlobalPooling.size() - 1 - i; + int kernel_idx = kernel_size.size() - 1 - i; + if (isGlobalPooling[pool_idx]) + kernel_size[kernel_idx] = inp[kernel_idx]; } kernel = Size(kernel_size[1], kernel_size[0]); @@ -1001,20 +1002,27 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); - if (globalPooling) + std::vector local_kernel = kernel_size.empty() ? + std::vector(inpShape.begin(), inpShape.end()) : kernel_size; + + for (int i = 0; i < local_kernel.size(); i++) { - outShape.push_back(1); - outShape.push_back(1); + int pool_idx = isGlobalPooling.size() - 1 - i; + int kernel_idx = local_kernel.size() - 1 - i; + if (isGlobalPooling[pool_idx]) + local_kernel[kernel_idx] = inpShape[kernel_idx]; } - else if (type == ROI || type == PSROI) + + + if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); outShape.push_back(pooledSize.width); } else if (padMode.empty()) { - for (int i = 0; i < kernel_size.size(); i++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i]; + for (int i = 0; i < local_kernel.size(); i++) { + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } @@ -1029,7 +1037,7 @@ virtual Ptr initNgraph(const std::vector >& inp } else { - getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), outShape); + getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector(local_kernel.size(), 1), outShape); } if (type == ROI) { @@ -1044,13 +1052,6 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[1] = psRoiOutChannels; } - int diff_size = isGlobalPooling.size() - (outShape.size() - 2); - for (int i = 2; i < outShape.size(); i++) - { - if (isGlobalPooling[i - 2 + diff_size]) - outShape[i] = 1; - } - int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); From ea31a14cc514b41962590606afc79f1534ab5645 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 16:33:17 +0300 Subject: [PATCH 11/25] Update sample --- samples/dnn/human_parsing.py | 49 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 467a19a3b1..43c495200a 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -3,8 +3,8 @@ import numpy as np import argparse -backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, - cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, + cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', @@ -36,26 +36,27 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, # 2. Create input # image = cv2.imread(path/to/image) # image_rev = np.flip(image, axis=1) -# image_h, image_w = image.shape[:2] # input = np.stack([image, image_rev], axis=0) # -# 3. Hardcode image_h and image_w shapes to determine output shapes -# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, [image_h, image_w]), -# tf.image.resize_images(parsing_out1_075, [image_h, image_w]), -# tf.image.resize_images(parsing_out1_125, [image_h, image_w])]), axis=0) -# Do similarly with parsing_out2, parsing_out3 -# 4. Remove postprocessing -# - parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) +# 3. Hardcode image_h and image_w shapes to determine output shapes. +# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. +# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) +# Do similarly with parsing_out2, parsing_out3 +# 4. Remove postprocessing. Last net operation: +# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) +# Change: +# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) # # 5. To save model after sess.run(...) add: -# - input_graph_def = tf.get_default_graph().as_graph_def() -# - output_node = "Mean_3" -# - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) -# - -# - output_graph = "LIP_JPPNet.pb" -# - with tf.gfile.GFile(output_graph, "wb") as f: -# - f.write(output_graph_def.SerializeToString()) - +# input_graph_def = tf.get_default_graph().as_graph_def() +# output_node = "Mean_3" +# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) +# +# output_graph = "LIP_JPPNet.pb" +# with tf.gfile.GFile(output_graph, "wb") as f: +# f.write(output_graph_def.SerializeToString()) def preprocess(image_path): @@ -73,6 +74,8 @@ def run_net(input, model_path, backend, target): """ Read network and infer model :param model_path: path to JPPNet model + :param backend: computation backend + :param target: computation device """ net = cv.dnn.readNet(model_path) net.setPreferableBackend(backend) @@ -82,10 +85,11 @@ def run_net(input, model_path, backend, target): return out -def postprocess(out): +def postprocess(out, input_shape): """ Create a grayscale human segmentation :param out: network output + :param input_shape: input image width and height """ # LIP classes # 0 Background @@ -111,6 +115,10 @@ def postprocess(out): head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0) head_output = head_output.squeeze(0) tail_output = tail_output.squeeze(0) + + head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]]) + tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]]) + tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0) tail_list = [arr.squeeze(0) for arr in tail_list] tail_list_rev = [tail_list[i] for i in range(14)] @@ -149,8 +157,9 @@ def parse_human(image_path, model_path, backend, target): :param target: name of computation target """ input = preprocess(image_path) + input_h, input_w = input.shape[2:] output = run_net(input, model_path, backend, target) - grayscale_out = postprocess(output) + grayscale_out = postprocess(output, (input_w, input_h)) segmentation = decode_labels(grayscale_out) return segmentation From bb91e6999b353ee18e502d0253f7ea43329df80b Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 20 Jan 2020 10:22:01 +0300 Subject: [PATCH 12/25] Fix demo --- samples/dnn/human_parsing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 43c495200a..74f644af29 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -3,8 +3,7 @@ import numpy as np import argparse -backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, - cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, cv.dnn.DNN_BACKEND_OPENCV) +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', @@ -14,7 +13,6 @@ parser.add_argument('--model', '-m', required=True, help='Path to pb model.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " - "%d: Halide language (http://halide-lang.org/), " "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " "%d: OpenCV implementation" % backends) parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, @@ -23,6 +21,7 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() # To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view # For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet @@ -165,7 +164,6 @@ def parse_human(image_path, model_path, backend, target): if __name__ == '__main__': - args, _ = parser.parse_known_args() output = parse_human(args.input, args.model, args.backend, args.target) winName = 'Deep learning human parsing in OpenCV' cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) From 08ba63da02a940cd8ba89e05da8471e941704c74 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 20 Jan 2020 15:03:17 +0300 Subject: [PATCH 13/25] Add global pool flags --- modules/dnn/src/layers/layers_common.cpp | 93 ++++++++++++++++++---- modules/dnn/src/layers/pooling_layer.cpp | 45 ++++++----- modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 3 files changed, 101 insertions(+), 39 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index f26c9778ec..4675a380d4 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -157,27 +157,86 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern if (params.has("global_pooling_w")) globalPooling[2] = params.get("global_pooling_w"); - if (is_global) - { - util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); - if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size")) - { - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); - } - for (int i = 0; i < pads_begin.size(); i++) { - if (pads_begin[i] != 0 || pads_end[i] != 0) - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0"); - } - for (int i = 0; i < strides.size(); i++) { - if (strides[i] != 1) - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); - } - } - else + is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; + if (!is_global) { util::getKernelSize(params, kernel); util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); } + else + { + if ((globalPooling[0] && params.has("kernel_d")) || + (globalPooling[1] && params.has("kernel_h")) || + (globalPooling[2] && params.has("kernel_w")) || + params.has("kernel_size")) { + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); + } + + kernel.resize(3, 1); + pads_begin.resize(3, 0); + pads_end.resize(3, 0); + strides.resize(3, 1); + if (params.has("kernel_d")) + kernel[0] = params.get("kernel_d"); + if (params.has("kernel_h")) + kernel[1] = params.get("kernel_h"); + if (params.has("kernel_w")) + kernel[2] = params.get("kernel_w"); + + if (params.has("pad_t")) + pads_begin[1] = params.get("pad_t"); + if (params.has("pad_l")) + pads_begin[2] = params.get("pad_l"); + if (params.has("pad_b")) + pads_end[1] = params.get("pad_b"); + if (params.has("pad_r")) + pads_end[2] = params.get("pad_r"); + if (params.has("pad_h")) { + pads_begin[1] = params.get("pad_h"); + pads_end[1] = params.get("pad_h"); + } + if (params.has("pad_w")) { + pads_begin[2] = params.get("pad_w"); + pads_end[2] = params.get("pad_w"); + } + if (params.has("pad")) { + DictValue param = params.get("pad"); + if (param.size() == 1) { + std::fill(pads_begin.begin(), pads_begin.end(), param.get(0)); + pads_end = pads_begin; + } else if (param.size() <= pads_begin.size()) { + for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= 0; i--, j--) { + pads_begin[j] = param.get(i); + } + pads_end = pads_begin; + } else { + for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= param.size() / 2; i--, j--) { + pads_begin[j] = param.get(i); + } + for (int i = param.size() / 2 - 1, j = pads_end.size() / 2 - 1; i >= 0; i--, j--) { + pads_end[j] = param.get(i); + } + } + } + + if (params.has("stride_h")) + strides[1] = params.get("stride_h"); + if (params.has("stride_w")) + strides[2] = params.get("stride_w"); + if (params.has("stride")) { + DictValue param = params.get("stride"); + for (int i = param.size() - 1, j = strides.size() - 1; i >= 0; i--, j--) { + strides[j] = param.get(i); + } + if (param.size() == 1) + std::fill(strides.begin() + 1, strides.end(), strides[0]); + } + + for (int i = 0; i < pads_begin.size(); i++) { + if ((pads_begin[i] != 0 || pads_end[i] != 0 || strides[i] != 1) && globalPooling[i]) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0 and strides must be = 1"); + } + } } void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 1fec982dea..eef091dd42 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -148,17 +148,24 @@ public: inp.push_back(inputs[0].size[i]); out.push_back(outputs[0].size[i]); } - kernel_size.resize(out.size()); - for (int i = 0; i < kernel_size.size(); i++) - { - int pool_idx = isGlobalPooling.size() - 1 - i; - int kernel_idx = kernel_size.size() - 1 - i; - if (isGlobalPooling[pool_idx]) - kernel_size[kernel_idx] = inp[kernel_idx]; - } - kernel = Size(kernel_size[1], kernel_size[0]); + if (kernel_size.size() > inp.size()) { + kernel_size.erase(kernel_size.begin()); + strides.erase(strides.begin()); + pads_begin.erase(pads_begin.begin()); + pads_end.erase(pads_end.begin()); + } + kernel_size.resize(out.size()); + + for (int i = 0; i < inp.size(); i++) + { + int idx = isGlobalPooling.size() - inp.size() + i; + if (isGlobalPooling[idx]) + kernel_size[i] = inp[i]; + } + kernel = Size(kernel_size.back(), kernel_size[kernel_size.size() - 2]); getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); + if (pads_begin.size() == 2) { pad_t = pads_begin[0]; pad_l = pads_begin[1]; @@ -1005,15 +1012,11 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector local_kernel = kernel_size.empty() ? std::vector(inpShape.begin(), inpShape.end()) : kernel_size; - for (int i = 0; i < local_kernel.size(); i++) - { - int pool_idx = isGlobalPooling.size() - 1 - i; - int kernel_idx = local_kernel.size() - 1 - i; - if (isGlobalPooling[pool_idx]) - local_kernel[kernel_idx] = inpShape[kernel_idx]; + for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + if (isGlobalPooling[j]) + local_kernel[j] = inpShape[i]; } - if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); @@ -1021,17 +1024,17 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - for (int i = 0; i < local_kernel.size(); i++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; + for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + float dst = (float)(inpShape[i] + pads_begin[j] + pads_end[j] - local_kernel[j]) / strides[j]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0; i < pads_end.size(); i++) { - if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { + for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + if (pads_end[j] && (outShape[2 + i] - 1) * strides[j] >= inpShape[i] + pads_end[j]) { --outShape[2 + i]; - CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + CV_Assert((outShape[2 + i] - 1) * strides[j] < inpShape[i] + pads_end[j]); } } } diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b3527d1092..fe8eb4a637 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1982,7 +1982,7 @@ void TFImporter::populateNet(Net dstNet) avgLp.set("pool", "ave"); // pooling kernel H x 1 avgLp.set("global_pooling_h", true); - avgLp.set("kernel_size", 1); + avgLp.set("kernel_w", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); From 97455f1593a9448a0de4015e9270b51f563a9afe Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 20 Jan 2020 17:31:58 +0300 Subject: [PATCH 14/25] Remove useless condition --- modules/dnn/src/layers/layers_common.cpp | 40 +++++++++--------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 4675a380d4..dbe69b4b81 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -149,13 +149,10 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern std::vector& strides, cv::String &padMode) { bool is_global = params.get("global_pooling", false); - globalPooling = std::vector(3, is_global); - if (params.has("global_pooling_d")) - globalPooling[0] = params.get("global_pooling_d"); - if (params.has("global_pooling_h")) - globalPooling[1] = params.get("global_pooling_h"); - if (params.has("global_pooling_w")) - globalPooling[2] = params.get("global_pooling_w"); + globalPooling.resize(3); + globalPooling[0] = params.get("global_pooling_d", is_global); + globalPooling[1] = params.get("global_pooling_h", is_global); + globalPooling[2] = params.get("global_pooling_w", is_global); is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; if (!is_global) @@ -172,25 +169,18 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); } - kernel.resize(3, 1); + kernel.resize(3); pads_begin.resize(3, 0); pads_end.resize(3, 0); strides.resize(3, 1); - if (params.has("kernel_d")) - kernel[0] = params.get("kernel_d"); - if (params.has("kernel_h")) - kernel[1] = params.get("kernel_h"); - if (params.has("kernel_w")) - kernel[2] = params.get("kernel_w"); + kernel[0] = params.get("kernel_d", 1); + kernel[1] = params.get("kernel_h", 1); + kernel[2] = params.get("kernel_w", 1); - if (params.has("pad_t")) - pads_begin[1] = params.get("pad_t"); - if (params.has("pad_l")) - pads_begin[2] = params.get("pad_l"); - if (params.has("pad_b")) - pads_end[1] = params.get("pad_b"); - if (params.has("pad_r")) - pads_end[2] = params.get("pad_r"); + pads_begin[1] = params.get("pad_t", 0); + pads_begin[2] = params.get("pad_l", 0); + pads_end[1] = params.get("pad_b", 0); + pads_end[2] = params.get("pad_r", 0); if (params.has("pad_h")) { pads_begin[1] = params.get("pad_h"); pads_end[1] = params.get("pad_h"); @@ -219,10 +209,8 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } } - if (params.has("stride_h")) - strides[1] = params.get("stride_h"); - if (params.has("stride_w")) - strides[2] = params.get("stride_w"); + strides[1] = params.get("stride_h", 1); + strides[2] = params.get("stride_w", 1); if (params.has("stride")) { DictValue param = params.get("stride"); for (int i = param.size() - 1, j = strides.size() - 1; i >= 0; i--, j--) { From d825caf18e1c173ecbc203a83c8c790c9e776873 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 21 Jan 2020 10:09:24 +0300 Subject: [PATCH 15/25] Update check params --- modules/dnn/src/layers/layers_common.cpp | 55 +++--------------------- modules/dnn/src/layers/pooling_layer.cpp | 9 ++-- 2 files changed, 10 insertions(+), 54 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index dbe69b4b81..cd34748398 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -162,6 +162,7 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } else { + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if ((globalPooling[0] && params.has("kernel_d")) || (globalPooling[1] && params.has("kernel_h")) || (globalPooling[2] && params.has("kernel_w")) || @@ -170,60 +171,18 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } kernel.resize(3); - pads_begin.resize(3, 0); - pads_end.resize(3, 0); - strides.resize(3, 1); kernel[0] = params.get("kernel_d", 1); kernel[1] = params.get("kernel_h", 1); kernel[2] = params.get("kernel_w", 1); - pads_begin[1] = params.get("pad_t", 0); - pads_begin[2] = params.get("pad_l", 0); - pads_end[1] = params.get("pad_b", 0); - pads_end[2] = params.get("pad_r", 0); - if (params.has("pad_h")) { - pads_begin[1] = params.get("pad_h"); - pads_end[1] = params.get("pad_h"); + for (int i = 0, j = globalPooling.size() - pads_begin.size(); i < pads_begin.size(); i++, j++) { + if ((pads_begin[i] != 0 || pads_end[i] != 0) && globalPooling[j]) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0"); } - if (params.has("pad_w")) { - pads_begin[2] = params.get("pad_w"); - pads_end[2] = params.get("pad_w"); + for (int i = 0, j = globalPooling.size() - strides.size(); i < strides.size(); i++, j++) { + if (strides[i] != 1 && globalPooling[j]) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); } - if (params.has("pad")) { - DictValue param = params.get("pad"); - if (param.size() == 1) { - std::fill(pads_begin.begin(), pads_begin.end(), param.get(0)); - pads_end = pads_begin; - } else if (param.size() <= pads_begin.size()) { - for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= 0; i--, j--) { - pads_begin[j] = param.get(i); - } - pads_end = pads_begin; - } else { - for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= param.size() / 2; i--, j--) { - pads_begin[j] = param.get(i); - } - for (int i = param.size() / 2 - 1, j = pads_end.size() / 2 - 1; i >= 0; i--, j--) { - pads_end[j] = param.get(i); - } - } - } - - strides[1] = params.get("stride_h", 1); - strides[2] = params.get("stride_w", 1); - if (params.has("stride")) { - DictValue param = params.get("stride"); - for (int i = param.size() - 1, j = strides.size() - 1; i >= 0; i--, j--) { - strides[j] = param.get(i); - } - if (param.size() == 1) - std::fill(strides.begin() + 1, strides.end(), strides[0]); - } - - for (int i = 0; i < pads_begin.size(); i++) { - if ((pads_begin[i] != 0 || pads_end[i] != 0 || strides[i] != 1) && globalPooling[i]) - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0 and strides must be = 1"); - } } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index eef091dd42..8d43dc3ebf 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -151,9 +151,6 @@ public: if (kernel_size.size() > inp.size()) { kernel_size.erase(kernel_size.begin()); - strides.erase(strides.begin()); - pads_begin.erase(pads_begin.begin()); - pads_end.erase(pads_end.begin()); } kernel_size.resize(out.size()); @@ -1025,16 +1022,16 @@ virtual Ptr initNgraph(const std::vector >& inp else if (padMode.empty()) { for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - float dst = (float)(inpShape[i] + pads_begin[j] + pads_end[j] - local_kernel[j]) / strides[j]; + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[j]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - if (pads_end[j] && (outShape[2 + i] - 1) * strides[j] >= inpShape[i] + pads_end[j]) { + if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { --outShape[2 + i]; - CV_Assert((outShape[2 + i] - 1) * strides[j] < inpShape[i] + pads_end[j]); + CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); } } } From fada959b4b0271e4cc19179f3b79076506ddfaab Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 21 Jan 2020 10:28:50 +0300 Subject: [PATCH 16/25] Fix comment --- modules/dnn/src/layers/layers_common.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index cd34748398..e9eb9fa649 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -155,12 +155,7 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern globalPooling[2] = params.get("global_pooling_w", is_global); is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; - if (!is_global) - { - util::getKernelSize(params, kernel); - util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); - } - else + if (is_global) { util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if ((globalPooling[0] && params.has("kernel_d")) || @@ -184,6 +179,11 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); } } + else + { + util::getKernelSize(params, kernel); + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); + } } void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, From 832ca0734d4532acd7d92007a3e24dbd493048f4 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 10:52:40 +0300 Subject: [PATCH 17/25] Refactoring --- .../dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 22 ++++++------ samples/dnn/human_parsing.py | 36 +++++++++---------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 2b9de0b663..efbc8b131e 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -250,7 +250,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN std::vector pads_begin, pads_end; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; - CV_DEPRECATED_EXTERNAL bool globalPooling; + CV_DEPRECATED_EXTERNAL bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. std::vector isGlobalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 8d43dc3ebf..3e1fafb338 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -97,7 +97,7 @@ public: CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode); - globalPooling = std::accumulate(isGlobalPooling.begin(), isGlobalPooling.end(), 0) == 3; + globalPooling = isGlobalPooling[0] || isGlobalPooling[1] || isGlobalPooling[2]; if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); stride = Size(strides[1], strides[0]); @@ -149,18 +149,16 @@ public: out.push_back(outputs[0].size[i]); } - if (kernel_size.size() > inp.size()) { - kernel_size.erase(kernel_size.begin()); - } - kernel_size.resize(out.size()); + if (globalPooling) { + std::vector finalKernel; + for (int i = 0; i < inp.size(); i++) { + int idx = isGlobalPooling.size() - inp.size() + i; + finalKernel.push_back(isGlobalPooling[idx] ? inp[i] : kernel_size[idx]); + } + kernel_size = finalKernel; + kernel = Size(kernel_size[1], kernel_size[0]); + } - for (int i = 0; i < inp.size(); i++) - { - int idx = isGlobalPooling.size() - inp.size() + i; - if (isGlobalPooling[idx]) - kernel_size[i] = inp[i]; - } - kernel = Size(kernel_size.back(), kernel_size[kernel_size.size() - 2]); getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 74f644af29..4a51c35af7 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -6,23 +6,6 @@ import argparse backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', '-m', required=True, help='Path to pb model.') -parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, - help="Choose one of computation backends: " - "%d: automatically (by default), " - "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " - "%d: OpenCV implementation" % backends) -parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, - help='Choose one of target computation devices: ' - '%d: CPU target (by default), ' - '%d: OpenCL, ' - '%d: OpenCL fp16 (half-float precision), ' - '%d: VPU' % targets) -args, _ = parser.parse_known_args() - # To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view # For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet # Change script evaluate_parsing_JPPNet-s2.py for human parsing @@ -147,7 +130,7 @@ def decode_labels(gray_image): return segm -def parse_human(image_path, model_path, backend, target): +def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU): """ Prepare input for execution, run net and postprocess output to parse human. :param image_path: path to input image @@ -164,6 +147,23 @@ def parse_human(image_path, model_path, backend, target): if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') + parser.add_argument('--model', '-m', required=True, help='Path to pb model.') + parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help="Choose one of computation backends: " + "%d: automatically (by default), " + "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "%d: OpenCV implementation" % backends) + parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Choose one of target computation devices: ' + '%d: CPU target (by default), ' + '%d: OpenCL, ' + '%d: OpenCL fp16 (half-float precision), ' + '%d: VPU' % targets) + args, _ = parser.parse_known_args() + output = parse_human(args.input, args.model, args.backend, args.target) winName = 'Deep learning human parsing in OpenCV' cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) From 35c24480ae4633d3d6b0e5bae95d336b4ba0ac55 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 13:36:29 +0300 Subject: [PATCH 18/25] Fix axis --- modules/dnn/src/tensorflow/tf_importer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index fe8eb4a637..ef0588c9df 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1967,7 +1967,7 @@ void TFImporter::populateNet(Net dstNet) LayerParams reshapeLp; std::string reshapeName = name + "/reshape"; CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", indices.at(0)); + reshapeLp.set("axis", 0); reshapeLp.set("num_axes", 1); int newShape[] = {1, 1, -1}; reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); @@ -1990,7 +1990,7 @@ void TFImporter::populateNet(Net dstNet) LayerParams sliceLp; std::string layerShapeName = name + "/slice"; CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); - sliceLp.set("axis", indices.at(0)); + sliceLp.set("axis", 0); int begin[] = {0}; int size[] = {1}; sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); @@ -2004,8 +2004,8 @@ void TFImporter::populateNet(Net dstNet) LayerParams squeezeLp; std::string squeezeName = name + "/squeeze"; CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", indices.at(0)); - squeezeLp.set("end_axis", indices.at(0) + 1); + squeezeLp.set("axis", 0); + squeezeLp.set("end_axis", 1); int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); layer_id[squeezeName] = squeezeId; connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); From 7e5b5390bac5243a8d98b5f8a7573fc334426ef8 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 14:57:54 +0300 Subject: [PATCH 19/25] Fix comments --- modules/dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/tensorflow/tf_importer.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index efbc8b131e..0c964df06b 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -250,7 +250,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN std::vector pads_begin, pads_end; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; - CV_DEPRECATED_EXTERNAL bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. + bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. std::vector isGlobalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ef0588c9df..7dffb1c04f 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2054,12 +2054,11 @@ void TFImporter::populateNet(Net dstNet) int num = (int)getLayerAttr(layer, "N").i(); CV_Assert(layer.input_size() == num); std::string base_name = name + "/reshape_"; - std::vector reshape_names; + std::vector reshape_ids; for (int i = 0; i < num; i++) { std::ostringstream ss; ss << i; std::string reshape_name = base_name + ss.str(); - reshape_names.push_back(reshape_name); LayerParams reshapeLP; reshapeLP.set("axis", dim); reshapeLP.set("num_axes", 1); @@ -2067,6 +2066,7 @@ void TFImporter::populateNet(Net dstNet) reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); layer_id[reshape_name] = id; + reshape_ids.push_back(id); connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); } @@ -2075,7 +2075,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; for (int li = 0; li < num; li++) - connect(layer_id, dstNet, Pin(reshape_names[li]), id, li); + dstNet.connect(reshape_ids[li], 0, id, li); } else if (type == "ClipByValue") { From 55b03dcaba72a626b0a75e62066fe346f1e68774 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 17:36:17 +0300 Subject: [PATCH 20/25] Refactoring --- modules/dnn/src/layers/layers_common.cpp | 3 +-- modules/dnn/src/layers/pooling_layer.cpp | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index e9eb9fa649..78f91a69d6 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -154,8 +154,7 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern globalPooling[1] = params.get("global_pooling_h", is_global); globalPooling[2] = params.get("global_pooling_w", is_global); - is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; - if (is_global) + if (globalPooling[0] || globalPooling[1] || globalPooling[2]) { util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if ((globalPooling[0] && params.has("kernel_d")) || diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 3e1fafb338..ff62b6e55d 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -148,7 +148,6 @@ public: inp.push_back(inputs[0].size[i]); out.push_back(outputs[0].size[i]); } - if (globalPooling) { std::vector finalKernel; for (int i = 0; i < inp.size(); i++) { @@ -160,7 +159,6 @@ public: } getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); - if (pads_begin.size() == 2) { pad_t = pads_begin[0]; pad_l = pads_begin[1]; @@ -1004,14 +1002,15 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); - std::vector local_kernel = kernel_size.empty() ? - std::vector(inpShape.begin(), inpShape.end()) : kernel_size; + std::vector local_kernel = kernel_size.size() > inpShape.size() ? + std::vector(kernel_size.begin() + 1, kernel_size.end()) : kernel_size; - for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - if (isGlobalPooling[j]) - local_kernel[j] = inpShape[i]; + if (globalPooling) { + for (int i = 0, j = kernel_size.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + if (isGlobalPooling[j]) + local_kernel[i] = inpShape[i]; + } } - if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); @@ -1019,14 +1018,14 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[j]) / strides[i]; + for (int i = 0; i < pads_end.size(); i++) { + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + for (int i = 0; i < inpShape.size(); i++) { if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { --outShape[2 + i]; CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); @@ -1049,7 +1048,6 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } - int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); From d9474648f05399682072078a97756fbcc54d7d98 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 24 Jan 2020 11:00:06 +0300 Subject: [PATCH 21/25] Fix diff --- modules/dnn/src/layers/pooling_layer.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index ff62b6e55d..326e61ba64 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -1002,15 +1002,16 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); - std::vector local_kernel = kernel_size.size() > inpShape.size() ? - std::vector(kernel_size.begin() + 1, kernel_size.end()) : kernel_size; - + std::vector local_kernel; if (globalPooling) { - for (int i = 0, j = kernel_size.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - if (isGlobalPooling[j]) - local_kernel[i] = inpShape[i]; + for (int i = 0; i < inpShape.size(); i++) { + int idx = isGlobalPooling.size() - inpShape.size() + i; + local_kernel.push_back(isGlobalPooling[idx] ? inpShape[i] : kernel_size[idx]); } + } else { + local_kernel = kernel_size; } + if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); @@ -1018,14 +1019,14 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - for (int i = 0; i < pads_end.size(); i++) { + for (int i = 0; i < local_kernel.size(); i++) { float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0; i < inpShape.size(); i++) { + for (int i = 0; i < pads_end.size(); i++) { if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { --outShape[2 + i]; CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); From 4b351120222f5fde688f37a465c78e9ef6668787 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 24 Jan 2020 16:30:10 +0300 Subject: [PATCH 22/25] Update sample --- samples/dnn/human_parsing.py | 76 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 4a51c35af7..5bfe19aee7 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -1,45 +1,11 @@ +import argparse import cv2 as cv import numpy as np -import argparse backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -# To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view -# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet -# Change script evaluate_parsing_JPPNet-s2.py for human parsing -# 1. Remove preprocessing to create image_batch_origin: -# - with tf.name_scope("create_inputs"): -# ... -# Add -# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') -# -# 2. Create input -# image = cv2.imread(path/to/image) -# image_rev = np.flip(image, axis=1) -# input = np.stack([image, image_rev], axis=0) -# -# 3. Hardcode image_h and image_w shapes to determine output shapes. -# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. -# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) -# Do similarly with parsing_out2, parsing_out3 -# 4. Remove postprocessing. Last net operation: -# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) -# Change: -# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) -# -# 5. To save model after sess.run(...) add: -# input_graph_def = tf.get_default_graph().as_graph_def() -# output_node = "Mean_3" -# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) -# -# output_graph = "LIP_JPPNet.pb" -# with tf.gfile.GFile(output_graph, "wb") as f: -# f.write(output_graph_def.SerializeToString()) - def preprocess(image_path): """ @@ -149,8 +115,9 @@ def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, targe if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model.') + parser.add_argument('--input', '-i', help='Path to input image.') + parser.add_argument('--model', '-m', required=True, help='Path to pb model + (https://drive.google.com/open?id=1XHvo111Gj1ZGoNUJt4Y4OsShrt_eUT34).') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -169,3 +136,38 @@ if __name__ == '__main__': cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) cv.imshow(winName, output) cv.waitKey() + + +# To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view +# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet +# Change script evaluate_parsing_JPPNet-s2.py for human parsing +# 1. Remove preprocessing to create image_batch_origin: +# - with tf.name_scope("create_inputs"): +# ... +# Add +# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') +# +# 2. Create input +# image = cv2.imread(path/to/image) +# image_rev = np.flip(image, axis=1) +# input = np.stack([image, image_rev], axis=0) +# +# 3. Hardcode image_h and image_w shapes to determine output shapes. +# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. +# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) +# Do similarly with parsing_out2, parsing_out3 +# 4. Remove postprocessing. Last net operation: +# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) +# Change: +# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) +# +# 5. To save model after sess.run(...) add: +# input_graph_def = tf.get_default_graph().as_graph_def() +# output_node = "Mean_3" +# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) +# +# output_graph = "LIP_JPPNet.pb" +# with tf.gfile.GFile(output_graph, "wb") as f: +# f.write(output_graph_def.SerializeToString()) From 5790810c3ab0aa9cd451716e54923077ec6746a0 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 27 Jan 2020 10:07:48 +0300 Subject: [PATCH 23/25] Change link --- samples/dnn/human_parsing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 5bfe19aee7..788ec95e59 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -115,9 +115,8 @@ def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, targe if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--input', '-i', help='Path to input image.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model - (https://drive.google.com/open?id=1XHvo111Gj1ZGoNUJt4Y4OsShrt_eUT34).') + parser.add_argument('--input', '-i', required=True, help='Path to input image.') + parser.add_argument('--model', '-m', required=True, help='Path to pb model(https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0).') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " From 4a19ac5aca22a29fd3d00ee39c56df19b00c310e Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 27 Jan 2020 16:18:14 +0300 Subject: [PATCH 24/25] Move instruction --- modules/dnn/include/opencv2/dnn/dnn.hpp | 4 +- samples/dnn/human_parsing.py | 80 +++++++++++++------------ 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 94e2ada3f1..f5b5b9a101 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -47,9 +47,9 @@ #include "opencv2/core/async.hpp" #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS -#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v15 { +#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v16 { #define CV__DNN_EXPERIMENTAL_NS_END } -namespace cv { namespace dnn { namespace experimental_dnn_34_v15 { } using namespace experimental_dnn_34_v15; }} +namespace cv { namespace dnn { namespace experimental_dnn_34_v16 { } using namespace experimental_dnn_34_v16; }} #else #define CV__DNN_EXPERIMENTAL_NS_BEGIN #define CV__DNN_EXPERIMENTAL_NS_END diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 788ec95e59..47e1a68473 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -1,6 +1,47 @@ +#!/usr/bin/env python +''' + You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 + or convert the model yourself. + + Follow these steps if you want to convert the original model yourself: + To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view + For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet + Change script evaluate_parsing_JPPNet-s2.py for human parsing + 1. Remove preprocessing to create image_batch_origin: + with tf.name_scope("create_inputs"): + ... + Add + image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') + + 2. Create input + image = cv2.imread(path/to/image) + image_rev = np.flip(image, axis=1) + input = np.stack([image, image_rev], axis=0) + + 3. Hardcode image_h and image_w shapes to determine output shapes. + We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. + parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), + tf.image.resize_images(parsing_out1_075, INPUT_SIZE), + tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) + Do similarly with parsing_out2, parsing_out3 + 4. Remove postprocessing. Last net operation: + raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) + Change: + parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) + + 5. To save model after sess.run(...) add: + input_graph_def = tf.get_default_graph().as_graph_def() + output_node = "Mean_3" + output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) + + output_graph = "LIP_JPPNet.pb" + with tf.gfile.GFile(output_graph, "wb") as f: + f.write(output_graph_def.SerializeToString())' +''' + import argparse -import cv2 as cv import numpy as np +import cv2 as cv backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) @@ -116,7 +157,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input', '-i', required=True, help='Path to input image.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model(https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0).') + parser.add_argument('--model', '-m', required=True, help='Path to pb model.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -135,38 +176,3 @@ if __name__ == '__main__': cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) cv.imshow(winName, output) cv.waitKey() - - -# To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view -# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet -# Change script evaluate_parsing_JPPNet-s2.py for human parsing -# 1. Remove preprocessing to create image_batch_origin: -# - with tf.name_scope("create_inputs"): -# ... -# Add -# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') -# -# 2. Create input -# image = cv2.imread(path/to/image) -# image_rev = np.flip(image, axis=1) -# input = np.stack([image, image_rev], axis=0) -# -# 3. Hardcode image_h and image_w shapes to determine output shapes. -# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. -# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) -# Do similarly with parsing_out2, parsing_out3 -# 4. Remove postprocessing. Last net operation: -# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) -# Change: -# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) -# -# 5. To save model after sess.run(...) add: -# input_graph_def = tf.get_default_graph().as_graph_def() -# output_node = "Mean_3" -# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) -# -# output_graph = "LIP_JPPNet.pb" -# with tf.gfile.GFile(output_graph, "wb") as f: -# f.write(output_graph_def.SerializeToString()) From 24166ac40226981aeefb551f267ac0762cdf4e25 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 27 Jan 2020 17:59:58 +0300 Subject: [PATCH 25/25] Fix indentation --- samples/dnn/human_parsing.py | 64 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 47e1a68473..c4ac11bad2 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -1,42 +1,42 @@ #!/usr/bin/env python ''' - You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 - or convert the model yourself. +You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 +or convert the model yourself. - Follow these steps if you want to convert the original model yourself: - To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view - For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet - Change script evaluate_parsing_JPPNet-s2.py for human parsing - 1. Remove preprocessing to create image_batch_origin: - with tf.name_scope("create_inputs"): - ... - Add - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') +Follow these steps if you want to convert the original model yourself: + To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view + For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet + Change script evaluate_parsing_JPPNet-s2.py for human parsing + 1. Remove preprocessing to create image_batch_origin: + with tf.name_scope("create_inputs"): + ... + Add + image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') - 2. Create input - image = cv2.imread(path/to/image) - image_rev = np.flip(image, axis=1) - input = np.stack([image, image_rev], axis=0) + 2. Create input + image = cv2.imread(path/to/image) + image_rev = np.flip(image, axis=1) + input = np.stack([image, image_rev], axis=0) - 3. Hardcode image_h and image_w shapes to determine output shapes. - We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), - tf.image.resize_images(parsing_out1_075, INPUT_SIZE), - tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) - Do similarly with parsing_out2, parsing_out3 - 4. Remove postprocessing. Last net operation: - raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) - Change: - parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) + 3. Hardcode image_h and image_w shapes to determine output shapes. + We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. + parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), + tf.image.resize_images(parsing_out1_075, INPUT_SIZE), + tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) + Do similarly with parsing_out2, parsing_out3 + 4. Remove postprocessing. Last net operation: + raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) + Change: + parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) - 5. To save model after sess.run(...) add: - input_graph_def = tf.get_default_graph().as_graph_def() - output_node = "Mean_3" - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) + 5. To save model after sess.run(...) add: + input_graph_def = tf.get_default_graph().as_graph_def() + output_node = "Mean_3" + output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) - output_graph = "LIP_JPPNet.pb" - with tf.gfile.GFile(output_graph, "wb") as f: - f.write(output_graph_def.SerializeToString())' + output_graph = "LIP_JPPNet.pb" + with tf.gfile.GFile(output_graph, "wb") as f: + f.write(output_graph_def.SerializeToString())' ''' import argparse