From 6e33769e56c520b0c17f1a971f9513fc75a9fae9 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 23 Dec 2019 15:47:20 +0300 Subject: [PATCH 01/42] Add human parsing demo --- modules/dnn/src/tensorflow/tf_importer.cpp | 120 ++++++++++++--- samples/dnn/human_parsing.py | 165 +++++++++++++++++++++ 2 files changed, 265 insertions(+), 20 deletions(-) create mode 100644 samples/dnn/human_parsing.py diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b1d7178798..6fbaf98f96 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1935,34 +1935,114 @@ void TFImporter::populateNet(Net dstNet) Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(indices.type() == CV_32SC1); - if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) - CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); - - layerParams.set("pool", "ave"); - layerParams.set("global_pooling", true); - - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - - // There are two attributes, "keepdims" and a deprecated "keep_dims". - bool keepDims = false; - if (hasLayerAttr(layer, "keepdims")) - keepDims = getLayerAttr(layer, "keepdims").b(); - else if (hasLayerAttr(layer, "keep_dims")) - keepDims = getLayerAttr(layer, "keep_dims").b(); - - if (!keepDims) + if (indices.total() == 1 && indices.at(0) == 0) { LayerParams flattenLp; std::string flattenName = name + "/flatten"; CV_Assert(layer_id.find(flattenName) == layer_id.end()); int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); layer_id[flattenName] = flattenId; - connect(layer_id, dstNet, Pin(name), flattenId, 0); + connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); + + + LayerParams reshapeLp; + std::string reshapeName = name + "/reshape"; + CV_Assert(layer_id.find(reshapeName) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + std::vector newShape = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + + int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); + layer_id[reshapeName] = reshapeId; + connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0); + + LayerParams avgLp; + std::string avgName = name + "/avg"; + CV_Assert(layer_id.find(avgName) == layer_id.end()); + avgLp.set("pool", "ave"); + avgLp.set("kernel_h", 3); // TODO: node.shape[0] + avgLp.set("kernel_w", 1); + int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); + layer_id[avgName] = avgId; + // one input only + connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); + + LayerParams reshapeLp2; + std::string reshapeName2 = name; + CV_Assert(layer_id.find(reshapeName2) == layer_id.end()); + newShape = {2, 20, 314, 253}; // TODO: remove out shapes + + reshapeLp2.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + + int reshapeId2 = dstNet.addLayer(reshapeName2, "Reshape", reshapeLp2); + layer_id[reshapeName2] = reshapeId2; + connect(layer_id, dstNet, Pin(avgName), reshapeId2, 0); + } else { + if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) + CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); + + layerParams.set("pool", "ave"); + layerParams.set("global_pooling", true); + + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + + // There are two attributes, "keepdims" and a deprecated "keep_dims". + bool keepDims = false; + if (hasLayerAttr(layer, "keepdims")) + keepDims = getLayerAttr(layer, "keepdims").b(); + else if (hasLayerAttr(layer, "keep_dims")) + keepDims = getLayerAttr(layer, "keep_dims").b(); + + if (!keepDims) + { + LayerParams flattenLp; + std::string flattenName = name + "/flatten"; + CV_Assert(layer_id.find(flattenName) == layer_id.end()); + int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); + layer_id[flattenName] = flattenId; + connect(layer_id, dstNet, Pin(name), flattenId, 0); + } } } + else if (type == "Pack") + { + CV_Assert(hasLayerAttr(layer, "axis")); + int dim = (int)getLayerAttr(layer, "axis").i(); + if (dim != 0) + CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); + + CV_Assert(hasLayerAttr(layer, "N")); + int num = (int)getLayerAttr(layer, "N").i(); + CV_Assert(layer.input_size() == num); + std::string base_name = name + "/reshape_"; + std::vector reshape_names; + for (int i = 0; i < num; i++) { + std::string reshape_name = base_name + std::to_string(i); + reshape_names.push_back(reshape_name); + LayerParams reshapeLP; + reshapeLP.set("axis", dim); + reshapeLP.set("num_axes", 1); + std::vector outShape = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); + layer_id[reshape_name] = id; + connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); + } + + layerParams.set("axis", dim); + int id = dstNet.addLayer(name, "Concat", layerParams); + layer_id[name] = id; + + for (int li = 0; li < num; li++) { + Pin inp = parsePin(reshape_names[li]); + connect(layer_id, dstNet, inp, id, li); + } + + } else if (type == "ClipByValue") { // op: "ClipByValue" diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py new file mode 100644 index 0000000000..84d0663871 --- /dev/null +++ b/samples/dnn/human_parsing.py @@ -0,0 +1,165 @@ +import cv2 as cv +import numpy as np +import argparse + + +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, + cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) +targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) + +parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') +parser.add_argument('--model', '-m', required=True, help='Path to pb model.') +parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help="Choose one of computation backends: " + "%d: automatically (by default), " + "%d: Halide language (http://halide-lang.org/), " + "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "%d: OpenCV implementation" % backends) +parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Choose one of target computation devices: ' + '%d: CPU target (by default), ' + '%d: OpenCL, ' + '%d: OpenCL fp16 (half-float precision), ' + '%d: VPU' % targets) + +# To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view +# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet +# Change script evaluate_parsing_JPPNet-s2.py for human parsing +# 1. Remove preprocessing to create image_batch_origin: +# - with tf.name_scope("create_inputs"): +# ... +# Add +# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') +# +# 2. Create input +# image = cv2.imread(path/to/image) +# image_rev = np.flip(image, axis=1) +# image_h, image_w = image.shape[:2] +# input = np.stack([image, image_rev], axis=0) +# +# 3. Hardcode image_h and image_w shapes to determine output shapes +# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, [image_h, image_w]), +# tf.image.resize_images(parsing_out1_075, [image_h, image_w]), +# tf.image.resize_images(parsing_out1_125, [image_h, image_w])]), axis=0) +# Do similarly with parsing_out2, parsing_out3 +# 4. Remove postprocessing +# - parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) +# +# 5. To save model after sess.run(...) add: +# - input_graph_def = tf.get_default_graph().as_graph_def() +# - output_node = "Mean_3" +# - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) +# - +# - output_graph = "LIP_JPPNet.pb" +# - with tf.gfile.GFile(output_graph, "wb") as f: +# - f.write(output_graph_def.SerializeToString()) + + + +def preprocess(image_path): + """ + Create 4-dimensional blob from image and flip image + :param image_path: path to input image + """ + image = cv.imread(image_path) + image_rev = np.flip(image, axis=1) + input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434)) + return input + + +def run_net(input, model_path, backend, target): + """ + Read network and infer model + :param model_path: path to JPPNet model + """ + net = cv.dnn.readNet(model_path) + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + net.setInput(input) + out = net.forward() + return out + + +def postprocess(out): + """ + Create a grayscale human segmentation + :param out: network output + """ + # LIP classes + # 0 Background + # 1 Hat + # 2 Hair + # 3 Glove + # 4 Sunglasses + # 5 UpperClothes + # 6 Dress + # 7 Coat + # 8 Socks + # 9 Pants + # 10 Jumpsuits + # 11 Scarf + # 12 Skirt + # 13 Face + # 14 LeftArm + # 15 RightArm + # 16 LeftLeg + # 17 RightLeg + # 18 LeftShoe + # 19 RightShoe + head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0) + head_output = head_output.squeeze(0) + tail_output = tail_output.squeeze(0) + tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0) + tail_list = [arr.squeeze(0) for arr in tail_list] + tail_list_rev = [tail_list[i] for i in range(14)] + tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]]) + tail_output_rev = np.stack(tail_list_rev, axis=0) + tail_output_rev = np.flip(tail_output_rev, axis=2) + raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=False) + raw_output_all = np.expand_dims(raw_output_all, axis=0) + raw_output_all = np.argmax(raw_output_all, axis=1) + raw_output_all = raw_output_all.transpose(1, 2, 0) + return raw_output_all + + +def decode_labels(gray_image): + """ + Colorize image according to labels + :param gray_image: grayscale human segmentation result + """ + height, width, _ = gray_image.shape + colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0), + (0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128), + (0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170), + (170, 255, 85), (255, 255, 0), (255, 170, 0)] + + segm = np.stack([colors[idx] for idx in gray_image.flatten()]) + segm = segm.reshape(height, width, 3).astype(np.uint8) + segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB) + return segm + + +def parse_human(image_path, model_path, backend, target): + """ + Prepare input for execution, run net and postprocess output to parse human. + :param image_path: path to input image + :param model_path: path to JPPNet model + :param backend: name of computation backend + :param target: name of computation target + """ + input = preprocess(image_path) + output = run_net(input, model_path, backend, target) + grayscale_out = postprocess(output) + segmentation = decode_labels(grayscale_out) + return segmentation + + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + output = parse_human(args.input, args.model, args.backend, args.target) + winName = 'Deep learning human parsing in OpenCV' + cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) + cv.imshow(winName, output) + cv.waitKey() From ffa72fc9793ceabfed9d339aa9238758abe47979 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 09:45:27 +0300 Subject: [PATCH 02/42] Refactoring --- samples/dnn/human_parsing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 84d0663871..467a19a3b1 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -117,8 +117,7 @@ def postprocess(out): tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]]) tail_output_rev = np.stack(tail_list_rev, axis=0) tail_output_rev = np.flip(tail_output_rev, axis=2) - raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=False) - raw_output_all = np.expand_dims(raw_output_all, axis=0) + raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True) raw_output_all = np.argmax(raw_output_all, axis=1) raw_output_all = raw_output_all.transpose(1, 2, 0) return raw_output_all From 543e0302d341703637e48063848f67b4f12000e9 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 15:43:19 +0300 Subject: [PATCH 03/42] Support global pooling by axis --- .../dnn/include/opencv2/dnn/all_layers.hpp | 1 + modules/dnn/src/layers/pooling_layer.cpp | 11 ++++ modules/dnn/src/tensorflow/tf_importer.cpp | 52 ++++++++++++------- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index b251b4adb3..d62b1f0bc7 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -251,6 +251,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; + int global_axis; bool computeMaxIdx; String padMode; bool ceilMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 5727e2b3f9..c4b452c5ac 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -95,6 +95,8 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + global_axis = params.has("global_axis") ? params.get("global_axis") : -1; + getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); @@ -149,6 +151,9 @@ public: if (globalPooling) { kernel = Size(inp[1], inp[0]); kernel_size = std::vector(inp.begin(), inp.end()); + } else if (global_axis != -1) { + kernel_size[global_axis] = inp[global_axis]; + kernel = Size(kernel_size[1], kernel_size[0]); } getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); @@ -1037,6 +1042,12 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } + else if (global_axis != -1) + { + CV_Assert(global_axis >= 0 && global_axis < inpShape.size()); + outShape[2 + global_axis] = 1; + } + int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 6fbaf98f96..426f8f8da4 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1944,14 +1944,13 @@ void TFImporter::populateNet(Net dstNet) layer_id[flattenName] = flattenId; connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); - LayerParams reshapeLp; std::string reshapeName = name + "/reshape"; CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", 0); + reshapeLp.set("axis", indices.at(0)); reshapeLp.set("num_axes", 1); - std::vector newShape = {1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + int newShape[] = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); layer_id[reshapeName] = reshapeId; @@ -1961,23 +1960,38 @@ void TFImporter::populateNet(Net dstNet) std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); - avgLp.set("kernel_h", 3); // TODO: node.shape[0] - avgLp.set("kernel_w", 1); + // pooling kernel H x 1 + avgLp.set("global_axis", 0); + avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; - // one input only connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); - LayerParams reshapeLp2; - std::string reshapeName2 = name; - CV_Assert(layer_id.find(reshapeName2) == layer_id.end()); - newShape = {2, 20, 314, 253}; // TODO: remove out shapes + LayerParams sliceLp; + std::string sliceName = name + "/slice"; + CV_Assert(layer_id.find(sliceName) == layer_id.end()); + sliceLp.set("axis", indices.at(0)); + int begin[] = {0}; + int size[] = {1}; + sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); + sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); + int sliceId = dstNet.addLayer(sliceName, "Slice", sliceLp); + layer_id[sliceName] = sliceId; + connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - reshapeLp2.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(sliceName), squeezeId, 0); - int reshapeId2 = dstNet.addLayer(reshapeName2, "Reshape", reshapeLp2); - layer_id[reshapeName2] = reshapeId2; - connect(layer_id, dstNet, Pin(avgName), reshapeId2, 0); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, Pin(avgName), id, 0); + connect(layer_id, dstNet, Pin(squeezeName), id, 1); } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); @@ -2021,13 +2035,15 @@ void TFImporter::populateNet(Net dstNet) std::string base_name = name + "/reshape_"; std::vector reshape_names; for (int i = 0; i < num; i++) { - std::string reshape_name = base_name + std::to_string(i); + std::ostringstream ss; + ss << i; + std::string reshape_name = base_name + ss.str(); reshape_names.push_back(reshape_name); LayerParams reshapeLP; reshapeLP.set("axis", dim); reshapeLP.set("num_axes", 1); - std::vector outShape = {1, -1}; - reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + int outShape[] = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); layer_id[reshape_name] = id; connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); From cf477f7e9faac694c404472274aa9b5b0c6f7627 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 24 Dec 2019 16:42:00 +0300 Subject: [PATCH 04/42] Fix global axis --- modules/dnn/src/layers/pooling_layer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index c4b452c5ac..55abcec0bf 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -95,8 +95,6 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - global_axis = params.has("global_axis") ? params.get("global_axis") : -1; - getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); @@ -126,6 +124,7 @@ public: CV_Error(Error::StsBadArg, "Cannot determine pooling type"); setParamsFrom(params); ceilMode = params.get("ceil_mode", true); + global_axis = params.get("global_axis", -1); spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } From 752653c70b2a0173e99b2323f279e63ef59918e2 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Sat, 28 Dec 2019 18:03:40 +0300 Subject: [PATCH 05/42] Update global pooling --- .../dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 28 +++++++++++++++---- modules/dnn/src/tensorflow/tf_importer.cpp | 3 +- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index d62b1f0bc7..73c85cad1c 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -251,7 +251,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; - int global_axis; + std::vector isGlobalPooling; bool computeMaxIdx; String padMode; bool ceilMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 55abcec0bf..aae9730c1a 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -122,9 +122,17 @@ public: } else CV_Error(Error::StsBadArg, "Cannot determine pooling type"); + setParamsFrom(params); ceilMode = params.get("ceil_mode", true); - global_axis = params.get("global_axis", -1); + if (params.has("is_global_pooling")) + { + const DictValue &global_axis = params.get("is_global_pooling"); + int size = global_axis.size(); + isGlobalPooling.resize(size); + for (int i = 0; i < size; i++) + isGlobalPooling[i] = global_axis.get(i); + } spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } @@ -150,8 +158,12 @@ public: if (globalPooling) { kernel = Size(inp[1], inp[0]); kernel_size = std::vector(inp.begin(), inp.end()); - } else if (global_axis != -1) { - kernel_size[global_axis] = inp[global_axis]; + } else if (!isGlobalPooling.empty()) { + for (int i = 0; i < isGlobalPooling.size(); i++) + { + if (isGlobalPooling[i]) + kernel_size[i] = inp[i]; + } kernel = Size(kernel_size[1], kernel_size[0]); } @@ -1041,10 +1053,14 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } - else if (global_axis != -1) + else if (!isGlobalPooling.empty()) { - CV_Assert(global_axis >= 0 && global_axis < inpShape.size()); - outShape[2 + global_axis] = 1; + CV_Assert(isGlobalPooling.size() == inpShape.size()); + for (int i = 0; i < isGlobalPooling.size(); i++) + { + if (isGlobalPooling[i]) + outShape[2 + i] = 1; + } } int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 426f8f8da4..f757efef5c 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1961,7 +1961,8 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); // pooling kernel H x 1 - avgLp.set("global_axis", 0); + bool isGlobalPooling[] = {true, false}; + avgLp.set("is_global_pooling", DictValue::arrayInt(&isGlobalPooling[0], 2)); avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; From 7eba3a7c9638c8a3f008e3595d60cbba1391b290 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Thu, 9 Jan 2020 13:59:35 +0300 Subject: [PATCH 06/42] Add pack description --- .../dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/layers/layers_common.cpp | 20 +++++++-- modules/dnn/src/layers/layers_common.hpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 42 +++++++------------ modules/dnn/src/tensorflow/tf_importer.cpp | 16 +++---- 5 files changed, 43 insertions(+), 39 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 73c85cad1c..2b9de0b663 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -250,7 +250,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN std::vector pads_begin, pads_end; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; - bool globalPooling; + CV_DEPRECATED_EXTERNAL bool globalPooling; std::vector isGlobalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index f119c12ac0..266d2cf45f 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -144,14 +144,26 @@ void getStrideAndPadding(const LayerParams ¶ms, std::vector& pads_be } } -void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode) { - globalPooling = params.has("global_pooling") && - params.get("global_pooling"); + bool is_global = params.get("global_pooling", false); + globalPooling = std::vector(3, is_global); + if (params.has("global_d")) + { + globalPooling[0] = params.get("global_d"); + } + else if (params.has("global_h")) + { + globalPooling[1] = params.get("global_h"); + } + else if (params.has("global_w")) + { + globalPooling[2] = params.get("global_w"); + } - if (globalPooling) + if (is_global) { util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size")) diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index b574d7eed0..81e7bdd11c 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -63,7 +63,7 @@ void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode, std::vector& adjust_pads); -void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index aae9730c1a..c881cc7c8d 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -79,6 +79,7 @@ public: { computeMaxIdx = true; globalPooling = false; + isGlobalPooling = std::vector(3, false); stride = Size(1, 1); pad_t = pad_l = pad_b = pad_r = 0; @@ -95,7 +96,8 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); + getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode); + globalPooling = std::accumulate(isGlobalPooling.begin(), isGlobalPooling.end(), 0) == 3; if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); stride = Size(strides[1], strides[0]); @@ -125,14 +127,7 @@ public: setParamsFrom(params); ceilMode = params.get("ceil_mode", true); - if (params.has("is_global_pooling")) - { - const DictValue &global_axis = params.get("is_global_pooling"); - int size = global_axis.size(); - isGlobalPooling.resize(size); - for (int i = 0; i < size; i++) - isGlobalPooling[i] = global_axis.get(i); - } + spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } @@ -155,17 +150,14 @@ public: inp.push_back(inputs[0].size[i]); out.push_back(outputs[0].size[i]); } - if (globalPooling) { - kernel = Size(inp[1], inp[0]); - kernel_size = std::vector(inp.begin(), inp.end()); - } else if (!isGlobalPooling.empty()) { - for (int i = 0; i < isGlobalPooling.size(); i++) - { - if (isGlobalPooling[i]) - kernel_size[i] = inp[i]; - } - kernel = Size(kernel_size[1], kernel_size[0]); + kernel_size.resize(out.size()); + int diff_size = isGlobalPooling.size() - kernel_size.size(); + for (int i = 0; i < kernel_size.size(); i++) + { + if (isGlobalPooling[i + diff_size]) + kernel_size[i] = inp[i]; } + kernel = Size(kernel_size[1], kernel_size[0]); getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { @@ -1053,14 +1045,12 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } - else if (!isGlobalPooling.empty()) + + int diff_size = isGlobalPooling.size() - (outShape.size() - 2); + for (int i = 2; i < outShape.size(); i++) { - CV_Assert(isGlobalPooling.size() == inpShape.size()); - for (int i = 0; i < isGlobalPooling.size(); i++) - { - if (isGlobalPooling[i]) - outShape[2 + i] = 1; - } + if (isGlobalPooling[i - 2 + diff_size]) + outShape[i] = 1; } int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index f757efef5c..565002d637 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1961,8 +1961,7 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); // pooling kernel H x 1 - bool isGlobalPooling[] = {true, false}; - avgLp.set("is_global_pooling", DictValue::arrayInt(&isGlobalPooling[0], 2)); + avgLp.set("global_h", true); avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; @@ -2025,6 +2024,12 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Pack") { + // op: tf.stack(list of tensors, axis=0) + // Join a list of inputs along a new axis. + // The "axis" specifies the index of the new axis in the dimensions of the output. + // Example: given a list with "N" tensors of shape (C, H, W): + // if axis == 0 then the output tensor will have the shape (N, C, H, W), + // if axis == 1 then the output tensor will have the shape (C, N, H, W). CV_Assert(hasLayerAttr(layer, "axis")); int dim = (int)getLayerAttr(layer, "axis").i(); if (dim != 0) @@ -2054,11 +2059,8 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Concat", layerParams); layer_id[name] = id; - for (int li = 0; li < num; li++) { - Pin inp = parsePin(reshape_names[li]); - connect(layer_id, dstNet, inp, id, li); - } - + for (int li = 0; li < num; li++) + connect(layer_id, dstNet, Pin(reshape_names[li]), id, li); } else if (type == "ClipByValue") { From a33d50084dac5e23e956c6592c23b4c288ab2458 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 09:01:57 +0300 Subject: [PATCH 07/42] Add global_pooling_dim flags --- modules/dnn/src/layers/layers_common.cpp | 18 ++++++------------ modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 266d2cf45f..2f8f1091c8 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -150,18 +150,12 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern { bool is_global = params.get("global_pooling", false); globalPooling = std::vector(3, is_global); - if (params.has("global_d")) - { - globalPooling[0] = params.get("global_d"); - } - else if (params.has("global_h")) - { - globalPooling[1] = params.get("global_h"); - } - else if (params.has("global_w")) - { - globalPooling[2] = params.get("global_w"); - } + if (params.has("global_pooling_d")) + globalPooling[0] = params.get("global_pooling_d"); + else if (params.has("global_pooling_h")) + globalPooling[1] = params.get("global_pooling_h"); + else if (params.has("global_pooling_w")) + globalPooling[2] = params.get("global_pooling_w"); if (is_global) { diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 565002d637..b73982eb89 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1961,7 +1961,7 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(avgName) == layer_id.end()); avgLp.set("pool", "ave"); // pooling kernel H x 1 - avgLp.set("global_h", true); + avgLp.set("global_pooling_h", true); avgLp.set("kernel_size", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; From e9e3af0aaa18b93ad885890de41f6c35e8852b4c Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 11:22:19 +0300 Subject: [PATCH 08/42] Add global pool by axis test --- modules/dnn/src/layers/slice_layer.cpp | 7 +++---- modules/dnn/test/test_tf_importer.cpp | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index d7cafd7f93..b29833c5bb 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -114,10 +114,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && -#ifdef HAVE_INF_ENGINE - INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && -#endif + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && sliceRanges.size() == 1) || + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && + sliceRanges.size() == 1 && sliceRanges[0].size() == 4); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 8826fa09ff..ecfc1635e0 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -121,6 +121,13 @@ public: } }; +TEST_P(Test_TensorFlow_layers, reduce_mean) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + runTensorFlowNet("global_pool_by_axis"); +} + TEST_P(Test_TensorFlow_layers, conv) { runTensorFlowNet("single_conv"); From 4625337179d07aafe8b80f666950a06e9fcafd93 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 12:41:31 +0300 Subject: [PATCH 09/42] Add docs reduce mean --- modules/dnn/src/layers/layers_common.cpp | 4 +- modules/dnn/src/layers/pooling_layer.cpp | 2 - modules/dnn/src/layers/slice_layer.cpp | 4 +- modules/dnn/src/tensorflow/tf_importer.cpp | 59 ++++++++++++++-------- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 2f8f1091c8..f26c9778ec 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -152,9 +152,9 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern globalPooling = std::vector(3, is_global); if (params.has("global_pooling_d")) globalPooling[0] = params.get("global_pooling_d"); - else if (params.has("global_pooling_h")) + if (params.has("global_pooling_h")) globalPooling[1] = params.get("global_pooling_h"); - else if (params.has("global_pooling_w")) + if (params.has("global_pooling_w")) globalPooling[2] = params.get("global_pooling_w"); if (is_global) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index c881cc7c8d..04c2e65b4b 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -124,10 +124,8 @@ public: } else CV_Error(Error::StsBadArg, "Cannot determine pooling type"); - setParamsFrom(params); ceilMode = params.get("ceil_mode", true); - spatialScale = params.get("spatial_scale", 1); avePoolPaddedArea = params.get("ave_pool_padded_area", true); } diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index b29833c5bb..662ade8f14 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -116,7 +116,9 @@ public: return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && sliceRanges.size() == 1) || (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && - +#ifdef HAVE_INF_ENGINE + INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && +#endif sliceRanges.size() == 1 && sliceRanges[0].size() == 4); } diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b73982eb89..b3527d1092 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1932,9 +1932,29 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Mean") { + // Computes the mean of elements across dimensions of a tensor. + // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, + // else the reduced dimensions are retained with length 1. + // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1 + // if keepdims is false we use Flatten after Pooling: out_shape = NxC + // if indices = [0] we use a global pooling by indices. + // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input, + // if keepdims is false we use Flatten after Slice. + // Example: input_shape = NxCxHxW + // determine out shape: NxCxHxW --Slice--> 1xCxHxW + // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) + // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape + Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(indices.type() == CV_32SC1); + // There are two attributes, "keepdims" and a deprecated "keep_dims". + bool keepDims = false; + if (hasLayerAttr(layer, "keepdims")) + keepDims = getLayerAttr(layer, "keepdims").b(); + else if (hasLayerAttr(layer, "keep_dims")) + keepDims = getLayerAttr(layer, "keep_dims").b(); + if (indices.total() == 1 && indices.at(0) == 0) { LayerParams flattenLp; @@ -1968,49 +1988,44 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); LayerParams sliceLp; - std::string sliceName = name + "/slice"; - CV_Assert(layer_id.find(sliceName) == layer_id.end()); + std::string layerShapeName = name + "/slice"; + CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); sliceLp.set("axis", indices.at(0)); int begin[] = {0}; int size[] = {1}; sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); - int sliceId = dstNet.addLayer(sliceName, "Slice", sliceLp); - layer_id[sliceName] = sliceId; + int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp); + layer_id[layerShapeName] = sliceId; connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", indices.at(0)); - squeezeLp.set("end_axis", indices.at(0) + 1); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(sliceName), squeezeId, 0); + if (!keepDims) + { + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); + layerShapeName = squeezeName; + } int id = dstNet.addLayer(name, "Reshape", layerParams); layer_id[name] = id; connect(layer_id, dstNet, Pin(avgName), id, 0); - connect(layer_id, dstNet, Pin(squeezeName), id, 1); + connect(layer_id, dstNet, Pin(layerShapeName), id, 1); } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); layerParams.set("pool", "ave"); layerParams.set("global_pooling", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - // There are two attributes, "keepdims" and a deprecated "keep_dims". - bool keepDims = false; - if (hasLayerAttr(layer, "keepdims")) - keepDims = getLayerAttr(layer, "keepdims").b(); - else if (hasLayerAttr(layer, "keep_dims")) - keepDims = getLayerAttr(layer, "keep_dims").b(); - if (!keepDims) { LayerParams flattenLp; From 9ed372b297178db85e9da2bb8ecacf88541a781a Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 14:09:08 +0300 Subject: [PATCH 10/42] Update get memory shapes --- modules/dnn/src/layers/pooling_layer.cpp | 35 ++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 04c2e65b4b..1fec982dea 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -149,11 +149,12 @@ public: out.push_back(outputs[0].size[i]); } kernel_size.resize(out.size()); - int diff_size = isGlobalPooling.size() - kernel_size.size(); for (int i = 0; i < kernel_size.size(); i++) { - if (isGlobalPooling[i + diff_size]) - kernel_size[i] = inp[i]; + int pool_idx = isGlobalPooling.size() - 1 - i; + int kernel_idx = kernel_size.size() - 1 - i; + if (isGlobalPooling[pool_idx]) + kernel_size[kernel_idx] = inp[kernel_idx]; } kernel = Size(kernel_size[1], kernel_size[0]); @@ -1001,20 +1002,27 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); - if (globalPooling) + std::vector local_kernel = kernel_size.empty() ? + std::vector(inpShape.begin(), inpShape.end()) : kernel_size; + + for (int i = 0; i < local_kernel.size(); i++) { - outShape.push_back(1); - outShape.push_back(1); + int pool_idx = isGlobalPooling.size() - 1 - i; + int kernel_idx = local_kernel.size() - 1 - i; + if (isGlobalPooling[pool_idx]) + local_kernel[kernel_idx] = inpShape[kernel_idx]; } - else if (type == ROI || type == PSROI) + + + if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); outShape.push_back(pooledSize.width); } else if (padMode.empty()) { - for (int i = 0; i < kernel_size.size(); i++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i]; + for (int i = 0; i < local_kernel.size(); i++) { + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } @@ -1029,7 +1037,7 @@ virtual Ptr initNgraph(const std::vector >& inp } else { - getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), outShape); + getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector(local_kernel.size(), 1), outShape); } if (type == ROI) { @@ -1044,13 +1052,6 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[1] = psRoiOutChannels; } - int diff_size = isGlobalPooling.size() - (outShape.size() - 2); - for (int i = 2; i < outShape.size(); i++) - { - if (isGlobalPooling[i - 2 + diff_size]) - outShape[i] = 1; - } - int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); From ea31a14cc514b41962590606afc79f1534ab5645 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 10 Jan 2020 16:33:17 +0300 Subject: [PATCH 11/42] Update sample --- samples/dnn/human_parsing.py | 49 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 467a19a3b1..43c495200a 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -3,8 +3,8 @@ import numpy as np import argparse -backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, - cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, + cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', @@ -36,26 +36,27 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, # 2. Create input # image = cv2.imread(path/to/image) # image_rev = np.flip(image, axis=1) -# image_h, image_w = image.shape[:2] # input = np.stack([image, image_rev], axis=0) # -# 3. Hardcode image_h and image_w shapes to determine output shapes -# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, [image_h, image_w]), -# tf.image.resize_images(parsing_out1_075, [image_h, image_w]), -# tf.image.resize_images(parsing_out1_125, [image_h, image_w])]), axis=0) -# Do similarly with parsing_out2, parsing_out3 -# 4. Remove postprocessing -# - parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) +# 3. Hardcode image_h and image_w shapes to determine output shapes. +# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. +# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) +# Do similarly with parsing_out2, parsing_out3 +# 4. Remove postprocessing. Last net operation: +# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) +# Change: +# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) # # 5. To save model after sess.run(...) add: -# - input_graph_def = tf.get_default_graph().as_graph_def() -# - output_node = "Mean_3" -# - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) -# - -# - output_graph = "LIP_JPPNet.pb" -# - with tf.gfile.GFile(output_graph, "wb") as f: -# - f.write(output_graph_def.SerializeToString()) - +# input_graph_def = tf.get_default_graph().as_graph_def() +# output_node = "Mean_3" +# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) +# +# output_graph = "LIP_JPPNet.pb" +# with tf.gfile.GFile(output_graph, "wb") as f: +# f.write(output_graph_def.SerializeToString()) def preprocess(image_path): @@ -73,6 +74,8 @@ def run_net(input, model_path, backend, target): """ Read network and infer model :param model_path: path to JPPNet model + :param backend: computation backend + :param target: computation device """ net = cv.dnn.readNet(model_path) net.setPreferableBackend(backend) @@ -82,10 +85,11 @@ def run_net(input, model_path, backend, target): return out -def postprocess(out): +def postprocess(out, input_shape): """ Create a grayscale human segmentation :param out: network output + :param input_shape: input image width and height """ # LIP classes # 0 Background @@ -111,6 +115,10 @@ def postprocess(out): head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0) head_output = head_output.squeeze(0) tail_output = tail_output.squeeze(0) + + head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]]) + tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]]) + tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0) tail_list = [arr.squeeze(0) for arr in tail_list] tail_list_rev = [tail_list[i] for i in range(14)] @@ -149,8 +157,9 @@ def parse_human(image_path, model_path, backend, target): :param target: name of computation target """ input = preprocess(image_path) + input_h, input_w = input.shape[2:] output = run_net(input, model_path, backend, target) - grayscale_out = postprocess(output) + grayscale_out = postprocess(output, (input_w, input_h)) segmentation = decode_labels(grayscale_out) return segmentation From 4ecbcf0885472631b21a459b722403f6a7efed04 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Jan 2020 15:06:34 +0300 Subject: [PATCH 12/42] imgproc: copy sumpixels.simd.hpp --- modules/imgproc/src/{sumpixels.cpp => sumpixels.simd.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/imgproc/src/{sumpixels.cpp => sumpixels.simd.hpp} (100%) mode change 100755 => 100644 diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.simd.hpp old mode 100755 new mode 100644 similarity index 100% rename from modules/imgproc/src/sumpixels.cpp rename to modules/imgproc/src/sumpixels.simd.hpp From c6a622542d59b8f949812038b8b24f664e580729 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Jan 2020 15:07:48 +0300 Subject: [PATCH 13/42] imgproc: copy sumpixels.dispatch.cpp --- modules/imgproc/src/{sumpixels.cpp => sumpixels.dispatch.cpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/imgproc/src/{sumpixels.cpp => sumpixels.dispatch.cpp} (100%) diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.dispatch.cpp similarity index 100% rename from modules/imgproc/src/sumpixels.cpp rename to modules/imgproc/src/sumpixels.dispatch.cpp From b4316af83496e9e4dbfdf680244fa84bd0bc174e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 17 Jan 2020 16:49:46 +0300 Subject: [PATCH 14/42] imgproc: rename sumpixels.avx512_skx.{cpp,hpp} --- .../src/{sumpixels.avx512_skx.cpp => sumpixels.avx512_skx.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/imgproc/src/{sumpixels.avx512_skx.cpp => sumpixels.avx512_skx.hpp} (100%) diff --git a/modules/imgproc/src/sumpixels.avx512_skx.cpp b/modules/imgproc/src/sumpixels.avx512_skx.hpp similarity index 100% rename from modules/imgproc/src/sumpixels.avx512_skx.cpp rename to modules/imgproc/src/sumpixels.avx512_skx.hpp From 09b3383a7e1072ac831eaf71fc56a33941de4db9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 17 Jan 2020 16:54:08 +0300 Subject: [PATCH 15/42] imgproc: dispatch sumpixels (integral) --- modules/imgproc/CMakeLists.txt | 1 + modules/imgproc/src/sumpixels.avx512_skx.hpp | 19 +- modules/imgproc/src/sumpixels.dispatch.cpp | 479 ++++++------------- modules/imgproc/src/sumpixels.hpp | 25 - modules/imgproc/src/sumpixels.simd.hpp | 459 ++---------------- 5 files changed, 202 insertions(+), 781 deletions(-) delete mode 100644 modules/imgproc/src/sumpixels.hpp diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index f26ea0b3bf..a74c883cd3 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_add_dispatched_file(undistort SSE2 AVX2) ocv_define_module(imgproc opencv_core WRAP java python js) diff --git a/modules/imgproc/src/sumpixels.avx512_skx.hpp b/modules/imgproc/src/sumpixels.avx512_skx.hpp index 804b48d8c5..3c9c90c658 100644 --- a/modules/imgproc/src/sumpixels.avx512_skx.hpp +++ b/modules/imgproc/src/sumpixels.avx512_skx.hpp @@ -2,14 +2,13 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019, Intel Corporation, all rights reserved. -#include "precomp.hpp" -#include "sumpixels.hpp" +// Copyright (C) 2019-2020, Intel Corporation, all rights reserved. #include "opencv2/core/hal/intrin.hpp" +namespace cv { namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN -namespace cv { namespace { // Anonymous namespace to avoid exposing the implementation classes // @@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c } // end of anonymous namespace -namespace opt_AVX512_SKX { - -// This is the implementation for the external callers interface entry point. -// It should be the only function called into this file from outside -// Any new implementations should be directed from here +static void calculate_integral_avx512(const uchar *src, size_t _srcstep, double *sum, size_t _sumstep, double *sqsum, size_t _sqsumstep, int width, int height, int cn) { + CV_INSTRUMENT_REGION(); + switch(cn){ case 1: { IntegralCalculator< 1 > calculator; @@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep, } -} // end namespace opt_AVX512_SXK -} // end namespace cv +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // end namespace cv::hal diff --git a/modules/imgproc/src/sumpixels.dispatch.cpp b/modules/imgproc/src/sumpixels.dispatch.cpp index 89337f3507..b828ec70c0 100755 --- a/modules/imgproc/src/sumpixels.dispatch.cpp +++ b/modules/imgproc/src/sumpixels.dispatch.cpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved. +// Copyright (C) 2000-2020 Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2014, Itseez Inc., all rights reserved. // Third party copyrights are property of their respective owners. @@ -44,210 +44,157 @@ #include "precomp.hpp" #include "opencl_kernels_imgproc.hpp" #include "opencv2/core/hal/intrin.hpp" -#include "sumpixels.hpp" -namespace cv -{ +#include "sumpixels.simd.hpp" +#include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content -template -struct Integral_SIMD + +namespace cv { + +#ifdef HAVE_OPENCL + +static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) { - bool operator()(const T *, size_t, - ST *, size_t, - QT *, size_t, - ST *, size_t, - int, int, int) const - { + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( (_src.type() != CV_8UC1) || + !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F))) return false; - } -}; + static const int tileSize = 16; -template <> -struct Integral_SIMD { - Integral_SIMD() {}; + String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s", + ocl::typeToStr(sdepth), tileSize, + doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (kcols.empty()) + return false; - bool operator()(const uchar *src, size_t _srcstep, - double *sum, size_t _sumstep, - double *sqsum, size_t _sqsumstep, - double *tilted, size_t _tiltedstep, - int width, int height, int cn) const + UMat src = _src.getUMat(); + Size src_size = src.size(); + Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); + UMat buf(bufsize, sdepth); + kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf)); + size_t gt = src.cols, lt = tileSize; + if (!kcols.run(1, >, <, false)) + return false; + + ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (krows.empty()) + return false; + + Size sumsize(src_size.width + 1, src_size.height + 1); + _sum.create(sumsize, sdepth); + UMat sum = _sum.getUMat(); + + krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum)); + gt = src.rows; + return krows.run(1, >, <, false); +} + +static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) +{ + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) + return false; + + static const int tileSize = 16; + + String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", + ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), + tileSize, + doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + + ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (kcols.empty()) + return false; + + UMat src = _src.getUMat(); + Size src_size = src.size(); + Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); + UMat buf(bufsize, sdepth); + UMat buf_sq(bufsize, sqdepth); + kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); + size_t gt = src.cols, lt = tileSize; + if (!kcols.run(1, >, <, false)) + return false; + + ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (krows.empty()) + return false; + + Size sumsize(src_size.width + 1, src_size.height + 1); + _sum.create(sumsize, sdepth); + UMat sum = _sum.getUMat(); + _sqsum.create(sumsize, sqdepth); + UMat sum_sq = _sqsum.getUMat(); + + krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); + gt = src.rows; + return krows.run(1, >, <, false); +} + +#endif // HAVE_OPENCL + +#ifdef HAVE_IPP + +static bool ipp_integral( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) +{ + CV_INSTRUMENT_REGION_IPP(); + + IppiSize size = {width, height}; + + if(cn > 1) + return false; + if(tilted) { -#if CV_TRY_AVX512_SKX - CV_UNUSED(_tiltedstep); - // TODO: Add support for 1 channel input (WIP) - if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){ - opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep, - sqsum, _sqsumstep, width, height, cn); - return true; - } -#else - // Avoid warnings in some builds - CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep); - CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep); - CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn); -#endif + CV_UNUSED(tstep); return false; } -}; - -#if CV_SIMD && CV_SIMD_WIDTH <= 64 - -template <> -struct Integral_SIMD -{ - Integral_SIMD() {} - - bool operator()(const uchar * src, size_t _srcstep, - int * sum, size_t _sumstep, - double * sqsum, size_t, - int * tilted, size_t, - int width, int height, int cn) const + if(!sqsum) { - if (sqsum || tilted || cn != 1) + if(depth == CV_8U && sdepth == CV_32S) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; + else if(depth == CV_8UC1 && sdepth == CV_32F) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; + else if(depth == CV_32FC1 && sdepth == CV_32F) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; + else return false; - - // the first iteration - memset(sum, 0, (width + 1) * sizeof(int)); - - // the others - for (int i = 0; i < height; ++i) - { - const uchar * src_row = src + _srcstep * i; - int * prev_sum_row = (int *)((uchar *)sum + _sumstep * i) + 1; - int * sum_row = (int *)((uchar *)sum + _sumstep * (i + 1)) + 1; - - sum_row[-1] = 0; - - v_int32 prev = vx_setzero_s32(); - int j = 0; - for ( ; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) - { - v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); - v_int32 el4l, el4h; -#if CV_AVX2 && CV_SIMD_WIDTH == 32 - __m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8)); - __m256i shmask = _mm256_set1_epi32(7); - el4l.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_low(vsum)), prev.val); - el4h.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_high(vsum)), _mm256_permutevar8x32_epi32(el4l.val, shmask)); - prev.val = _mm256_permutevar8x32_epi32(el4h.val, shmask); -#else - el8 += v_rotate_left<1>(el8); - el8 += v_rotate_left<2>(el8); -#if CV_SIMD_WIDTH >= 32 - el8 += v_rotate_left<4>(el8); -#if CV_SIMD_WIDTH == 64 - el8 += v_rotate_left<8>(el8); -#endif -#endif - v_expand(el8, el4l, el4h); - el4l += prev; - el4h += el4l; - - prev = v_broadcast_element(el4h); -#endif - v_store(sum_row + j , el4l + vx_load(prev_sum_row + j )); - v_store(sum_row + j + v_int32::nlanes, el4h + vx_load(prev_sum_row + j + v_int32::nlanes)); - } - - for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) - sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; - } - vx_cleanup(); - - return true; } -}; - -template <> -struct Integral_SIMD -{ - Integral_SIMD() {} - - bool operator()(const uchar * src, size_t _srcstep, - float * sum, size_t _sumstep, - double * sqsum, size_t, - float * tilted, size_t, - int width, int height, int cn) const + else { - if (sqsum || tilted || cn != 1) + if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else return false; - - // the first iteration - memset(sum, 0, (width + 1) * sizeof(int)); - - // the others - for (int i = 0; i < height; ++i) - { - const uchar * src_row = src + _srcstep * i; - float * prev_sum_row = (float *)((uchar *)sum + _sumstep * i) + 1; - float * sum_row = (float *)((uchar *)sum + _sumstep * (i + 1)) + 1; - - sum_row[-1] = 0; - - v_float32 prev = vx_setzero_f32(); - int j = 0; - for (; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) - { - v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); - v_float32 el4l, el4h; -#if CV_AVX2 && CV_SIMD_WIDTH == 32 - __m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8)); - __m256i shmask = _mm256_set1_epi32(7); - el4l.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_low(vsum))), prev.val); - el4h.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_high(vsum))), _mm256_permutevar8x32_ps(el4l.val, shmask)); - prev.val = _mm256_permutevar8x32_ps(el4h.val, shmask); -#else - el8 += v_rotate_left<1>(el8); - el8 += v_rotate_left<2>(el8); -#if CV_SIMD_WIDTH >= 32 - el8 += v_rotate_left<4>(el8); -#if CV_SIMD_WIDTH == 64 - el8 += v_rotate_left<8>(el8); -#endif -#endif - v_int32 el4li, el4hi; - v_expand(el8, el4li, el4hi); - el4l = v_cvt_f32(el4li) + prev; - el4h = v_cvt_f32(el4hi) + el4l; - - prev = v_broadcast_element(el4h); -#endif - v_store(sum_row + j , el4l + vx_load(prev_sum_row + j )); - v_store(sum_row + j + v_float32::nlanes, el4h + vx_load(prev_sum_row + j + v_float32::nlanes)); - } - - for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) - sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; - } - vx_cleanup(); - - return true; } -}; +} -#endif +#endif // HAVE_IPP -template +namespace hal { + +template static void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep, int width, int height, int cn ) { int x, y, k; - if (Integral_SIMD()(src, _srcstep, - sum, _sumstep, - sqsum, _sqsumstep, - tilted, _tiltedstep, - width, height, cn)) - return; - int srcstep = (int)(_srcstep/sizeof(T)); int sumstep = (int)(_sumstep/sizeof(ST)); int tiltedstep = (int)(_tiltedstep/sizeof(ST)); @@ -401,157 +348,36 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, } } - -#ifdef HAVE_OPENCL - -static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) +static bool integral_SIMD( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) { - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + CV_INSTRUMENT_REGION(); - if ( (_src.type() != CV_8UC1) || - !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F))) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum)); - gt = src.rows; - return krows.run(1, >, <, false); + CV_CPU_DISPATCH(integral_SIMD, (depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn), + CV_CPU_DISPATCH_MODES_ALL); } -static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) +void integral( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) { - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + CV_INSTRUMENT_REGION(); - if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), - tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - UMat buf_sq(bufsize, sqdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - _sqsum.create(sumsize, sqdepth); - UMat sum_sq = _sqsum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); - gt = src.rows; - return krows.run(1, >, <, false); -} - -#endif - -} - -#if defined(HAVE_IPP) -namespace cv -{ -static bool ipp_integral( - int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ - CV_INSTRUMENT_REGION_IPP(); - - IppiSize size = {width, height}; - - if(cn > 1) - return false; - if(tilted) - { - CV_UNUSED(tstep); - return false; - } - - if(!sqsum) - { - if(depth == CV_8U && sdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_8UC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_32FC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; - else - return false; - } - else - { - if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else - return false; - } -} -} -#endif - -namespace cv { namespace hal { - -void integral(int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn); CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)); + if (integral_SIMD(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)) + return; + #define ONE_CALL(A, B, C) integral_((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) @@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth, else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) ONE_CALL(double, double, double); else - CV_Error( CV_StsUnsupportedFormat, "" ); + CV_Error(Error::StsUnsupportedFormat, ""); #undef ONE_CALL } -}} // cv::hal:: +} // namespace hal -void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) +void integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) { CV_INSTRUMENT_REGION(); @@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output src.cols, src.rows, cn); } -void cv::integral( InputArray src, OutputArray sum, int sdepth ) +void integral( InputArray src, OutputArray sum, int sdepth ) { CV_INSTRUMENT_REGION(); integral( src, sum, noArray(), noArray(), sdepth ); } -void cv::integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) +void integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) { CV_INSTRUMENT_REGION(); integral( src, sum, sqsum, noArray(), sdepth, sqdepth ); } +} // namespace CV_IMPL void cvIntegral( const CvArr* image, CvArr* sumImage, diff --git a/modules/imgproc/src/sumpixels.hpp b/modules/imgproc/src/sumpixels.hpp deleted file mode 100644 index 8d5ab0a851..0000000000 --- a/modules/imgproc/src/sumpixels.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -// -// Copyright (C) 2019, Intel Corporation, all rights reserved. -#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP -#define OPENCV_IMGPROC_SUM_PIXELS_HPP - -namespace cv -{ - -namespace opt_AVX512_SKX -{ -#if CV_TRY_AVX512_SKX - void calculate_integral_avx512( - const uchar *src, size_t _srcstep, - double *sum, size_t _sumstep, - double *sqsum, size_t _sqsumstep, - int width, int height, int cn); - -#endif -} // end namespace opt_AVX512_SKX -} // end namespace cv - -#endif diff --git a/modules/imgproc/src/sumpixels.simd.hpp b/modules/imgproc/src/sumpixels.simd.hpp index 89337f3507..c8d60a0040 100644 --- a/modules/imgproc/src/sumpixels.simd.hpp +++ b/modules/imgproc/src/sumpixels.simd.hpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved. +// Copyright (C) 2000-2020 Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2014, Itseez Inc., all rights reserved. // Third party copyrights are property of their respective owners. @@ -41,13 +41,26 @@ // //M*/ -#include "precomp.hpp" -#include "opencl_kernels_imgproc.hpp" #include "opencv2/core/hal/intrin.hpp" -#include "sumpixels.hpp" -namespace cv -{ +#if CV_AVX512_SKX +#include "sumpixels.avx512_skx.hpp" +#endif + +namespace cv { namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +// forward declarations +bool integral_SIMD( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +namespace { template struct Integral_SIMD @@ -62,7 +75,7 @@ struct Integral_SIMD } }; - +#if CV_AVX512_SKX template <> struct Integral_SIMD { Integral_SIMD() {}; @@ -74,24 +87,19 @@ struct Integral_SIMD { double *tilted, size_t _tiltedstep, int width, int height, int cn) const { -#if CV_TRY_AVX512_SKX CV_UNUSED(_tiltedstep); // TODO: Add support for 1 channel input (WIP) - if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){ - opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep, - sqsum, _sqsumstep, width, height, cn); + if (!tilted && (cn <= 4)) + { + calculate_integral_avx512(src, _srcstep, sum, _sumstep, + sqsum, _sqsumstep, width, height, cn); return true; } -#else - // Avoid warnings in some builds - CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep); - CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep); - CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn); -#endif return false; } }; +#endif #if CV_SIMD && CV_SIMD_WIDTH <= 64 @@ -157,8 +165,6 @@ struct Integral_SIMD for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; } - vx_cleanup(); - return true; } }; @@ -226,333 +232,26 @@ struct Integral_SIMD for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; } - vx_cleanup(); - return true; } }; #endif -template -void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, - QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep, - int width, int height, int cn ) +} // namespace anon + +bool integral_SIMD( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) { - int x, y, k; + CV_INSTRUMENT_REGION(); - if (Integral_SIMD()(src, _srcstep, - sum, _sumstep, - sqsum, _sqsumstep, - tilted, _tiltedstep, - width, height, cn)) - return; - - int srcstep = (int)(_srcstep/sizeof(T)); - int sumstep = (int)(_sumstep/sizeof(ST)); - int tiltedstep = (int)(_tiltedstep/sizeof(ST)); - int sqsumstep = (int)(_sqsumstep/sizeof(QT)); - - width *= cn; - - memset( sum, 0, (width+cn)*sizeof(sum[0])); - sum += sumstep + cn; - - if( sqsum ) - { - memset( sqsum, 0, (width+cn)*sizeof(sqsum[0])); - sqsum += sqsumstep + cn; - } - - if( tilted ) - { - memset( tilted, 0, (width+cn)*sizeof(tilted[0])); - tilted += tiltedstep + cn; - } - - if( sqsum == 0 && tilted == 0 ) - { - for( y = 0; y < height; y++, src += srcstep - cn, sum += sumstep - cn ) - { - for( k = 0; k < cn; k++, src++, sum++ ) - { - ST s = sum[-cn] = 0; - for( x = 0; x < width; x += cn ) - { - s += src[x]; - sum[x] = sum[x - sumstep] + s; - } - } - } - } - else if( tilted == 0 ) - { - for( y = 0; y < height; y++, src += srcstep - cn, - sum += sumstep - cn, sqsum += sqsumstep - cn ) - { - for( k = 0; k < cn; k++, src++, sum++, sqsum++ ) - { - ST s = sum[-cn] = 0; - QT sq = sqsum[-cn] = 0; - for( x = 0; x < width; x += cn ) - { - T it = src[x]; - s += it; - sq += (QT)it*it; - ST t = sum[x - sumstep] + s; - QT tq = sqsum[x - sqsumstep] + sq; - sum[x] = t; - sqsum[x] = tq; - } - } - } - } - else - { - AutoBuffer _buf(width+cn); - ST* buf = _buf.data(); - ST s; - QT sq; - for( k = 0; k < cn; k++, src++, sum++, tilted++, buf++ ) - { - sum[-cn] = tilted[-cn] = 0; - - for( x = 0, s = 0, sq = 0; x < width; x += cn ) - { - T it = src[x]; - buf[x] = tilted[x] = it; - s += it; - sq += (QT)it*it; - sum[x] = s; - if( sqsum ) - sqsum[x] = sq; - } - - if( width == cn ) - buf[cn] = 0; - - if( sqsum ) - { - sqsum[-cn] = 0; - sqsum++; - } - } - - for( y = 1; y < height; y++ ) - { - src += srcstep - cn; - sum += sumstep - cn; - tilted += tiltedstep - cn; - buf += -cn; - - if( sqsum ) - sqsum += sqsumstep - cn; - - for( k = 0; k < cn; k++, src++, sum++, tilted++, buf++ ) - { - T it = src[0]; - ST t0 = s = it; - QT tq0 = sq = (QT)it*it; - - sum[-cn] = 0; - if( sqsum ) - sqsum[-cn] = 0; - tilted[-cn] = tilted[-tiltedstep]; - - sum[0] = sum[-sumstep] + t0; - if( sqsum ) - sqsum[0] = sqsum[-sqsumstep] + tq0; - tilted[0] = tilted[-tiltedstep] + t0 + buf[cn]; - - for( x = cn; x < width - cn; x += cn ) - { - ST t1 = buf[x]; - buf[x - cn] = t1 + t0; - t0 = it = src[x]; - tq0 = (QT)it*it; - s += t0; - sq += tq0; - sum[x] = sum[x - sumstep] + s; - if( sqsum ) - sqsum[x] = sqsum[x - sqsumstep] + sq; - t1 += buf[x + cn] + t0 + tilted[x - tiltedstep - cn]; - tilted[x] = t1; - } - - if( width > cn ) - { - ST t1 = buf[x]; - buf[x - cn] = t1 + t0; - t0 = it = src[x]; - tq0 = (QT)it*it; - s += t0; - sq += tq0; - sum[x] = sum[x - sumstep] + s; - if( sqsum ) - sqsum[x] = sqsum[x - sqsumstep] + sq; - tilted[x] = t0 + t1 + tilted[x - tiltedstep - cn]; - buf[x] = t0; - } - - if( sqsum ) - sqsum++; - } - } - } -} - - -#ifdef HAVE_OPENCL - -static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) -{ - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( (_src.type() != CV_8UC1) || - !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F))) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum)); - gt = src.rows; - return krows.run(1, >, <, false); -} - -static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) -{ - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), - tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - UMat buf_sq(bufsize, sqdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - _sqsum.create(sumsize, sqdepth); - UMat sum_sq = _sqsum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); - gt = src.rows; - return krows.run(1, >, <, false); -} - -#endif - -} - -#if defined(HAVE_IPP) -namespace cv -{ -static bool ipp_integral( - int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ - CV_INSTRUMENT_REGION_IPP(); - - IppiSize size = {width, height}; - - if(cn > 1) - return false; - if(tilted) - { - CV_UNUSED(tstep); - return false; - } - - if(!sqsum) - { - if(depth == CV_8U && sdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_8UC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_32FC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; - else - return false; - } - else - { - if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else - return false; - } -} -} -#endif - -namespace cv { namespace hal { - -void integral(int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ - CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn); - CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)); - -#define ONE_CALL(A, B, C) integral_((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) +#define ONE_CALL(T, ST, QT) \ + return Integral_SIMD()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn) if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) ONE_CALL(uchar, int, double); @@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth, else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) ONE_CALL(double, double, double); else - CV_Error( CV_StsUnsupportedFormat, "" ); + return false; #undef ONE_CALL } +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END }} // cv::hal:: - -void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) -{ - CV_INSTRUMENT_REGION(); - - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - if( sdepth <= 0 ) - sdepth = depth == CV_8U ? CV_32S : CV_64F; - if ( sqdepth <= 0 ) - sqdepth = CV_64F; - sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth); - - CV_OCL_RUN(_sum.isUMat() && !_tilted.needed(), - (_sqsum.needed() ? ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth) : ocl_integral(_src, _sum, sdepth))); - - Size ssize = _src.size(), isize(ssize.width + 1, ssize.height + 1); - _sum.create( isize, CV_MAKETYPE(sdepth, cn) ); - Mat src = _src.getMat(), sum =_sum.getMat(), sqsum, tilted; - - if( _sqsum.needed() ) - { - _sqsum.create( isize, CV_MAKETYPE(sqdepth, cn) ); - sqsum = _sqsum.getMat(); - }; - - if( _tilted.needed() ) - { - _tilted.create( isize, CV_MAKETYPE(sdepth, cn) ); - tilted = _tilted.getMat(); - } - - hal::integral(depth, sdepth, sqdepth, - src.ptr(), src.step, - sum.ptr(), sum.step, - sqsum.ptr(), sqsum.step, - tilted.ptr(), tilted.step, - src.cols, src.rows, cn); -} - -void cv::integral( InputArray src, OutputArray sum, int sdepth ) -{ - CV_INSTRUMENT_REGION(); - - integral( src, sum, noArray(), noArray(), sdepth ); -} - -void cv::integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) -{ - CV_INSTRUMENT_REGION(); - - integral( src, sum, sqsum, noArray(), sdepth, sqdepth ); -} - - -CV_IMPL void -cvIntegral( const CvArr* image, CvArr* sumImage, - CvArr* sumSqImage, CvArr* tiltedSumImage ) -{ - cv::Mat src = cv::cvarrToMat(image), sum = cv::cvarrToMat(sumImage), sum0 = sum; - cv::Mat sqsum0, sqsum, tilted0, tilted; - cv::Mat *psqsum = 0, *ptilted = 0; - - if( sumSqImage ) - { - sqsum0 = sqsum = cv::cvarrToMat(sumSqImage); - psqsum = &sqsum; - } - - if( tiltedSumImage ) - { - tilted0 = tilted = cv::cvarrToMat(tiltedSumImage); - ptilted = &tilted; - } - cv::integral( src, sum, psqsum ? cv::_OutputArray(*psqsum) : cv::_OutputArray(), - ptilted ? cv::_OutputArray(*ptilted) : cv::_OutputArray(), sum.depth() ); - - CV_Assert( sum.data == sum0.data && sqsum.data == sqsum0.data && tilted.data == tilted0.data ); -} - -/* End of file. */ From bb91e6999b353ee18e502d0253f7ea43329df80b Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 20 Jan 2020 10:22:01 +0300 Subject: [PATCH 16/42] Fix demo --- samples/dnn/human_parsing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 43c495200a..74f644af29 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -3,8 +3,7 @@ import numpy as np import argparse -backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, - cv.dnn.DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, cv.dnn.DNN_BACKEND_OPENCV) +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', @@ -14,7 +13,6 @@ parser.add_argument('--model', '-m', required=True, help='Path to pb model.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " - "%d: Halide language (http://halide-lang.org/), " "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " "%d: OpenCV implementation" % backends) parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, @@ -23,6 +21,7 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() # To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view # For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet @@ -165,7 +164,6 @@ def parse_human(image_path, model_path, backend, target): if __name__ == '__main__': - args, _ = parser.parse_known_args() output = parse_human(args.input, args.model, args.backend, args.target) winName = 'Deep learning human parsing in OpenCV' cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) From 08ba63da02a940cd8ba89e05da8471e941704c74 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 20 Jan 2020 15:03:17 +0300 Subject: [PATCH 17/42] Add global pool flags --- modules/dnn/src/layers/layers_common.cpp | 93 ++++++++++++++++++---- modules/dnn/src/layers/pooling_layer.cpp | 45 ++++++----- modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 3 files changed, 101 insertions(+), 39 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index f26c9778ec..4675a380d4 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -157,27 +157,86 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern if (params.has("global_pooling_w")) globalPooling[2] = params.get("global_pooling_w"); - if (is_global) - { - util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); - if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size")) - { - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); - } - for (int i = 0; i < pads_begin.size(); i++) { - if (pads_begin[i] != 0 || pads_end[i] != 0) - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0"); - } - for (int i = 0; i < strides.size(); i++) { - if (strides[i] != 1) - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); - } - } - else + is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; + if (!is_global) { util::getKernelSize(params, kernel); util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); } + else + { + if ((globalPooling[0] && params.has("kernel_d")) || + (globalPooling[1] && params.has("kernel_h")) || + (globalPooling[2] && params.has("kernel_w")) || + params.has("kernel_size")) { + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); + } + + kernel.resize(3, 1); + pads_begin.resize(3, 0); + pads_end.resize(3, 0); + strides.resize(3, 1); + if (params.has("kernel_d")) + kernel[0] = params.get("kernel_d"); + if (params.has("kernel_h")) + kernel[1] = params.get("kernel_h"); + if (params.has("kernel_w")) + kernel[2] = params.get("kernel_w"); + + if (params.has("pad_t")) + pads_begin[1] = params.get("pad_t"); + if (params.has("pad_l")) + pads_begin[2] = params.get("pad_l"); + if (params.has("pad_b")) + pads_end[1] = params.get("pad_b"); + if (params.has("pad_r")) + pads_end[2] = params.get("pad_r"); + if (params.has("pad_h")) { + pads_begin[1] = params.get("pad_h"); + pads_end[1] = params.get("pad_h"); + } + if (params.has("pad_w")) { + pads_begin[2] = params.get("pad_w"); + pads_end[2] = params.get("pad_w"); + } + if (params.has("pad")) { + DictValue param = params.get("pad"); + if (param.size() == 1) { + std::fill(pads_begin.begin(), pads_begin.end(), param.get(0)); + pads_end = pads_begin; + } else if (param.size() <= pads_begin.size()) { + for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= 0; i--, j--) { + pads_begin[j] = param.get(i); + } + pads_end = pads_begin; + } else { + for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= param.size() / 2; i--, j--) { + pads_begin[j] = param.get(i); + } + for (int i = param.size() / 2 - 1, j = pads_end.size() / 2 - 1; i >= 0; i--, j--) { + pads_end[j] = param.get(i); + } + } + } + + if (params.has("stride_h")) + strides[1] = params.get("stride_h"); + if (params.has("stride_w")) + strides[2] = params.get("stride_w"); + if (params.has("stride")) { + DictValue param = params.get("stride"); + for (int i = param.size() - 1, j = strides.size() - 1; i >= 0; i--, j--) { + strides[j] = param.get(i); + } + if (param.size() == 1) + std::fill(strides.begin() + 1, strides.end(), strides[0]); + } + + for (int i = 0; i < pads_begin.size(); i++) { + if ((pads_begin[i] != 0 || pads_end[i] != 0 || strides[i] != 1) && globalPooling[i]) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0 and strides must be = 1"); + } + } } void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 1fec982dea..eef091dd42 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -148,17 +148,24 @@ public: inp.push_back(inputs[0].size[i]); out.push_back(outputs[0].size[i]); } - kernel_size.resize(out.size()); - for (int i = 0; i < kernel_size.size(); i++) - { - int pool_idx = isGlobalPooling.size() - 1 - i; - int kernel_idx = kernel_size.size() - 1 - i; - if (isGlobalPooling[pool_idx]) - kernel_size[kernel_idx] = inp[kernel_idx]; - } - kernel = Size(kernel_size[1], kernel_size[0]); + if (kernel_size.size() > inp.size()) { + kernel_size.erase(kernel_size.begin()); + strides.erase(strides.begin()); + pads_begin.erase(pads_begin.begin()); + pads_end.erase(pads_end.begin()); + } + kernel_size.resize(out.size()); + + for (int i = 0; i < inp.size(); i++) + { + int idx = isGlobalPooling.size() - inp.size() + i; + if (isGlobalPooling[idx]) + kernel_size[i] = inp[i]; + } + kernel = Size(kernel_size.back(), kernel_size[kernel_size.size() - 2]); getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); + if (pads_begin.size() == 2) { pad_t = pads_begin[0]; pad_l = pads_begin[1]; @@ -1005,15 +1012,11 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector local_kernel = kernel_size.empty() ? std::vector(inpShape.begin(), inpShape.end()) : kernel_size; - for (int i = 0; i < local_kernel.size(); i++) - { - int pool_idx = isGlobalPooling.size() - 1 - i; - int kernel_idx = local_kernel.size() - 1 - i; - if (isGlobalPooling[pool_idx]) - local_kernel[kernel_idx] = inpShape[kernel_idx]; + for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + if (isGlobalPooling[j]) + local_kernel[j] = inpShape[i]; } - if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); @@ -1021,17 +1024,17 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - for (int i = 0; i < local_kernel.size(); i++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; + for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + float dst = (float)(inpShape[i] + pads_begin[j] + pads_end[j] - local_kernel[j]) / strides[j]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0; i < pads_end.size(); i++) { - if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { + for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + if (pads_end[j] && (outShape[2 + i] - 1) * strides[j] >= inpShape[i] + pads_end[j]) { --outShape[2 + i]; - CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + CV_Assert((outShape[2 + i] - 1) * strides[j] < inpShape[i] + pads_end[j]); } } } diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b3527d1092..fe8eb4a637 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1982,7 +1982,7 @@ void TFImporter::populateNet(Net dstNet) avgLp.set("pool", "ave"); // pooling kernel H x 1 avgLp.set("global_pooling_h", true); - avgLp.set("kernel_size", 1); + avgLp.set("kernel_w", 1); int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); layer_id[avgName] = avgId; connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); From 97455f1593a9448a0de4015e9270b51f563a9afe Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 20 Jan 2020 17:31:58 +0300 Subject: [PATCH 18/42] Remove useless condition --- modules/dnn/src/layers/layers_common.cpp | 40 +++++++++--------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 4675a380d4..dbe69b4b81 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -149,13 +149,10 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern std::vector& strides, cv::String &padMode) { bool is_global = params.get("global_pooling", false); - globalPooling = std::vector(3, is_global); - if (params.has("global_pooling_d")) - globalPooling[0] = params.get("global_pooling_d"); - if (params.has("global_pooling_h")) - globalPooling[1] = params.get("global_pooling_h"); - if (params.has("global_pooling_w")) - globalPooling[2] = params.get("global_pooling_w"); + globalPooling.resize(3); + globalPooling[0] = params.get("global_pooling_d", is_global); + globalPooling[1] = params.get("global_pooling_h", is_global); + globalPooling[2] = params.get("global_pooling_w", is_global); is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; if (!is_global) @@ -172,25 +169,18 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); } - kernel.resize(3, 1); + kernel.resize(3); pads_begin.resize(3, 0); pads_end.resize(3, 0); strides.resize(3, 1); - if (params.has("kernel_d")) - kernel[0] = params.get("kernel_d"); - if (params.has("kernel_h")) - kernel[1] = params.get("kernel_h"); - if (params.has("kernel_w")) - kernel[2] = params.get("kernel_w"); + kernel[0] = params.get("kernel_d", 1); + kernel[1] = params.get("kernel_h", 1); + kernel[2] = params.get("kernel_w", 1); - if (params.has("pad_t")) - pads_begin[1] = params.get("pad_t"); - if (params.has("pad_l")) - pads_begin[2] = params.get("pad_l"); - if (params.has("pad_b")) - pads_end[1] = params.get("pad_b"); - if (params.has("pad_r")) - pads_end[2] = params.get("pad_r"); + pads_begin[1] = params.get("pad_t", 0); + pads_begin[2] = params.get("pad_l", 0); + pads_end[1] = params.get("pad_b", 0); + pads_end[2] = params.get("pad_r", 0); if (params.has("pad_h")) { pads_begin[1] = params.get("pad_h"); pads_end[1] = params.get("pad_h"); @@ -219,10 +209,8 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } } - if (params.has("stride_h")) - strides[1] = params.get("stride_h"); - if (params.has("stride_w")) - strides[2] = params.get("stride_w"); + strides[1] = params.get("stride_h", 1); + strides[2] = params.get("stride_w", 1); if (params.has("stride")) { DictValue param = params.get("stride"); for (int i = param.size() - 1, j = strides.size() - 1; i >= 0; i--, j--) { From d825caf18e1c173ecbc203a83c8c790c9e776873 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 21 Jan 2020 10:09:24 +0300 Subject: [PATCH 19/42] Update check params --- modules/dnn/src/layers/layers_common.cpp | 55 +++--------------------- modules/dnn/src/layers/pooling_layer.cpp | 9 ++-- 2 files changed, 10 insertions(+), 54 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index dbe69b4b81..cd34748398 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -162,6 +162,7 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } else { + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if ((globalPooling[0] && params.has("kernel_d")) || (globalPooling[1] && params.has("kernel_h")) || (globalPooling[2] && params.has("kernel_w")) || @@ -170,60 +171,18 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } kernel.resize(3); - pads_begin.resize(3, 0); - pads_end.resize(3, 0); - strides.resize(3, 1); kernel[0] = params.get("kernel_d", 1); kernel[1] = params.get("kernel_h", 1); kernel[2] = params.get("kernel_w", 1); - pads_begin[1] = params.get("pad_t", 0); - pads_begin[2] = params.get("pad_l", 0); - pads_end[1] = params.get("pad_b", 0); - pads_end[2] = params.get("pad_r", 0); - if (params.has("pad_h")) { - pads_begin[1] = params.get("pad_h"); - pads_end[1] = params.get("pad_h"); + for (int i = 0, j = globalPooling.size() - pads_begin.size(); i < pads_begin.size(); i++, j++) { + if ((pads_begin[i] != 0 || pads_end[i] != 0) && globalPooling[j]) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0"); } - if (params.has("pad_w")) { - pads_begin[2] = params.get("pad_w"); - pads_end[2] = params.get("pad_w"); + for (int i = 0, j = globalPooling.size() - strides.size(); i < strides.size(); i++, j++) { + if (strides[i] != 1 && globalPooling[j]) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); } - if (params.has("pad")) { - DictValue param = params.get("pad"); - if (param.size() == 1) { - std::fill(pads_begin.begin(), pads_begin.end(), param.get(0)); - pads_end = pads_begin; - } else if (param.size() <= pads_begin.size()) { - for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= 0; i--, j--) { - pads_begin[j] = param.get(i); - } - pads_end = pads_begin; - } else { - for (int i = param.size() - 1, j = pads_begin.size() - 1; i >= param.size() / 2; i--, j--) { - pads_begin[j] = param.get(i); - } - for (int i = param.size() / 2 - 1, j = pads_end.size() / 2 - 1; i >= 0; i--, j--) { - pads_end[j] = param.get(i); - } - } - } - - strides[1] = params.get("stride_h", 1); - strides[2] = params.get("stride_w", 1); - if (params.has("stride")) { - DictValue param = params.get("stride"); - for (int i = param.size() - 1, j = strides.size() - 1; i >= 0; i--, j--) { - strides[j] = param.get(i); - } - if (param.size() == 1) - std::fill(strides.begin() + 1, strides.end(), strides[0]); - } - - for (int i = 0; i < pads_begin.size(); i++) { - if ((pads_begin[i] != 0 || pads_end[i] != 0 || strides[i] != 1) && globalPooling[i]) - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0 and strides must be = 1"); - } } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index eef091dd42..8d43dc3ebf 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -151,9 +151,6 @@ public: if (kernel_size.size() > inp.size()) { kernel_size.erase(kernel_size.begin()); - strides.erase(strides.begin()); - pads_begin.erase(pads_begin.begin()); - pads_end.erase(pads_end.begin()); } kernel_size.resize(out.size()); @@ -1025,16 +1022,16 @@ virtual Ptr initNgraph(const std::vector >& inp else if (padMode.empty()) { for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - float dst = (float)(inpShape[i] + pads_begin[j] + pads_end[j] - local_kernel[j]) / strides[j]; + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[j]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - if (pads_end[j] && (outShape[2 + i] - 1) * strides[j] >= inpShape[i] + pads_end[j]) { + if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { --outShape[2 + i]; - CV_Assert((outShape[2 + i] - 1) * strides[j] < inpShape[i] + pads_end[j]); + CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); } } } From fada959b4b0271e4cc19179f3b79076506ddfaab Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 21 Jan 2020 10:28:50 +0300 Subject: [PATCH 20/42] Fix comment --- modules/dnn/src/layers/layers_common.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index cd34748398..e9eb9fa649 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -155,12 +155,7 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern globalPooling[2] = params.get("global_pooling_w", is_global); is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; - if (!is_global) - { - util::getKernelSize(params, kernel); - util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); - } - else + if (is_global) { util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if ((globalPooling[0] && params.has("kernel_d")) || @@ -184,6 +179,11 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); } } + else + { + util::getKernelSize(params, kernel); + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); + } } void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, From 832ca0734d4532acd7d92007a3e24dbd493048f4 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 10:52:40 +0300 Subject: [PATCH 21/42] Refactoring --- .../dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 22 ++++++------ samples/dnn/human_parsing.py | 36 +++++++++---------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 2b9de0b663..efbc8b131e 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -250,7 +250,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN std::vector pads_begin, pads_end; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; - CV_DEPRECATED_EXTERNAL bool globalPooling; + CV_DEPRECATED_EXTERNAL bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. std::vector isGlobalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 8d43dc3ebf..3e1fafb338 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -97,7 +97,7 @@ public: CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode); - globalPooling = std::accumulate(isGlobalPooling.begin(), isGlobalPooling.end(), 0) == 3; + globalPooling = isGlobalPooling[0] || isGlobalPooling[1] || isGlobalPooling[2]; if (kernel_size.size() == 2) { kernel = Size(kernel_size[1], kernel_size[0]); stride = Size(strides[1], strides[0]); @@ -149,18 +149,16 @@ public: out.push_back(outputs[0].size[i]); } - if (kernel_size.size() > inp.size()) { - kernel_size.erase(kernel_size.begin()); - } - kernel_size.resize(out.size()); + if (globalPooling) { + std::vector finalKernel; + for (int i = 0; i < inp.size(); i++) { + int idx = isGlobalPooling.size() - inp.size() + i; + finalKernel.push_back(isGlobalPooling[idx] ? inp[i] : kernel_size[idx]); + } + kernel_size = finalKernel; + kernel = Size(kernel_size[1], kernel_size[0]); + } - for (int i = 0; i < inp.size(); i++) - { - int idx = isGlobalPooling.size() - inp.size() + i; - if (isGlobalPooling[idx]) - kernel_size[i] = inp[i]; - } - kernel = Size(kernel_size.back(), kernel_size[kernel_size.size() - 2]); getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 74f644af29..4a51c35af7 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -6,23 +6,6 @@ import argparse backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', '-m', required=True, help='Path to pb model.') -parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, - help="Choose one of computation backends: " - "%d: automatically (by default), " - "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " - "%d: OpenCV implementation" % backends) -parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, - help='Choose one of target computation devices: ' - '%d: CPU target (by default), ' - '%d: OpenCL, ' - '%d: OpenCL fp16 (half-float precision), ' - '%d: VPU' % targets) -args, _ = parser.parse_known_args() - # To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view # For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet # Change script evaluate_parsing_JPPNet-s2.py for human parsing @@ -147,7 +130,7 @@ def decode_labels(gray_image): return segm -def parse_human(image_path, model_path, backend, target): +def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU): """ Prepare input for execution, run net and postprocess output to parse human. :param image_path: path to input image @@ -164,6 +147,23 @@ def parse_human(image_path, model_path, backend, target): if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') + parser.add_argument('--model', '-m', required=True, help='Path to pb model.') + parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help="Choose one of computation backends: " + "%d: automatically (by default), " + "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "%d: OpenCV implementation" % backends) + parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Choose one of target computation devices: ' + '%d: CPU target (by default), ' + '%d: OpenCL, ' + '%d: OpenCL fp16 (half-float precision), ' + '%d: VPU' % targets) + args, _ = parser.parse_known_args() + output = parse_human(args.input, args.model, args.backend, args.target) winName = 'Deep learning human parsing in OpenCV' cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) From 35c24480ae4633d3d6b0e5bae95d336b4ba0ac55 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 13:36:29 +0300 Subject: [PATCH 22/42] Fix axis --- modules/dnn/src/tensorflow/tf_importer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index fe8eb4a637..ef0588c9df 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1967,7 +1967,7 @@ void TFImporter::populateNet(Net dstNet) LayerParams reshapeLp; std::string reshapeName = name + "/reshape"; CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", indices.at(0)); + reshapeLp.set("axis", 0); reshapeLp.set("num_axes", 1); int newShape[] = {1, 1, -1}; reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); @@ -1990,7 +1990,7 @@ void TFImporter::populateNet(Net dstNet) LayerParams sliceLp; std::string layerShapeName = name + "/slice"; CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); - sliceLp.set("axis", indices.at(0)); + sliceLp.set("axis", 0); int begin[] = {0}; int size[] = {1}; sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); @@ -2004,8 +2004,8 @@ void TFImporter::populateNet(Net dstNet) LayerParams squeezeLp; std::string squeezeName = name + "/squeeze"; CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", indices.at(0)); - squeezeLp.set("end_axis", indices.at(0) + 1); + squeezeLp.set("axis", 0); + squeezeLp.set("end_axis", 1); int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); layer_id[squeezeName] = squeezeId; connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); From 7e5b5390bac5243a8d98b5f8a7573fc334426ef8 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 14:57:54 +0300 Subject: [PATCH 23/42] Fix comments --- modules/dnn/include/opencv2/dnn/all_layers.hpp | 2 +- modules/dnn/src/tensorflow/tf_importer.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index efbc8b131e..0c964df06b 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -250,7 +250,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN std::vector pads_begin, pads_end; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; - CV_DEPRECATED_EXTERNAL bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. + bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. std::vector isGlobalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ef0588c9df..7dffb1c04f 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2054,12 +2054,11 @@ void TFImporter::populateNet(Net dstNet) int num = (int)getLayerAttr(layer, "N").i(); CV_Assert(layer.input_size() == num); std::string base_name = name + "/reshape_"; - std::vector reshape_names; + std::vector reshape_ids; for (int i = 0; i < num; i++) { std::ostringstream ss; ss << i; std::string reshape_name = base_name + ss.str(); - reshape_names.push_back(reshape_name); LayerParams reshapeLP; reshapeLP.set("axis", dim); reshapeLP.set("num_axes", 1); @@ -2067,6 +2066,7 @@ void TFImporter::populateNet(Net dstNet) reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); layer_id[reshape_name] = id; + reshape_ids.push_back(id); connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); } @@ -2075,7 +2075,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; for (int li = 0; li < num; li++) - connect(layer_id, dstNet, Pin(reshape_names[li]), id, li); + dstNet.connect(reshape_ids[li], 0, id, li); } else if (type == "ClipByValue") { From 814a5cec414858ddbe7ae54da4535529cb712377 Mon Sep 17 00:00:00 2001 From: Miguel Pari Soto Date: Wed, 22 Jan 2020 11:48:33 -0300 Subject: [PATCH 24/42] feat(android): add support for android modules selection --- platforms/android/build_sdk.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/platforms/android/build_sdk.py b/platforms/android/build_sdk.py index f9b4c1a8f3..8c0ef8978f 100755 --- a/platforms/android/build_sdk.py +++ b/platforms/android/build_sdk.py @@ -226,6 +226,9 @@ class Builder: if self.ninja_path != 'ninja': cmake_vars['CMAKE_MAKE_PROGRAM'] = self.ninja_path + if self.config.modules_list is not None: + cmd.append("-DBUILD_LIST='%s'" % self.config.modules_list) + if self.config.extra_modules_path is not None: cmd.append("-DOPENCV_EXTRA_MODULES_PATH='%s'" % self.config.extra_modules_path) @@ -374,6 +377,7 @@ if __name__ == "__main__": parser.add_argument('--ndk_path', help="Path to Android NDK to use for build") parser.add_argument('--sdk_path', help="Path to Android SDK to use for build") parser.add_argument('--use_android_buildtools', action="store_true", help='Use cmake/ninja build tools from Android SDK') + parser.add_argument("--modules_list", help="List of modules to include for build") parser.add_argument("--extra_modules_path", help="Path to extra modules to use for build") parser.add_argument('--sign_with', help="Certificate to sign the Manager apk") parser.add_argument('--build_doc', action="store_true", help="Build javadoc") From 6670e6b0bbff52641723c710f1c7d9eb516afa72 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 15 Jan 2020 15:54:18 +0300 Subject: [PATCH 25/42] dnn: prevent unloading of InferenceEngine plugins --- modules/dnn/src/op_inf_engine.cpp | 36 ++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index c4e3f0830f..a5319e1a63 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -563,11 +563,45 @@ static std::map& getShar return sharedPlugins; } #else -InferenceEngine::Core& getCore() +static bool init_IE_plugins() +{ + // load and hold IE plugins + static InferenceEngine::Core* init_core = new InferenceEngine::Core(); // 'delete' is never called + (void)init_core->GetAvailableDevices(); + return true; +} +static InferenceEngine::Core& create_IE_Core_instance() { static InferenceEngine::Core core; return core; } +static InferenceEngine::Core& create_IE_Core_pointer() +{ + // load and hold IE plugins + static InferenceEngine::Core* core = new InferenceEngine::Core(); // 'delete' is never called + return *core; +} +InferenceEngine::Core& getCore() +{ + // to make happy memory leak tools use: + // - OPENCV_DNN_INFERENCE_ENGINE_HOLD_PLUGINS=0 + // - OPENCV_DNN_INFERENCE_ENGINE_CORE_LIFETIME_WORKAROUND=0 + static bool param_DNN_INFERENCE_ENGINE_HOLD_PLUGINS = utils::getConfigurationParameterBool("OPENCV_DNN_INFERENCE_ENGINE_HOLD_PLUGINS", true); + static bool init_IE_plugins_ = param_DNN_INFERENCE_ENGINE_HOLD_PLUGINS && init_IE_plugins(); CV_UNUSED(init_IE_plugins_); + + static bool param_DNN_INFERENCE_ENGINE_CORE_LIFETIME_WORKAROUND = + utils::getConfigurationParameterBool("OPENCV_DNN_INFERENCE_ENGINE_CORE_LIFETIME_WORKAROUND", +#ifdef _WIN32 + true +#else + false +#endif + ); + static InferenceEngine::Core& core = param_DNN_INFERENCE_ENGINE_CORE_LIFETIME_WORKAROUND + ? create_IE_Core_pointer() + : create_IE_Core_instance(); + return core; +} #endif #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT) From a6359e49d21cab9988934515af254ca047bfd122 Mon Sep 17 00:00:00 2001 From: Gourav Roy Date: Mon, 16 Dec 2019 11:12:32 +0530 Subject: [PATCH 26/42] Added tutorial for text skewness correction in C++ and Python. --- samples/cpp/text_skewness_correction.cpp | 74 ++++++++++++++++++++++ samples/python/text_skewness_correction.py | 58 +++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 samples/cpp/text_skewness_correction.cpp create mode 100644 samples/python/text_skewness_correction.py diff --git a/samples/cpp/text_skewness_correction.cpp b/samples/cpp/text_skewness_correction.cpp new file mode 100644 index 0000000000..15df92e641 --- /dev/null +++ b/samples/cpp/text_skewness_correction.cpp @@ -0,0 +1,74 @@ +/* +This tutorial demonstrates how to correct the skewness in a text. +The program takes as input a skewed source image and shows non skewed text. + +*/ + +#include +#include +#include +#include + +#include +#include +#include + +using namespace cv; +using namespace std; + + +int main( int argc, char** argv ) +{ + CommandLineParser parser(argc, argv, "{@input | imageTextR.png | input image}"); + + // Load image from the disk + Mat image = imread( samples::findFile( parser.get("@input") ), IMREAD_COLOR); + if (image.empty()) + { + cout << "Cannot load the image " + parser.get("@input") << endl; + return -1; + } + + Mat gray; + cvtColor(image, gray, COLOR_BGR2GRAY); + + //Threshold the image, setting all foreground pixels to 255 and all background pixels to 0 + Mat thresh; + threshold(gray, thresh, 0, 255, THRESH_BINARY_INV | THRESH_OTSU); + + // Applying erode filter to remove random noise + int erosion_size = 1; + Mat element = getStructuringElement( MORPH_RECT, Size(2*erosion_size+1, 2*erosion_size+1), Point(erosion_size, erosion_size) ); + erode(thresh, thresh, element); + + cv::Mat coords; + findNonZero(thresh, coords); + + RotatedRect box = minAreaRect(coords); + float angle = box.angle; + + // The cv::minAreaRect function returns values in the range [-90, 0) + // if the angle is less than -45 we need to add 90 to it + if (angle < -45.0f) + { + angle = (90.0f + angle); + } + + //Obtaining the rotation matrix + Point2f center((image.cols) / 2.0f, (image.rows) / 2.0f); + Mat M = getRotationMatrix2D(center, angle, 1.0f); + Mat rotated; + + // Rotating the image by required angle + stringstream angle_to_str; + angle_to_str << fixed << setprecision(2) << angle; + warpAffine(image, rotated, M, image.size(), INTER_CUBIC, BORDER_REPLICATE); + putText(rotated, "Angle " + angle_to_str.str() + " degrees", Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.7, Scalar(0, 0, 255), 2); + cout << "[INFO] angle: " << angle_to_str.str() << endl; + + //Show the image + imshow("Input", image); + imshow("Rotated", rotated); + waitKey(0); + return 0; +} diff --git a/samples/python/text_skewness_correction.py b/samples/python/text_skewness_correction.py new file mode 100644 index 0000000000..c8ee33b39d --- /dev/null +++ b/samples/python/text_skewness_correction.py @@ -0,0 +1,58 @@ +''' +Text skewness correction +This tutorial demonstrates how to correct the skewness in a text. +The program takes as input a skewed source image and shows non skewed text. + +Usage: + python text_skewness_correction.py --image "Image path" +''' + +import numpy as np +import cv2 as cv +import sys +import argparse + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--image", required=True, help="path to input image file") + args = vars(parser.parse_args()) + + # load the image from disk + image = cv.imread(cv.samples.findFile(args["image"])) + if image is None: + print("can't read image " + args["image"]) + sys.exit(-1) + gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) + + # threshold the image, setting all foreground pixels to + # 255 and all background pixels to 0 + thresh = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)[1] + + # Applying erode filter to remove random noise + erosion_size = 1 + element = cv.getStructuringElement(cv.MORPH_RECT, (2 * erosion_size + 1, 2 * erosion_size + 1), (erosion_size, erosion_size) ) + thresh = cv.erode(thresh, element) + + coords = cv.findNonZero(thresh) + angle = cv.minAreaRect(coords)[-1] + # the `cv.minAreaRect` function returns values in the + # range [-90, 0) if the angle is less than -45 we need to add 90 to it + if angle < -45: + angle = (90 + angle) + + (h, w) = image.shape[:2] + center = (w // 2, h // 2) + M = cv.getRotationMatrix2D(center, angle, 1.0) + rotated = cv.warpAffine(image, M, (w, h), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE) + cv.putText(rotated, "Angle: {:.2f} degrees".format(angle), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) + + # show the output image + print("[INFO] angle: {:.2f}".format(angle)) + cv.imshow("Input", image) + cv.imshow("Rotated", rotated) + cv.waitKey(0) + + +if __name__ == "__main__": + main() From 55b03dcaba72a626b0a75e62066fe346f1e68774 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 22 Jan 2020 17:36:17 +0300 Subject: [PATCH 27/42] Refactoring --- modules/dnn/src/layers/layers_common.cpp | 3 +-- modules/dnn/src/layers/pooling_layer.cpp | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index e9eb9fa649..78f91a69d6 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -154,8 +154,7 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern globalPooling[1] = params.get("global_pooling_h", is_global); globalPooling[2] = params.get("global_pooling_w", is_global); - is_global = globalPooling[0] || globalPooling[1] || globalPooling[2]; - if (is_global) + if (globalPooling[0] || globalPooling[1] || globalPooling[2]) { util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); if ((globalPooling[0] && params.has("kernel_d")) || diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 3e1fafb338..ff62b6e55d 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -148,7 +148,6 @@ public: inp.push_back(inputs[0].size[i]); out.push_back(outputs[0].size[i]); } - if (globalPooling) { std::vector finalKernel; for (int i = 0; i < inp.size(); i++) { @@ -160,7 +159,6 @@ public: } getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); - if (pads_begin.size() == 2) { pad_t = pads_begin[0]; pad_l = pads_begin[1]; @@ -1004,14 +1002,15 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); - std::vector local_kernel = kernel_size.empty() ? - std::vector(inpShape.begin(), inpShape.end()) : kernel_size; + std::vector local_kernel = kernel_size.size() > inpShape.size() ? + std::vector(kernel_size.begin() + 1, kernel_size.end()) : kernel_size; - for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - if (isGlobalPooling[j]) - local_kernel[j] = inpShape[i]; + if (globalPooling) { + for (int i = 0, j = kernel_size.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + if (isGlobalPooling[j]) + local_kernel[i] = inpShape[i]; + } } - if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); @@ -1019,14 +1018,14 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[j]) / strides[i]; + for (int i = 0; i < pads_end.size(); i++) { + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0, j = local_kernel.size() - inpShape.size(); i < inpShape.size(); i++, j++) { + for (int i = 0; i < inpShape.size(); i++) { if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { --outShape[2 + i]; CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); @@ -1049,7 +1048,6 @@ virtual Ptr initNgraph(const std::vector >& inp outShape[0] = inputs[1][0]; // Number of proposals; outShape[1] = psRoiOutChannels; } - int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); From a3ae69893cf7c916020694ca1944e41c96ad2d99 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Thu, 23 Jan 2020 15:10:42 +0300 Subject: [PATCH 28/42] Extend nGraph Deconvolution layer support --- modules/dnn/src/layers/convolution_layer.cpp | 28 +++++--------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index e291d6ef6d..6bb8994a3c 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1325,19 +1325,6 @@ public: const int group = numOutput / outGroupCn; if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - if (padMode.empty()) { - for (int i = 0; i < adjust_pads.size(); i++) { - if (pads_end[i] < adjust_pads[i]) - return false; - } - } else if (padMode == "SAME") { - for (int i = 0; i < adjust_pads.size(); i++) { - if (kernel_size[i] < pads_begin[i] + 1 + adjust_pads[i]) - return false; - } - } else if (padMode == "VALID") - return false; - return group == 1; } @@ -2042,20 +2029,16 @@ public: ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, newWeights.data); } std::vector paddings_end; - if (padMode.empty()) - { - for (int i = 0; i < pads_end.size(); i++) { - paddings_end.push_back(pads_end[i] - adjust_pads[i]); - } - } - else if (padMode == "SAME") + if (padMode == "SAME") { for (int i = 0; i < pads_begin.size(); i++) { paddings_end.push_back(kernel_size[i] - pads_begin[i] - 1 - adjust_pads[i]); } + adjust_pads = std::vector(pads_begin.size(), 0); } else { paddings_end = pads_end; } + ngraph::op::PadType pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::EXPLICIT; auto deconv = std::make_shared( ieInpNode, @@ -2063,7 +2046,10 @@ public: ngraph::Strides(strides), ngraph::CoordinateDiff(std::vector(pads_begin.begin(), pads_begin.end())), ngraph::CoordinateDiff(std::vector(paddings_end.begin(), paddings_end.end())), - ngraph::Strides(dilations)); + ngraph::Strides(dilations), + pad_type, + ngraph::CoordinateDiff(std::vector(adjust_pads.begin(), adjust_pads.end()))); + if (hasBias() || fusedBias) { std::vector shape(deconv->get_shape().size(), 1); From 504cd8a9f5c2fdf184fa1bb06550b3ce0b0ea4b8 Mon Sep 17 00:00:00 2001 From: Ganesh Kathiresan Date: Thu, 23 Jan 2020 17:53:03 +0530 Subject: [PATCH 29/42] Merge pull request #16405 from ganesh-k13:bugfix/solvepnp-crash Added type check for solvePnPGeneric | Issue: #16049 * Added type check * Added checks before type fix * Tests for 16049 * calib3d: update solvePnP regression check (16049) --- .../calib3d/misc/python/test/test_solvepnp.py | 20 +++++++++++++++++++ modules/calib3d/src/solvepnp.cpp | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/modules/calib3d/misc/python/test/test_solvepnp.py b/modules/calib3d/misc/python/test/test_solvepnp.py index b751e15c74..f7765c0384 100644 --- a/modules/calib3d/misc/python/test/test_solvepnp.py +++ b/modules/calib3d/misc/python/test/test_solvepnp.py @@ -39,6 +39,26 @@ class solvepnp_test(NewOpenCVTests): obj_points, img_points, cameraMatrix, distCoeffs, reprojectionError=r ) + def test_regression_16049(self): + obj_points = np.array([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32) + img_points = np.array( + [[[700, 400], [700, 600], [900, 600], [900, 400]]], dtype=np.float32 + ) + + cameraMatrix = np.array( + [[712.0634, 0, 800], [0, 712.540, 500], [0, 0, 1]], dtype=np.float32 + ) + distCoeffs = np.array([[0, 0, 0, 0]], dtype=np.float32) + x, r, t, e = cv.solvePnPGeneric( + obj_points, img_points, cameraMatrix, distCoeffs + ) + if e is None: + # noArray() is supported, see https://github.com/opencv/opencv/issues/16049 + pass + else: + eDump = cv.utils.dumpInputArray(e) + self.assertEqual(eDump, "InputArray: empty()=false kind=0x00010000 flags=0x01010000 total(-1)=1 dims(-1)=2 size(-1)=1x1 type(-1)=CV_32FC1") + if __name__ == '__main__': NewOpenCVTests.bootstrap() diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index d2fb3976c5..0fcd670abc 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -1009,7 +1009,10 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints, if (reprojectionError.needed()) { - int type = reprojectionError.type(); + int type = (reprojectionError.fixedType() || !reprojectionError.empty()) + ? reprojectionError.type() + : (max(_ipoints.depth(), _opoints.depth()) == CV_64F ? CV_64F : CV_32F); + reprojectionError.create(solutions, 1, type); CV_CheckType(reprojectionError.type(), type == CV_32FC1 || type == CV_64FC1, "Type of reprojectionError must be CV_32FC1 or CV_64FC1!"); From 0687cffe2163a16210421f6e5d9260e38abdbc8a Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Thu, 23 Jan 2020 15:32:16 +0300 Subject: [PATCH 30/42] Support logSoftMax --- modules/dnn/src/layers/softmax_layer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 75e31006de..d7ffef0bbf 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -92,7 +92,8 @@ public: { return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && !logSoftMax); + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !logSoftMax); } #ifdef HAVE_OPENCL @@ -330,6 +331,9 @@ public: auto& ieInpNode = nodes[0].dynamicCast()->node; int axis = clamp(axisRaw, ieInpNode->get_shape().size()); auto softmax = std::make_shared(ieInpNode, axis); + if (logSoftMax) + return Ptr(new InfEngineNgraphNode(std::make_shared(softmax))); + return Ptr(new InfEngineNgraphNode(softmax)); } #endif // HAVE_DNN_NGRAPH From d0e02cccba94a1e39ef3f17b2b0546751091b20a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 23 Jan 2020 18:43:36 +0300 Subject: [PATCH 31/42] samples(dnn): avoid 'async' keyword (Python 3.7+) --- samples/dnn/object_detection.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 8126ee58aa..d4ea40f935 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -43,6 +43,7 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) parser.add_argument('--async', type=int, default=0, + dest='asyncN', help='Number of asynchronous forwards at the same time. ' 'Choose 0 for synchronous mode') args, _ = parser.parse_known_args() @@ -231,8 +232,8 @@ def processingThreadBody(): try: frame = framesQueue.get_nowait() - if args.async: - if len(futureOutputs) == args.async: + if args.asyncN: + if len(futureOutputs) == args.asyncN: frame = None # Skip the frame else: framesQueue.queue.clear() # Skip the rest of frames @@ -256,7 +257,7 @@ def processingThreadBody(): frame = cv.resize(frame, (inpWidth, inpHeight)) net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info') - if args.async: + if args.asyncN: futureOutputs.append(net.forwardAsync()) else: outs = net.forward(outNames) From d9474648f05399682072078a97756fbcc54d7d98 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 24 Jan 2020 11:00:06 +0300 Subject: [PATCH 32/42] Fix diff --- modules/dnn/src/layers/pooling_layer.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index ff62b6e55d..326e61ba64 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -1002,15 +1002,16 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); - std::vector local_kernel = kernel_size.size() > inpShape.size() ? - std::vector(kernel_size.begin() + 1, kernel_size.end()) : kernel_size; - + std::vector local_kernel; if (globalPooling) { - for (int i = 0, j = kernel_size.size() - inpShape.size(); i < inpShape.size(); i++, j++) { - if (isGlobalPooling[j]) - local_kernel[i] = inpShape[i]; + for (int i = 0; i < inpShape.size(); i++) { + int idx = isGlobalPooling.size() - inpShape.size() + i; + local_kernel.push_back(isGlobalPooling[idx] ? inpShape[i] : kernel_size[idx]); } + } else { + local_kernel = kernel_size; } + if (type == ROI || type == PSROI) { outShape.push_back(pooledSize.height); @@ -1018,14 +1019,14 @@ virtual Ptr initNgraph(const std::vector >& inp } else if (padMode.empty()) { - for (int i = 0; i < pads_end.size(); i++) { + for (int i = 0; i < local_kernel.size(); i++) { float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0; i < inpShape.size(); i++) { + for (int i = 0; i < pads_end.size(); i++) { if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { --outShape[2 + i]; CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); From 4b351120222f5fde688f37a465c78e9ef6668787 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 24 Jan 2020 16:30:10 +0300 Subject: [PATCH 33/42] Update sample --- samples/dnn/human_parsing.py | 76 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 4a51c35af7..5bfe19aee7 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -1,45 +1,11 @@ +import argparse import cv2 as cv import numpy as np -import argparse backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -# To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view -# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet -# Change script evaluate_parsing_JPPNet-s2.py for human parsing -# 1. Remove preprocessing to create image_batch_origin: -# - with tf.name_scope("create_inputs"): -# ... -# Add -# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') -# -# 2. Create input -# image = cv2.imread(path/to/image) -# image_rev = np.flip(image, axis=1) -# input = np.stack([image, image_rev], axis=0) -# -# 3. Hardcode image_h and image_w shapes to determine output shapes. -# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. -# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) -# Do similarly with parsing_out2, parsing_out3 -# 4. Remove postprocessing. Last net operation: -# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) -# Change: -# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) -# -# 5. To save model after sess.run(...) add: -# input_graph_def = tf.get_default_graph().as_graph_def() -# output_node = "Mean_3" -# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) -# -# output_graph = "LIP_JPPNet.pb" -# with tf.gfile.GFile(output_graph, "wb") as f: -# f.write(output_graph_def.SerializeToString()) - def preprocess(image_path): """ @@ -149,8 +115,9 @@ def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, targe if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--input', '-i', help='Path to input image. Skip this argument to capture frames from a camera.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model.') + parser.add_argument('--input', '-i', help='Path to input image.') + parser.add_argument('--model', '-m', required=True, help='Path to pb model + (https://drive.google.com/open?id=1XHvo111Gj1ZGoNUJt4Y4OsShrt_eUT34).') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -169,3 +136,38 @@ if __name__ == '__main__': cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) cv.imshow(winName, output) cv.waitKey() + + +# To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view +# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet +# Change script evaluate_parsing_JPPNet-s2.py for human parsing +# 1. Remove preprocessing to create image_batch_origin: +# - with tf.name_scope("create_inputs"): +# ... +# Add +# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') +# +# 2. Create input +# image = cv2.imread(path/to/image) +# image_rev = np.flip(image, axis=1) +# input = np.stack([image, image_rev], axis=0) +# +# 3. Hardcode image_h and image_w shapes to determine output shapes. +# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. +# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), +# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) +# Do similarly with parsing_out2, parsing_out3 +# 4. Remove postprocessing. Last net operation: +# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) +# Change: +# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) +# +# 5. To save model after sess.run(...) add: +# input_graph_def = tf.get_default_graph().as_graph_def() +# output_node = "Mean_3" +# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) +# +# output_graph = "LIP_JPPNet.pb" +# with tf.gfile.GFile(output_graph, "wb") as f: +# f.write(output_graph_def.SerializeToString()) From ffaf15d0d95c4e40649e3be4bb1d455b5e5fb22b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 24 Jan 2020 15:23:50 +0300 Subject: [PATCH 34/42] cmake: reuse OpenCV NEON detection in carotene - use carotene if NEON available only --- 3rdparty/carotene/hal/CMakeLists.txt | 16 ---------------- CMakeLists.txt | 10 +++++++--- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/3rdparty/carotene/hal/CMakeLists.txt b/3rdparty/carotene/hal/CMakeLists.txt index 556adf978c..a87f7a0949 100644 --- a/3rdparty/carotene/hal/CMakeLists.txt +++ b/3rdparty/carotene/hal/CMakeLists.txt @@ -58,22 +58,6 @@ function(compile_carotene) endif() add_subdirectory("${CAROTENE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/carotene") - - if(ARM OR AARCH64) - if(CMAKE_BUILD_TYPE) - set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) - endif() - check_cxx_compiler_flag("-mfpu=neon" CXX_HAS_MFPU_NEON) - check_c_compiler_flag("-mfpu=neon" C_HAS_MFPU_NEON) - if(${CXX_HAS_MFPU_NEON} AND ${C_HAS_MFPU_NEON} AND NOT "${CMAKE_CXX_FLAGS} " MATCHES "-mfpu=neon[^ ]*") - get_target_property(old_flags "carotene_objs" COMPILE_FLAGS) - if(old_flags) - set_target_properties("carotene_objs" PROPERTIES COMPILE_FLAGS "${old_flags} -mfpu=neon") - else() - set_target_properties("carotene_objs" PROPERTIES COMPILE_FLAGS "-mfpu=neon") - endif() - endif() - endif() endfunction() compile_carotene() diff --git a/CMakeLists.txt b/CMakeLists.txt index f4a2568db6..3a7a6f1d7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -797,9 +797,13 @@ endif() foreach(hal ${OpenCV_HAL}) if(hal STREQUAL "carotene") - add_subdirectory(3rdparty/carotene/hal) - ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS) - list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})") + if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;") + add_subdirectory(3rdparty/carotene/hal) + ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS) + list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})") + else() + message(STATUS "Carotene: NEON is not available, disabling carotene...") + endif() elseif(hal STREQUAL "openvx") add_subdirectory(3rdparty/openvx) ocv_hal_register(OPENVX_HAL_LIBRARIES OPENVX_HAL_HEADERS OPENVX_HAL_INCLUDE_DIRS) From 4d2da2debebbf85e4569ce1dfa898cee7af2746c Mon Sep 17 00:00:00 2001 From: Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com> Date: Fri, 24 Jan 2020 10:00:49 -0500 Subject: [PATCH 35/42] Merge pull request #16375 from ChipKerchner:vectorizeMultTranspose * Reduce LLC loads, stores and multiplies on MulTransposed - 8% faster on VSX * Add is_same method so c++11 is not required * Remove trailing whitespaces. * Change is_same to DataType depth check --- modules/core/src/matmul.simd.hpp | 153 +++++++++++++++++++++++-------- 1 file changed, 116 insertions(+), 37 deletions(-) diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp index 5c60d3097d..38973ea1a4 100644 --- a/modules/core/src/matmul.simd.hpp +++ b/modules/core/src/matmul.simd.hpp @@ -2078,6 +2078,10 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double deltastep = deltastep ? 4 : 0; } +#if CV_SIMD_64F + v_float64x2 v_scale = v_setall_f64(scale); +#endif + if( !delta ) for( i = 0; i < size.width; i++, tdst += dststep ) { @@ -2086,22 +2090,41 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double for( j = i; j <= size.width - 4; j += 4 ) { - double s0 = 0, s1 = 0, s2 = 0, s3 = 0; - const sT *tsrc = src + j; - - for( k = 0; k < size.height; k++, tsrc += srcstep ) +#if CV_SIMD_64F + if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { - double a = col_buf[k]; - s0 += a * tsrc[0]; - s1 += a * tsrc[1]; - s2 += a * tsrc[2]; - s3 += a * tsrc[3]; - } + v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64(); + const double *tsrc = (double*)(src + j); - tdst[j] = (dT)(s0*scale); - tdst[j+1] = (dT)(s1*scale); - tdst[j+2] = (dT)(s2*scale); - tdst[j+3] = (dT)(s3*scale); + for( k = 0; k < size.height; k++, tsrc += srcstep ) + { + v_float64x2 a = v_setall_f64((double)col_buf[k]); + s0 += a * v_load(tsrc+0); + s1 += a * v_load(tsrc+2); + } + + v_store((double*)(tdst+j), s0*v_scale); + v_store((double*)(tdst+j+2), s1*v_scale); + } else +#endif + { + double s0 = 0, s1 = 0, s2 = 0, s3 = 0; + const sT *tsrc = src + j; + + for( k = 0; k < size.height; k++, tsrc += srcstep ) + { + double a = col_buf[k]; + s0 += a * tsrc[0]; + s1 += a * tsrc[1]; + s2 += a * tsrc[2]; + s3 += a * tsrc[3]; + } + + tdst[j] = (dT)(s0*scale); + tdst[j+1] = (dT)(s1*scale); + tdst[j+2] = (dT)(s2*scale); + tdst[j+3] = (dT)(s3*scale); + } } for( ; j < size.width; j++ ) @@ -2127,23 +2150,45 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double for( j = i; j <= size.width - 4; j += 4 ) { - double s0 = 0, s1 = 0, s2 = 0, s3 = 0; - const sT *tsrc = src + j; - const dT *d = delta_buf ? delta_buf : delta + j; - - for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) +#if CV_SIMD_64F + if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { - double a = col_buf[k]; - s0 += a * (tsrc[0] - d[0]); - s1 += a * (tsrc[1] - d[1]); - s2 += a * (tsrc[2] - d[2]); - s3 += a * (tsrc[3] - d[3]); - } + v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64(); + const double *tsrc = (double*)(src + j); + const double *d = (double*)(delta_buf ? delta_buf : delta + j); - tdst[j] = (dT)(s0*scale); - tdst[j+1] = (dT)(s1*scale); - tdst[j+2] = (dT)(s2*scale); - tdst[j+3] = (dT)(s3*scale); + for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) + { + v_float64x2 a = v_setall_f64((double)col_buf[k]); + s0 += a * (v_load(tsrc+0) - v_load(d+0)); + s1 += a * (v_load(tsrc+2) - v_load(d+2)); + } + + v_store((double*)(tdst+j), s0*v_scale); + v_store((double*)(tdst+j+2), s1*v_scale); + } + else +#endif + + { + double s0 = 0, s1 = 0, s2 = 0, s3 = 0; + const sT *tsrc = src + j; + const dT *d = delta_buf ? delta_buf : delta + j; + + for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) + { + double a = col_buf[k]; + s0 += a * (tsrc[0] - d[0]); + s1 += a * (tsrc[1] - d[1]); + s2 += a * (tsrc[2] - d[2]); + s3 += a * (tsrc[3] - d[3]); + } + + tdst[j] = (dT)(s0*scale); + tdst[j+1] = (dT)(s1*scale); + tdst[j+2] = (dT)(s2*scale); + tdst[j+3] = (dT)(s3*scale); + } } for( ; j < size.width; j++ ) @@ -2182,10 +2227,25 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double double s = 0; const sT *tsrc1 = src + i*srcstep; const sT *tsrc2 = src + j*srcstep; +#if CV_SIMD_64F + if (DataType::depth == CV_64F && DataType
::depth == CV_64F) + { + const double *v_tsrc1 = (double *)(tsrc1); + const double *v_tsrc2 = (double *)(tsrc2); + v_float64x2 v_s = v_setzero_f64(); - for( k = 0; k <= size.width - 4; k += 4 ) - s += (double)tsrc1[k]*tsrc2[k] + (double)tsrc1[k+1]*tsrc2[k+1] + - (double)tsrc1[k+2]*tsrc2[k+2] + (double)tsrc1[k+3]*tsrc2[k+3]; + for( k = 0; k <= size.width - 4; k += 4 ) + v_s += (v_load(v_tsrc1+k) * v_load(v_tsrc2+k)) + + (v_load(v_tsrc1+k+2) * v_load(v_tsrc2+k+2)); + s += v_reduce_sum(v_s); + } + else +#endif + { + for( k = 0; k <= size.width - 4; k += 4 ) + s += (double)tsrc1[k]*tsrc2[k] + (double)tsrc1[k+1]*tsrc2[k+1] + + (double)tsrc1[k+2]*tsrc2[k+2] + (double)tsrc1[k+3]*tsrc2[k+3]; + } for( ; k < size.width; k++ ) s += (double)tsrc1[k] * tsrc2[k]; tdst[j] = (dT)(s*scale); @@ -2220,11 +2280,30 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double delta_buf[2] = delta_buf[3] = tdelta2[0]; tdelta2 = delta_buf; } - for( k = 0; k <= size.width-4; k += 4, tdelta2 += delta_shift ) - s += (double)row_buf[k]*(tsrc2[k] - tdelta2[0]) + - (double)row_buf[k+1]*(tsrc2[k+1] - tdelta2[1]) + - (double)row_buf[k+2]*(tsrc2[k+2] - tdelta2[2]) + - (double)row_buf[k+3]*(tsrc2[k+3] - tdelta2[3]); +#if CV_SIMD_64F + if (DataType::depth == CV_64F && DataType
::depth == CV_64F) + { + const double *v_tsrc2 = (double *)(tsrc2); + const double *v_tdelta2 = (double *)(tdelta2); + const double *v_row_buf = (double *)(row_buf); + v_float64x2 v_s = v_setzero_f64(); + + for( k = 0; k <= size.width - 4; k += 4, v_tdelta2 += delta_shift ) + v_s += ((v_load(v_tsrc2+k) - v_load(v_tdelta2)) * v_load(v_row_buf+k)) + + ((v_load(v_tsrc2+k+2) - v_load(v_tdelta2+2)) * v_load(v_row_buf+k+2)); + s += v_reduce_sum(v_s); + + tdelta2 = (const dT *)(v_tdelta2); + } + else +#endif + { + for( k = 0; k <= size.width-4; k += 4, tdelta2 += delta_shift ) + s += (double)row_buf[k]*(tsrc2[k] - tdelta2[0]) + + (double)row_buf[k+1]*(tsrc2[k+1] - tdelta2[1]) + + (double)row_buf[k+2]*(tsrc2[k+2] - tdelta2[2]) + + (double)row_buf[k+3]*(tsrc2[k+3] - tdelta2[3]); + } for( ; k < size.width; k++, tdelta2++ ) s += (double)row_buf[k]*(tsrc2[k] - tdelta2[0]); tdst[j] = (dT)(s*scale); From e83438c23df7c21dd5a23107dc8ad5fca95887e0 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 26 Jan 2020 00:00:25 +0000 Subject: [PATCH 36/42] core(build): fix i386 compilation --- modules/core/src/parallel_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/src/parallel_impl.cpp b/modules/core/src/parallel_impl.cpp index 90424a6990..8638af9d67 100644 --- a/modules/core/src/parallel_impl.cpp +++ b/modules/core/src/parallel_impl.cpp @@ -52,7 +52,7 @@ DECLARE_CV_PAUSE #endif #ifndef CV_PAUSE # if defined __GNUC__ && (defined __i386__ || defined __x86_64__) -# if !defined(__SSE__) +# if !defined(__SSE2__) static inline void cv_non_sse_mm_pause() { __asm__ __volatile__ ("rep; nop"); } # define _mm_pause cv_non_sse_mm_pause # endif From 0853085ec629cdd819f8ad08dcd8b322be20642d Mon Sep 17 00:00:00 2001 From: Pierre Letessier Date: Sun, 26 Jan 2020 08:19:09 +0100 Subject: [PATCH 37/42] Merge pull request #16190 from pletessier:videocapture_skip_frames Videocapture skip frames * enable skipping frames * update videoio_skip test --- modules/videoio/src/cap_ffmpeg_impl.hpp | 23 ++++++++++++ modules/videoio/test/test_ffmpeg.cpp | 48 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index fdc6b31b19..c69cd74509 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -966,6 +966,29 @@ bool CvCapture_FFMPEG::open( const char* _filename ) enc->thread_count = get_number_of_cpus(); //#endif +#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(52, 123, 0) + AVDictionaryEntry* avdiscard_entry = av_dict_get(dict, "avdiscard", NULL, 0); + + if (avdiscard_entry != 0) { + if(strcmp(avdiscard_entry->value, "all") == 0) + enc->skip_frame = AVDISCARD_ALL; + else if (strcmp(avdiscard_entry->value, "bidir") == 0) + enc->skip_frame = AVDISCARD_BIDIR; + else if (strcmp(avdiscard_entry->value, "default") == 0) + enc->skip_frame = AVDISCARD_DEFAULT; + else if (strcmp(avdiscard_entry->value, "none") == 0) + enc->skip_frame = AVDISCARD_NONE; +#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54, 59, 100) + else if (strcmp(avdiscard_entry->value, "nonintra") == 0) + enc->skip_frame = AVDISCARD_NONINTRA; +#endif + else if (strcmp(avdiscard_entry->value, "nonkey") == 0) + enc->skip_frame = AVDISCARD_NONKEY; + else if (strcmp(avdiscard_entry->value, "nonref") == 0) + enc->skip_frame = AVDISCARD_NONREF; + } +#endif + #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0) #define AVMEDIA_TYPE_VIDEO CODEC_TYPE_VIDEO #endif diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index 6519c57751..10f41daf7a 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -303,6 +303,54 @@ const videoio_container_params_t videoio_container_params[] = INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params)); +typedef tuple videoio_skip_params_t; +typedef testing::TestWithParam< videoio_skip_params_t > videoio_skip; + +TEST_P(videoio_skip, DISABLED_read) // optional test, may fail in some configurations +{ +#if CV_VERSION_MAJOR >= 4 + if (!videoio_registry::hasBackend(CAP_FFMPEG)) + throw SkipTestException("Backend was not found"); +#endif + + const string path = get<0>(GetParam()); + const string env = get<1>(GetParam()); + const int expectedFrameNumber = get<2>(GetParam()); + + #ifdef _WIN32 + _putenv_s("OPENCV_FFMPEG_CAPTURE_OPTIONS", env.c_str()); + #else + setenv("OPENCV_FFMPEG_CAPTURE_OPTIONS", env.c_str(), 1); + #endif + VideoCapture container(findDataFile(path), CAP_FFMPEG); + #ifdef _WIN32 + _putenv_s("OPENCV_FFMPEG_CAPTURE_OPTIONS", ""); + #else + setenv("OPENCV_FFMPEG_CAPTURE_OPTIONS", "", 1); + #endif + + ASSERT_TRUE(container.isOpened()); + + Mat reference; + int nframes = 0, n_err = 0; + while (container.isOpened()) + { + if (container.read(reference)) + nframes++; + else if (++n_err > 3) + break; + } + EXPECT_EQ(expectedFrameNumber, nframes); +} + +const videoio_skip_params_t videoio_skip_params[] = +{ + videoio_skip_params_t("video/big_buck_bunny.mp4", "", 125), + videoio_skip_params_t("video/big_buck_bunny.mp4", "avdiscard;nonkey", 11) +}; + +INSTANTIATE_TEST_CASE_P(/**/, videoio_skip, testing::ValuesIn(videoio_skip_params)); + //========================================================================== //////////////////////////////// Parallel VideoWriters and VideoCaptures //////////////////////////////////// From acc089ca64273b37b1a248bc68288efaffa1e92c Mon Sep 17 00:00:00 2001 From: Polina Smolnikova <43805563+rayonnant14@users.noreply.github.com> Date: Sun, 26 Jan 2020 22:18:42 +0300 Subject: [PATCH 38/42] Merge pull request #15338 from rayonnant14:my_detect_and_decode_3.4 QR-Code detector : multiple detection * change in qr-codes detection * change in qr-codes detection * change in test * change in test * add multiple detection * multiple detection * multiple detect * add parallel implementation * add functional for performance tests * change in test * add perftest * returned implementation for 1 qr-code, added support for vector and vector> in MultipleDetectAndDecode * deleted all lambda expressions * changing in triangle sort * fixed warnings * fixed errors * add java and python tests * change in java tests * change in java and python tests * change in perf test * change in qrcode.cpp * add spaces * change in qrcode.cpp * change in qrcode.cpp * change in qrcode.cpp * change in java tests * change in java tests * solved problems * solved problems * change in java and python tests * change in python tests * change in python tests * change in python tests * change in methods name * deleted sample qrcode_multi, change in qrcode.cpp * change in perf tests * change in objdetect.hpp * deleted code duplication in sample qrcode.cpp * returned spaces * added spaces * deleted draw function * change in qrcode.cpp * change in qrcode.cpp * deleted all draw functions * objdetect(QR): extractVerticalLines * objdetect(QR): whitespaces * objdetect(QR): simplify operations, avoid duplicated code * change in interface, additional checks in java and python tests, added new key in sample for saving original image from camera * fix warnings and errors in python test * fix * write in file with space key * solved error with empty mat check in python test * correct path to test image * deleted spaces * solved error with check empty mat in python tests * added check of empty vector of points * samples: rework qrcode.cpp * objdetect(QR): fix API, input parameters must be first * objdetect(QR): test/fix points layout --- modules/java/generator/gen_java.py | 5 +- .../objdetect/include/opencv2/objdetect.hpp | 60 +- .../misc/java/test/QRCodeDetectorTest.java | 20 + .../misc/python/test/test_qrcode_detect.py | 36 +- .../objdetect/perf/perf_qrcode_pipeline.cpp | 57 + modules/objdetect/src/qrcode.cpp | 1479 +++++++++++++++-- modules/objdetect/test/test_qrcode.cpp | 153 +- samples/cpp/qrcode.cpp | 332 +++- 8 files changed, 1887 insertions(+), 255 deletions(-) diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py index d98a98f8c5..ea1b89edd6 100755 --- a/modules/java/generator/gen_java.py +++ b/modules/java/generator/gen_java.py @@ -914,7 +914,10 @@ class JavaWrapperGenerator(object): c_epilogue.append("Mat* _retval_ = new Mat();") c_epilogue.append(fi.ctype+"_to_Mat(_ret_val_vector_, *_retval_);") else: - c_epilogue.append("return " + fi.ctype + "_to_List(env, _ret_val_vector_);") + if ret: + c_epilogue.append("jobject _retval_ = " + fi.ctype + "_to_List(env, _ret_val_vector_);") + else: + c_epilogue.append("return " + fi.ctype + "_to_List(env, _ret_val_vector_);") if fi.classname: if not fi.ctype: # c-tor retval = fi.fullClass(isCPP=True) + "* _retval_ = " diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index cc9c4e1a18..5bd6a11002 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -694,8 +694,8 @@ public: CV_WRAP bool detect(InputArray img, OutputArray points) const; /** @brief Decodes QR code in image once it's found by the detect() method. - Returns UTF8-encoded output string or empty string if the code cannot be decoded. + Returns UTF8-encoded output string or empty string if the code cannot be decoded. @param img grayscale or color (BGR) image containing QR code. @param points Quadrangle vertices found by detect() method (or some other algorithm). @param straight_qrcode The optional output image containing rectified and binarized QR code @@ -705,11 +705,44 @@ public: /** @brief Both detects and decodes QR code @param img grayscale or color (BGR) image containing QR code. - @param points opiotnal output array of vertices of the found QR code quadrangle. Will be empty if not found. + @param points optional output array of vertices of the found QR code quadrangle. Will be empty if not found. @param straight_qrcode The optional output image containing rectified and binarized QR code */ CV_WRAP cv::String detectAndDecode(InputArray img, OutputArray points=noArray(), OutputArray straight_qrcode = noArray()); + /** @brief Detects QR codes in image and returns the vector of the quadrangles containing the codes. + @param img grayscale or color (BGR) image containing (or not) QR codes. + @param points Output vector of vector of vertices of the minimum-area quadrangle containing the codes. + */ + CV_WRAP + bool detectMulti(InputArray img, OutputArray points) const; + + /** @brief Decodes QR codes in image once it's found by the detect() method. + @param img grayscale or color (BGR) image containing QR codes. + @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + @param points vector of Quadrangle vertices found by detect() method (or some other algorithm). + @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes + */ + CV_WRAP + bool decodeMulti( + InputArray img, InputArray points, + CV_OUT std::vector& decoded_info, + OutputArrayOfArrays straight_qrcode = noArray() + ) const; + + /** @brief Both detects and decodes QR codes + @param img grayscale or color (BGR) image containing QR codes. + @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + @param points optional output vector of vertices of the found QR code quadrangles. Will be empty if not found. + @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes + */ + CV_WRAP + bool detectAndDecodeMulti( + InputArray img, CV_OUT std::vector& decoded_info, + OutputArray points = noArray(), + OutputArrayOfArrays straight_qrcode = noArray() + ) const; + protected: struct Impl; Ptr p; @@ -731,6 +764,29 @@ CV_EXPORTS bool detectQRCode(InputArray in, std::vector &points, double e */ CV_EXPORTS bool decodeQRCode(InputArray in, InputArray points, std::string &decoded_info, OutputArray straight_qrcode = noArray()); +/** @brief Detect QR codes in image and return vector of minimum area of quadrangle that describes QR codes. + @param in Matrix of the type CV_8UC1 containing an image where QR codes are detected. + @param points Output vector of vertices of quadrangles of minimal area that describes QR codes. + @param eps_x Epsilon neighborhood, which allows you to determine the horizontal pattern of the scheme 1:1:3:1:1 according to QR code standard. + @param eps_y Epsilon neighborhood, which allows you to determine the vertical pattern of the scheme 1:1:3:1:1 according to QR code standard. + */ +CV_EXPORTS +bool detectQRCodeMulti( + InputArray in, std::vector &points, + double eps_x = 0.2, double eps_y = 0.1); + +/** @brief Decode QR codes in image and return text that is encrypted in QR code. + @param in Matrix of the type CV_8UC1 containing an image where QR code are detected. + @param points Input vector of vertices of quadrangles of minimal area that describes QR codes. + @param decoded_info vector of String information that is encrypted in QR codes. + @param straight_qrcode vector of Matrixes of the type CV_8UC1 containing an binary straight QR codes. + */ +CV_EXPORTS +bool decodeQRCodeMulti( + InputArray in, InputArray points, + CV_OUT std::vector &decoded_info, + OutputArrayOfArrays straight_qrcode = noArray()); + //! @} objdetect } diff --git a/modules/objdetect/misc/java/test/QRCodeDetectorTest.java b/modules/objdetect/misc/java/test/QRCodeDetectorTest.java index 9879772e42..cd8be409aa 100644 --- a/modules/objdetect/misc/java/test/QRCodeDetectorTest.java +++ b/modules/objdetect/misc/java/test/QRCodeDetectorTest.java @@ -1,9 +1,11 @@ package org.opencv.test.objdetect; +import java.util.List; import org.opencv.core.Mat; import org.opencv.objdetect.QRCodeDetector; import org.opencv.imgcodecs.Imgcodecs; import org.opencv.test.OpenCVTestCase; +import java.util.ArrayList; public class QRCodeDetectorTest extends OpenCVTestCase { @@ -21,9 +23,27 @@ public class QRCodeDetectorTest extends OpenCVTestCase { public void testDetectAndDecode() { Mat img = Imgcodecs.imread(testDataPath + "/cv/qrcode/link_ocv.jpg"); + assertFalse(img.empty()); QRCodeDetector detector = new QRCodeDetector(); + assertNotNull(detector); String output = detector.detectAndDecode(img); assertEquals(output, "https://opencv.org/"); } + public void testDetectAndDecodeMulti() { + Mat img = Imgcodecs.imread(testDataPath + "/cv/qrcode/multiple/6_qrcodes.png"); + assertFalse(img.empty()); + QRCodeDetector detector = new QRCodeDetector(); + assertNotNull(detector); + List < String > output = new ArrayList< String >(); + boolean result = detector.detectAndDecodeMulti(img, output); + assertTrue(result); + assertEquals(output.size(), 6); + assertEquals(output.get(0), "SKIP"); + assertEquals(output.get(1), "EXTRA"); + assertEquals(output.get(2), "TWO STEPS FORWARD"); + assertEquals(output.get(3), "STEP BACK"); + assertEquals(output.get(4), "QUESTION"); + assertEquals(output.get(5), "STEP FORWARD"); + } } diff --git a/modules/objdetect/misc/python/test/test_qrcode_detect.py b/modules/objdetect/misc/python/test/test_qrcode_detect.py index 18f7ed79ed..8a95c8bce5 100644 --- a/modules/objdetect/misc/python/test/test_qrcode_detect.py +++ b/modules/objdetect/misc/python/test/test_qrcode_detect.py @@ -11,8 +11,42 @@ import cv2 as cv from tests_common import NewOpenCVTests class qrcode_detector_test(NewOpenCVTests): + + def test_detect(self): + img = cv.imread(os.path.join(self.extraTestDataPath, 'cv/qrcode/link_ocv.jpg')) + self.assertFalse(img is None) + detector = cv.QRCodeDetector() + retval, points = detector.detect(img) + self.assertTrue(retval) + self.assertEqual(points.shape, (1, 4, 2)) + def test_detect_and_decode(self): img = cv.imread(os.path.join(self.extraTestDataPath, 'cv/qrcode/link_ocv.jpg')) + self.assertFalse(img is None) detector = cv.QRCodeDetector() retval, points, straight_qrcode = detector.detectAndDecode(img) - self.assertEqual(retval, "https://opencv.org/"); + self.assertEqual(retval, "https://opencv.org/") + self.assertEqual(points.shape, (1, 4, 2)) + + def test_detect_multi(self): + img = cv.imread(os.path.join(self.extraTestDataPath, 'cv/qrcode/multiple/6_qrcodes.png')) + self.assertFalse(img is None) + detector = cv.QRCodeDetector() + retval, points = detector.detectMulti(img) + self.assertTrue(retval) + self.assertEqual(points.shape, (6, 4, 2)) + + def test_detect_and_decode_multi(self): + img = cv.imread(os.path.join(self.extraTestDataPath, 'cv/qrcode/multiple/6_qrcodes.png')) + self.assertFalse(img is None) + detector = cv.QRCodeDetector() + retval, decoded_data, points, straight_qrcode = detector.detectAndDecodeMulti(img) + self.assertTrue(retval) + self.assertEqual(len(decoded_data), 6) + self.assertEqual(decoded_data[0], "TWO STEPS FORWARD") + self.assertEqual(decoded_data[1], "EXTRA") + self.assertEqual(decoded_data[2], "SKIP") + self.assertEqual(decoded_data[3], "STEP FORWARD") + self.assertEqual(decoded_data[4], "STEP BACK") + self.assertEqual(decoded_data[5], "QUESTION") + self.assertEqual(points.shape, (6, 4, 2)) diff --git a/modules/objdetect/perf/perf_qrcode_pipeline.cpp b/modules/objdetect/perf/perf_qrcode_pipeline.cpp index da5f278c66..44ab54f4b2 100644 --- a/modules/objdetect/perf/perf_qrcode_pipeline.cpp +++ b/modules/objdetect/perf/perf_qrcode_pipeline.cpp @@ -53,6 +53,56 @@ PERF_TEST_P_(Perf_Objdetect_QRCode, decode) } #endif +typedef ::perf::TestBaseWithParam< std::string > Perf_Objdetect_QRCode_Multi; + +PERF_TEST_P_(Perf_Objdetect_QRCode_Multi, detectMulti) +{ + const std::string name_current_image = GetParam(); + const std::string root = "cv/qrcode/multiple/"; + + std::string image_path = findDataFile(root + name_current_image); + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + std::vector corners; + QRCodeDetector qrcode; + TEST_CYCLE() ASSERT_TRUE(qrcode.detectMulti(src, corners)); + SANITY_CHECK(corners); +} + +#ifdef HAVE_QUIRC +PERF_TEST_P_(Perf_Objdetect_QRCode_Multi, decodeMulti) +{ + const std::string name_current_image = GetParam(); + const std::string root = "cv/qrcode/multiple/"; + + std::string image_path = findDataFile(root + name_current_image); + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + QRCodeDetector qrcode; + std::vector corners; + ASSERT_TRUE(qrcode.detectMulti(src, corners)); + std::vector straight_barcode; + std::vector< cv::String > decoded_info; + TEST_CYCLE() + { + ASSERT_TRUE(qrcode.decodeMulti(src, corners, decoded_info, straight_barcode)); + for(size_t i = 0; i < decoded_info.size(); i++) + { + ASSERT_FALSE(decoded_info[i].empty()); + } + } + std::vector < std::vector< uint8_t > > decoded_info_uint8_t; + for(size_t i = 0; i < decoded_info.size(); i++) + { + std::vector< uint8_t > tmp(decoded_info[i].begin(), decoded_info[i].end()); + decoded_info_uint8_t.push_back(tmp); + } + SANITY_CHECK(decoded_info_uint8_t); + SANITY_CHECK(straight_barcode); + +} +#endif + INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Objdetect_QRCode, ::testing::Values( "version_1_down.jpg", "version_1_left.jpg", "version_1_right.jpg", "version_1_up.jpg", "version_1_top.jpg", @@ -61,6 +111,13 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Objdetect_QRCode, ) ); +INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Objdetect_QRCode_Multi, + ::testing::Values( + "2_qrcodes.png", "3_close_qrcodes.png", "3_qrcodes.png", "4_qrcodes.png", + "5_qrcodes.png", "6_qrcodes.png", "7_qrcodes.png", "8_close_qrcodes.png" + ) +); + typedef ::perf::TestBaseWithParam< tuple< std::string, Size > > Perf_Objdetect_Not_QRCode; PERF_TEST_P_(Perf_Objdetect_Not_QRCode, detect) diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 88929b0820..3467803a25 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -22,6 +22,48 @@ namespace cv { using std::vector; +static bool checkQRInputImage(InputArray img, Mat& gray) +{ + CV_Assert(!img.empty()); + CV_CheckDepthEQ(img.depth(), CV_8U, ""); + + if (img.cols() <= 20 || img.rows() <= 20) + { + return false; // image data is not enough for providing reliable results + } + int incn = img.channels(); + CV_Check(incn, incn == 1 || incn == 3 || incn == 3, ""); + if (incn == 3 || incn == 4) + { + cvtColor(img, gray, COLOR_BGR2GRAY); + } + else + { + gray = img.getMat(); + } + return true; +} + +static void updatePointsResult(OutputArray points_, const vector& points) +{ + if (points_.needed()) + { + int N = int(points.size() / 4); + if (N > 0) + { + Mat m_p(N, 4, CV_32FC2, (void*)&points[0]); + int points_type = points_.fixedType() ? points_.type() : CV_32FC2; + m_p.reshape(2, points_.rows()).convertTo(points_, points_type); // Mat layout: N x 4 x 2cn + } + else + { + points_.release(); + } + } +} + + + class QRDetect { public: @@ -35,6 +77,7 @@ public: protected: vector searchHorizontalLines(); vector separateVerticalLines(const vector &list_lines); + vector extractVerticalLines(const vector &list_lines, double eps); void fixationPoints(vector &local_point); vector getQuadrilateral(vector angle_list); bool testBypassRoute(vector hull, int start, int finish); @@ -112,7 +155,7 @@ vector QRDetect::searchHorizontalLines() { if (bin_barcode_row[x] == future_pixel) { - future_pixel = 255 - future_pixel; + future_pixel = static_cast(~future_pixel); pixels_position.push_back(x); } } @@ -125,7 +168,7 @@ vector QRDetect::searchHorizontalLines() test_lines[3] = static_cast(pixels_position[i + 2] - pixels_position[i + 1]); test_lines[4] = static_cast(pixels_position[i + 3] - pixels_position[i + 2]); - double length = 0.0, weight = 0.0; + double length = 0.0, weight = 0.0; // TODO avoid 'double' calculations for (size_t j = 0; j < test_lines_size; j++) { length += test_lines[j]; } @@ -152,96 +195,115 @@ vector QRDetect::searchHorizontalLines() vector QRDetect::separateVerticalLines(const vector &list_lines) { CV_TRACE_FUNCTION(); - vector result; - int temp_length; - vector point2f_result; - uint8_t next_pixel; - vector test_lines; - for (int coeff_epsilon = 1; coeff_epsilon < 10; coeff_epsilon++) { - result.clear(); - temp_length = 0; - point2f_result.clear(); - - for (size_t pnt = 0; pnt < list_lines.size(); pnt++) + vector point2f_result = extractVerticalLines(list_lines, eps_horizontal * coeff_epsilon); + if (!point2f_result.empty()) { - const int x = cvRound(list_lines[pnt][0] + list_lines[pnt][2] * 0.5); - const int y = cvRound(list_lines[pnt][1]); - - // --------------- Search vertical up-lines --------------- // - - test_lines.clear(); - uint8_t future_pixel_up = 255; - - for (int j = y; j < bin_barcode.rows - 1; j++) - { - next_pixel = bin_barcode.ptr(j + 1)[x]; - temp_length++; - if (next_pixel == future_pixel_up) - { - future_pixel_up = 255 - future_pixel_up; - test_lines.push_back(temp_length); - temp_length = 0; - if (test_lines.size() == 3) { break; } - } - } - - // --------------- Search vertical down-lines --------------- // - - uint8_t future_pixel_down = 255; - for (int j = y; j >= 1; j--) - { - next_pixel = bin_barcode.ptr(j - 1)[x]; - temp_length++; - if (next_pixel == future_pixel_down) - { - future_pixel_down = 255 - future_pixel_down; - test_lines.push_back(temp_length); - temp_length = 0; - if (test_lines.size() == 6) { break; } - } - } - - // --------------- Compute vertical lines --------------- // - - if (test_lines.size() == 6) - { - double length = 0.0, weight = 0.0; - - for (size_t i = 0; i < test_lines.size(); i++) { length += test_lines[i]; } - - CV_Assert(length > 0); - for (size_t i = 0; i < test_lines.size(); i++) - { - if (i % 3 != 0) { weight += fabs((test_lines[i] / length) - 1.0/ 7.0); } - else { weight += fabs((test_lines[i] / length) - 3.0/14.0); } - } - - if(weight < eps_horizontal * coeff_epsilon) - { - result.push_back(list_lines[pnt]); - } - } - } - if (result.size() > 2) - { - for (size_t i = 0; i < result.size(); i++) - { - point2f_result.push_back( - Point2f(static_cast(result[i][0] + result[i][2] * 0.5), - static_cast(result[i][1]))); - } - vector centers; Mat labels; - double compactness; - compactness = kmeans(point2f_result, 3, labels, - TermCriteria( TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), - 3, KMEANS_PP_CENTERS, centers); - if (compactness == 0) { continue; } - if (compactness > 0) { break; } + double compactness = kmeans( + point2f_result, 3, labels, + TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), + 3, KMEANS_PP_CENTERS, centers); + if (compactness == 0) + continue; + if (compactness > 0) + { + return point2f_result; + } + } + } + return vector(); // nothing +} + +vector QRDetect::extractVerticalLines(const vector &list_lines, double eps) +{ + CV_TRACE_FUNCTION(); + vector result; + vector test_lines; test_lines.reserve(6); + + for (size_t pnt = 0; pnt < list_lines.size(); pnt++) + { + const int x = cvRound(list_lines[pnt][0] + list_lines[pnt][2] * 0.5); + const int y = cvRound(list_lines[pnt][1]); + + // --------------- Search vertical up-lines --------------- // + + test_lines.clear(); + uint8_t future_pixel_up = 255; + + int temp_length_up = 0; + for (int j = y; j < bin_barcode.rows - 1; j++) + { + uint8_t next_pixel = bin_barcode.ptr(j + 1)[x]; + temp_length_up++; + if (next_pixel == future_pixel_up) + { + future_pixel_up = static_cast(~future_pixel_up); + test_lines.push_back(temp_length_up); + temp_length_up = 0; + if (test_lines.size() == 3) + break; + } + } + + // --------------- Search vertical down-lines --------------- // + + int temp_length_down = 0; + uint8_t future_pixel_down = 255; + for (int j = y; j >= 1; j--) + { + uint8_t next_pixel = bin_barcode.ptr(j - 1)[x]; + temp_length_down++; + if (next_pixel == future_pixel_down) + { + future_pixel_down = static_cast(~future_pixel_down); + test_lines.push_back(temp_length_down); + temp_length_down = 0; + if (test_lines.size() == 6) + break; + } + } + + // --------------- Compute vertical lines --------------- // + + if (test_lines.size() == 6) + { + double length = 0.0, weight = 0.0; // TODO avoid 'double' calculations + + for (size_t i = 0; i < test_lines.size(); i++) + length += test_lines[i]; + + CV_Assert(length > 0); + for (size_t i = 0; i < test_lines.size(); i++) + { + if (i % 3 != 0) + { + weight += fabs((test_lines[i] / length) - 1.0/ 7.0); + } + else + { + weight += fabs((test_lines[i] / length) - 3.0/14.0); + } + } + + if (weight < eps) + { + result.push_back(list_lines[pnt]); + } + } + } + + vector point2f_result; + if (result.size() > 2) + { + for (size_t i = 0; i < result.size(); i++) + { + point2f_result.push_back( + Point2f(static_cast(result[i][0] + result[i][2] * 0.5), + static_cast(result[i][1]))); } } return point2f_result; @@ -302,19 +364,25 @@ void QRDetect::fixationPoints(vector &local_point) for (size_t k = 0; k < list_line_iter.size(); k++) { + LineIterator& li = list_line_iter[k]; uint8_t future_pixel = 255, count_index = 0; - for(int j = 0; j < list_line_iter[k].count; j++, ++list_line_iter[k]) + for(int j = 0; j < li.count; j++, ++li) { - if (list_line_iter[k].pos().x >= bin_barcode.cols || - list_line_iter[k].pos().y >= bin_barcode.rows) { break; } - const uint8_t value = bin_barcode.at(list_line_iter[k].pos()); + const Point p = li.pos(); + if (p.x >= bin_barcode.cols || + p.y >= bin_barcode.rows) + { + break; + } + + const uint8_t value = bin_barcode.at(p); if (value == future_pixel) { - future_pixel = 255 - future_pixel; + future_pixel = static_cast(~future_pixel); count_index++; if (count_index == 3) { - list_area_pnt.push_back(list_line_iter[k].pos()); + list_area_pnt.push_back(p); break; } } @@ -454,7 +522,7 @@ bool QRDetect::computeTransformationPoints() next_pixel = bin_barcode.ptr(cvRound(localization_points[i].y))[index + 1]; if (next_pixel == future_pixel) { - future_pixel = 255 - future_pixel; + future_pixel = static_cast(~future_pixel); count_test_lines++; if (count_test_lines == 2) { @@ -623,11 +691,12 @@ vector QRDetect::getQuadrilateral(vector angle_list) angle_list[(i + 1) % angle_size]); for(int j = 0; j < line_iter.count; j++, ++line_iter) { - value = bin_barcode.at(line_iter.pos()); - mask_value = mask.at(line_iter.pos() + Point(1, 1)); + Point p = line_iter.pos(); + value = bin_barcode.at(p); + mask_value = mask.at(p + Point(1, 1)); if (value == 0 && mask_value == 0) { - floodFill(fill_bin_barcode, mask, line_iter.pos(), 255, + floodFill(fill_bin_barcode, mask, p, 255, 0, Scalar(), Scalar(), FLOODFILL_MASK_ONLY); } } @@ -868,26 +937,16 @@ void QRCodeDetector::setEpsY(double epsY) { p->epsY = epsY; } bool QRCodeDetector::detect(InputArray in, OutputArray points) const { - Mat inarr = in.getMat(); - CV_Assert(!inarr.empty()); - CV_Assert(inarr.depth() == CV_8U); - if (inarr.cols <= 20 || inarr.rows <= 20) - return false; // image data is not enough for providing reliable results - - int incn = inarr.channels(); - if( incn == 3 || incn == 4 ) - { - Mat gray; - cvtColor(inarr, gray, COLOR_BGR2GRAY); - inarr = gray; - } + Mat inarr; + if (!checkQRInputImage(in, inarr)) + return false; QRDetect qrdet; qrdet.init(inarr, p->epsX, p->epsY); if (!qrdet.localization()) { return false; } if (!qrdet.computeTransformationPoints()) { return false; } vector pnts2f = qrdet.getTransformationPoints(); - Mat(pnts2f).convertTo(points, points.fixedType() ? points.type() : CV_32FC2); + updatePointsResult(points, pnts2f); return true; } @@ -925,24 +984,7 @@ void QRDecode::init(const Mat &src, const vector &points) { CV_TRACE_FUNCTION(); vector bbox = points; - double coeff_expansion; - const int min_side = std::min(src.size().width, src.size().height); - if (min_side > 512) - { - coeff_expansion = min_side / 512; - const int width = cvRound(src.size().width / coeff_expansion); - const int height = cvRound(src.size().height / coeff_expansion); - Size new_size(width, height); - resize(src, original, new_size, 0, 0, INTER_AREA); - for (size_t i = 0; i < bbox.size(); i++) - { - bbox[i] /= static_cast(coeff_expansion); - } - } - else - { - original = src.clone(); - } + original = src.clone(); intermediate = Mat::zeros(original.size(), CV_8UC1); original_points = bbox; version = 0; @@ -1008,7 +1050,11 @@ bool QRDecode::versionDefinition() for(int j = 0; j < line_iter.count; j++, ++line_iter) { const uint8_t value = intermediate.at(line_iter.pos()); - if (value == 0) { black_point = line_iter.pos(); break; } + if (value == 0) + { + black_point = line_iter.pos(); + break; + } } Mat mask = Mat::zeros(intermediate.rows + 2, intermediate.cols + 2, CV_8UC1); @@ -1041,7 +1087,7 @@ bool QRDecode::versionDefinition() { if (intermediate_row[i] == future_pixel) { - future_pixel = 255 - future_pixel; + future_pixel = static_cast(~future_pixel); transition_x++; } } @@ -1052,11 +1098,10 @@ bool QRDecode::versionDefinition() const uint8_t value = intermediate.at(Point(j, remote_point.x)); if (value == future_pixel) { - future_pixel = 255 - future_pixel; + future_pixel = static_cast(~future_pixel); transition_y++; } } - version = saturate_cast((std::min(transition_x, transition_y) - 1) * 0.25 - 1); if ( !( 0 < version && version <= 40 ) ) { return false; } version_size = 21 + (version - 1) * 4; @@ -1177,19 +1222,9 @@ bool decodeQRCode(InputArray in, InputArray points, std::string &decoded_info, O cv::String QRCodeDetector::decode(InputArray in, InputArray points, OutputArray straight_qrcode) { - Mat inarr = in.getMat(); - CV_Assert(!inarr.empty()); - CV_Assert(inarr.depth() == CV_8U); - if (inarr.cols <= 20 || inarr.rows <= 20) - return cv::String(); // image data is not enough for providing reliable results - - int incn = inarr.channels(); - if( incn == 3 || incn == 4 ) - { - Mat gray; - cvtColor(inarr, gray, COLOR_BGR2GRAY); - inarr = gray; - } + Mat inarr; + if (!checkQRInputImage(in, inarr)) + return std::string(); vector src_points; points.copyTo(src_points); @@ -1216,34 +1251,1150 @@ cv::String QRCodeDetector::detectAndDecode(InputArray in, OutputArray points_, OutputArray straight_qrcode) { - Mat inarr = in.getMat(); - CV_Assert(!inarr.empty()); - CV_Assert(inarr.depth() == CV_8U); - if (inarr.cols <= 20 || inarr.rows <= 20) - return cv::String(); // image data is not enough for providing reliable results - - int incn = inarr.channels(); - if( incn == 3 || incn == 4 ) + Mat inarr; + if (!checkQRInputImage(in, inarr)) { - Mat gray; - cvtColor(inarr, gray, COLOR_BGR2GRAY); - inarr = gray; + points_.release(); + return std::string(); } vector points; bool ok = detect(inarr, points); - if( points_.needed() ) + if (!ok) { - if( ok ) - Mat(points).copyTo(points_); - else - points_.release(); + points_.release(); + return std::string(); } - std::string decoded_info; - if( ok ) - decoded_info = decode(inarr, points, straight_qrcode); + updatePointsResult(points_, points); + std::string decoded_info = decode(inarr, points, straight_qrcode); return decoded_info; } +class QRDetectMulti : public QRDetect +{ +public: + void init(const Mat& src, double eps_vertical_ = 0.2, double eps_horizontal_ = 0.1); + bool localization(); + bool computeTransformationPoints(const size_t cur_ind); + vector< vector < Point2f > > getTransformationPoints() { return transformation_points;} +protected: + int findNumberLocalizationPoints(vector& tmp_localization_points); + void findQRCodeContours(vector& tmp_localization_points, vector< vector< Point2f > >& true_points_group, const int& num_qrcodes); + bool checkSets(vector >& true_points_group, vector >& true_points_group_copy, + vector& tmp_localization_points); + void deleteUsedPoints(vector >& true_points_group, vector >& loc, + vector& tmp_localization_points); + void fixationPoints(vector &local_point); + bool checkPoints(const vector& quadrangle_points); + bool checkPointsInsideQuadrangle(const vector& quadrangle_points); + bool checkPointsInsideTriangle(const vector& triangle_points); + + Mat bin_barcode_fullsize, bin_barcode_temp; + vector not_resized_loc_points; + vector resized_loc_points; + vector< vector< Point2f > > localization_points, transformation_points; + struct compareDistanse_y + { + bool operator()(const Point2f& a, const Point2f& b) const + { + return a.y < b.y; + } + }; + struct compareSquare + { + const vector& points; + compareSquare(const vector& points_) : points(points_) {} + bool operator()(const Vec3i& a, const Vec3i& b) const; + }; + Mat original; + class ParallelSearch : public ParallelLoopBody + { + public: + ParallelSearch(vector< vector< Point2f > >& true_points_group_, + vector< vector< Point2f > >& loc_, int iter_, int* end_, + vector< vector< Vec3i > >& all_points_, + QRDetectMulti& cl_) + : + true_points_group(true_points_group_), + loc(loc_), + iter(iter_), + end(end_), + all_points(all_points_), + cl(cl_) + { + } + void operator()(const Range& range) const CV_OVERRIDE; + vector< vector< Point2f > >& true_points_group; + vector< vector< Point2f > >& loc; + int iter; + int* end; + vector< vector< Vec3i > >& all_points; + QRDetectMulti& cl; + }; +}; + +void QRDetectMulti::ParallelSearch::operator()(const Range& range) const +{ + for (int s = range.start; s < range.end; s++) + { + bool flag = false; + for (int r = iter; r < end[s]; r++) + { + if (flag) + break; + + size_t x = iter + s; + size_t k = r - iter; + vector triangle; + + for (int l = 0; l < 3; l++) + { + triangle.push_back(true_points_group[s][all_points[s][k][l]]); + } + + if (cl.checkPointsInsideTriangle(triangle)) + { + bool flag_for_break = false; + cl.fixationPoints(triangle); + if (triangle.size() == 3) + { + cl.localization_points[x] = triangle; + if (cl.purpose == cl.SHRINKING) + { + + for (size_t j = 0; j < 3; j++) + { + cl.localization_points[x][j] *= cl.coeff_expansion; + } + } + else if (cl.purpose == cl.ZOOMING) + { + for (size_t j = 0; j < 3; j++) + { + cl.localization_points[x][j] /= cl.coeff_expansion; + } + } + for (size_t i = 0; i < 3; i++) + { + for (size_t j = i + 1; j < 3; j++) + { + if (norm(cl.localization_points[x][i] - cl.localization_points[x][j]) < 10) + { + cl.localization_points[x].clear(); + flag_for_break = true; + break; + } + } + if (flag_for_break) + break; + } + if ((!flag_for_break) + && (cl.localization_points[x].size() == 3) + && (cl.computeTransformationPoints(x)) + && (cl.checkPointsInsideQuadrangle(cl.transformation_points[x])) + && (cl.checkPoints(cl.transformation_points[x]))) + { + for (int l = 0; l < 3; l++) + { + loc[s][all_points[s][k][l]].x = -1; + } + + flag = true; + break; + } + } + if (flag) + { + break; + } + else + { + cl.transformation_points[x].clear(); + cl.localization_points[x].clear(); + } + } + } + } } + +void QRDetectMulti::init(const Mat& src, double eps_vertical_, double eps_horizontal_) +{ + CV_TRACE_FUNCTION(); + + CV_Assert(!src.empty()); + const double min_side = std::min(src.size().width, src.size().height); + if (min_side < 512.0) + { + purpose = ZOOMING; + coeff_expansion = 512.0 / min_side; + const int width = cvRound(src.size().width * coeff_expansion); + const int height = cvRound(src.size().height * coeff_expansion); + Size new_size(width, height); + resize(src, barcode, new_size, 0, 0, INTER_LINEAR); + } + else if (min_side > 512.0) + { + purpose = SHRINKING; + coeff_expansion = min_side / 512.0; + const int width = cvRound(src.size().width / coeff_expansion); + const int height = cvRound(src.size().height / coeff_expansion); + Size new_size(width, height); + resize(src, barcode, new_size, 0, 0, INTER_AREA); + } + else + { + purpose = UNCHANGED; + coeff_expansion = 1.0; + barcode = src.clone(); + } + + eps_vertical = eps_vertical_; + eps_horizontal = eps_horizontal_; + adaptiveThreshold(barcode, bin_barcode, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY, 83, 2); + adaptiveThreshold(src, bin_barcode_fullsize, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY, 83, 2); +} + +void QRDetectMulti::fixationPoints(vector &local_point) +{ + CV_TRACE_FUNCTION(); + + Point2f v0(local_point[1] - local_point[2]); + Point2f v1(local_point[0] - local_point[2]); + Point2f v2(local_point[1] - local_point[0]); + + double cos_angles[3], norm_triangl[3]; + norm_triangl[0] = norm(v0); + norm_triangl[1] = norm(v1); + norm_triangl[2] = norm(v2); + + cos_angles[0] = v2.dot(-v1) / (norm_triangl[1] * norm_triangl[2]); + cos_angles[1] = v2.dot(v0) / (norm_triangl[0] * norm_triangl[2]); + cos_angles[2] = v1.dot(v0) / (norm_triangl[0] * norm_triangl[1]); + + const double angle_barrier = 0.85; + if (fabs(cos_angles[0]) > angle_barrier || fabs(cos_angles[1]) > angle_barrier || fabs(cos_angles[2]) > angle_barrier) + { + local_point.clear(); + return; + } + + size_t i_min_cos = + (cos_angles[0] < cos_angles[1] && cos_angles[0] < cos_angles[2]) ? 0 : + (cos_angles[1] < cos_angles[0] && cos_angles[1] < cos_angles[2]) ? 1 : 2; + + size_t index_max = 0; + double max_area = std::numeric_limits::min(); + for (size_t i = 0; i < local_point.size(); i++) + { + const size_t current_index = i % 3; + const size_t left_index = (i + 1) % 3; + const size_t right_index = (i + 2) % 3; + + const Point2f current_point(local_point[current_index]); + const Point2f left_point(local_point[left_index]); + const Point2f right_point(local_point[right_index]); + const Point2f central_point(intersectionLines( + current_point, + Point2f(static_cast((local_point[left_index].x + local_point[right_index].x) * 0.5), + static_cast((local_point[left_index].y + local_point[right_index].y) * 0.5)), + Point2f(0, static_cast(bin_barcode_temp.rows - 1)), + Point2f(static_cast(bin_barcode_temp.cols - 1), + static_cast(bin_barcode_temp.rows - 1)))); + + + vector list_area_pnt; + list_area_pnt.push_back(current_point); + + vector list_line_iter; + list_line_iter.push_back(LineIterator(bin_barcode_temp, current_point, left_point)); + list_line_iter.push_back(LineIterator(bin_barcode_temp, current_point, central_point)); + list_line_iter.push_back(LineIterator(bin_barcode_temp, current_point, right_point)); + + for (size_t k = 0; k < list_line_iter.size(); k++) + { + LineIterator& li = list_line_iter[k]; + uint8_t future_pixel = 255, count_index = 0; + for (int j = 0; j < li.count; j++, ++li) + { + Point p = li.pos(); + if (p.x >= bin_barcode_temp.cols || + p.y >= bin_barcode_temp.rows) + { + break; + } + + const uint8_t value = bin_barcode_temp.at(p); + if (value == future_pixel) + { + future_pixel = static_cast(~future_pixel); + count_index++; + if (count_index == 3) + { + list_area_pnt.push_back(p); + break; + } + } + } + } + + const double temp_check_area = contourArea(list_area_pnt); + if (temp_check_area > max_area) + { + index_max = current_index; + max_area = temp_check_area; + } + + } + if (index_max == i_min_cos) + { + std::swap(local_point[0], local_point[index_max]); + } + else + { + local_point.clear(); + return; + } + + const Point2f rpt = local_point[0], bpt = local_point[1], gpt = local_point[2]; + Matx22f m(rpt.x - bpt.x, rpt.y - bpt.y, gpt.x - rpt.x, gpt.y - rpt.y); + if (determinant(m) > 0) + { + std::swap(local_point[1], local_point[2]); + } +} + +bool QRDetectMulti::checkPoints(const vector& quadrangle_points) +{ + if (quadrangle_points.size() != 4) + return false; + vector quadrangle = quadrangle_points; + std::sort(quadrangle.begin(), quadrangle.end(), compareDistanse_y()); + LineIterator it1(bin_barcode_fullsize, quadrangle[1], quadrangle[0]); + LineIterator it2(bin_barcode_fullsize, quadrangle[2], quadrangle[0]); + LineIterator it3(bin_barcode_fullsize, quadrangle[1], quadrangle[3]); + LineIterator it4(bin_barcode_fullsize, quadrangle[2], quadrangle[3]); + vector list_line_iter; + list_line_iter.push_back(it1); + list_line_iter.push_back(it2); + list_line_iter.push_back(it3); + list_line_iter.push_back(it4); + int count_w = 0; + int count_b = 0; + for (int j = 0; j < 3; j +=2) + { + LineIterator& li = list_line_iter[j]; + LineIterator& li2 = list_line_iter[j + 1]; + for (int i = 0; i < li.count; i++) + { + + Point pt1 = li.pos(); + Point pt2 = li2.pos(); + LineIterator it0(bin_barcode_fullsize, pt1, pt2); + for (int r = 0; r < it0.count; r++) + { + int pixel = bin_barcode.at(it0.pos().y , it0.pos().x); + if (pixel == 255) + { + count_w++; + } + if (pixel == 0) + { + count_b++; + } + it0++; + } + li++; + li2++; + } + } + + double frac = double(count_b) / double(count_w); + double bottom_bound = 0.76; + double upper_bound = 1.24; + if ((frac <= bottom_bound) || (frac >= upper_bound)) + return false; + return true; +} + +bool QRDetectMulti::checkPointsInsideQuadrangle(const vector& quadrangle_points) +{ + if (quadrangle_points.size() != 4) + return false; + + int count = 0; + for (size_t i = 0; i < not_resized_loc_points.size(); i++) + { + if (pointPolygonTest(quadrangle_points, not_resized_loc_points[i], true) > 0) + { + count++; + } + } + if (count == 3) + return true; + else + return false; +} + +bool QRDetectMulti::checkPointsInsideTriangle(const vector& triangle_points) +{ + if (triangle_points.size() != 3) + return false; + double eps = 3; + for (size_t i = 0; i < resized_loc_points.size(); i++) + { + if (pointPolygonTest( triangle_points, resized_loc_points[i], true ) > 0) + { + if ((abs(resized_loc_points[i].x - triangle_points[0].x) > eps) + && (abs(resized_loc_points[i].x - triangle_points[1].x) > eps) + && (abs(resized_loc_points[i].x - triangle_points[2].x) > eps)) + { + return false; + } + } + } + return true; +} + +bool QRDetectMulti::compareSquare::operator()(const Vec3i& a, const Vec3i& b) const +{ + Point2f a0 = points[a[0]]; + Point2f a1 = points[a[1]]; + Point2f a2 = points[a[2]]; + Point2f b0 = points[b[0]]; + Point2f b1 = points[b[1]]; + Point2f b2 = points[b[2]]; + return fabs((a1.x - a0.x) * (a2.y - a0.y) - (a2.x - a0.x) * (a1.y - a0.y)) < + fabs((b1.x - b0.x) * (b2.y - b0.y) - (b2.x - b0.x) * (b1.y - b0.y)); +} + +int QRDetectMulti::findNumberLocalizationPoints(vector& tmp_localization_points) +{ + size_t number_possible_purpose = 1; + if (purpose == SHRINKING) + number_possible_purpose = 2; + Mat tmp_shrinking = bin_barcode; + int tmp_num_points = 0; + int num_points = -1; + for (eps_horizontal = 0.1; eps_horizontal < 0.4; eps_horizontal += 0.1) + { + tmp_num_points = 0; + num_points = -1; + if (purpose == SHRINKING) + number_possible_purpose = 2; + else + number_possible_purpose = 1; + for (size_t k = 0; k < number_possible_purpose; k++) + { + if (k == 1) + bin_barcode = bin_barcode_fullsize; + vector list_lines_x = searchHorizontalLines(); + if (list_lines_x.empty()) + { + if (k == 0) + { + k = 1; + bin_barcode = bin_barcode_fullsize; + list_lines_x = searchHorizontalLines(); + if (list_lines_x.empty()) + break; + } + else + break; + } + vector list_lines_y = extractVerticalLines(list_lines_x, eps_horizontal); + if (list_lines_y.size() < 3) + { + if (k == 0) + { + k = 1; + bin_barcode = bin_barcode_fullsize; + list_lines_x = searchHorizontalLines(); + if (list_lines_x.empty()) + break; + list_lines_y = extractVerticalLines(list_lines_x, eps_horizontal); + if (list_lines_y.size() < 3) + break; + } + else + break; + } + vector index_list_lines_y; + for (size_t i = 0; i < list_lines_y.size(); i++) + index_list_lines_y.push_back(-1); + num_points = 0; + for (size_t i = 0; i < list_lines_y.size() - 1; i++) + { + for (size_t j = i; j < list_lines_y.size(); j++ ) + { + + double points_distance = norm(list_lines_y[i] - list_lines_y[j]); + if (points_distance <= 10) + { + if ((index_list_lines_y[i] == -1) && (index_list_lines_y[j] == -1)) + { + index_list_lines_y[i] = num_points; + index_list_lines_y[j] = num_points; + num_points++; + } + else if (index_list_lines_y[i] != -1) + index_list_lines_y[j] = index_list_lines_y[i]; + else if (index_list_lines_y[j] != -1) + index_list_lines_y[i] = index_list_lines_y[j]; + } + } + } + for (size_t i = 0; i < index_list_lines_y.size(); i++) + { + if (index_list_lines_y[i] == -1) + { + index_list_lines_y[i] = num_points; + num_points++; + } + } + if ((tmp_num_points < num_points) && (k == 1)) + { + purpose = UNCHANGED; + tmp_num_points = num_points; + bin_barcode = bin_barcode_fullsize; + coeff_expansion = 1.0; + } + if ((tmp_num_points < num_points) && (k == 0)) + { + tmp_num_points = num_points; + } + } + + if ((tmp_num_points < 3) && (tmp_num_points >= 1)) + { + const double min_side = std::min(bin_barcode_fullsize.size().width, bin_barcode_fullsize.size().height); + if (min_side > 512) + { + bin_barcode = tmp_shrinking; + purpose = SHRINKING; + coeff_expansion = min_side / 512.0; + } + if (min_side < 512) + { + bin_barcode = tmp_shrinking; + purpose = ZOOMING; + coeff_expansion = 512 / min_side; + } + } + else + break; + } + if (purpose == SHRINKING) + bin_barcode = tmp_shrinking; + num_points = tmp_num_points; + vector list_lines_x = searchHorizontalLines(); + if (list_lines_x.empty()) + return num_points; + vector list_lines_y = extractVerticalLines(list_lines_x, eps_horizontal); + if (list_lines_y.size() < 3) + return num_points; + if (num_points < 3) + return num_points; + + Mat labels; + kmeans(list_lines_y, num_points, labels, + TermCriteria( TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), + num_points, KMEANS_PP_CENTERS, tmp_localization_points); + bin_barcode_temp = bin_barcode.clone(); + if (purpose == SHRINKING) + { + const int width = cvRound(bin_barcode.size().width * coeff_expansion); + const int height = cvRound(bin_barcode.size().height * coeff_expansion); + Size new_size(width, height); + Mat intermediate; + resize(bin_barcode, intermediate, new_size, 0, 0, INTER_LINEAR); + bin_barcode = intermediate.clone(); + } + else if (purpose == ZOOMING) + { + const int width = cvRound(bin_barcode.size().width / coeff_expansion); + const int height = cvRound(bin_barcode.size().height / coeff_expansion); + Size new_size(width, height); + Mat intermediate; + resize(bin_barcode, intermediate, new_size, 0, 0, INTER_LINEAR); + bin_barcode = intermediate.clone(); + } + else + { + bin_barcode = bin_barcode_fullsize.clone(); + } + return num_points; +} + +void QRDetectMulti::findQRCodeContours(vector& tmp_localization_points, + vector< vector< Point2f > >& true_points_group, const int& num_qrcodes) +{ + Mat gray, blur_image, threshold_output; + Mat bar = barcode; + const int width = cvRound(bin_barcode.size().width); + const int height = cvRound(bin_barcode.size().height); + Size new_size(width, height); + resize(bar, bar, new_size, 0, 0, INTER_LINEAR); + blur(bar, blur_image, Size(3, 3)); + threshold(blur_image, threshold_output, 50, 255, THRESH_BINARY); + + vector< vector< Point > > contours; + vector hierarchy; + findContours(threshold_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0)); + vector all_contours_points; + for (size_t i = 0; i < contours.size(); i++) + { + for (size_t j = 0; j < contours[i].size(); j++) + { + all_contours_points.push_back(contours[i][j]); + } + } + Mat qrcode_labels; + vector clustered_localization_points; + int count_contours = num_qrcodes; + if (all_contours_points.size() < size_t(num_qrcodes)) + count_contours = (int)all_contours_points.size(); + kmeans(all_contours_points, count_contours, qrcode_labels, + TermCriteria( TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), + count_contours, KMEANS_PP_CENTERS, clustered_localization_points); + + vector< vector< Point2f > > qrcode_clusters(count_contours); + for (int i = 0; i < count_contours; i++) + for (int j = 0; j < int(all_contours_points.size()); j++) + { + if (qrcode_labels.at(j, 0) == i) + { + qrcode_clusters[i].push_back(all_contours_points[j]); + } + } + vector< vector< Point2f > > hull(count_contours); + for (size_t i = 0; i < qrcode_clusters.size(); i++) + convexHull(Mat(qrcode_clusters[i]), hull[i]); + not_resized_loc_points = tmp_localization_points; + resized_loc_points = tmp_localization_points; + if (purpose == SHRINKING) + { + for (size_t j = 0; j < not_resized_loc_points.size(); j++) + { + not_resized_loc_points[j] *= coeff_expansion; + } + } + else if (purpose == ZOOMING) + { + for (size_t j = 0; j < not_resized_loc_points.size(); j++) + { + not_resized_loc_points[j] /= coeff_expansion; + } + } + + true_points_group.resize(hull.size()); + + for (size_t j = 0; j < hull.size(); j++) + { + for (size_t i = 0; i < not_resized_loc_points.size(); i++) + { + if (pointPolygonTest(hull[j], not_resized_loc_points[i], true) > 0) + { + true_points_group[j].push_back(tmp_localization_points[i]); + tmp_localization_points[i].x = -1; + } + + } + } + vector copy; + for (size_t j = 0; j < tmp_localization_points.size(); j++) + { + if (tmp_localization_points[j].x != -1) + copy.push_back(tmp_localization_points[j]); + } + tmp_localization_points = copy; +} + +bool QRDetectMulti::checkSets(vector >& true_points_group, vector >& true_points_group_copy, + vector& tmp_localization_points) +{ + for (size_t i = 0; i < true_points_group.size(); i++) + { + if (true_points_group[i].size() < 3) + { + for (size_t j = 0; j < true_points_group[i].size(); j++) + tmp_localization_points.push_back(true_points_group[i][j]); + true_points_group[i].clear(); + } + } + vector< vector< Point2f > > temp_for_copy; + for (size_t i = 0; i < true_points_group.size(); i++) + { + if (true_points_group[i].size() != 0) + temp_for_copy.push_back(true_points_group[i]); + } + true_points_group = temp_for_copy; + if (true_points_group.size() == 0) + { + true_points_group.push_back(tmp_localization_points); + tmp_localization_points.clear(); + } + if (true_points_group.size() == 0) + return false; + if (true_points_group[0].size() < 3) + return false; + + + int* set_size = new int[true_points_group.size()]; + for (size_t i = 0; i < true_points_group.size(); i++) + { + set_size[i] = int(0.5 * (true_points_group[i].size() - 2 ) * (true_points_group[i].size() - 1)); + } + vector< vector< Vec3i > > all_points(true_points_group.size()); + for (size_t i = 0; i < true_points_group.size(); i++) + all_points[i].resize(set_size[i]); + int cur_cluster = 0; + for (size_t i = 0; i < true_points_group.size(); i++) + { + cur_cluster = 0; + for (size_t j = 1; j < true_points_group[i].size() - 1; j++) + for (size_t k = j + 1; k < true_points_group[i].size(); k++) + { + all_points[i][cur_cluster][0] = 0; + all_points[i][cur_cluster][1] = int(j); + all_points[i][cur_cluster][2] = int(k); + cur_cluster++; + } + } + + for (size_t i = 0; i < true_points_group.size(); i++) + { + std::sort(all_points[i].begin(), all_points[i].end(), compareSquare(true_points_group[i])); + } + if (true_points_group.size() == 1) + { + int check_number = 35; + if (set_size[0] > check_number) + set_size[0] = check_number; + all_points[0].resize(set_size[0]); + } + int iter = (int)localization_points.size(); + localization_points.resize(iter + true_points_group.size()); + transformation_points.resize(iter + true_points_group.size()); + + true_points_group_copy = true_points_group; + int* end = new int[true_points_group.size()]; + for (size_t i = 0; i < true_points_group.size(); i++) + end[i] = iter + set_size[i]; + ParallelSearch parallelSearch(true_points_group, + true_points_group_copy, iter, end, all_points, *this); + parallel_for_(Range(0, (int)true_points_group.size()), parallelSearch); + + return true; +} + +void QRDetectMulti::deleteUsedPoints(vector >& true_points_group, vector >& loc, + vector& tmp_localization_points) +{ + size_t iter = localization_points.size() - true_points_group.size() ; + for (size_t s = 0; s < true_points_group.size(); s++) + { + if (localization_points[iter + s].empty()) + loc[s][0].x = -2; + + if (loc[s].size() == 3) + { + + if ((true_points_group.size() > 1) || ((true_points_group.size() == 1) && (tmp_localization_points.size() != 0)) ) + { + for (size_t j = 0; j < true_points_group[s].size(); j++) + { + if (loc[s][j].x != -1) + { + loc[s][j].x = -1; + tmp_localization_points.push_back(true_points_group[s][j]); + } + } + } + } + vector for_copy; + for (size_t j = 0; j < loc[s].size(); j++) + { + if ((loc[s][j].x != -1) && (loc[s][j].x != -2) ) + { + for_copy.push_back(true_points_group[s][j]); + } + if ((loc[s][j].x == -2) && (true_points_group.size() > 1)) + { + tmp_localization_points.push_back(true_points_group[s][j]); + } + } + true_points_group[s] = for_copy; + } + + vector< vector< Point2f > > for_copy_loc; + vector< vector< Point2f > > for_copy_trans; + + + for (size_t i = 0; i < localization_points.size(); i++) + { + if ((localization_points[i].size() == 3) && (transformation_points[i].size() == 4)) + { + for_copy_loc.push_back(localization_points[i]); + for_copy_trans.push_back(transformation_points[i]); + } + } + localization_points = for_copy_loc; + transformation_points = for_copy_trans; +} + +bool QRDetectMulti::localization() +{ + CV_TRACE_FUNCTION(); + vector tmp_localization_points; + int num_points = findNumberLocalizationPoints(tmp_localization_points); + if (num_points < 3) + return false; + int num_qrcodes = divUp(num_points, 3); + vector > true_points_group; + findQRCodeContours(tmp_localization_points, true_points_group, num_qrcodes); + for (int q = 0; q < num_qrcodes; q++) + { + vector > loc; + size_t iter = localization_points.size(); + + if (!checkSets(true_points_group, loc, tmp_localization_points)) + break; + deleteUsedPoints(true_points_group, loc, tmp_localization_points); + if ((localization_points.size() - iter) == 1) + q--; + if (((localization_points.size() - iter) == 0) && (tmp_localization_points.size() == 0) && (true_points_group.size() == 1) ) + break; + } + if ((transformation_points.size() == 0) || (localization_points.size() == 0)) + return false; + return true; +} + +bool QRDetectMulti::computeTransformationPoints(const size_t cur_ind) +{ + CV_TRACE_FUNCTION(); + + if (localization_points[cur_ind].size() != 3) + { + return false; + } + + vector locations, non_zero_elem[3], newHull; + vector new_non_zero_elem[3]; + for (size_t i = 0; i < 3 ; i++) + { + Mat mask = Mat::zeros(bin_barcode.rows + 2, bin_barcode.cols + 2, CV_8UC1); + uint8_t next_pixel, future_pixel = 255; + int localization_point_x = cvRound(localization_points[cur_ind][i].x); + int localization_point_y = cvRound(localization_points[cur_ind][i].y); + int count_test_lines = 0, index = localization_point_x; + for (; index < bin_barcode.cols - 1; index++) + { + next_pixel = bin_barcode.at(localization_point_y, index + 1); + if (next_pixel == future_pixel) + { + future_pixel = static_cast(~future_pixel); + count_test_lines++; + + if (count_test_lines == 2) + { + // TODO avoid drawing functions + floodFill(bin_barcode, mask, + Point(index + 1, localization_point_y), 255, + 0, Scalar(), Scalar(), FLOODFILL_MASK_ONLY); + break; + } + } + + } + Mat mask_roi = mask(Range(1, bin_barcode.rows - 1), Range(1, bin_barcode.cols - 1)); + findNonZero(mask_roi, non_zero_elem[i]); + newHull.insert(newHull.end(), non_zero_elem[i].begin(), non_zero_elem[i].end()); + } + convexHull(newHull, locations); + for (size_t i = 0; i < locations.size(); i++) + { + for (size_t j = 0; j < 3; j++) + { + for (size_t k = 0; k < non_zero_elem[j].size(); k++) + { + if (locations[i] == non_zero_elem[j][k]) + { + new_non_zero_elem[j].push_back(locations[i]); + } + } + } + } + + if (new_non_zero_elem[0].size() == 0) + return false; + + double pentagon_diag_norm = -1; + Point2f down_left_edge_point, up_right_edge_point, up_left_edge_point; + for (size_t i = 0; i < new_non_zero_elem[1].size(); i++) + { + for (size_t j = 0; j < new_non_zero_elem[2].size(); j++) + { + double temp_norm = norm(new_non_zero_elem[1][i] - new_non_zero_elem[2][j]); + if (temp_norm > pentagon_diag_norm) + { + down_left_edge_point = new_non_zero_elem[1][i]; + up_right_edge_point = new_non_zero_elem[2][j]; + pentagon_diag_norm = temp_norm; + } + } + } + + if (down_left_edge_point == Point2f(0, 0) || + up_right_edge_point == Point2f(0, 0)) + { + return false; + } + + double max_area = -1; + up_left_edge_point = new_non_zero_elem[0][0]; + + for (size_t i = 0; i < new_non_zero_elem[0].size(); i++) + { + vector list_edge_points; + list_edge_points.push_back(new_non_zero_elem[0][i]); + list_edge_points.push_back(down_left_edge_point); + list_edge_points.push_back(up_right_edge_point); + + double temp_area = fabs(contourArea(list_edge_points)); + if (max_area < temp_area) + { + up_left_edge_point = new_non_zero_elem[0][i]; + max_area = temp_area; + } + } + + Point2f down_max_delta_point, up_max_delta_point; + double norm_down_max_delta = -1, norm_up_max_delta = -1; + for (size_t i = 0; i < new_non_zero_elem[1].size(); i++) + { + double temp_norm_delta = norm(up_left_edge_point - new_non_zero_elem[1][i]) + norm(down_left_edge_point - new_non_zero_elem[1][i]); + if (norm_down_max_delta < temp_norm_delta) + { + down_max_delta_point = new_non_zero_elem[1][i]; + norm_down_max_delta = temp_norm_delta; + } + } + + + for (size_t i = 0; i < new_non_zero_elem[2].size(); i++) + { + double temp_norm_delta = norm(up_left_edge_point - new_non_zero_elem[2][i]) + norm(up_right_edge_point - new_non_zero_elem[2][i]); + if (norm_up_max_delta < temp_norm_delta) + { + up_max_delta_point = new_non_zero_elem[2][i]; + norm_up_max_delta = temp_norm_delta; + } + } + vector tmp_transformation_points; + tmp_transformation_points.push_back(down_left_edge_point); + tmp_transformation_points.push_back(up_left_edge_point); + tmp_transformation_points.push_back(up_right_edge_point); + tmp_transformation_points.push_back(intersectionLines( + down_left_edge_point, down_max_delta_point, + up_right_edge_point, up_max_delta_point)); + transformation_points[cur_ind] = tmp_transformation_points; + + vector quadrilateral = getQuadrilateral(transformation_points[cur_ind]); + transformation_points[cur_ind] = quadrilateral; + + return true; +} + +bool QRCodeDetector::detectMulti(InputArray in, OutputArray points) const +{ + Mat inarr; + if (!checkQRInputImage(in, inarr)) + { + points.release(); + return false; + } + + QRDetectMulti qrdet; + qrdet.init(inarr, p->epsX, p->epsY); + if (!qrdet.localization()) + { + points.release(); + return false; + } + vector< vector< Point2f > > pnts2f = qrdet.getTransformationPoints(); + vector trans_points; + for(size_t i = 0; i < pnts2f.size(); i++) + for(size_t j = 0; j < pnts2f[i].size(); j++) + trans_points.push_back(pnts2f[i][j]); + + updatePointsResult(points, trans_points); + + return true; +} + +bool detectQRCodeMulti(InputArray in, vector< Point > &points, double eps_x, double eps_y) +{ + QRCodeDetector qrdetector; + qrdetector.setEpsX(eps_x); + qrdetector.setEpsY(eps_y); + return qrdetector.detectMulti(in, points); +} + +class ParallelDecodeProcess : public ParallelLoopBody +{ +public: + ParallelDecodeProcess(Mat& inarr_, vector& qrdec_, vector& decoded_info_, + vector& straight_barcode_, vector< vector< Point2f > >& src_points_) + : inarr(inarr_), qrdec(qrdec_), decoded_info(decoded_info_) + , straight_barcode(straight_barcode_), src_points(src_points_) + { + // nothing + } + void operator()(const Range& range) const CV_OVERRIDE + { + for (int i = range.start; i < range.end; i++) + { + qrdec[i].init(inarr, src_points[i]); + bool ok = qrdec[i].fullDecodingProcess(); + if (ok) + { + decoded_info[i] = qrdec[i].getDecodeInformation(); + straight_barcode[i] = qrdec[i].getStraightBarcode(); + } + else if (std::min(inarr.size().width, inarr.size().height) > 512) + { + const int min_side = std::min(inarr.size().width, inarr.size().height); + double coeff_expansion = min_side / 512; + const int width = cvRound(inarr.size().width / coeff_expansion); + const int height = cvRound(inarr.size().height / coeff_expansion); + Size new_size(width, height); + Mat inarr2; + resize(inarr, inarr2, new_size, 0, 0, INTER_AREA); + for (size_t j = 0; j < 4; j++) + { + src_points[i][j] /= static_cast(coeff_expansion); + } + qrdec[i].init(inarr2, src_points[i]); + ok = qrdec[i].fullDecodingProcess(); + if (ok) + { + decoded_info[i] = qrdec[i].getDecodeInformation(); + straight_barcode[i] = qrdec[i].getStraightBarcode(); + } + } + if (decoded_info[i].empty()) + decoded_info[i] = ""; + } + } + +private: + Mat& inarr; + vector& qrdec; + vector& decoded_info; + vector& straight_barcode; + vector< vector< Point2f > >& src_points; + +}; + +bool QRCodeDetector::decodeMulti( + InputArray img, + InputArray points, + CV_OUT std::vector& decoded_info, + OutputArrayOfArrays straight_qrcode + ) const +{ + Mat inarr; + if (!checkQRInputImage(img, inarr)) + return false; + CV_Assert(points.size().width > 0); + CV_Assert((points.size().width % 4) == 0); + vector< vector< Point2f > > src_points ; + Mat qr_points = points.getMat(); + for (int i = 0; i < points.size().width ; i += 4) + { + vector tempMat = qr_points.colRange(i, i + 4); + if (contourArea(tempMat) > 0.0) + { + src_points.push_back(tempMat); + } + } + CV_Assert(src_points.size() > 0); + vector qrdec(src_points.size()); + vector straight_barcode(src_points.size()); + vector info(src_points.size()); + ParallelDecodeProcess parallelDecodeProcess(inarr, qrdec, info, straight_barcode, src_points); + parallel_for_(Range(0, int(src_points.size())), parallelDecodeProcess); + vector for_copy; + for (size_t i = 0; i < straight_barcode.size(); i++) + { + if (!(straight_barcode[i].empty())) + for_copy.push_back(straight_barcode[i]); + } + straight_barcode = for_copy; + vector tmp_straight_qrcodes; + if (straight_qrcode.needed()) + { + for (size_t i = 0; i < straight_barcode.size(); i++) + { + Mat tmp_straight_qrcode; + tmp_straight_qrcodes.push_back(tmp_straight_qrcode); + straight_barcode[i].convertTo(((OutputArray)tmp_straight_qrcodes[i]), + ((OutputArray)tmp_straight_qrcodes[i]).fixedType() ? + ((OutputArray)tmp_straight_qrcodes[i]).type() : CV_32FC2); + } + straight_qrcode.createSameSize(tmp_straight_qrcodes, CV_32FC2); + straight_qrcode.assign(tmp_straight_qrcodes); + } + decoded_info.clear(); + for (size_t i = 0; i < info.size(); i++) + { + decoded_info.push_back(info[i]); + } + if (!decoded_info.empty()) + return true; + else + return false; +} + +bool QRCodeDetector::detectAndDecodeMulti( + InputArray img, + CV_OUT std::vector& decoded_info, + OutputArray points_, + OutputArrayOfArrays straight_qrcode + ) const +{ + Mat inarr; + if (!checkQRInputImage(img, inarr)) + { + points_.release(); + return false; + } + + vector points; + bool ok = detectMulti(inarr, points); + if (!ok) + { + points_.release(); + return false; + } + updatePointsResult(points_, points); + decoded_info.clear(); + ok = decodeMulti(inarr, points, decoded_info, straight_qrcode); + return ok; +} + +bool decodeQRCodeMulti( + InputArray in, InputArray points, + vector &decoded_info, OutputArrayOfArrays straight_qrcode) +{ + QRCodeDetector qrcode; + vector info; + bool ok = qrcode.decodeMulti(in, points, info, straight_qrcode); + for (size_t i = 0; i < info.size(); i++) + decoded_info.push_back(info[i]); + return ok; +} + +} // namespace diff --git a/modules/objdetect/test/test_qrcode.cpp b/modules/objdetect/test/test_qrcode.cpp index 8c02f3db0c..d26323ea76 100644 --- a/modules/objdetect/test/test_qrcode.cpp +++ b/modules/objdetect/test/test_qrcode.cpp @@ -21,7 +21,11 @@ std::string qrcode_images_close[] = { std::string qrcode_images_monitor[] = { "monitor_1.png", "monitor_2.png", "monitor_3.png", "monitor_4.png", "monitor_5.png" }; -// #define UPDATE_QRCODE_TEST_DATA +std::string qrcode_images_multiple[] = { + "2_qrcodes.png", "3_close_qrcodes.png", "3_qrcodes.png", "4_qrcodes.png", + "5_qrcodes.png", "6_qrcodes.png", "7_qrcodes.png", "8_close_qrcodes.png" +}; +//#define UPDATE_QRCODE_TEST_DATA #ifdef UPDATE_QRCODE_TEST_DATA TEST(Objdetect_QRCode, generate_test_data) @@ -134,6 +138,66 @@ TEST(Objdetect_QRCode_Monitor, generate_test_data) file_config.release(); } + +TEST(Objdetect_QRCode_Multi, generate_test_data) +{ + const std::string root = "qrcode/multiple/"; + const std::string dataset_config = findDataFile(root + "dataset_config.json"); + FileStorage file_config(dataset_config, FileStorage::WRITE); + + file_config << "multiple_images" << "[:"; + size_t multiple_count = sizeof(qrcode_images_multiple) / sizeof(qrcode_images_multiple[0]); + for (size_t i = 0; i < multiple_count; i++) + { + file_config << "{:" << "image_name" << qrcode_images_multiple[i]; + std::string image_path = findDataFile(root + qrcode_images_multiple[i]); + Mat src = imread(image_path); + + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + std::vector corners; + EXPECT_TRUE(detectQRCodeMulti(src, corners)); +#ifdef HAVE_QUIRC + std::vector decoded_info; + std::vector straight_barcode; + EXPECT_TRUE(decodeQRCodeMulti(src, corners, decoded_info, straight_barcode)); +#endif + file_config << "x" << "[:"; + for(size_t j = 0; j < corners.size(); j += 4) + { + file_config << "[:"; + for (size_t k = 0; k < 4; k++) + { + file_config << corners[j + k].x; + } + file_config << "]"; + } + file_config << "]"; + file_config << "y" << "[:"; + for(size_t j = 0; j < corners.size(); j += 4) + { + file_config << "[:"; + for (size_t k = 0; k < 4; k++) + { + file_config << corners[j + k].y; + } + file_config << "]"; + } + file_config << "]"; + file_config << "info"; + file_config << "[:"; + + for(size_t j = 0; j < decoded_info.size(); j++) + { + file_config << decoded_info[j]; + } + file_config << "]"; + file_config << "}"; + } + + file_config << "]"; + file_config.release(); +} + #else typedef testing::TestWithParam< std::string > Objdetect_QRCode; @@ -326,9 +390,96 @@ TEST_P(Objdetect_QRCode_Monitor, regression) } } +typedef testing::TestWithParam < std::string > Objdetect_QRCode_Multi; +TEST_P(Objdetect_QRCode_Multi, regression) +{ + const std::string name_current_image = GetParam(); + const std::string root = "qrcode/multiple/"; + const int pixels_error = 3; + + std::string image_path = findDataFile(root + name_current_image); + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + QRCodeDetector qrcode; + std::vector corners; +#ifdef HAVE_QUIRC + std::vector decoded_info; + std::vector straight_barcode; + EXPECT_TRUE(qrcode.detectAndDecodeMulti(src, decoded_info, corners, straight_barcode)); + ASSERT_FALSE(corners.empty()); + ASSERT_FALSE(decoded_info.empty()); +#else + ASSERT_TRUE(qrcode.detectMulti(src, corners)); +#endif + + const std::string dataset_config = findDataFile(root + "dataset_config.json"); + FileStorage file_config(dataset_config, FileStorage::READ); + ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; + { + FileNode images_list = file_config["multiple_images"]; + size_t images_count = static_cast(images_list.size()); + ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; + for (size_t index = 0; index < images_count; index++) + { + FileNode config = images_list[(int)index]; + std::string name_test_image = config["image_name"]; + if (name_test_image == name_current_image) + { + for(int j = 0; j < int(corners.size()); j += 4) + { + bool ok = false; + for (int k = 0; k < int(corners.size() / 4); k++) + { + int count_eq_points = 0; + for (int i = 0; i < 4; i++) + { + int x = config["x"][k][i]; + int y = config["y"][k][i]; + if(((abs(corners[j + i].x - x)) <= pixels_error) && ((abs(corners[j + i].y - y)) <= pixels_error)) + count_eq_points++; + } + if (count_eq_points == 4) + { + ok = true; + break; + } + } + EXPECT_TRUE(ok); + } + +#ifdef HAVE_QUIRC + size_t count_eq_info = 0; + for(int i = 0; i < int(decoded_info.size()); i++) + { + for(int j = 0; j < int(decoded_info.size()); j++) + { + std::string original_info = config["info"][j]; + if(original_info == decoded_info[i]) + { + count_eq_info++; + break; + } + } + } + EXPECT_EQ(decoded_info.size(), count_eq_info); +#endif + + return; // done + } + } + std::cerr + << "Not found results for '" << name_current_image + << "' image in config file:" << dataset_config << std::endl + << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data." + << std::endl; + } +} + + INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name)); INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Close, testing::ValuesIn(qrcode_images_close)); INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Monitor, testing::ValuesIn(qrcode_images_monitor)); +INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Multi, testing::ValuesIn(qrcode_images_multiple)); TEST(Objdetect_QRCode_basic, not_found_qrcode) { diff --git a/samples/cpp/qrcode.cpp b/samples/cpp/qrcode.cpp index 1e938f66f1..af332d307c 100644 --- a/samples/cpp/qrcode.cpp +++ b/samples/cpp/qrcode.cpp @@ -2,23 +2,45 @@ #include "opencv2/imgproc.hpp" #include "opencv2/highgui.hpp" #include "opencv2/videoio.hpp" +#include "opencv2/imgcodecs.hpp" #include #include using namespace std; using namespace cv; -static void drawQRCodeContour(Mat &color_image, vector transform); -static void drawFPS(Mat &color_image, double fps); -static int liveQRCodeDetect(const string& out_file); -static int imageQRCodeDetect(const string& in_file, const string& out_file); +static int liveQRCodeDetect(); +static int imageQRCodeDetect(const string& in_file); + +static bool g_modeMultiQR = false; +static bool g_detectOnly = false; + +static string g_out_file_name, g_out_file_ext; +static int g_save_idx = 0; + +static bool g_saveDetections = false; +static bool g_saveAll = false; + +static string getQRModeString() +{ + std::ostringstream out; + out << "QR" + << (g_modeMultiQR ? " multi" : "") + << (g_detectOnly ? " detector" : " decoder"); + return out.str(); +} int main(int argc, char *argv[]) { const string keys = "{h help ? | | print help messages }" - "{i in | | input path to file for detect (with parameter - show image, otherwise - camera)}" - "{o out | | output path to file (save image, work with -i parameter) }"; + "{i in | | input image path (also switches to image detection mode) }" + "{detect | false | detect QR code only (skip decoding) }" + "{m multi | | use detect for multiple qr-codes }" + "{o out | qr_code.png | path to result file }" + "{save_detections | false | save all QR detections (video mode only) }" + "{save_all | false | save all processed frames (video mode only) }" + ; CommandLineParser cmd_parser(argc, argv, keys); cmd_parser.about("This program detects the QR-codes from camera or images using the OpenCV library."); @@ -28,32 +50,51 @@ int main(int argc, char *argv[]) return 0; } - string in_file_name = cmd_parser.get("in"); // input path to image - string out_file_name; - if (cmd_parser.has("out")) - out_file_name = cmd_parser.get("out"); // output path to image + string in_file_name = cmd_parser.get("in"); // path to input image + if (cmd_parser.has("out")) + { + std::string fpath = cmd_parser.get("out"); // path to output image + std::string::size_type idx = fpath.rfind('.'); + if (idx != std::string::npos) + { + g_out_file_name = fpath.substr(0, idx); + g_out_file_ext = fpath.substr(idx); + } + else + { + g_out_file_name = fpath; + g_out_file_ext = ".png"; + } + } if (!cmd_parser.check()) { cmd_parser.printErrors(); return -1; } + g_modeMultiQR = cmd_parser.has("multi") && cmd_parser.get("multi"); + g_detectOnly = cmd_parser.has("detect") && cmd_parser.get("detect"); + + g_saveDetections = cmd_parser.has("save_detections") && cmd_parser.get("save_detections"); + g_saveAll = cmd_parser.has("save_all") && cmd_parser.get("save_all"); + int return_code = 0; if (in_file_name.empty()) { - return_code = liveQRCodeDetect(out_file_name); + return_code = liveQRCodeDetect(); } else { - return_code = imageQRCodeDetect(samples::findFile(in_file_name), out_file_name); + return_code = imageQRCodeDetect(samples::findFile(in_file_name)); } return return_code; } -void drawQRCodeContour(Mat &color_image, vector transform) +static +void drawQRCodeContour(Mat &color_image, const vector& corners) { - if (!transform.empty()) + if (!corners.empty()) { double show_radius = (color_image.rows > color_image.cols) ? (2.813 * color_image.rows) / color_image.cols @@ -61,127 +102,246 @@ void drawQRCodeContour(Mat &color_image, vector transform) double contour_radius = show_radius * 0.4; vector< vector > contours; - contours.push_back(transform); + contours.push_back(corners); drawContours(color_image, contours, 0, Scalar(211, 0, 148), cvRound(contour_radius)); RNG rng(1000); for (size_t i = 0; i < 4; i++) { Scalar color = Scalar(rng.uniform(0,255), rng.uniform(0, 255), rng.uniform(0, 255)); - circle(color_image, transform[i], cvRound(show_radius), color, -1); + circle(color_image, corners[i], cvRound(show_radius), color, -1); } } } +static void drawFPS(Mat &color_image, double fps) { ostringstream convert; - convert << cvRound(fps) << " FPS (QR detection)"; + convert << cv::format("%.2f", fps) << " FPS (" << getQRModeString() << ")"; putText(color_image, convert.str(), Point(25, 25), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 255), 2); } -int liveQRCodeDetect(const string& out_file) +static +void drawQRCodeResults(Mat& frame, const vector& corners, const vector& decode_info, double fps) { - VideoCapture cap(0); - if(!cap.isOpened()) + if (!corners.empty()) { - cout << "Cannot open a camera" << endl; - return -4; + for (size_t i = 0; i < corners.size(); i += 4) + { + size_t qr_idx = i / 4; + vector qrcode_contour(corners.begin() + i, corners.begin() + i + 4); + drawQRCodeContour(frame, qrcode_contour); + + cout << "QR[" << qr_idx << "] @ " << Mat(qrcode_contour).reshape(2, 1) << ": "; + if (decode_info.size() > qr_idx) + { + if (!decode_info[qr_idx].empty()) + cout << "'" << decode_info[qr_idx] << "'" << endl; + else + cout << "can't decode QR code" << endl; + } + else + { + cout << "decode information is not available (disabled)" << endl; + } + } + } + else + { + cout << "QR code is not detected" << endl; } - QRCodeDetector qrcode; - TickMeter total; - for(;;) + drawFPS(frame, fps); +} + +static +void runQR( + QRCodeDetector& qrcode, const Mat& input, + vector& corners, vector& decode_info + // +global: bool g_modeMultiQR, bool g_detectOnly +) +{ + if (!g_modeMultiQR) { - Mat frame, src, straight_barcode; - string decode_info; - vector transform; + if (!g_detectOnly) + { + String decode_info1 = qrcode.detectAndDecode(input, corners); + decode_info.push_back(decode_info1); + } + else + { + bool detection_result = qrcode.detect(input, corners); + CV_UNUSED(detection_result); + } + } + else + { + if (!g_detectOnly) + { + bool result_detection = qrcode.detectAndDecodeMulti(input, decode_info, corners); + CV_UNUSED(result_detection); + } + else + { + bool result_detection = qrcode.detectMulti(input, corners); + CV_UNUSED(result_detection); + } + } +} + +static +double processQRCodeDetection(QRCodeDetector& qrcode, const Mat& input, Mat& result, vector& corners) +{ + if (input.channels() == 1) + cvtColor(input, result, COLOR_GRAY2BGR); + else + input.copyTo(result); + + cout << "Run " << getQRModeString() + << " on image: " << input.size() << " (" << typeToString(input.type()) << ")" + << endl; + + TickMeter timer; + + vector decode_info; + timer.start(); + runQR(qrcode, input, corners, decode_info); + timer.stop(); + + double fps = 1 / timer.getTimeSec(); + drawQRCodeResults(result, corners, decode_info, fps); + + return fps; +} + +int liveQRCodeDetect() +{ + VideoCapture cap(0); + + if (!cap.isOpened()) + { + cout << "Cannot open a camera" << endl; + return 2; + } + + cout << "Press 'm' to switch between detectAndDecode and detectAndDecodeMulti" << endl; + cout << "Press 'd' to switch between decoder and detector" << endl; + cout << "Press ' ' (space) to save result into images" << endl; + cout << "Press 'ESC' to exit" << endl; + QRCodeDetector qrcode; + + for (;;) + { + Mat frame; cap >> frame; if (frame.empty()) { cout << "End of video stream" << endl; break; } - cvtColor(frame, src, COLOR_BGR2GRAY); - total.start(); - bool result_detection = qrcode.detect(src, transform); - if (result_detection) + bool forceSave = g_saveAll; + + Mat result; + + try { - decode_info = qrcode.decode(src, transform, straight_barcode); - if (!decode_info.empty()) { cout << decode_info << endl; } + vector corners; + double fps = processQRCodeDetection(qrcode, frame, result, corners); + cout << "FPS: " << fps << endl; + forceSave |= (g_saveDetections && !corners.empty()); + //forceSave |= fps < 1.0; + } + catch (const cv::Exception& e) + { + cerr << "ERROR exception: " << e.what() << endl; + forceSave = true; } - total.stop(); - double fps = 1 / total.getTimeSec(); - total.reset(); - if (result_detection) { drawQRCodeContour(frame, transform); } - drawFPS(frame, fps); + if (!result.empty()) + imshow("QR code", result); - imshow("Live QR code detector", frame); - char c = (char)waitKey(30); + int code = waitKey(1); + if (code < 0 && !forceSave) + continue; // timeout + char c = (char)code; + if (c == ' ' || forceSave) + { + string fsuffix = cv::format("-%05d", g_save_idx++); + + string fname_input = g_out_file_name + fsuffix + "_input.png"; + cout << "Saving QR code detection input: '" << fname_input << "' ..." << endl; + imwrite(fname_input, frame); + + string fname = g_out_file_name + fsuffix + g_out_file_ext; + cout << "Saving QR code detection result: '" << fname << "' ..." << endl; + imwrite(fname, result); + + cout << "Saved" << endl; + } + if (c == 'm') + { + g_modeMultiQR = !g_modeMultiQR; + cout << "Switching QR code mode ==> " << (g_modeMultiQR ? "detectAndDecodeMulti" : "detectAndDecode") << endl; + } + if (c == 'd') + { + g_detectOnly = !g_detectOnly; + cout << "Switching QR decoder mode ==> " << (g_detectOnly ? "detect" : "decode") << endl; + } if (c == 27) + { + cout << "'ESC' is pressed. Exiting..." << endl; break; - if (c == ' ' && !out_file.empty()) - imwrite(out_file, frame); // TODO write original frame too + } } + cout << "Exit." << endl; + return 0; } -int imageQRCodeDetect(const string& in_file, const string& out_file) +int imageQRCodeDetect(const string& in_file) { - Mat color_src = imread(in_file, IMREAD_COLOR), src; - cvtColor(color_src, src, COLOR_BGR2GRAY); - Mat straight_barcode; - string decoded_info; - vector transform; const int count_experiments = 10; - double transform_time = 0.0; - bool result_detection = false; - TickMeter total; + + Mat input = imread(in_file, IMREAD_COLOR); + cout << "Run " << getQRModeString() + << " on image: " << input.size() << " (" << typeToString(input.type()) << ")" + << endl; + QRCodeDetector qrcode; + vector corners; + vector decode_info; + + TickMeter timer; for (size_t i = 0; i < count_experiments; i++) { - total.start(); - transform.clear(); - result_detection = qrcode.detect(src, transform); - total.stop(); - transform_time += total.getTimeSec(); - total.reset(); - if (!result_detection) - continue; + corners.clear(); + decode_info.clear(); - total.start(); - decoded_info = qrcode.decode(src, transform, straight_barcode); - total.stop(); - transform_time += total.getTimeSec(); - total.reset(); + timer.start(); + runQR(qrcode, input, corners, decode_info); + timer.stop(); } - double fps = count_experiments / transform_time; - if (!result_detection) - cout << "QR code not found" << endl; - if (decoded_info.empty()) - cout << "QR code cannot be decoded" << endl; - - drawQRCodeContour(color_src, transform); - drawFPS(color_src, fps); - - cout << "Input image file path: " << in_file << endl; - cout << "Output image file path: " << out_file << endl; - cout << "Size: " << color_src.size() << endl; + double fps = count_experiments / timer.getTimeSec(); cout << "FPS: " << fps << endl; - cout << "Decoded info: " << decoded_info << endl; - if (!out_file.empty()) + Mat result; input.copyTo(result); + drawQRCodeResults(result, corners, decode_info, fps); + + imshow("QR", result); waitKey(1); + + if (!g_out_file_name.empty()) { - imwrite(out_file, color_src); + string out_file = g_out_file_name + g_out_file_ext; + cout << "Saving result: " << out_file << endl; + imwrite(out_file, result); } - for(;;) - { - imshow("Detect QR code on image", color_src); - if (waitKey(0) == 27) - break; - } + cout << "Press any key to exit ..." << endl; + waitKey(0); + cout << "Exit." << endl; + return 0; } From 5790810c3ab0aa9cd451716e54923077ec6746a0 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 27 Jan 2020 10:07:48 +0300 Subject: [PATCH 39/42] Change link --- samples/dnn/human_parsing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 5bfe19aee7..788ec95e59 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -115,9 +115,8 @@ def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, targe if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--input', '-i', help='Path to input image.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model - (https://drive.google.com/open?id=1XHvo111Gj1ZGoNUJt4Y4OsShrt_eUT34).') + parser.add_argument('--input', '-i', required=True, help='Path to input image.') + parser.add_argument('--model', '-m', required=True, help='Path to pb model(https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0).') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " From 4af55e64e924deb0ecc5412b341da5215706bee7 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 13 Jan 2020 17:09:24 +0300 Subject: [PATCH 40/42] cmake: support OPENCV_ABI_SKIP_MODULES_LIST backporting of commit f163778da7132304ad9f1bf3f3743660042ba406 --- cmake/OpenCVGenABI.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/OpenCVGenABI.cmake b/cmake/OpenCVGenABI.cmake index 4aaa16a6a1..57905a26e6 100644 --- a/cmake/OpenCVGenABI.cmake +++ b/cmake/OpenCVGenABI.cmake @@ -2,6 +2,8 @@ if (NOT GENERATE_ABI_DESCRIPTOR) return() endif() +set(OPENCV_ABI_SKIP_MODULES_LIST "" CACHE STRING "List of modules to exclude from ABI checker") + set(filename "opencv_abi.xml") set(path1 "${CMAKE_BINARY_DIR}/${filename}") @@ -28,6 +30,7 @@ foreach(mod ${OPENCV_MODULES_BUILD}) string(REGEX REPLACE "^opencv_" "" mod "${mod}") if(NOT OPENCV_MODULE_opencv_${mod}_CLASS STREQUAL "PUBLIC" OR NOT "${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${OpenCV_SOURCE_DIR}/modules/${mod}" # opencv_contrib + OR ";${mod};" MATCHES ";${OPENCV_ABI_SKIP_MODULES_LIST};" ) # headers foreach(h ${OPENCV_MODULE_opencv_${mod}_HEADERS}) From 4a19ac5aca22a29fd3d00ee39c56df19b00c310e Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 27 Jan 2020 16:18:14 +0300 Subject: [PATCH 41/42] Move instruction --- modules/dnn/include/opencv2/dnn/dnn.hpp | 4 +- samples/dnn/human_parsing.py | 80 +++++++++++++------------ 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 94e2ada3f1..f5b5b9a101 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -47,9 +47,9 @@ #include "opencv2/core/async.hpp" #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS -#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v15 { +#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v16 { #define CV__DNN_EXPERIMENTAL_NS_END } -namespace cv { namespace dnn { namespace experimental_dnn_34_v15 { } using namespace experimental_dnn_34_v15; }} +namespace cv { namespace dnn { namespace experimental_dnn_34_v16 { } using namespace experimental_dnn_34_v16; }} #else #define CV__DNN_EXPERIMENTAL_NS_BEGIN #define CV__DNN_EXPERIMENTAL_NS_END diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 788ec95e59..47e1a68473 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -1,6 +1,47 @@ +#!/usr/bin/env python +''' + You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 + or convert the model yourself. + + Follow these steps if you want to convert the original model yourself: + To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view + For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet + Change script evaluate_parsing_JPPNet-s2.py for human parsing + 1. Remove preprocessing to create image_batch_origin: + with tf.name_scope("create_inputs"): + ... + Add + image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') + + 2. Create input + image = cv2.imread(path/to/image) + image_rev = np.flip(image, axis=1) + input = np.stack([image, image_rev], axis=0) + + 3. Hardcode image_h and image_w shapes to determine output shapes. + We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. + parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), + tf.image.resize_images(parsing_out1_075, INPUT_SIZE), + tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) + Do similarly with parsing_out2, parsing_out3 + 4. Remove postprocessing. Last net operation: + raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) + Change: + parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) + + 5. To save model after sess.run(...) add: + input_graph_def = tf.get_default_graph().as_graph_def() + output_node = "Mean_3" + output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) + + output_graph = "LIP_JPPNet.pb" + with tf.gfile.GFile(output_graph, "wb") as f: + f.write(output_graph_def.SerializeToString())' +''' + import argparse -import cv2 as cv import numpy as np +import cv2 as cv backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) @@ -116,7 +157,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input', '-i', required=True, help='Path to input image.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model(https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0).') + parser.add_argument('--model', '-m', required=True, help='Path to pb model.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -135,38 +176,3 @@ if __name__ == '__main__': cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) cv.imshow(winName, output) cv.waitKey() - - -# To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view -# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet -# Change script evaluate_parsing_JPPNet-s2.py for human parsing -# 1. Remove preprocessing to create image_batch_origin: -# - with tf.name_scope("create_inputs"): -# ... -# Add -# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') -# -# 2. Create input -# image = cv2.imread(path/to/image) -# image_rev = np.flip(image, axis=1) -# input = np.stack([image, image_rev], axis=0) -# -# 3. Hardcode image_h and image_w shapes to determine output shapes. -# We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. -# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_075, INPUT_SIZE), -# tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) -# Do similarly with parsing_out2, parsing_out3 -# 4. Remove postprocessing. Last net operation: -# raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) -# Change: -# parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) -# -# 5. To save model after sess.run(...) add: -# input_graph_def = tf.get_default_graph().as_graph_def() -# output_node = "Mean_3" -# output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) -# -# output_graph = "LIP_JPPNet.pb" -# with tf.gfile.GFile(output_graph, "wb") as f: -# f.write(output_graph_def.SerializeToString()) From 24166ac40226981aeefb551f267ac0762cdf4e25 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 27 Jan 2020 17:59:58 +0300 Subject: [PATCH 42/42] Fix indentation --- samples/dnn/human_parsing.py | 64 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index 47e1a68473..c4ac11bad2 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -1,42 +1,42 @@ #!/usr/bin/env python ''' - You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 - or convert the model yourself. +You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 +or convert the model yourself. - Follow these steps if you want to convert the original model yourself: - To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view - For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet - Change script evaluate_parsing_JPPNet-s2.py for human parsing - 1. Remove preprocessing to create image_batch_origin: - with tf.name_scope("create_inputs"): - ... - Add - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') +Follow these steps if you want to convert the original model yourself: + To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view + For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet + Change script evaluate_parsing_JPPNet-s2.py for human parsing + 1. Remove preprocessing to create image_batch_origin: + with tf.name_scope("create_inputs"): + ... + Add + image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') - 2. Create input - image = cv2.imread(path/to/image) - image_rev = np.flip(image, axis=1) - input = np.stack([image, image_rev], axis=0) + 2. Create input + image = cv2.imread(path/to/image) + image_rev = np.flip(image, axis=1) + input = np.stack([image, image_rev], axis=0) - 3. Hardcode image_h and image_w shapes to determine output shapes. - We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), - tf.image.resize_images(parsing_out1_075, INPUT_SIZE), - tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) - Do similarly with parsing_out2, parsing_out3 - 4. Remove postprocessing. Last net operation: - raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) - Change: - parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) + 3. Hardcode image_h and image_w shapes to determine output shapes. + We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. + parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), + tf.image.resize_images(parsing_out1_075, INPUT_SIZE), + tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) + Do similarly with parsing_out2, parsing_out3 + 4. Remove postprocessing. Last net operation: + raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) + Change: + parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) - 5. To save model after sess.run(...) add: - input_graph_def = tf.get_default_graph().as_graph_def() - output_node = "Mean_3" - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) + 5. To save model after sess.run(...) add: + input_graph_def = tf.get_default_graph().as_graph_def() + output_node = "Mean_3" + output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) - output_graph = "LIP_JPPNet.pb" - with tf.gfile.GFile(output_graph, "wb") as f: - f.write(output_graph_def.SerializeToString())' + output_graph = "LIP_JPPNet.pb" + with tf.gfile.GFile(output_graph, "wb") as f: + f.write(output_graph_def.SerializeToString())' ''' import argparse