From 5dae27865244c0ff1ade47fcdd579457a394d6fc Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 7 Nov 2020 18:25:48 +0000 Subject: [PATCH 1/3] bindings: "inline namespace" --- modules/python/src2/hdr_parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index eba7000d47..d8b04b43ce 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -658,6 +658,10 @@ class CppHeaderParser(object): stack_top = self.block_stack[-1] context = stack_top[self.BLOCK_TYPE] + if stmt.startswith('inline namespace'): + # emulate anonymous namespace + return "namespace", "", True, None + stmt_type = "" if end_token == "{": stmt_type = "block" From 41c2669476ba81d684bfd81f83d83c6cb96db027 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 12 Nov 2020 19:47:54 +0000 Subject: [PATCH 2/3] java: robust code generation - the same generated code from Python2/3 - avoid randomized output due to unpredictable dict/set order --- modules/java/generator/gen_java.py | 45 ++++++++++++++++-------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py index 03075c5ae7..8e5c69e788 100755 --- a/modules/java/generator/gen_java.py +++ b/modules/java/generator/gen_java.py @@ -105,7 +105,7 @@ T_CPP_MODULE = Template(read_contents(os.path.join(SCRIPT_DIR, 'templates/cpp_mo class GeneralInfo(): def __init__(self, type, decl, namespaces): - self.namespace, self.classpath, self.classname, self.name = self.parseName(decl[0], namespaces) + self.symbol_id, self.namespace, self.classpath, self.classname, self.name = self.parseName(decl[0], namespaces) # parse doxygen comments self.params={} @@ -141,13 +141,13 @@ class GeneralInfo(): break pieces = localName.split(".") if len(pieces) > 2: # ... - return spaceName, ".".join(pieces[:-1]), pieces[-2], pieces[-1] + return name, spaceName, ".".join(pieces[:-1]), pieces[-2], pieces[-1] elif len(pieces) == 2: # . - return spaceName, pieces[0], pieces[0], pieces[1] + return name, spaceName, pieces[0], pieces[0], pieces[1] elif len(pieces) == 1: # - return spaceName, "", "", pieces[0] + return name, spaceName, "", "", pieces[0] else: - return spaceName, "", "" # error?! + return name, spaceName, "", "" # error?! def fullName(self, isCPP=False): result = ".".join([self.fullClass(), self.name]) @@ -249,8 +249,8 @@ class ClassInfo(GeneralInfo): def getAllMethods(self): result = [] - result.extend([fi for fi in sorted(self.methods) if fi.isconstructor]) - result.extend([fi for fi in sorted(self.methods) if not fi.isconstructor]) + result += [fi for fi in self.methods if fi.isconstructor] + result += [fi for fi in self.methods if not fi.isconstructor] return result def addMethod(self, fi): @@ -369,7 +369,7 @@ class JavaWrapperGenerator(object): self.clear() def clear(self): - self.namespaces = set(["cv"]) + self.namespaces = ["cv"] self.classes = { "Mat" : ClassInfo([ 'class Mat', '', [], [] ], self.namespaces) } self.module = "" self.Module = "" @@ -512,9 +512,9 @@ class JavaWrapperGenerator(object): includes.append('#include "' + hdr + '"') for hdr in srcfiles: decls = parser.parse(hdr) - self.namespaces = parser.namespaces + self.namespaces = sorted(parser.namespaces) logging.info("\n\n===== Header: %s =====", hdr) - logging.info("Namespaces: %s", parser.namespaces) + logging.info("Namespaces: %s", sorted(parser.namespaces)) if decls: includes.append('#include "' + hdr + '"') else: @@ -536,7 +536,7 @@ class JavaWrapperGenerator(object): moduleCppCode = StringIO() package_path = os.path.join(output_java_path, module) mkdir_p(package_path) - for ci in self.classes.values(): + for ci in sorted(self.classes.values(), key=lambda x: x.symbol_id): if ci.name == "Mat": continue ci.initCodeStreams(self.Module) @@ -560,7 +560,7 @@ class JavaWrapperGenerator(object): report.write("\n".join(self.ported_func_list)) report.write("\n\nSKIPPED FUNCs LIST (%i of %i):\n\n" % (len(self.skipped_func_list), total_count)) report.write("".join(self.skipped_func_list)) - for i in self.def_args_hist.keys(): + for i in sorted(self.def_args_hist.keys()): report.write("\n%i def args - %i funcs" % (i, self.def_args_hist[i])) return report.getvalue() @@ -1028,10 +1028,11 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname if ci.consts: enumTypes = set(map(lambda c: c.enumType, ci.consts)) grouped_consts = {enumType: [c for c in ci.consts if c.enumType == enumType] for enumType in enumTypes} - for typeName, consts in grouped_consts.items(): + for typeName in sorted(grouped_consts.keys(), key=lambda x: str(x) if x is not None else ""): + consts = grouped_consts[typeName] logging.info("%s", consts) if typeName: - typeName = typeName.rsplit(".", 1)[-1] + typeNameShort = typeName.rsplit(".", 1)[-1] ###################### Utilize Java enums ###################### # ci.j_code.write(""" # public enum {1} {{ @@ -1045,9 +1046,9 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname # ) ################################################################ ci.j_code.write(""" - // C++: enum {1} + // C++: enum {1} ({2}) public static final int - {0};\n\n""".format((",\n"+" "*12).join(["%s = %s" % (c.name, const_value(c.value)) for c in consts]), typeName) + {0};\n\n""".format((",\n"+" "*12).join(["%s = %s" % (c.name, const_value(c.value)) for c in consts]), typeNameShort, typeName) ) else: ci.j_code.write(""" @@ -1072,10 +1073,12 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname # manual ports if ci.name in ManualFuncs: - for func in ManualFuncs[ci.name].keys(): - ci.j_code.write ( "\n".join(ManualFuncs[ci.name][func]["j_code"]) ) - ci.jn_code.write( "\n".join(ManualFuncs[ci.name][func]["jn_code"]) ) - ci.cpp_code.write( "\n".join(ManualFuncs[ci.name][func]["cpp_code"]) ) + for func in sorted(ManualFuncs[ci.name].keys()): + logging.info("manual function: %s", func) + fn = ManualFuncs[ci.name][func] + ci.j_code.write("\n".join(fn["j_code"])) + ci.jn_code.write("\n".join(fn["jn_code"])) + ci.cpp_code.write("\n".join(fn["cpp_code"])) if ci.name != self.Module or ci.base: # finalize() @@ -1303,7 +1306,7 @@ if __name__ == "__main__": # initialize logger logging.basicConfig(filename='gen_java.log', format=None, filemode='w', level=logging.INFO) handler = logging.StreamHandler() - handler.setLevel(logging.WARNING) + handler.setLevel(os.environ.get('LOG_LEVEL', logging.WARNING)) logging.getLogger().addHandler(handler) # parse command line parameters From 61144f935efaae03d506ab2b54ee02b3bc1a4452 Mon Sep 17 00:00:00 2001 From: Sergei Slashchinin <62052793+sl-sergei@users.noreply.github.com> Date: Sat, 14 Nov 2020 01:22:10 +0300 Subject: [PATCH 3/3] Merge pull request #18783 from sl-sergei:fix_conv1d Add support for Conv1D on OpenCV backend * Add support for Conv1D on OpenCV backend * disable tests on other targets/backends * Fix formatting * Restore comment * Remove unnecessary flag and fix test logic * Fix perf test * fix braces * Fix indentation, assert check and remove unnecessary condition * Remove unnecessary changes * Add test cases for variable weights and bias * dnn(conv): fallback on OpenCV+CPU instead of failures * coding style --- modules/dnn/perf/perf_convolution.cpp | 4 +- modules/dnn/perf/perf_convolution1d.cpp | 163 +++++++++++++ modules/dnn/perf/perf_convolution3d.cpp | 4 +- modules/dnn/src/layers/convolution_layer.cpp | 228 ++++++++++++++----- modules/dnn/src/onnx/onnx_importer.cpp | 8 +- modules/dnn/test/test_onnx_importer.cpp | 61 ++++- modules/dnn/test/test_tf_importer.cpp | 2 - 7 files changed, 402 insertions(+), 68 deletions(-) create mode 100644 modules/dnn/perf/perf_convolution1d.cpp diff --git a/modules/dnn/perf/perf_convolution.cpp b/modules/dnn/perf/perf_convolution.cpp index 7d51cd300f..c2a3a66ab9 100644 --- a/modules/dnn/perf/perf_convolution.cpp +++ b/modules/dnn/perf/perf_convolution.cpp @@ -533,7 +533,7 @@ struct ConvParamID CONV_100 = 100, CONV_LAST = sizeof(testConvolutionConfigs) / sizeof(testConvolutionConfigs[0]) }; - int val_; \ + int val_; ConvParamID(int val = 0) : val_(val) {} operator int() const { return val_; } static ::testing::internal::ParamGenerator all() @@ -546,7 +546,7 @@ struct ConvParamID ConvParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = ConvParamID(i); } // reduce generated code size return ::testing::ValuesIn(v_, v_ + NUM); } -}; \ +}; static inline void PrintTo(const ConvParamID& v, std::ostream* os) { CV_Assert((int)v >= 0); CV_Assert((int)v < ConvParamID::CONV_LAST); diff --git a/modules/dnn/perf/perf_convolution1d.cpp b/modules/dnn/perf/perf_convolution1d.cpp new file mode 100644 index 0000000000..c35cbd503f --- /dev/null +++ b/modules/dnn/perf/perf_convolution1d.cpp @@ -0,0 +1,163 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include + +namespace opencv_test { + +struct Conv1DParam_t { + int kernel; + struct BlobShape { int dims[3]; } shapeIn; + int outCN; + int groups; + int stride; + int dilation; + int pad[2]; + const char* padMode; + bool hasBias; + double declared_flops; +}; +// Details: #12142 +static const Conv1DParam_t testConvolution1DConfigs[] = { + {3, {{1, 6, 10}}, 6, 1, 1, 1, {0, 0}, "VALID", true, 1776.}, + {3, {{1, 2, 19}}, 2, 2, 2, 1, {1, 1}, "", true, 260.}, + {3, {{1, 2, 25}}, 2, 2, 1, 1, {2, 2}, "SAME", false, 650.}, +}; + +struct Conv1DParamID +{ + enum { + CONV_0 = 0, + CONV_LAST = sizeof(testConvolution1DConfigs) / sizeof(testConvolution1DConfigs[0]) + }; + int val_; + Conv1DParamID(int val = 0) : val_(val) {} + operator int() const { return val_; } + static ::testing::internal::ParamGenerator all() + { + enum { NUM = (int)CONV_LAST }; + Conv1DParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = Conv1DParamID(i); } // reduce generated code size + return ::testing::ValuesIn(v_, v_ + NUM); + } +}; +static inline void PrintTo(const Conv1DParamID& v, std::ostream* os) +{ + CV_Assert((int)v >= 0); CV_Assert((int)v < Conv1DParamID::CONV_LAST); + const Conv1DParam_t& p = testConvolution1DConfigs[(int)v]; + + *os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9) + << ", K=[" << p.kernel << "]" + << ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << "}" + << ", OCN=" << p.outCN; + if (p.groups > 1) + *os << ", G=" << p.groups; + if (p.stride != 1) + *os << ", S=" << p.stride; + if (p.dilation != 1) + *os << ", D=" << p.dilation; + if (p.pad[0] != 0 && p.pad[1] != 0 ) + *os << ", P=(" << p.pad[0] << ", " << p.pad[1] << ")"; + if (!((std::string)p.padMode).empty()) + *os << ", PM=" << ((std::string)p.padMode); + if (p.hasBias) + *os << ", BIAS"; +} + + +typedef tuple > Conv1DTestParam_t; +typedef TestBaseWithParam Conv1D; + +PERF_TEST_P_(Conv1D, conv1d) +{ + int test_id = (int)get<0>(GetParam()); + ASSERT_GE(test_id, 0); ASSERT_LT(test_id, Conv1DParamID::CONV_LAST); + const Conv1DParam_t& params = testConvolution1DConfigs[test_id]; + double declared_flops = params.declared_flops; + + DictValue kernel = DictValue::arrayInt(¶ms.kernel, 1); + DictValue stride = DictValue::arrayInt(¶ms.stride, 1); + DictValue pad = DictValue::arrayInt(¶ms.pad[0], 2); + DictValue dilation = DictValue::arrayInt(¶ms.dilation, 1); + + MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 3); + int outChannels = params.outCN; + int groups = params.groups; + std::string padMode(params.padMode); + + bool hasBias = params.hasBias; + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); + + if (targetId != DNN_TARGET_CPU) + throw SkipTestException("Only CPU is supported"); + + int inChannels = inputShape[1]; + + int sz[] = {outChannels, inChannels / groups, params.kernel}; + Mat weights(3, &sz[0], CV_32F); + randu(weights, -1.0f, 1.0f); + + LayerParams lp; + lp.set("kernel_size", kernel); + lp.set("pad", pad); + if (!padMode.empty()) + lp.set("pad_mode", padMode); + + lp.set("stride", stride); + lp.set("dilation", dilation); + lp.set("num_output", outChannels); + lp.set("group", groups); + lp.set("bias_term", hasBias); + lp.type = "Convolution"; + lp.name = "testLayer"; + lp.blobs.push_back(weights); + + if (hasBias) + { + Mat bias(1, outChannels, CV_32F); + randu(bias, -1.0f, 1.0f); + lp.blobs.push_back(bias); + } + + int inpSz[] = {1, inChannels, inputShape[2]}; + Mat input(3, &inpSz[0], CV_32F); + randu(input, -1.0f, 1.0f); + + Net net; + net.addLayerToPrev(lp.name, lp.type, lp); + + net.setInput(input); + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + + // warmup + Mat output = net.forward(); + + MatShape netInputShape = shape(input); + size_t weightsMemory = 0, blobsMemory = 0; + net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory); + int64 flops = net.getFLOPS(netInputShape); + CV_Assert(flops > 0); + + std::cout + << "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape + << " OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output) + << " Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb" + << " MFLOPS=" << flops * 1e-6 << std::endl; + + TEST_CYCLE() + { + Mat res = net.forward(); + } + EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6); + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/**/, Conv1D, Combine( + Conv1DParamID::all(), + dnnBackendsAndTargets(false, false) // defined in ../test/test_common.hpp +)); + +} // namespace diff --git a/modules/dnn/perf/perf_convolution3d.cpp b/modules/dnn/perf/perf_convolution3d.cpp index 1f512b2a15..0cf4ce26a3 100644 --- a/modules/dnn/perf/perf_convolution3d.cpp +++ b/modules/dnn/perf/perf_convolution3d.cpp @@ -46,7 +46,7 @@ struct Conv3DParamID CONV_100 = 16, CONV_LAST = sizeof(testConvolution3DConfigs) / sizeof(testConvolution3DConfigs[0]) }; - int val_; \ + int val_; Conv3DParamID(int val = 0) : val_(val) {} operator int() const { return val_; } static ::testing::internal::ParamGenerator all() @@ -59,7 +59,7 @@ struct Conv3DParamID Conv3DParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = Conv3DParamID(i); } // reduce generated code size return ::testing::ValuesIn(v_, v_ + NUM); } -}; \ +}; static inline void PrintTo(const Conv3DParamID& v, std::ostream* os) { CV_Assert((int)v >= 0); CV_Assert((int)v < Conv3DParamID::CONV_LAST); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 473c07b755..c8245c487d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -113,17 +113,22 @@ public: MatSize weightShape = blobs.empty() ? inputs[1].size : blobs[0].size; CV_Assert(inputs[0].dims == outputs[0].dims); + if (weightShape.dims() == 3) + { + kernel_size.assign(1, kernel_size[0]); + strides.assign(1, strides[0]); + } CV_Assert(weightShape.dims() == kernel_size.size() + 2); for (int i = 0; i < kernel_size.size(); i++) { CV_Assert(weightShape[i + 2] == kernel_size[i]); } const Mat &input = inputs[0]; - CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S)); + CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S)); for (size_t i = 0; i < outputs.size(); i++) { CV_Assert(inputs[i].type() == input.type()); - CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); + CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); for (int j = 0; j < inputs[i].dims; j++) { CV_Assert(inputs[i].size[j] == input.size[j]); } @@ -261,19 +266,26 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + size_t ksize = kernel_size.size(); #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - if (kernel_size.size() == 3) + if (ksize == 1) + return false; + if (ksize == 3) return preferableTarget == DNN_TARGET_CPU; if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty()) return false; return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height); } - else #endif - return (kernel_size.size() == 3 && preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV) || - (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && !blobs.empty()))); + if (backendId == DNN_BACKEND_OPENCV) + return ksize >= 1 && ksize <= 3; +#ifdef HAVE_HALIDE + if (backendId == DNN_BACKEND_HALIDE) + return ksize == 2 && !blobs.empty(); +#endif + return false; } bool getMemoryShapes(const std::vector &inputs, @@ -325,18 +337,27 @@ public: inputs_arr.getMatVector(inputs); // prepare weightsMat where each row is aligned and has enough zero padding on the right to // use vectorized (i.e. with intrinsics) loops without tail processing - Mat wm = blobs.empty() ? inputs[1].reshape(1, numOutput) : blobs[0].reshape(1, numOutput); - if( wm.step1() % VEC_ALIGN != 0 ) + if (!blobs.empty()) { - int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); - Mat wm_buffer = Mat(numOutput, newcols, wm.type()); - Mat wm_padding = wm_buffer.colRange(wm.cols, newcols); - wm_padding.setTo(Scalar::all(0.)); - Mat wm_aligned = wm_buffer.colRange(0, wm.cols); - wm.copyTo(wm_aligned); - wm = wm_aligned; + Mat wm = blobs[0].reshape(1, numOutput); + if( wm.step1() % VEC_ALIGN != 0 ) + { + int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); + Mat wm_buffer = Mat(numOutput, newcols, wm.type()); + Mat wm_padding = wm_buffer.colRange(wm.cols, newcols); + wm_padding.setTo(Scalar::all(0.)); + Mat wm_aligned = wm_buffer.colRange(0, wm.cols); + wm.copyTo(wm_aligned); + wm = wm_aligned; + } + weightsMat = wm; } - weightsMat = wm; + else + { + // initialized in .forward() + weightsMat.release(); + } + weightsMultipliers.assign(numOutput, 1.0); Mat biasMat = hasBias() ? blobs[1].reshape(1, numOutput) : Mat(); @@ -678,8 +699,11 @@ public: { size_t karea = std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies()); - CV_Assert_N( - (input.dims == 4 || input.dims == 5) && (input.dims == output.dims), + bool isConv1D = input.dims == 3; + bool isConv2D = input.dims == 4; + bool isConv3D = input.dims == 5; + CV_CheckEQ(static_cast(kernel_size.size()), input.dims - 2, ""); + CV_Assert_N(input.dims == output.dims, input.size[0] == output.size[0], weights.rows == output.size[1], weights.cols == (input.size[1]/ngroups)*karea, @@ -689,12 +713,15 @@ public: input.isContinuous(), output.isContinuous(), biasvec.size() == (size_t)output.size[1]+2); + CV_Check(weights.step1(), weights.step1() % VEC_ALIGN == 0, ""); + CV_CheckType(weights.type(), CV_32FC1, ""); ParallelConv p; p.input_ = &input; p.weights_ = &weights; p.output_ = &output; - for( int i = 0; i < 4; i++ ) p.outShape[i] = output.size[i]; + int max_ind = isConv1D? 3: 4; + for( int i = 0; i < max_ind; i++ ) p.outShape[i] = output.size[i]; p.outShape[1] /= ngroups; p.kernel_size = kernel_size; p.strides = strides; p.dilations = dilations; @@ -706,20 +733,19 @@ public: int inpCnAll = input.size[1]; int depth = (input.dims == 5) ? input.size[2] : 1; int width = input.size[input.dims - 1]; - int height = input.size[input.dims - 2]; + int height = isConv1D? 1 : input.size[input.dims - 2]; int inpCn = inpCnAll / ngroups; - bool isConv2D = kernel_size.size() == 2; - - p.is1x1_ = isConv2D && kernel_size[0] == 1 && kernel_size[1] == 1 && - pads_begin[0] == 0 && pads_begin[1] == 0; + p.is1x1_ = (isConv2D && kernel_size[0] == 1 && kernel_size[1] == 1 && + pads_begin[0] == 0 && pads_begin[1] == 0) || + (isConv1D && pads_begin[0] == 0 && kernel_size[0] == 1); p.useAVX = checkHardwareSupport(CPU_AVX) && isConv2D; p.useAVX2 = checkHardwareSupport(CPU_AVX2) && isConv2D; p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX && isConv2D; - int kernel_d = !isConv2D? kernel_size[0] : 1; - int kernel_h = kernel_size[kernel_size.size() - 2]; + int kernel_d = isConv3D? kernel_size[0] : 1; + int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2]; int kernel_w = kernel_size.back(); int blk_size_cn0 = cvCeil(800./(kernel_w*kernel_h)); @@ -729,14 +755,20 @@ public: ncn = std::min(ncn, inpCn); p.blk_size_cn = ncn; - int dil_d = !isConv2D? dilations[0] : 1; - int dil_h = dilations[dilations.size() - 2]; + int dil_d = isConv3D? dilations[0] : 1; + int dil_h = isConv1D? 1 : dilations[dilations.size() - 2]; int dil_w = dilations.back(); p.ofstab_.resize(karea * ncn); int* ofstab = &p.ofstab_[0]; - if (isConv2D) + if (isConv1D) + { + for( int k = 0; k < ncn; k++ ) + for( int k_c = 0; k_c < kernel_w; k_c++ ) + ofstab[k*kernel_w + k_c] = k*width + k_c*dil_w; + } + else if (isConv2D) { for( int k = 0; k < ncn; k++ ) for( int k_r = 0; k_r < kernel_h; k_r++ ) @@ -765,34 +797,36 @@ public: { const int valign = ConvolutionLayerImpl::VEC_ALIGN; int ngroups = ngroups_, batchSize = input_->size[0]*ngroups; + bool isConv1D = input_->dims == 3; bool isConv2D = input_->dims == 4; + bool isConv3D = input_->dims == 5; int outW = output_->size[output_->dims - 1]; - int outH = output_->size[output_->dims - 2]; + int outH = isConv1D? 1 : output_->size[output_->dims - 2]; int outCn = output_->size[1]/ngroups; - int depth = !isConv2D? input_->size[2] : 1; - int height = input_->size[input_->dims - 2]; + int depth = isConv3D? input_->size[2] : 1; + int height = isConv1D? 1 : input_->size[input_->dims - 2]; int width = input_->size[input_->dims - 1]; int inpCn = input_->size[1]/ngroups; const int nstripes = nstripes_; - int kernel_d = !isConv2D? kernel_size[0] : 1; - int kernel_h = kernel_size[kernel_size.size() - 2]; + int kernel_d = isConv3D? kernel_size[0] : 1; + int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2]; int kernel_w = kernel_size.back(); int karea = kernel_w*kernel_h*kernel_d; - int pad_d = !isConv2D? pads_begin[0] : 0; - int pad_t = pads_begin[pads_begin.size() - 2]; + int pad_d = isConv3D? pads_begin[0] : 0; + int pad_t = isConv1D? 0 : pads_begin[pads_begin.size() - 2]; int pad_l = pads_begin.back(); - int stride_d = !isConv2D? strides[0] : 0; - int stride_h = strides[strides.size() - 2]; + int stride_d = isConv3D? strides[0] : 0; + int stride_h = isConv1D? 0 : strides[strides.size() - 2]; int stride_w = strides.back(); - int dilation_d = !isConv2D? dilations[0] : 1; - int dilation_h = dilations[dilations.size() - 2]; + int dilation_d = isConv3D? dilations[0] : 1; + int dilation_h = isConv1D? 1 : dilations[dilations.size() - 2]; int dilation_w = dilations.back(); int i, j, k, d; @@ -1032,7 +1066,71 @@ public: // do im2row for a part of input tensor float* rowbuf = rowbuf0; - if (isConv2D) + if (isConv1D) + { + for( ofs = ofs0; ofs < ofs1; out_j = 0, ++out_i ) + { + int delta = std::min(ofs1 - ofs, outW - out_j); + int out_j1 = out_j + delta; + + int in_j = out_j * stride_w - pad_l; + const float* imgptr = data_inp0 + cn0*width + in_j; + ofs += delta; + + // do im2row for a part of input tensor + if( is1x1 ) + { + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w ) + { + for( k = 0; k < vsz; k++ ) + rowbuf[k] = imgptr[k*inpPlaneSize]; + } + } + else + { + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w, in_j += stride_w ) + { + // this condition should be true for most of the tensor elements, i.e. + // most of the time the kernel aperture is inside the tensor X-Y plane. + if( out_j + 2 <= out_j1 && 0 <= in_j && in_j + stride_w*2 <= width - (kernel_w-1)*dilation_w ) + { + for( k = 0; k < vsz; k++ ) + { + int k1 = ofstab[k]; + float v0 = imgptr[k1]; + float v1 = imgptr[k1 + stride_w]; + rowbuf[k] = v0; + rowbuf[k+vsz_a] = v1; + } + out_j++; + rowbuf += vsz_a; + imgptr += stride_w; + in_j += stride_w; + } + else + { + int i0 = std::max(0, (-in_j + dilation_w-1)/dilation_w); + int i1 = std::min(kernel_w, (width - in_j + dilation_w-1)/dilation_w); + + // here some non-continuous sub-row of the row will not be + // filled from the tensor; we need to make sure that the uncovered + // elements are explicitly set to 0's. the easiest way is to + // set all the elements to 0's before the loop. + memset(rowbuf, 0, vsz*sizeof(rowbuf[0])); + for( k = 0; k < ncn; k++ ) + { + for( i = i0; i < i1; i++ ) + { + int imgofs = k*width + i*dilation_w; + rowbuf[k*kernel_w + i] = imgptr[imgofs]; + } + } + } + } + } + } + } + else if (isConv2D) { if( is1x1 && stride_w == 1 && stride_h == 1 ) { @@ -1265,9 +1363,12 @@ public: vs12 = v_setzero_f32(), vs13 = v_setzero_f32(); for( k = 0; k < vsz; k += 4, rptr += 4 ) { - v_float32x4 w0 = v_load_aligned(wptr0 + k), w1 = v_load_aligned(wptr1 + k); - v_float32x4 r0 = v_load_aligned(rptr), r1 = v_load_aligned(rptr + vsz_a), - r2 = v_load_aligned(rptr + vsz_a*2), r3 = v_load_aligned(rptr + vsz_a*3); + v_float32x4 w0 = v_load_aligned(wptr0 + k); + v_float32x4 w1 = v_load_aligned(wptr1 + k); + v_float32x4 r0 = v_load_aligned(rptr); + v_float32x4 r1 = v_load_aligned(rptr + vsz_a); + v_float32x4 r2 = v_load_aligned(rptr + vsz_a*2); + v_float32x4 r3 = v_load_aligned(rptr + vsz_a*3); vs00 += w0*r0; vs01 += w0*r1; @@ -1337,6 +1438,12 @@ public: #ifdef HAVE_OPENCL bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { + if (kernel_size.size() != 2) + { + // no OpenCL optimizations, see .supportedBacked() + return false; + } + std::vector inputs; std::vector outputs; @@ -1520,26 +1627,35 @@ public: if (blobs.empty()) { Mat wm = inputs[1].reshape(1, outCn); - if( wm.step1() % VEC_ALIGN != 0 ) + if (wm.data != weightsMat.data) { - wm.copyTo(weightsMat); + int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); + Mat wm_buffer = Mat(numOutput, newcols, wm.type()); + Mat wm_padding = wm_buffer.colRange(wm.cols, newcols); + wm_padding.setTo(Scalar::all(0.)); + weightsMat = wm_buffer.colRange(0, wm.cols); + + wm.copyTo((const Mat&)weightsMat); if (inputs.size() > 2) { Mat biasMat = inputs[2].reshape(1, outCn); biasMat.col(0).copyTo(biasvec); - biasvec.resize(outCn + 2); - } - else - { - biasvec.resize(outCn + 2, 0); } + biasvec.resize(outCn + 2, 0); } } - - /*printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", - name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3], - kernel.width, kernel.height, pad.width, pad.height, - stride.width, stride.height, dilation.width, dilation.height);*/ + /*if (inputs[0].dims > 3) { + printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", + name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3], + kernel.width, kernel.height, pad.width, pad.height, + stride.width, stride.height, dilation.width, dilation.height); + } + else { + printf("conv %s: input (%d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", + name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], + kernel.width, kernel.height, pad.width, pad.height, + stride.width, stride.height, dilation.width, dilation.height); + }*/ int inpGroupCn = blobs.empty() ? inputs[1].size[1] : blobs[0].size[1]; CV_Assert_N(inputs.size() >= (size_t)1, inputs[0].size[1] % inpGroupCn == 0, outputs.size() == 1, inputs[0].data != outputs[0].data); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 56683f4c14..9443336305 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -200,12 +200,12 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot if(attribute_name == "kernel_shape") { - CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("kernel_size", parse(attribute_proto.ints())); } else if(attribute_name == "strides") { - CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("stride", parse(attribute_proto.ints())); } else if(attribute_name == "pads") @@ -229,7 +229,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot else { // Convolution or pooling. - CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); lp.set("pad", parse(attribute_proto.ints())); } } @@ -244,7 +244,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot } else if(attribute_name == "dilations") { - CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("dilation", parse(attribute_proto.ints())); } else if (attribute_proto.has_i()) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 993ba56be4..5c6de55da5 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -183,9 +183,14 @@ TEST_P(Test_ONNX_layers, Convolution3D) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - if (target != DNN_TARGET_CPU) - throw SkipTestException("Only CPU is supported"); testONNXModels("conv3d"); +} + +TEST_P(Test_ONNX_layers, Convolution3D_bias) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("conv3d_bias"); } @@ -648,6 +653,58 @@ TEST_P(Test_ONNX_layers, ResizeOpset11_Torch1_6) testONNXModels("resize_opset11_torch1.6"); } +TEST_P(Test_ONNX_layers, Conv1d) +{ + testONNXModels("conv1d"); +} + +TEST_P(Test_ONNX_layers, Conv1d_bias) +{ + testONNXModels("conv1d_bias"); +} + +TEST_P(Test_ONNX_layers, Conv1d_variable_weight) +{ + String basename = "conv1d_variable_w"; + Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + Mat input = blobFromNPY(_tf("data/input_" + basename + "_0.npy")); + Mat weights = blobFromNPY(_tf("data/input_" + basename + "_1.npy")); + Mat ref = blobFromNPY(_tf("data/output_" + basename + ".npy")); + + net.setInput(input, "0"); + net.setInput(weights, "1"); + + Mat out = net.forward(); + normAssert(ref, out, "", default_l1, default_lInf); +} + +TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias) +{ + String basename = "conv1d_variable_wb"; + Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + Mat input = blobFromNPY(_tf("data/input_" + basename + "_0.npy")); + Mat weights = blobFromNPY(_tf("data/input_" + basename + "_1.npy")); + Mat bias = blobFromNPY(_tf("data/input_" + basename + "_2.npy")); + Mat ref = blobFromNPY(_tf("data/output_" + basename + ".npy")); + + net.setInput(input, "0"); + net.setInput(weights, "1"); + net.setInput(bias, "bias"); + + Mat out = net.forward(); + normAssert(ref, out, "", default_l1, default_lInf); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 68b720a375..e9c1562b4c 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -173,8 +173,6 @@ TEST_P(Test_TensorFlow_layers, Convolution3D) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // Only CPU on DLIE backend is supported - if (target != DNN_TARGET_CPU) - throw SkipTestException("Only CPU is supported"); runTensorFlowNet("conv3d"); }