diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index c6fe6d05bc..72064843b9 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -210,7 +210,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS BaseConvolutionLayer : public Layer { public: - Size kernel, stride, pad, dilation, adjustPad; + CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad; + std::vector adjust_pads; + std::vector kernel_size, strides, dilations; + std::vector pads_begin, pads_end; String padMode; int numOutput; }; @@ -243,9 +246,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN { public: int type; - Size kernel, stride; - int pad_l, pad_t, pad_r, pad_b; - CV_DEPRECATED_EXTERNAL Size pad; + std::vector kernel_size, strides; + std::vector pads_begin, pads_end; + CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; + CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 2cc3f9a640..9bffdbe74a 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2263,6 +2263,7 @@ struct Net::Impl if (isAsync) CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); + CV_Assert(layer->supportBackend(DNN_BACKEND_OPENCV)); if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 7aaa8bc989..3b298e616d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -66,31 +66,34 @@ public: BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); - int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0; - getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t, - pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height, - dilation.width, padMode); - - if (pad_t != pad_b || pad_l != pad_r) - CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); - - pad.width = pad_l; - pad.height = pad_t; + getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode); numOutput = params.get("num_output"); int ngroups = params.get("group", 1); - - adjustPad.height = params.get("adj_h", 0); - adjustPad.width = params.get("adj_w", 0); - CV_Assert(numOutput % ngroups == 0); - CV_Assert(adjustPad.width < stride.width && - adjustPad.height < stride.height); + if (kernel_size.size() == 2) { + kernel = Size(kernel_size[1], kernel_size[0]); + stride = Size(strides[1], strides[0]); + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + dilation = Size(dilations[1], dilations[0]); + + adjust_pads.push_back(params.get("adj_h", 0)); + adjust_pads.push_back(params.get("adj_w", 0)); + + adjustPad.height = adjust_pads[0]; + adjustPad.width = adjust_pads[1]; + CV_Assert(adjustPad.width < stride.width && + adjustPad.height < stride.height); + } newWeightAndBias = false; } - void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { std::vector inputs, outputs; inputs_arr.getMatVector(inputs); @@ -98,31 +101,38 @@ public: CV_Assert(inputs.size() > 0); - CV_Assert(blobs.size() >= 1 && blobs.size() <= 2); - CV_Assert(blobs[0].dims == 4 && blobs[0].size[3] == kernel.width && blobs[0].size[2] == kernel.height); + CV_Assert(blobs.size() == 1 || blobs.size() == 2); + CV_Assert(inputs[0].dims == outputs[0].dims); + CV_Assert(blobs[0].dims == kernel_size.size() + 2); + for (int i = 0; i < kernel_size.size(); i++) { + CV_Assert(blobs[0].size[i + 2] == kernel_size[i]); + } const Mat &input = inputs[0]; - CV_Assert(input.dims == 4 && (input.type() == CV_32F || input.type() == CV_64F || input.type() == CV_16S)); + CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S)); for (size_t i = 0; i < inputs.size(); i++) { CV_Assert(inputs[i].type() == input.type()); - CV_Assert(inputs[i].dims == 4 && inputs[i].size[1] == input.size[1]); - CV_Assert(inputs[i].size[2] == input.size[2] && inputs[i].size[3] == input.size[3]); + CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); + for (int j = 0; j < inputs[i].dims; j++) { + CV_Assert(inputs[i].size[j] == input.size[j]); + } } - Size outSize = Size(outputs[0].size[3], outputs[0].size[2]); - - int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width; - - getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize, - kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r); - - - if (pad_t != pad_b || pad_l != pad_r) - CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); - - pad.width = pad_l; - pad.height = pad_t; + std::vector inpShape; + std::vector outShape; + for (int i = 2; i < inputs[0].dims; i++) { + inpShape.push_back(inputs[0].size[i]); + outShape.push_back(outputs[0].size[i]); + } + getConvPoolPaddings(inpShape, outShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end); + if (pads_begin.size() == 2) { + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + } } bool hasBias() const @@ -134,8 +144,8 @@ public: bool is1x1() const { return (kernel.height == 1 && kernel.width == 1) && - (stride.height == 1 && stride.width == 1) && - (dilation.height == 1 && dilation.width == 1); + (stride.height == 1 && stride.width == 1) && + (dilation.height == 1 && dilation.width == 1); } virtual bool tryFuse(Ptr& top) CV_OVERRIDE @@ -237,12 +247,14 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { + if (kernel_size.size() == 3) + return preferableTarget == DNN_TARGET_CPU; return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R4) || (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height); } else #endif - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + return (kernel_size.size() == 2) && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE); } bool getMemoryShapes(const std::vector &inputs, @@ -256,21 +268,23 @@ public: internals.clear(); - int inpCn = inputs[0][1]; - int inpH = inputs[0][2]; - int inpW = inputs[0][3]; + CV_Assert(inputs.size() != 0); + std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); int outCn = blobs[0].size[0]; - Size out; + std::vector outShape; + outShape.push_back(inputs[0][0]); + outShape.push_back(outCn); + int inpCn = inputs[0][1]; if (padMode.empty()) { - out.height = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1; - out.width = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1; + for (int i = 0; i < inpShape.size(); i++) + outShape.push_back((inpShape[i] + pads_begin[i] + pads_end[i] - dilations[i] * (kernel_size[i] - 1) - 1) / strides[i] + 1); } else { - getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out); + getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape); } int ngroups = inpCn / blobs[0].size[1]; @@ -279,8 +293,7 @@ public: "be multiple of %d but got %d", blobs[0].size[1], inpCn)); CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0); - int dims[] = {inputs[0][0], outCn, out.height, out.width}; - outputs.resize(inputs.size(), shape(dims, 4)); + outputs.resize(1, outShape); return false; } @@ -451,25 +464,28 @@ public: { #ifdef HAVE_INF_ENGINE InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - CV_Assert(input->dims.size() == 4); + CV_Assert(input->dims.size() == 4 || input->dims.size() == 5); - const int inpCn = input->dims[2]; // NOTE: input->dims are reversed (whcn) + const int inpCn = input->dims[input->dims.size() - 2]; // NOTE: input->dims are reversed (WHIO or WHDIO) const int outCn = blobs[0].size[0]; const int inpGroupCn = blobs[0].size[1]; const int group = inpCn / inpGroupCn; - auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW); + InferenceEngine::Layout layout = (input->dims.size() == 4) ? InferenceEngine::Layout::OIHW : + InferenceEngine::Layout::NCDHW; + + auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); if (newWeightAndBias) { if (weightsMat.isContinuous()) { Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size); - ieWeights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW); + ieWeights = wrapToInfEngineBlob(fusedWeights, layout); } else { ieWeights = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW, + InferenceEngine::Precision::FP32, layout, ieWeights->dims()); ieWeights->allocate(); @@ -488,11 +504,11 @@ public: #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5) InferenceEngine::Builder::ConvolutionLayer ieLayer(name); - ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width}); - ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); - ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width}); - ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width}); - ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width}); + ieLayer.setKernel(kernel_size); + ieLayer.setStrides(strides); + ieLayer.setDilation(dilations); + ieLayer.setPaddingsBegin(pads_begin); + ieLayer.setPaddingsEnd(pads_end); ieLayer.setGroup((size_t)group); ieLayer.setOutDepth((size_t)outCn); @@ -1085,6 +1101,10 @@ public: CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0, outputs.size() == 1, inputs[0].data != outputs[0].data); + if (inputs[0].dims == 5) { + CV_Error(Error::StsNotImplemented, "Convolution3D layer is not supported on OCV backend"); + } + int ngroups = inputs[0].size[1]/blobs[0].size[1]; CV_Assert(outputs[0].size[1] % ngroups == 0); int outCn = blobs[0].size[0]; @@ -1157,6 +1177,9 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { + if (kernel_size.size() == 3) + CV_Error(Error::StsNotImplemented, "Unsupported deconvolution3D layer"); + if (INF_ENGINE_RELEASE >= 2018050000 && (adjustPad.height || adjustPad.width)) return false; @@ -1172,7 +1195,7 @@ public: } else #endif // HAVE_INF_ENGINE - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + return kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE); } bool getMemoryShapes(const std::vector &inputs, @@ -1183,39 +1206,36 @@ public: CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput); CV_Assert(inputs.size() != 0); - int inpCn = inputs[0][1]; - int inpH = inputs[0][2]; - int inpW = inputs[0][3]; - - int outH = -1, outW = -1; + int outCn = numOutput; + std::vector outShape; + outShape.push_back(inputs[0][0]); // batch + outShape.push_back(outCn); if (padMode.empty()) { - outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; - outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; + for (int i = 0; i < kernel_size.size(); i++) + outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + kernel_size[i] - pads_begin[i] - pads_end[i] + adjust_pads[i]); } else if (padMode == "VALID") { - outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height; - outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width; + for (int i = 0; i < kernel_size.size(); i++) + outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + kernel_size[i] + adjust_pads[i]); } else if (padMode == "SAME") { - outH = stride.height * (inpH - 1) + 1 + adjustPad.height; - outW = stride.width * (inpW - 1) + 1 + adjustPad.width; + for (int i = 0; i < kernel_size.size(); i++) + outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + 1 + adjust_pads[i]); } else CV_Error(Error::StsError, "Unsupported padding mode " + padMode); - int outCn = numOutput; - CV_Assert(outCn % blobs[0].size[1] == 0); int ngroups = outCn / blobs[0].size[1]; + int inpCn = inputs[0][1]; CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0); CV_Assert(blobs[0].size[0] == inpCn); - int dims[] = {inputs[0][0], outCn, outH, outW}; - outputs.resize(inputs.size(), shape(dims, 4)); + outputs.resize(1, outShape); if (!is1x1()) internals.push_back(computeColRowShape(inputs[0], outputs[0])); @@ -1231,16 +1251,20 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width; - getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]), - Size(inputs[0].size[3], inputs[0].size[2]), - kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r); - - if (pad_t != pad_b || pad_l != pad_r) - CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); - - pad.width = pad_l; - pad.height = pad_t; + std::vector inpShape; + std::vector outShape; + for (int i = 2; i < inputs[0].dims; i++) { + inpShape.push_back(inputs[0].size[i]); + outShape.push_back(outputs[0].size[i]); + } + getConvPoolPaddings(outShape, inpShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end); + if (pads_begin.size() == 2) { + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in deconvolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + } weightsMultipliers.assign(numOutput, 1.0); if (weightsMat.empty()) @@ -1760,11 +1784,11 @@ public: InferenceEngine::Builder::DeconvolutionLayer ieLayer(name); - ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width}); - ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); - ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width}); - ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width}); - ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width}); + ieLayer.setKernel(kernel_size); + ieLayer.setStrides(strides); + ieLayer.setDilation(dilations); + ieLayer.setPaddingsBegin(pads_begin); + ieLayer.setPaddingsEnd(pads_end); ieLayer.setGroup((size_t)group); ieLayer.setOutDepth((size_t)numOutput); diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 2dbb12109d..627f79c784 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -57,20 +57,19 @@ std::string makeName(const std::string& str1, const std::string& str2) } bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const std::string& nameAll, - int ¶meterH, int ¶meterW, bool hasDefault = false, const int& defaultValue = 0) + std::vector& parameter, bool hasDefault = false, const std::vector& defaultValue = std::vector(2, 0)) { std::string nameH = makeName(nameBase, std::string("_h")); std::string nameW = makeName(nameBase, std::string("_w")); std::string nameAll_ = nameAll; - if(nameAll_ == "") - { + if (nameAll_ == "") nameAll_ = nameBase; - } if (params.has(nameH) && params.has(nameW)) { - parameterH = params.get(nameH); - parameterW = params.get(nameW); + CV_Assert(params.get(nameH) >= 0 && params.get(nameW) >= 0); + parameter.push_back(params.get(nameH)); + parameter.push_back(params.get(nameW)); return true; } else @@ -78,26 +77,19 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const if (params.has(nameAll_)) { DictValue param = params.get(nameAll_); - parameterH = param.get(0); - if (param.size() == 1) - { - parameterW = parameterH; - } - else if (param.size() == 2) - { - parameterW = param.get(1); - } - else - { - return false; + for (int i = 0; i < param.size(); i++) { + CV_Assert(param.get(i) >= 0); + parameter.push_back(param.get(i)); } + if (parameter.size() == 1) + parameter.resize(2, parameter[0]); return true; } else { - if(hasDefault) + if (hasDefault) { - parameterH = parameterW = defaultValue; + parameter = defaultValue; return true; } else @@ -108,30 +100,38 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const } } -void getKernelSize(const LayerParams ¶ms, int &kernelH, int &kernelW) +void getKernelSize(const LayerParams ¶ms, std::vector& kernel) { - if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW)) - { + if (!util::getParameter(params, "kernel", "kernel_size", kernel)) CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified"); - } - CV_Assert(kernelH > 0 && kernelW > 0); + for (int i = 0; i < kernel.size(); i++) + CV_Assert(kernel[i] > 0); } -void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode) +void getStrideAndPadding(const LayerParams ¶ms, std::vector& pads_begin, std::vector& pads_end, + std::vector& strides, cv::String& padMode, size_t kernel_size = 2) { if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) { - padT = params.get("pad_t"); - padL = params.get("pad_l"); - padB = params.get("pad_b"); - padR = params.get("pad_r"); + CV_Assert(params.get("pad_t") >= 0 && params.get("pad_l") >= 0 && + params.get("pad_b") >= 0 && params.get("pad_r") >= 0); + pads_begin.push_back(params.get("pad_t")); + pads_begin.push_back(params.get("pad_l")); + pads_end.push_back(params.get("pad_b")); + pads_end.push_back(params.get("pad_r")); } else { - util::getParameter(params, "pad", "pad", padT, padL, true, 0); - padB = padT; - padR = padL; + util::getParameter(params, "pad", "pad", pads_begin, true, std::vector(kernel_size, 0)); + if (pads_begin.size() < 4) + pads_end = pads_begin; + else + { + pads_end = std::vector(pads_begin.begin() + pads_begin.size() / 2, pads_begin.end()); + pads_begin.resize(pads_begin.size() / 2); + } + CV_Assert(pads_begin.size() == pads_end.size()); } - util::getParameter(params, "stride", "stride", strideH, strideW, true, 1); + util::getParameter(params, "stride", "stride", strides, true, std::vector(kernel_size, 1)); padMode = ""; if (params.has("pad_mode")) @@ -139,15 +139,16 @@ void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &p padMode = params.get("pad_mode"); } - CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0); + for (int i = 0; i < strides.size(); i++) + CV_Assert(strides[i] > 0); } } - -void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, - int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode) +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, + std::vector& pads_begin, std::vector& pads_end, + std::vector& strides, cv::String &padMode) { - util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode); + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); globalPooling = params.has("global_pooling") && params.get("global_pooling"); @@ -158,25 +159,30 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel { CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); } - if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1) - { - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1"); + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != 0 || pads_end[i] != 0) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0"); + } + for (int i = 0; i < strides.size(); i++) { + if (strides[i] != 1) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); } } else { - util::getKernelSize(params, kernelH, kernelW); + util::getKernelSize(params, kernel); } } -void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, - int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode) +void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, + std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode) { - util::getKernelSize(params, kernelH, kernelW); - util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode); - util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1); + util::getKernelSize(params, kernel); + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); + util::getParameter(params, "dilation", "dilation", dilations, true, std::vector(kernel.size(), 1)); - CV_Assert(dilationH > 0 && dilationW > 0); + for (int i = 0; i < dilations.size(); i++) + CV_Assert(dilations[i] > 0); } // From TensorFlow code: @@ -188,19 +194,19 @@ void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &ke // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top // and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means // we pad more on the right and bottom than on the top and left. -void getConvPoolOutParams(const Size& inp, const Size &kernel, - const Size &stride, const String &padMode, - const Size &dilation, Size& out) +void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, + const std::vector& stride, const String &padMode, + const std::vector& dilation, std::vector& out) { if (padMode == "VALID") { - out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height; - out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width; + for (int i = 0; i < inp.size(); i++) + out.push_back((inp[i] - dilation[i] * (kernel[i] - 1) - 1 + stride[i]) / stride[i]); } else if (padMode == "SAME") { - out.height = (inp.height - 1 + stride.height) / stride.height; - out.width = (inp.width - 1 + stride.width) / stride.width; + for (int i = 0; i < inp.size(); i++) + out.push_back((inp[i] - 1 + stride[i]) / stride[i]); } else { @@ -208,22 +214,26 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel, } } -void getConvPoolPaddings(const Size& inp, const Size& out, - const Size &kernel, const Size &stride, - const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR) +void getConvPoolPaddings(const std::vector& inp, const std::vector& out, + const std::vector& kernel, const std::vector& strides, + const String &padMode, const std::vector& dilation, + std::vector& pads_begin, std::vector& pads_end) { if (padMode == "VALID") { - padT = padL = padB = padR = 0; + pads_begin.assign(kernel.size(), 0); + pads_end.assign(kernel.size(), 0); } else if (padMode == "SAME") { - int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height); - int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width); - // For odd values of total padding, add more padding at the 'right' - // side of the given dimension. - padT= padB = Ph / 2; - padL = padR = Pw / 2; + CV_Assert_N(kernel.size() == dilation.size(), kernel.size() == strides.size(), + kernel.size() == inp.size(), kernel.size() == out.size()); + pads_begin.resize(kernel.size()); + pads_end.resize(kernel.size()); + for (int i = 0; i < pads_begin.size(); i++) { + int pad = ((out[i] - 1) * strides[i] + dilation[i] * (kernel[i] - 1) + 1 - inp[i]) / 2; + pads_begin[i] = pads_end[i] = std::max(0, pad); + } } } diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index 7fce183d6e..fd1e430a54 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -59,22 +59,20 @@ namespace cv { namespace dnn { +void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, + std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode); -void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, - int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode); +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, + std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); -void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, - int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode); - -void getConvPoolOutParams(const Size& inp, const Size &kernel, - const Size &stride, const String &padMode, - const Size &dilation, Size& out); - - -void getConvPoolPaddings(const Size& inp, const Size& out, - const Size &kernel, const Size &stride, - const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR); +void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, + const std::vector& stride, const String &padMode, + const std::vector& dilation, std::vector& out); + void getConvPoolPaddings(const std::vector& inp, const std::vector& out, + const std::vector& kernel, const std::vector& strides, + const String &padMode, const std::vector& dilation, + std::vector& pads_begin, std::vector& pads_end); } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 28945c7440..78946b4b63 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -72,6 +72,7 @@ public: computeMaxIdx = true; globalPooling = false; stride = Size(1, 1); + pad_t = pad_l = pad_b = pad_r = 0; if (params.has("pool") || params.has("kernel_size") || params.has("kernel_w") || params.has("kernel_h")) @@ -86,11 +87,17 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, - pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode); + getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); + if (kernel_size.size() == 2) { + kernel = Size(kernel_size[1], kernel_size[0]); + stride = Size(strides[1], strides[0]); + pad = Size(pads_begin[1], pads_begin[0]); - pad.width = pad_l; - pad.height = pad_t; + pad_t = pads_begin[0]; + pad_l = pads_begin[1]; + pad_b = pads_end[0]; + pad_r = pads_end[1]; + } } else if (params.has("pooled_w") || params.has("pooled_h")) { @@ -125,17 +132,24 @@ public: CV_Assert(!inputs.empty()); - cv::Size inp(inputs[0].size[3], inputs[0].size[2]), - out(outputs[0].size[3], outputs[0].size[2]); - - if(globalPooling) - { - kernel = inp; + std::vector inp; + std::vector out; + for (int i = 2; i < inputs[0].dims; i++) { + inp.push_back(inputs[0].size[i]); + out.push_back(outputs[0].size[i]); + } + if (globalPooling) { + kernel = Size(inp[1], inp[0]); + kernel_size = std::vector(inp.begin(), inp.end()); } - getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r); - pad.width = pad_l; - pad.height = pad_t; + getConvPoolPaddings(inp, out, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), pads_begin, pads_end); + if (pads_begin.size() == 2) { + pad_t = pads_begin[0]; + pad_l = pads_begin[1]; + pad_b = pads_end[0]; + pad_r = pads_end[1]; + } #ifdef HAVE_OPENCL poolOp.release(); @@ -148,6 +162,8 @@ public: if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { #ifdef HAVE_INF_ENGINE + if (kernel_size.size() == 3) + return preferableTarget == DNN_TARGET_CPU; if (preferableTarget == DNN_TARGET_MYRIAD) { if (type == MAX && (pad_l == 1 && pad_t == 1) && stride == Size(2, 2) ) { return !isMyriadX(); @@ -161,9 +177,9 @@ public: #endif } else - return backendId == DNN_BACKEND_OPENCV || + return (kernel_size.empty() || kernel_size.size() == 2) && (backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && haveHalide() && - (type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r))); + (type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r)))); } #ifdef HAVE_OPENCL @@ -269,10 +285,12 @@ public: if (type == MAX || type == AVE) { InferenceEngine::Builder::PoolingLayer ieLayer(name); - ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width}); - ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); - ieLayer.setPaddingsBegin({(size_t)pad_t, (size_t)pad_l}); - ieLayer.setPaddingsEnd({(size_t)pad_b, (size_t)pad_r}); + + ieLayer.setKernel(kernel_size); + ieLayer.setStrides(strides); + ieLayer.setPaddingsBegin(pads_begin); + ieLayer.setPaddingsEnd(pads_end); + ieLayer.setPoolingType(type == MAX ? InferenceEngine::Builder::PoolingLayer::PoolingType::MAX : InferenceEngine::Builder::PoolingLayer::PoolingType::AVG); @@ -916,59 +934,56 @@ public: std::vector &internals) const CV_OVERRIDE { CV_Assert(inputs.size() != 0); - Size in(inputs[0][3], inputs[0][2]), out; + + std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); + std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); if (globalPooling) { - out.height = 1; - out.width = 1; + outShape.push_back(1); + outShape.push_back(1); } else if (type == ROI || type == PSROI) { - out.height = pooledSize.height; - out.width = pooledSize.width; + outShape.push_back(pooledSize.height); + outShape.push_back(pooledSize.width); } else if (padMode.empty()) { - float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height; - float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width; - out.height = 1 + (ceilMode ? ceil(height) : floor(height)); - out.width = 1 + (ceilMode ? ceil(width) : floor(width)); + for (int i = 0; i < kernel_size.size(); i++) { + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i]; + outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); + } - if (pad_r || pad_b) - { - // If we have padding, ensure that the last pooling starts strictly - // inside the image (instead of at the padding); otherwise clip the last. - if ((out.height - 1) * stride.height >= in.height + pad_b) - --out.height; - if ((out.width - 1) * stride.width >= in.width + pad_r) - --out.width; - CV_Assert((out.height - 1) * stride.height < in.height + pad_b); - CV_Assert((out.width - 1) * stride.width < in.width + pad_r); + // If we have padding, ensure that the last pooling starts strictly + // inside the image (instead of at the padding); otherwise clip the last. + for (int i = 0; i < pads_end.size(); i++) { + if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { + --outShape[2 + i]; + CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + } } } else { - getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out); + getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), outShape); } - - int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width}; if (type == ROI) { CV_Assert(inputs.size() == 2); - dims[0] = inputs[1][0]; // Number of proposals; + outShape[0] = inputs[1][0]; // Number of proposals; } else if (type == PSROI) { CV_Assert(inputs.size() == 2); CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]); - dims[0] = inputs[1][0]; // Number of proposals; - dims[1] = psRoiOutChannels; + outShape[0] = inputs[1][0]; // Number of proposals; + outShape[1] = psRoiOutChannels; } - int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); - outputs.assign(numOutputs, shape(dims, 4)); + + outputs.assign(numOutputs, outShape); return false; } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 7396e00fdc..e722b4a735 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -184,6 +184,12 @@ std::map ONNXImporter::getGraphTensors( return layers_weights; } +static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) { + std::vector dst(src.size()); + convertInt64ToInt32(src, dst, src.size()); + return DictValue::arrayInt(&dst[0], src.size()); +} + LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto) { LayerParams lp; @@ -194,15 +200,13 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot if(attribute_name == "kernel_shape") { - CV_Assert(attribute_proto.ints_size() == 2); - lp.set("kernel_h", saturate_cast(attribute_proto.ints(0))); - lp.set("kernel_w", saturate_cast(attribute_proto.ints(1))); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("kernel_size", parse(attribute_proto.ints())); } else if(attribute_name == "strides") { - CV_Assert(attribute_proto.ints_size() == 2); - lp.set("stride_h", saturate_cast(attribute_proto.ints(0))); - lp.set("stride_w", saturate_cast(attribute_proto.ints(1))); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("stride", parse(attribute_proto.ints())); } else if(attribute_name == "pads") { @@ -225,11 +229,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot else { // Convolution or pooling. - CV_Assert(attribute_proto.ints_size() == 4); - lp.set("pad_t", saturate_cast(attribute_proto.ints(0))); - lp.set("pad_l", saturate_cast(attribute_proto.ints(1))); - lp.set("pad_b", saturate_cast(attribute_proto.ints(2))); - lp.set("pad_r", saturate_cast(attribute_proto.ints(3))); + CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); + lp.set("pad", parse(attribute_proto.ints())); } } else if(attribute_name == "auto_pad") @@ -243,9 +244,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot } else if(attribute_name == "dilations") { - CV_Assert(attribute_proto.ints_size() == 2); - lp.set("dilation_h", saturate_cast(attribute_proto.ints(0))); - lp.set("dilation_w", saturate_cast(attribute_proto.ints(1))); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("dilation", parse(attribute_proto.ints())); } else if (attribute_proto.has_i()) { @@ -270,10 +270,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot } else if (attribute_proto.ints_size() > 0) { - const ::google::protobuf::RepeatedField< ::google::protobuf::int64> src = attribute_proto.ints(); - std::vector dst(attribute_proto.ints_size()); - convertInt64ToInt32(src, dst, attribute_proto.ints_size()); - lp.set(attribute_proto.name(), DictValue::arrayInt(&dst[0], attribute_proto.ints_size())); + lp.set(attribute_proto.name(), parse(attribute_proto.ints())); } else if (attribute_proto.has_t()) { @@ -305,19 +302,6 @@ Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, return constBlob->second; } - -bool ONNXImporter::isCeilMode(const LayerParams& layerParams) { - if (!layerParams.has("pad_mode")) { - if (layerParams.has("pad_h")) { - return layerParams.get("pad_h") != layerParams.get("pad_b") || - layerParams.get("pad_w") != layerParams.get("pad_r"); - } - else - return false; // all pads == 0 - } - return true; -} - void ONNXImporter::populateNet(Net dstNet) { CV_Assert(model_proto.has_graph()); @@ -384,13 +368,13 @@ void ONNXImporter::populateNet(Net dstNet) { layerParams.type = "Pooling"; layerParams.set("pool", "MAX"); - layerParams.set("ceil_mode", isCeilMode(layerParams)); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); } else if (layer_type == "AveragePool") { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); - layerParams.set("ceil_mode", isCeilMode(layerParams)); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); } else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool") @@ -600,8 +584,9 @@ void ONNXImporter::populateNet(Net dstNet) if (outShape.size() != 4) CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements."); - const int strideY = layerParams.get("stride_h", 1); - const int strideX = layerParams.get("stride_w", 1); + DictValue stride = layerParams.get("stride"); + const int strideY = stride.getIntValue(0); + const int strideX = stride.getIntValue(1); const int outH = outShape.getIntValue(2); const int outW = outShape.getIntValue(3); @@ -612,15 +597,13 @@ void ONNXImporter::populateNet(Net dstNet) } else if (layerParams.get("pad_mode") == "VALID") { - if (!layerParams.has("kernel_h") || !layerParams.has("kernel_w")) + if (!layerParams.has("kernel_size")) CV_Error(Error::StsNotImplemented, - "Required attributes 'kernel_h' and 'kernel_w' are not present."); + "Required attribute 'kernel_size' is not present."); - int kernelH = layerParams.get("kernel_h"); - int kernelW = layerParams.get("kernel_w"); - - layerParams.set("adj_w", (outW - kernelW) % strideX); - layerParams.set("adj_h", (outH - kernelH) % strideY); + DictValue kernel = layerParams.get("kernel_size"); + layerParams.set("adj_h", (outH - kernel.getIntValue(0)) % strideY); + layerParams.set("adj_w", (outW - kernel.getIntValue(1)) % strideX); } } else if (layerParams.has("output_padding")) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ef0b196f44..a7a681c140 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -51,6 +51,7 @@ enum DataLayout { DATA_LAYOUT_NHWC, DATA_LAYOUT_NCHW, + DATA_LAYOUT_NDHWC, DATA_LAYOUT_UNKNOWN, DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) }; @@ -258,6 +259,8 @@ static int getDataLayout(const tensorflow::NodeDef& layer) return DATA_LAYOUT_NHWC; else if (format == "NCHW" || format == "channels_first") return DATA_LAYOUT_NCHW; + else if (format == "NDHWC") + return DATA_LAYOUT_NDHWC; else CV_Error(Error::StsParseError, "Unknown data_format value: " + format); } @@ -281,21 +284,34 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) if (hasLayerAttr(layer, "strides")) { const tensorflow::AttrValue& val = getLayerAttr(layer, "strides"); - int dimX, dimY, dimC; + int dimX, dimY, dimC, dimD; int layout = getDataLayout(layer); if (layout == DATA_LAYOUT_NCHW) { dimC = 1; dimY = 2; dimX = 3; } + else if (layout == DATA_LAYOUT_NDHWC) + { + dimD = 1; dimY = 2; dimX = 3; dimC = 4; + } else { dimY = 1; dimX = 2; dimC = 3; } - if (val.list().i_size() != 4 || + if (!(val.list().i_size() == 4 || val.list().i_size() == 5) || val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported strides"); - layerParams.set("stride_h", static_cast(val.list().i(dimY))); - layerParams.set("stride_w", static_cast(val.list().i(dimX))); + if (layout == DATA_LAYOUT_NDHWC) { + int strides[] = {static_cast(val.list().i(dimD)), + static_cast(val.list().i(dimY)), + static_cast(val.list().i(dimX))}; + layerParams.set("stride", DictValue::arrayInt(strides, 3)); + } + else + { + layerParams.set("stride_h", static_cast(val.list().i(dimY))); + layerParams.set("stride_w", static_cast(val.list().i(dimX))); + } } } @@ -318,21 +334,35 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) if (hasLayerAttr(layer, "ksize")) { const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize"); - int dimX, dimY, dimC; + int dimX, dimY, dimC, dimD; int layout = getDataLayout(layer); if (layout == DATA_LAYOUT_NCHW) { dimC = 1; dimY = 2; dimX = 3; } + else if (layout == DATA_LAYOUT_NDHWC) + { + dimD = 1; dimY = 2; dimX = 3; dimC = 4; + } else { dimY = 1; dimX = 2; dimC = 3; } - if (val.list().i_size() != 4 || + if (!(val.list().i_size() == 4 || val.list().i_size() == 5) || val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported ksize"); - layerParams.set("kernel_h", static_cast(val.list().i(dimY))); - layerParams.set("kernel_w", static_cast(val.list().i(dimX))); + + if (layout == DATA_LAYOUT_NDHWC) { + int kernel[] = {static_cast(val.list().i(dimD)), + static_cast(val.list().i(dimY)), + static_cast(val.list().i(dimX))}; + layerParams.set("kernel_size", DictValue::arrayInt(kernel, 3)); + } + else + { + layerParams.set("kernel_h", static_cast(val.list().i(dimY))); + layerParams.set("kernel_w", static_cast(val.list().i(dimX))); + } } else { @@ -456,12 +486,26 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds // TODO: other blob types CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || tensor.dtype() == tensorflow::DT_HALF); - CV_Assert(dims == 4); + CV_Assert(dims == 4 || dims == 5); - // REORDER kernel HWIO to OIHW - swap(shape[0], shape[2]); // IWHO - swap(shape[1], shape[3]); // IOHW - swap(shape[0], shape[1]); // OIHW + int out_c, input_c, depth, height, width; + if (dims == 4) + { + // REORDER kernel HWIO to OIHW + swap(shape[0], shape[2]); // IWHO + swap(shape[1], shape[3]); // IOHW + swap(shape[0], shape[1]); // OIHW + depth = 1; height = shape[2]; width = shape[3]; + } + else + { + // REORDER kernel DHWIO to OIDHW + swap(shape[0], shape[4]); // OHWID + swap(shape[1], shape[3]); // OIWHD + swap(shape[2], shape[4]); // OIDHW + depth = shape[2]; height = shape[3]; width = shape[4]; + } + out_c = shape[0]; input_c = shape[1]; dstBlob.create(shape, CV_32F); @@ -472,17 +516,20 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds float *dstData = dstBlob.ptr(); const float *data = reinterpret_cast(tensorContent.data); - int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3]; - int total = out_c*input_c*height*width; - for(int i_oc = 0; i_oc < out_c; i_oc++) { - for(int i_ic = 0; i_ic < input_c; i_ic++) { - for(int i_h = 0; i_h < height; i_h++) { - for(int i_w = 0; i_w < width; i_w++) { - int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w; - int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc; - CV_Assert(dst_i < total); - CV_Assert(src_i < total); - dstData[dst_i] = data[src_i]; + int total = out_c * input_c * depth * height * width; + for (int i_oc = 0; i_oc < out_c; i_oc++) { + for (int i_ic = 0; i_ic < input_c; i_ic++) { + for (int i_d = 0; i_d < depth; i_d++) { + for (int i_h = 0; i_h < height; i_h++) { + for (int i_w = 0; i_w < width; i_w++) { + int dst_i = input_c * depth * height * width * i_oc + + depth * height * width * i_ic + height * width * i_d + width * i_h + i_w; + int src_i = out_c * input_c * width * height * i_d + + out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc; + CV_Assert(dst_i < total); + CV_Assert(src_i < total); + dstData[dst_i] = data[src_i]; + } } } } @@ -745,7 +792,7 @@ void TFImporter::populateNet(Net dstNet) int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); data_layouts[name] = predictedLayout; - if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad") + if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "Conv3D") { // The first node of dilated convolution subgraph. // Extract input node, dilation rate and paddings. @@ -917,9 +964,9 @@ void TFImporter::populateNet(Net dstNet) { layerParams.blobs[0] = sharedWeightsIt->second; } + Mat weights = layerParams.blobs[0]; + layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2)); - layerParams.set("kernel_h", layerParams.blobs[0].size[2]); - layerParams.set("kernel_w", layerParams.blobs[0].size[3]); layerParams.set("num_output", layerParams.blobs[0].size[0]); setStrides(layerParams, layer); @@ -1290,7 +1337,7 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, inp, id, ii - from); } } - else if (type == "MaxPool") + else if (type == "MaxPool" || type == "MaxPool3D") { layerParams.set("pool", "max"); @@ -1303,11 +1350,10 @@ void TFImporter::populateNet(Net dstNet) connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); } - else if (type == "AvgPool") + else if (type == "AvgPool" || type == "AvgPool3D") { layerParams.set("pool", "ave"); layerParams.set("ave_pool_padded_area", false); - setKSize(layerParams, layer); setStrides(layerParams, layer); setPadding(layerParams, layer); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 992a9f31cf..c0782e96b2 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -81,6 +81,13 @@ TEST_P(Test_ONNX_layers, Convolution) testONNXModels("convolution"); } +TEST_P(Test_ONNX_layers, Convolution3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("conv3d"); + testONNXModels("conv3d_bias"); +} TEST_P(Test_ONNX_layers, Two_convolution) { @@ -138,6 +145,20 @@ TEST_P(Test_ONNX_layers, AveragePooling) testONNXModels("average_pooling"); } +TEST_P(Test_ONNX_layers, MaxPooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("max_pool3d"); +} + +TEST_P(Test_ONNX_layers, AvePooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("ave_pool3d"); +} + TEST_P(Test_ONNX_layers, BatchNormalization) { testONNXModels("batch_norm"); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 8b750bbb44..ff2e14b8b6 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -131,6 +131,13 @@ TEST_P(Test_TensorFlow_layers, conv) runTensorFlowNet("conv_pool_nchw"); } +TEST_P(Test_TensorFlow_layers, Convolution3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + runTensorFlowNet("conv3d"); +} + TEST_P(Test_TensorFlow_layers, padding) { runTensorFlowNet("padding_valid"); @@ -212,6 +219,20 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same) runTensorFlowNet("ave_pool_same"); } +TEST_P(Test_TensorFlow_layers, MaxPooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + runTensorFlowNet("max_pool3d"); +} + +TEST_P(Test_TensorFlow_layers, AvePooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + runTensorFlowNet("ave_pool3d"); +} + TEST_P(Test_TensorFlow_layers, deconvolution) { runTensorFlowNet("deconvolution");