diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 0b12f949bc..caa471806f 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -67,7 +67,7 @@ public: BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); - getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode); + getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads); numOutput = params.get("num_output"); int ngroups = params.get("group", 1); @@ -83,14 +83,14 @@ public: pad = Size(pads_begin[1], pads_begin[0]); dilation = Size(dilations[1], dilations[0]); - adjust_pads.push_back(params.get("adj_h", 0)); - adjust_pads.push_back(params.get("adj_w", 0)); - adjustPad.height = adjust_pads[0]; adjustPad.width = adjust_pads[1]; - CV_Assert(adjustPad.width < stride.width && - adjustPad.height < stride.height); } + + for (int i = 0; i < adjust_pads.size(); i++) { + CV_Assert(adjust_pads[i] < strides[i]); + } + fusedWeights = false; fusedBias = false; } @@ -1241,29 +1241,39 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE - const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout + const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW or IODHW layout const int group = numOutput / outGroupCn; if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { - if (kernel_size.size() == 3) - CV_Error(Error::StsNotImplemented, "Unsupported deconvolution3D layer"); + if (kernel_size.size() == 3 && preferableTarget != DNN_TARGET_CPU) { + return false; + } - if (adjustPad.height || adjustPad.width) + if (std::accumulate(adjust_pads.begin(), adjust_pads.end(), 0, std::plus()) > 0) { if (padMode.empty()) { if (preferableTarget != DNN_TARGET_CPU && group != 1) { - if ((adjustPad.height && pad.height) || (adjustPad.width && pad.width)) + for (int i = 0; i < adjust_pads.size(); i++) { + if (adjust_pads[i] && pads_begin[i]) + return false; + } + } + for (int i = 0; i < adjust_pads.size(); i++) { + if (pads_end[i] < adjust_pads[i]) return false; } - return pad.width >= adjustPad.width && pad.height >= adjustPad.height; + return true; } else if (padMode == "SAME") { - return kernel.width >= pad.width + 1 + adjustPad.width && - kernel.height >= pad.height + 1 + adjustPad.height; + for (int i = 0; i < adjust_pads.size(); i++) { + if (kernel_size[i] < pads_begin[i] + 1 + adjust_pads[i]) + return false; + } + return true; } else if (padMode == "VALID") return false; @@ -1274,7 +1284,7 @@ public: return preferableTarget == DNN_TARGET_CPU; } if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16) - return dilation.width == 1 && dilation.height == 1; + return std::accumulate(dilations.begin(), dilations.end(), 1, std::multiplies()) == 1; return true; } else @@ -1861,11 +1871,14 @@ public: #ifdef HAVE_INF_ENGINE virtual Ptr initInfEngine(const std::vector > &) CV_OVERRIDE { - auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW); + InferenceEngine::Layout layout = blobs[0].dims == 5? InferenceEngine::Layout::NCDHW : + InferenceEngine::Layout::OIHW; + + auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); if (fusedWeights) { ieWeights = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW, + InferenceEngine::Precision::FP32, layout, ieWeights->dims()); ieWeights->allocate(); @@ -1874,7 +1887,7 @@ public: transpose(weightsMat, newWeights); } - const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout + const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW or OIDHW layout const int group = numOutput / outGroupCn; InferenceEngine::Builder::DeconvolutionLayer ieLayer(name); @@ -1886,12 +1899,19 @@ public: if (padMode.empty()) { - ieLayer.setPaddingsEnd({pads_end[0] - adjust_pads[0], pads_end[1] - adjust_pads[1]}); + std::vector paddings_end; + for (int i = 0; i < pads_end.size(); i++) { + paddings_end.push_back(pads_end[i] - adjust_pads[i]); + } + ieLayer.setPaddingsEnd(paddings_end); } else if (padMode == "SAME") { - ieLayer.setPaddingsEnd({kernel_size[0] - pads_begin[0] - 1 - adjust_pads[0], - kernel_size[1] - pads_begin[1] - 1 - adjust_pads[1]}); + std::vector paddings_end; + for (int i = 0; i < pads_begin.size(); i++) { + paddings_end.push_back(kernel_size[i] - pads_begin[i] - 1 - adjust_pads[i]); + } + ieLayer.setPaddingsEnd(paddings_end); } ieLayer.setGroup((size_t)group); ieLayer.setOutDepth((size_t)numOutput); @@ -1911,10 +1931,12 @@ public: float flops = 0; int outChannels = blobs[0].size[0]; + size_t karea = std::accumulate(kernel_size.begin(), kernel_size.end(), + 1, std::multiplies()); for (int i = 0; i < inputs.size(); i++) { - flops += CV_BIG_INT(2)*outChannels*kernel.area()*total(inputs[i]); + flops += CV_BIG_INT(2)*outChannels*karea*total(inputs[i]); } return flops; diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 29d863d2ad..2f5f486155 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -175,11 +175,13 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern } void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, - std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode) + std::vector& pads_end, std::vector& strides, + std::vector& dilations, cv::String &padMode, std::vector& adjust_pads) { util::getKernelSize(params, kernel); util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); util::getParameter(params, "dilation", "dilation", dilations, true, std::vector(kernel.size(), 1)); + util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector(kernel.size(), 0)); for (int i = 0; i < dilations.size(); i++) CV_Assert(dilations[i] > 0); diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index 26c1ce62d5..b574d7eed0 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -60,7 +60,8 @@ namespace cv namespace dnn { void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, - std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode); + std::vector& pads_end, std::vector& strides, std::vector& dilations, + cv::String &padMode, std::vector& adjust_pads); void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 5164fbeccb..c5745c9b8b 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -682,42 +682,37 @@ void ONNXImporter::populateNet(Net dstNet) layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get("group", 1)); layerParams.set("bias_term", node_proto.input_size() == 3); + if (!layerParams.has("kernel_size")) + CV_Error(Error::StsNotImplemented, + "Required attribute 'kernel_size' is not present."); + if (layerParams.has("output_shape")) { const DictValue& outShape = layerParams.get("output_shape"); + DictValue strides = layerParams.get("stride"); + DictValue kernel = layerParams.get("kernel_size"); - if (outShape.size() != 4) - CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements."); - - DictValue stride = layerParams.get("stride"); - const int strideY = stride.getIntValue(0); - const int strideX = stride.getIntValue(1); - const int outH = outShape.getIntValue(2); - const int outW = outShape.getIntValue(3); - - if (layerParams.get("pad_mode") == "SAME") + String padMode; + std::vector adjust_pads; + if (layerParams.has("pad_mode")) { - layerParams.set("adj_w", (outW - 1) % strideX); - layerParams.set("adj_h", (outH - 1) % strideY); - } - else if (layerParams.get("pad_mode") == "VALID") - { - if (!layerParams.has("kernel_size")) - CV_Error(Error::StsNotImplemented, - "Required attribute 'kernel_size' is not present."); + padMode = toUpperCase(layerParams.get("pad_mode")); + if (padMode != "SAME" && padMode != "VALID") + CV_Error(Error::StsError, "Unsupported padding mode " + padMode); - DictValue kernel = layerParams.get("kernel_size"); - layerParams.set("adj_h", (outH - kernel.getIntValue(0)) % strideY); - layerParams.set("adj_w", (outW - kernel.getIntValue(1)) % strideX); + for (int i = 0; i < strides.size(); i++) + { + int sz = outShape.get(2 + i); + int stride = strides.get(i); + adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride : + (sz - kernel.get(i)) % stride); + } + layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size())); } } else if (layerParams.has("output_padding")) { - const DictValue& adj_pad = layerParams.get("output_padding"); - if (adj_pad.size() != 2) - CV_Error(Error::StsNotImplemented, "Deconvolution3D layer is not supported"); - layerParams.set("adj_w", adj_pad.get(1)); - layerParams.set("adj_h", adj_pad.get(0)); + replaceLayerParam(layerParams, "output_padding", "adj"); } } else if (layer_type == "Transpose") diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 5dd919fd7e..7ae94db3f3 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -127,6 +127,19 @@ TEST_P(Test_ONNX_layers, Deconvolution) testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, Deconvolution3D) +{ +#if defined(INF_ENGINE_RELEASE) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_2018R5); +#endif + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("deconv3d"); + testONNXModels("deconv3d_bias"); + testONNXModels("deconv3d_pad"); + testONNXModels("deconv3d_adjpad"); +} + TEST_P(Test_ONNX_layers, Dropout) { testONNXModels("dropout");