diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp index 609809e110..b77333bd48 100644 --- a/modules/dnn/include/opencv2/dnn/shape_utils.hpp +++ b/modules/dnn/include/opencv2/dnn/shape_utils.hpp @@ -235,6 +235,23 @@ Range normalize_axis_range(const Range& r, int axisSize) return clamped; } +static inline +bool isAllOnes(const MatShape &inputShape, int startPos, int endPos) +{ + CV_Assert(!inputShape.empty()); + + CV_CheckGE((int) inputShape.size(), startPos, ""); + CV_CheckGE(startPos, 0, ""); + CV_CheckLE(startPos, endPos, ""); + CV_CheckLE((size_t)endPos, inputShape.size(), ""); + + for (size_t i = startPos; i < endPos; i++) + { + if (inputShape[i] != 1) + return false; + } + return true; +} CV__DNN_EXPERIMENTAL_NS_END } } diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 77e1a1171a..f30bb6b43d 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -45,6 +45,7 @@ #include "../op_halide.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -90,6 +91,7 @@ public: : outputChannels(0) { setParamsFrom(params); + hasVecInput = false; op = SUM; if (params.has("operation")) { @@ -149,6 +151,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (hasVecInput && ELTWISE_CHANNNELS_SAME) + return backendId == DNN_BACKEND_OPENCV; + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || ((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())) @@ -197,9 +202,6 @@ public: { CV_Assert(0 && "Internal error"); } - - for (size_t j = 2; j < dims; j++) - CV_Assert(inputs[0][j] == inputs[i][j]); } channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME; @@ -207,9 +209,56 @@ public: outputs.assign(1, inputs[0]); outputs[0][1] = numChannels; + + if (dims > 2) + { + size_t vecIdx = 0; + bool isVecFound = false; + for (size_t i = 0; i < inputs.size(); i++) + { + bool allOnes = isAllOnes(inputs[i], 2, dims); + if (!allOnes && !isVecFound) + { + vecIdx = i; + isVecFound = true; + } + + if (!allOnes && i != vecIdx) + { + for (size_t j = 2; j < dims; j++) + { + CV_Assert(inputs[vecIdx][j] == inputs[i][j]); + } + } + } + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME && isVecFound) + { + for (size_t j = 2; j < dims; j++) + { + outputs[0][j] = inputs[vecIdx][j]; + } + } + } + return false; } + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector inputs; + inputs_arr.getMatVector(inputs); + + for (size_t i = 0; i < inputs.size(); i++) + { + MatShape inpShape = shape(inputs[i].size); + if (isAllOnes(inpShape, 2, inputs[i].dims)) + { + hasVecInput = true; + return; + } + } + } class EltwiseInvoker : public ParallelLoopBody { @@ -502,6 +551,9 @@ public: if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME)) return false; + if (hasVecInput) + return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19477 + inputs_.getUMatVector(inputs); outputs_.getUMatVector(outputs); @@ -602,6 +654,47 @@ public: CV_Assert(outputs.size() == 1); const int nstripes = getNumThreads(); + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME && inputs[0].dims > 2) + { + for (size_t i = 0; i < inputs.size(); i++) + { + MatShape inpShape = shape(inputs[i].size); + bool allOnes = isAllOnes(inpShape, 2, inputs[i].dims); + + if (allOnes) + { + Mat tmpInput = inputs[i]; + MatShape outShape = shape(outputs[0].size); + size_t xSize = outShape[2]; + for (size_t j = 3; j < outShape.size(); j++) + xSize *= outShape[j]; + + int dimVec[3] = {outShape[0], outShape[1], (int) xSize}; + std::vector matSizesVec(&dimVec[0], &dimVec[0] + 3); + inputs[i] = Mat(matSizesVec, tmpInput.type()); + + std::vector idx(outShape.size(), 0); + std::vector outIdx(inpShape.size(), 0); + + for (size_t j = 0; j < outShape[0]; j++) + { + outIdx[0] = idx[0] = j; + for(size_t k = 0; k < outShape[1]; k++) + { + outIdx[1] = idx[1] = k; + for (size_t x = 0; x < xSize; x++) + { + outIdx[2] = x; + inputs[i].at(outIdx.data()) = tmpInput.at(idx.data()); + } + } + } + inputs[i] = inputs[i].reshape(0, outShape); + } + } + } + EltwiseInvoker::run(*this, &inputs[0], (int)inputs.size(), outputs[0], nstripes); @@ -739,6 +832,9 @@ public: } Ptr activ; + +private: + bool hasVecInput; }; Ptr EltwiseLayer::create(const LayerParams& params) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 6a1a44f03a..62a559a672 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -205,6 +205,11 @@ TEST_P(Test_TensorFlow_layers, eltwise) runTensorFlowNet("eltwise_sub"); } +TEST_P(Test_TensorFlow_layers, eltwise_add_vec) +{ + runTensorFlowNet("eltwise_add_vec"); +} + TEST_P(Test_TensorFlow_layers, channel_broadcast) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)