diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 54a53fd867..3a90081e17 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -128,7 +128,7 @@ namespace cv { void setConvolution(int kernel, int pad, int stride, - int filters_num, int channels_num, int use_batch_normalize, int use_relu) + int filters_num, int channels_num, int use_batch_normalize) { cv::dnn::LayerParams conv_param = getParamConvolution(kernel, pad, stride, filters_num); @@ -168,27 +168,29 @@ namespace cv { net->layers.push_back(lp); } - if (use_relu) - { - cv::dnn::LayerParams activation_param; - activation_param.set("negative_slope", 0.1f); - activation_param.name = "ReLU-name"; - activation_param.type = "ReLU"; - - darknet::LayerParameter lp; - std::string layer_name = cv::format("relu_%d", layer_id); - lp.layer_name = layer_name; - lp.layer_type = activation_param.type; - lp.layerParams = activation_param; - lp.bottom_indexes.push_back(last_layer); - last_layer = layer_name; - net->layers.push_back(lp); - } - layer_id++; fused_layer_names.push_back(last_layer); } + void setReLU() + { + cv::dnn::LayerParams activation_param; + activation_param.set("negative_slope", 0.1f); + activation_param.name = "ReLU-name"; + activation_param.type = "ReLU"; + + darknet::LayerParameter lp; + std::string layer_name = cv::format("relu_%d", layer_id); + lp.layer_name = layer_name; + lp.layer_type = activation_param.type; + lp.layerParams = activation_param; + lp.bottom_indexes.push_back(last_layer); + last_layer = layer_name; + net->layers.push_back(lp); + + fused_layer_names.back() = last_layer; + } + void setMaxpool(size_t kernel, size_t pad, size_t stride) { cv::dnn::LayerParams maxpool_param; @@ -409,12 +411,19 @@ namespace cv { fused_layer_names.push_back(last_layer); } - void setShortcut(int from) + void setShortcut(int from, float alpha) { cv::dnn::LayerParams shortcut_param; shortcut_param.name = "Shortcut-name"; shortcut_param.type = "Eltwise"; + if (alpha != 1) + { + std::vector coeffs(2, 1); + coeffs[0] = alpha; + shortcut_param.set("coeff", DictValue::arrayReal(&coeffs[0], coeffs.size())); + } + shortcut_param.set("op", "sum"); darknet::LayerParameter lp; @@ -422,8 +431,8 @@ namespace cv { lp.layer_name = layer_name; lp.layer_type = shortcut_param.type; lp.layerParams = shortcut_param; - lp.bottom_indexes.push_back(fused_layer_names.at(from)); lp.bottom_indexes.push_back(last_layer); + lp.bottom_indexes.push_back(fused_layer_names.at(from)); last_layer = layer_name; net->layers.push_back(lp); @@ -548,10 +557,7 @@ namespace cv { int pad = getParam(layer_params, "pad", 0); int stride = getParam(layer_params, "stride", 1); int filters = getParam(layer_params, "filters", -1); - std::string activation = getParam(layer_params, "activation", "linear"); bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; - if(activation != "linear" && activation != "leaky") - CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); int flipped = getParam(layer_params, "flipped", 0); if (flipped == 1) CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented"); @@ -563,7 +569,7 @@ namespace cv { CV_Assert(current_channels > 0); setParams.setConvolution(kernel_size, pad, stride, filters, current_channels, - batch_normalize, activation == "leaky"); + batch_normalize); current_channels = filters; } @@ -593,7 +599,7 @@ namespace cv { current_channels = 0; for (size_t k = 0; k < layers_vec.size(); ++k) { - layers_vec[k] = layers_vec[k] > 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); + layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); current_channels += net->out_channels_vec[layers_vec[k]]; } @@ -631,13 +637,15 @@ namespace cv { else if (layer_type == "shortcut") { std::string bottom_layer = getParam(layer_params, "from", ""); + float alpha = getParam(layer_params, "alpha", 1); + float beta = getParam(layer_params, "beta", 0); + if (beta != 0) + CV_Error(Error::StsNotImplemented, "Non-zero beta"); CV_Assert(!bottom_layer.empty()); int from = std::atoi(bottom_layer.c_str()); - from += layers_counter; - current_channels = net->out_channels_vec[from]; - - setParams.setShortcut(from); + from = from < 0 ? from + layers_counter : from; + setParams.setShortcut(from, alpha); } else if (layer_type == "upsample") { @@ -667,6 +675,15 @@ namespace cv { else { CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); } + + std::string activation = getParam(layer_params, "activation", "linear"); + if (activation == "leaky") + { + setParams.setReLU(); + } + else if (activation != "linear") + CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); + net->out_channels_vec[layers_counter] = current_channels; } @@ -710,7 +727,6 @@ namespace cv { { int kernel_size = getParam(layer_params, "size", -1); int filters = getParam(layer_params, "filters", -1); - std::string activation = getParam(layer_params, "activation", "linear"); bool use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; CV_Assert(kernel_size > 0 && filters > 0); @@ -754,14 +770,16 @@ namespace cv { bn_blobs.push_back(biasData_mat); setParams.setLayerBlobs(cv_layers_counter, bn_blobs); } - - if(activation == "leaky") - ++cv_layers_counter; } if (layer_type == "region" || layer_type == "yolo") { ++cv_layers_counter; // For permute. } + + std::string activation = getParam(layer_params, "activation", "linear"); + if(activation == "leaky") + ++cv_layers_counter; // For ReLU + current_channels = net->out_channels_vec[darknet_layers_counter]; } return true; diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 6b5db7c6d7..e248b1622d 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -64,6 +64,7 @@ public: MAX = 2, } op; std::vector coeffs; + bool variableChannels; EltwiseLayerImpl(const LayerParams& params) { @@ -98,7 +99,7 @@ public: { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || - (backendId == DNN_BACKEND_INFERENCE_ENGINE && + (backendId == DNN_BACKEND_INFERENCE_ENGINE && !variableChannels && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())); } @@ -108,33 +109,57 @@ public: std::vector &internals) const CV_OVERRIDE { CV_Assert(inputs.size() >= 2); + CV_Assert(inputs[0].size() >= 2); CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); CV_Assert(op == SUM || coeffs.size() == 0); + int dims = inputs[0].size(); + // Number of channels in output shape is determined by the first input tensor. + int numChannels = inputs[0][1]; for (int i = 1; i < inputs.size(); i++) { - CV_Assert(inputs[0] == inputs[i]); + CV_Assert(inputs[0][0] == inputs[i][0]); + + // It's allowed for channels axis to be different. + for (int j = 2; j < dims; j++) + CV_Assert(inputs[0][j] == inputs[i][j]); } outputs.assign(1, inputs[0]); - + outputs[0][1] = numChannels; return false; } + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector inputs; + inputs_arr.getMatVector(inputs); + variableChannels = false; + for (int i = 1; i < inputs.size(); ++i) + { + if (inputs[i].size[1] != inputs[0].size[1]) + { + variableChannels = true; + break; + } + } + } + + class EltwiseInvoker : public ParallelLoopBody { public: - const Mat* srcs; + std::vector srcs; int nsrcs; Mat* dst; - const std::vector* coeffs; + std::vector coeffs; EltwiseOp op; int nstripes; const ActivationLayer* activ; int channels; size_t planeSize; - EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} + EltwiseInvoker() : nsrcs(0), dst(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} static void run(const Mat* srcs, int nsrcs, Mat& dst, const std::vector& coeffs, EltwiseOp op, @@ -143,15 +168,23 @@ public: CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous()); CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs); + EltwiseInvoker p; + p.srcs.resize(nsrcs); + p.coeffs = coeffs; for( int i = 0; i < nsrcs; i++ ) { - CV_Assert(srcs[i].size == dst.size && - srcs[i].type() == dst.type() && + p.srcs[i] = srcs + i; + CV_Assert(srcs[i].type() == dst.type() && srcs[i].isContinuous()); + // Sort srcs and coefficients in the order by number of channels + for( int j = i; j >= 1 && p.srcs[j - 1]->size[1] < p.srcs[j]->size[1]; j-- ) + { + std::swap(p.srcs[j - 1], p.srcs[j]); + if (!p.coeffs.empty()) + std::swap(p.coeffs[j - 1], p.coeffs[j]); + } } - EltwiseInvoker p; - p.srcs = srcs; p.nsrcs = nsrcs; p.dst = &dst; p.op = op; @@ -173,7 +206,8 @@ public: break; } } - p.coeffs = simpleCoeffs ? 0 : &coeffs; + if (simpleCoeffs) + p.coeffs.clear(); p.activ = activ; parallel_for_(Range(0, nstripes), p, nstripes); @@ -185,8 +219,8 @@ public: size_t stripeSize = (total + nstripes - 1)/nstripes; size_t stripeStart = r.start*stripeSize; size_t stripeEnd = std::min(r.end*stripeSize, total); - int c, j, k, n = nsrcs; - const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0; + int c, j, k, n; + const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0; float* dstptr0 = dst->ptr(); int blockSize0 = 1 << 12, blockSize; @@ -201,14 +235,35 @@ public: for( c = 0; c < channels; c++ ) { size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize; - const float* srcptr0 = srcs[0].ptr() + globalDelta; + const float* srcptr0 = srcs[0]->ptr() + globalDelta; float* dstptr = dstptr0 + globalDelta; - if( op == PROD ) + // This code assumes that srcs are sorted in descending order by channels. + for (n = 1; n < nsrcs && c < srcs[n]->size[1]; ++n) {} + + if (n == 1) + { + if( !coeffsptr ) + { + for( j = 0; j < blockSize; j++ ) + { + dstptr[j] = srcptr0[j]; + } + } + else + { + float c0 = coeffsptr[0]; + for( j = 0; j < blockSize; j++ ) + { + dstptr[j] = c0*srcptr0[j]; + } + } + } + else if( op == PROD ) { for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; for( j = 0; j < blockSize; j++ ) { dstptr[j] = srcptr0[j]*srcptr1[j]; @@ -220,7 +275,7 @@ public: { for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; for( j = 0; j < blockSize; j++ ) { dstptr[j] = std::max(srcptr0[j], srcptr1[j]); @@ -232,7 +287,7 @@ public: { for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; for( j = 0; j < blockSize; j++ ) { dstptr[j] = srcptr0[j] + srcptr1[j]; @@ -245,7 +300,7 @@ public: float c0 = coeffsptr[0]; for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; float c1 = coeffsptr[k]; for( j = 0; j < blockSize; j++ ) { @@ -272,7 +327,7 @@ public: std::vector inputs; std::vector outputs; - if (inputs_.depth() == CV_16S && op != SUM) + if ((inputs_.depth() == CV_16S && op != SUM) || variableChannels) return false; inputs_.getUMatVector(inputs); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 26637ebbe6..6514302389 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -444,6 +444,9 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); TEST_P(Test_Darknet_layers, shortcut) { testDarknetLayer("shortcut"); + testDarknetLayer("shortcut_leaky"); + testDarknetLayer("shortcut_unequal"); + testDarknetLayer("shortcut_unequal_2"); } TEST_P(Test_Darknet_layers, upsample) diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 445c524639..35674814b1 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1488,4 +1488,62 @@ TEST(Layer_Test_Convolution, relu_fusion) normAssert(input, output); } +typedef testing::TestWithParam > > Layer_Test_Eltwise_unequal; +TEST_P(Layer_Test_Eltwise_unequal, Accuracy) +{ + bool weighted = get<0>(GetParam()); + int backendId = get<0>(get<1>(GetParam())); + int targetId = get<1>(get<1>(GetParam())); + + if (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + Net net; + LayerParams lp; + lp.type = "Eltwise"; + lp.name = "testLayer"; + + const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}}; + std::vector inpNames(3); + std::vector inputs(3); + size_t numOutValues = 1*4*2*2; // By the first input + + std::vector weights(3, 1); + if (weighted) + { + for (int i = 0; i < inputs.size(); ++i) + randu(Mat(1, 1, CV_32F, &weights[i]), -1, 1); + lp.set("coeff", DictValue::arrayReal(&weights[0], weights.size())); + } + + int eltwiseId = net.addLayer(lp.name, lp.type, lp); + for (int i = 0; i < inputs.size(); ++i) + { + inputs[i].create(4, inpShapes[i], CV_32F); + randu(inputs[i], 0, 255); + inpNames[i] = format("input_%d", i); + net.connect(0, i, eltwiseId, i); + } + Mat ref(1, numOutValues, CV_32F, Scalar(0)); + + net.setInputsNames(inpNames); + for (int i = 0; i < inputs.size(); ++i) + { + net.setInput(inputs[i], inpNames[i]); + if (numOutValues >= inputs[i].total()) + ref.colRange(0, inputs[i].total()) += weights[i] * inputs[i].reshape(1, 1); + else + ref += weights[i] * inputs[i].reshape(1, 1).colRange(0, numOutValues); + } + + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + Mat out = net.forward(); + normAssert(out.reshape(1, 1), ref); +} +INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine( + testing::Bool(), + dnnBackendsAndTargets() +)); + }} // namespace