From 1f4fdfd5993cc194adbeeb022e653b83c7113666 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 12 Jan 2018 11:59:05 +0300 Subject: [PATCH] Untrainable version of Scale layer from Caffe --- .../dnn/include/opencv2/dnn/all_layers.hpp | 1 + modules/dnn/src/layers/convolution_layer.cpp | 4 +- modules/dnn/src/layers/scale_layer.cpp | 67 ++++++++++++---- modules/dnn/test/test_caffe_importer.cpp | 12 +-- modules/dnn/test/test_layers.cpp | 77 +++++++++++++++++++ 5 files changed, 137 insertions(+), 24 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index da0e34afcb..6741efaac1 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -490,6 +490,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN { public: bool hasBias; + int axis; static Ptr create(const LayerParams& params); }; diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index ba6a8a287d..7abde1397a 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -268,6 +268,8 @@ public: bool setScale(const Ptr& layer) { + if (layer.empty() || layer->blobs.empty()) + return false; scaleLayer = layer; // we will need to re-compute the weights with the scaling // coefficients taken into account @@ -276,7 +278,7 @@ public: newWeightAndBias = true; fusedBias = false; #endif - return !scaleLayer.empty(); + return true; } virtual Ptr initHalide(const std::vector > &inputs) diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 0ac5209ed4..7912f47619 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -26,6 +26,7 @@ public: { setParamsFrom(params); hasBias = params.get("bias_term", false); + axis = params.get("axis", 1); } bool getMemoryShapes(const std::vector &inputs, @@ -33,8 +34,8 @@ public: std::vector &outputs, std::vector &internals) const { - CV_Assert(blobs.size() == 1 + hasBias); - Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + CV_Assert(inputs.size() == 2 && blobs.empty() || blobs.size() == 1 + hasBias); + outputs.assign(1, inputs[0]); return true; } @@ -56,30 +57,62 @@ public: { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_Assert(outputs.size() == 1, !blobs.empty() || inputs.size() == 2); - for (size_t ii = 0; ii < outputs.size(); ii++) + Mat &inpBlob = *inputs[0]; + Mat &outBlob = outputs[0]; + Mat &weights = blobs.empty() ? *inputs[1] : blobs[0]; + Mat bias = hasBias ? blobs.back() : Mat(); + MatShape inpShape = shape(inpBlob); + const int numWeights = weights.total(); + + int endAxis; + for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis) { - Mat &inpBlob = *inputs[ii]; - Mat &outBlob = outputs[ii]; + if (total(inpShape, axis, endAxis) == numWeights) + break; + } + CV_Assert(total(inpShape, axis, endAxis) == numWeights, + !hasBias || numWeights == bias.total(), + inpBlob.type() == CV_32F && outBlob.type() == CV_32F); - CV_Assert(inpBlob.size[1] == blobs[0].total()); - if (hasBias) - CV_Assert(inpBlob.size[1] == blobs[1].total()); + int numSlices = total(inpShape, 0, axis); + float* inpData = (float*)inpBlob.data; + float* outData = (float*)outBlob.data; - CV_Assert(inpBlob.type() == CV_32F && outBlob.type() == CV_32F); - - for( int cn = 0; cn < inpBlob.size[0]; cn++ ) + if (endAxis != inpBlob.dims) + { + float* weightsData = (float*)weights.data; + float* biasesData = hasBias ? (float*)bias.data : 0; + int spatialSize = total(inpShape, endAxis); // spatialSize != 1 + for (int i = 0; i < numSlices; ++i) { - for (int n = 0; n < inpBlob.size[1]; n++) + for (int j = 0; j < numWeights; ++j) { - float w = blobs[0].at(n); - float b = hasBias ? blobs[1].at(n) : 0; - Mat outBlobPlane = slice(outBlob, cn, n); - Mat inpBlobPlane = slice(inpBlob, cn, n); - inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b); + float w = weightsData[j]; + float b = hasBias ? biasesData[j] : 0; + Mat inpSlice(1, spatialSize, CV_32F, inpData); + Mat outSlice(1, spatialSize, CV_32F, outData); + inpSlice.convertTo(outSlice, CV_32F, w, b); + inpData += spatialSize; + outData += spatialSize; } } } + else + { + for (int i = 0; i < numSlices; ++i) + { + Mat inpSlice(weights.dims, weights.size, CV_32F, inpData); + Mat outSlice(weights.dims, weights.size, CV_32F, outData); + multiply(inpSlice, weights, outSlice); + if (hasBias) + add(outSlice, bias, outSlice); + + inpData += numWeights; + outData += numWeights; + } + } } virtual Ptr tryAttach(const Ptr& node) diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index febf0357c3..200c2664c5 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -87,10 +87,10 @@ TEST(Test_Caffe, read_googlenet) ASSERT_FALSE(net.empty()); } -typedef testing::TestWithParam > Reproducibility_AlexNet; +typedef testing::TestWithParam Reproducibility_AlexNet; TEST_P(Reproducibility_AlexNet, Accuracy) { - bool readFromMemory = get<0>(GetParam()); + bool readFromMemory = GetParam(); Net net; { const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false); @@ -119,12 +119,12 @@ TEST_P(Reproducibility_AlexNet, Accuracy) normAssert(ref, out); } -INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Values(true, false)); +INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Bool()); -typedef testing::TestWithParam > Reproducibility_OCL_AlexNet; +typedef testing::TestWithParam Reproducibility_OCL_AlexNet; OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy) { - bool readFromMemory = get<0>(GetParam()); + bool readFromMemory = GetParam(); Net net; { const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false); @@ -156,7 +156,7 @@ OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy) normAssert(ref, out); } -OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Values(true, false)); +OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Bool()); #if !defined(_WIN32) || defined(_WIN64) TEST(Reproducibility_FCN, Accuracy) diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index cd23541aed..5bf77eea10 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -627,4 +627,81 @@ OCL_TEST(Layer_Test_FasterRCNN_Proposal, Accuracy) EXPECT_EQ(countNonZero(out.rowRange(numDets, out.size[0])), 0); } +typedef testing::TestWithParam > Scale_untrainable; +TEST_P(Scale_untrainable, Accuracy) +{ + Vec4i inpShapeVec = get<0>(GetParam()); + int axis = get<1>(GetParam())[0]; + int weightsDims = get<1>(GetParam())[1]; + bool testFusion = get<2>(GetParam()); + const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]}; + + // Create a network with two inputs. Scale layer multiplies a first input to + // a second one. See http://caffe.berkeleyvision.org/tutorial/layers/scale.html + Net net; + // Check that this version of Scale layer won't be fused with Convolution layer. + if (testFusion) + { + LayerParams lp; + lp.set("kernel_size", 1); + lp.set("num_output", 3); + lp.set("group", 3); + lp.set("bias_term", false); + lp.type = "Convolution"; + lp.name = "testConv"; + + std::vector weightsShape(4); + weightsShape[0] = 3; // #outChannels + weightsShape[1] = 1; // #inpChannels / group + weightsShape[2] = 1; // height + weightsShape[3] = 1; // width + Mat weights(weightsShape, CV_32F); + weights.setTo(1); + lp.blobs.push_back(weights); + net.addLayerToPrev(lp.name, lp.type, lp); + } + LayerParams lp; + lp.type = "Scale"; + lp.name = "testLayer"; + lp.set("axis", axis); + int id = net.addLayerToPrev(lp.name, lp.type, lp); + net.connect(0, 1, id, 1); + + Mat input(4, inpShape, CV_32F); + Mat weights(weightsDims, &inpShape[axis], CV_32F); + randu(input, -1, 1); + randu(weights, -1, 1); + + std::vector inpNames(2); + inpNames[0] = "scale_input"; + inpNames[1] = "scale_weights"; + net.setInputsNames(inpNames); + net.setInput(input, inpNames[0]); + net.setInput(weights, inpNames[1]); + Mat out = net.forward(); + + Mat ref(input.dims, input.size, CV_32F); + float* inpData = (float*)input.data; + float* refData = (float*)ref.data; + float* weightsData = (float*)weights.data; + int spatialSize = 1; + for (int i = axis + weightsDims; i < 4; ++i) + spatialSize *= inpShape[i]; + for (int i = 0; i < ref.total(); ++i) + { + float w = weightsData[(i / spatialSize) % weights.total()]; + refData[i] = inpData[i] * w; + } + normAssert(out, ref); +} + +INSTANTIATE_TEST_CASE_P(Layer_Test, Scale_untrainable, Combine( +/*input size*/ Values(Vec4i(2, 3, 4, 5)), +/*axis, #dims*/ Values(Vec2i(0, 1), Vec2i(0, 2), Vec2i(0, 3), Vec2i(0, 4), + Vec2i(1, 1), Vec2i(1, 2), Vec2i(1, 3), + Vec2i(2, 1), Vec2i(2, 2), + Vec2i(3, 1)), +/*conv fusion*/ testing::Bool() +)); + }