Merge pull request #10585 from dkurt:dnn_weightless_scale

2025-06-27 23:11:57 +08:00 · 2018-01-15 06:07:50 +00:00 · 2018-01-15 06:07:50 +00:00 · 1255bd8d4b
commit 1255bd8d4b
parent 70ff4af4fa 1f4fdfd599
5 changed files with 137 additions and 24 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -490,6 +490,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
    {
    public:
        bool hasBias;
+        int axis;

        static Ptr<ScaleLayer> create(const LayerParams& params);
    };
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -268,6 +268,8 @@ public:

    bool setScale(const Ptr<ScaleLayer>& layer)
    {
+        if (layer.empty() || layer->blobs.empty())
+            return false;
        scaleLayer = layer;
        // we will need to re-compute the weights with the scaling
        // coefficients taken into account
@ -276,7 +278,7 @@ public:
        newWeightAndBias = true;
        fusedBias = false;
 #endif
-        return !scaleLayer.empty();
+        return true;
    }

    virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@ -26,6 +26,7 @@ public:
    {
        setParamsFrom(params);
        hasBias = params.get<bool>("bias_term", false);
+        axis = params.get<int>("axis", 1);
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -33,8 +34,8 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const
    {
-        CV_Assert(blobs.size() == 1 + hasBias);
-        Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
+        CV_Assert(inputs.size() == 2 && blobs.empty() || blobs.size() == 1 + hasBias);
+        outputs.assign(1, inputs[0]);
        return true;
    }

@ -56,28 +57,60 @@ public:
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+        CV_Assert(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);

-        for (size_t ii = 0; ii < outputs.size(); ii++)
+        Mat &inpBlob = *inputs[0];
+        Mat &outBlob = outputs[0];
+        Mat &weights = blobs.empty() ? *inputs[1] : blobs[0];
+        Mat bias = hasBias ? blobs.back() : Mat();
+        MatShape inpShape = shape(inpBlob);
+        const int numWeights = weights.total();
+
+        int endAxis;
+        for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
        {
-            Mat &inpBlob = *inputs[ii];
-            Mat &outBlob = outputs[ii];
-
-            CV_Assert(inpBlob.size[1] == blobs[0].total());
-            if (hasBias)
-                CV_Assert(inpBlob.size[1] == blobs[1].total());
-
-            CV_Assert(inpBlob.type() == CV_32F && outBlob.type() == CV_32F);
-
-            for( int cn = 0; cn < inpBlob.size[0]; cn++ )
-            {
-                for (int n = 0; n < inpBlob.size[1]; n++)
-                {
-                    float w = blobs[0].at<float>(n);
-                    float b = hasBias ? blobs[1].at<float>(n) : 0;
-                    Mat outBlobPlane = slice(outBlob, cn, n);
-                    Mat inpBlobPlane = slice(inpBlob, cn, n);
-                    inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b);
+            if (total(inpShape, axis, endAxis) == numWeights)
+                break;
        }
+        CV_Assert(total(inpShape, axis, endAxis) == numWeights,
+                  !hasBias || numWeights == bias.total(),
+                  inpBlob.type() == CV_32F && outBlob.type() == CV_32F);
+
+        int numSlices = total(inpShape, 0, axis);
+        float* inpData = (float*)inpBlob.data;
+        float* outData = (float*)outBlob.data;
+
+        if (endAxis != inpBlob.dims)
+        {
+            float* weightsData = (float*)weights.data;
+            float* biasesData = hasBias ? (float*)bias.data : 0;
+            int spatialSize = total(inpShape, endAxis);  // spatialSize != 1
+            for (int i = 0; i < numSlices; ++i)
+            {
+                for (int j = 0; j < numWeights; ++j)
+                {
+                    float w = weightsData[j];
+                    float b = hasBias ? biasesData[j] : 0;
+                    Mat inpSlice(1, spatialSize, CV_32F, inpData);
+                    Mat outSlice(1, spatialSize, CV_32F, outData);
+                    inpSlice.convertTo(outSlice, CV_32F, w, b);
+                    inpData += spatialSize;
+                    outData += spatialSize;
+                }
+            }
+        }
+        else
+        {
+            for (int i = 0; i < numSlices; ++i)
+            {
+                Mat inpSlice(weights.dims, weights.size, CV_32F, inpData);
+                Mat outSlice(weights.dims, weights.size, CV_32F, outData);
+                multiply(inpSlice, weights, outSlice);
+                if (hasBias)
+                    add(outSlice, bias, outSlice);
+
+                inpData += numWeights;
+                outData += numWeights;
            }
        }
    }
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@ -87,10 +87,10 @@ TEST(Test_Caffe, read_googlenet)
    ASSERT_FALSE(net.empty());
 }

-typedef testing::TestWithParam<tuple<bool> > Reproducibility_AlexNet;
+typedef testing::TestWithParam<bool> Reproducibility_AlexNet;
 TEST_P(Reproducibility_AlexNet, Accuracy)
 {
-    bool readFromMemory = get<0>(GetParam());
+    bool readFromMemory = GetParam();
    Net net;
    {
        const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
@ -119,12 +119,12 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
    normAssert(ref, out);
 }

-INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Values(true, false));
+INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Bool());

-typedef testing::TestWithParam<tuple<bool> > Reproducibility_OCL_AlexNet;
+typedef testing::TestWithParam<bool> Reproducibility_OCL_AlexNet;
 OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy)
 {
-    bool readFromMemory = get<0>(GetParam());
+    bool readFromMemory = GetParam();
    Net net;
    {
        const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
@ -156,7 +156,7 @@ OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy)
    normAssert(ref, out);
 }

-OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Values(true, false));
+OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Bool());

 #if !defined(_WIN32) || defined(_WIN64)
 TEST(Reproducibility_FCN, Accuracy)
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -627,4 +627,81 @@ OCL_TEST(Layer_Test_FasterRCNN_Proposal, Accuracy)
        EXPECT_EQ(countNonZero(out.rowRange(numDets, out.size[0])), 0);
 }

+typedef testing::TestWithParam<tuple<Vec4i, Vec2i, bool> > Scale_untrainable;
+TEST_P(Scale_untrainable, Accuracy)
+{
+    Vec4i inpShapeVec = get<0>(GetParam());
+    int axis = get<1>(GetParam())[0];
+    int weightsDims = get<1>(GetParam())[1];
+    bool testFusion = get<2>(GetParam());
+    const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
+
+    // Create a network with two inputs. Scale layer multiplies a first input to
+    // a second one. See http://caffe.berkeleyvision.org/tutorial/layers/scale.html
+    Net net;
+    // Check that this version of Scale layer won't be fused with Convolution layer.
+    if (testFusion)
+    {
+        LayerParams lp;
+        lp.set("kernel_size", 1);
+        lp.set("num_output", 3);
+        lp.set("group", 3);
+        lp.set("bias_term", false);
+        lp.type = "Convolution";
+        lp.name = "testConv";
+
+        std::vector<int> weightsShape(4);
+        weightsShape[0] = 3;  // #outChannels
+        weightsShape[1] = 1;  // #inpChannels / group
+        weightsShape[2] = 1;  // height
+        weightsShape[3] = 1;  // width
+        Mat weights(weightsShape, CV_32F);
+        weights.setTo(1);
+        lp.blobs.push_back(weights);
+        net.addLayerToPrev(lp.name, lp.type, lp);
+    }
+    LayerParams lp;
+    lp.type = "Scale";
+    lp.name = "testLayer";
+    lp.set("axis", axis);
+    int id = net.addLayerToPrev(lp.name, lp.type, lp);
+    net.connect(0, 1, id, 1);
+
+    Mat input(4, inpShape, CV_32F);
+    Mat weights(weightsDims, &inpShape[axis], CV_32F);
+    randu(input, -1, 1);
+    randu(weights, -1, 1);
+
+    std::vector<String> inpNames(2);
+    inpNames[0] = "scale_input";
+    inpNames[1] = "scale_weights";
+    net.setInputsNames(inpNames);
+    net.setInput(input, inpNames[0]);
+    net.setInput(weights, inpNames[1]);
+    Mat out = net.forward();
+
+    Mat ref(input.dims, input.size, CV_32F);
+    float* inpData = (float*)input.data;
+    float* refData = (float*)ref.data;
+    float* weightsData = (float*)weights.data;
+    int spatialSize = 1;
+    for (int i = axis + weightsDims; i < 4; ++i)
+        spatialSize *= inpShape[i];
+    for (int i = 0; i < ref.total(); ++i)
+    {
+        float w = weightsData[(i / spatialSize) % weights.total()];
+        refData[i] = inpData[i] * w;
+    }
+    normAssert(out, ref);
+}
+
+INSTANTIATE_TEST_CASE_P(Layer_Test, Scale_untrainable, Combine(
+/*input size*/   Values(Vec4i(2, 3, 4, 5)),
+/*axis, #dims*/  Values(Vec2i(0, 1), Vec2i(0, 2), Vec2i(0, 3), Vec2i(0, 4),
+                                     Vec2i(1, 1), Vec2i(1, 2), Vec2i(1, 3),
+                                                  Vec2i(2, 1), Vec2i(2, 2),
+                                                               Vec2i(3, 1)),
+/*conv fusion*/  testing::Bool()
+));
+
 }