diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index da0e34afcb..6741efaac1 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -490,6 +490,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     {
     public:
         bool hasBias;
+        int axis;
 
         static Ptr<ScaleLayer> create(const LayerParams& params);
     };
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index ba6a8a287d..7abde1397a 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -268,6 +268,8 @@ public:
 
     bool setScale(const Ptr<ScaleLayer>& layer)
     {
+        if (layer.empty() || layer->blobs.empty())
+            return false;
         scaleLayer = layer;
         // we will need to re-compute the weights with the scaling
         // coefficients taken into account
@@ -276,7 +278,7 @@ public:
         newWeightAndBias = true;
         fusedBias = false;
 #endif
-        return !scaleLayer.empty();
+        return true;
     }
 
     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp
index 0ac5209ed4..7912f47619 100644
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@@ -26,6 +26,7 @@ public:
     {
         setParamsFrom(params);
         hasBias = params.get<bool>("bias_term", false);
+        axis = params.get<int>("axis", 1);
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -33,8 +34,8 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
-        CV_Assert(blobs.size() == 1 + hasBias);
-        Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
+        CV_Assert(inputs.size() == 2 && blobs.empty() || blobs.size() == 1 + hasBias);
+        outputs.assign(1, inputs[0]);
         return true;
     }
 
@@ -56,30 +57,62 @@ public:
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+        CV_Assert(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);
 
-        for (size_t ii = 0; ii < outputs.size(); ii++)
+        Mat &inpBlob = *inputs[0];
+        Mat &outBlob = outputs[0];
+        Mat &weights = blobs.empty() ? *inputs[1] : blobs[0];
+        Mat bias = hasBias ? blobs.back() : Mat();
+        MatShape inpShape = shape(inpBlob);
+        const int numWeights = weights.total();
+
+        int endAxis;
+        for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
         {
-            Mat &inpBlob = *inputs[ii];
-            Mat &outBlob = outputs[ii];
+            if (total(inpShape, axis, endAxis) == numWeights)
+                break;
+        }
+        CV_Assert(total(inpShape, axis, endAxis) == numWeights,
+                  !hasBias || numWeights == bias.total(),
+                  inpBlob.type() == CV_32F && outBlob.type() == CV_32F);
 
-            CV_Assert(inpBlob.size[1] == blobs[0].total());
-            if (hasBias)
-                CV_Assert(inpBlob.size[1] == blobs[1].total());
+        int numSlices = total(inpShape, 0, axis);
+        float* inpData = (float*)inpBlob.data;
+        float* outData = (float*)outBlob.data;
 
-            CV_Assert(inpBlob.type() == CV_32F && outBlob.type() == CV_32F);
-
-            for( int cn = 0; cn < inpBlob.size[0]; cn++ )
+        if (endAxis != inpBlob.dims)
+        {
+            float* weightsData = (float*)weights.data;
+            float* biasesData = hasBias ? (float*)bias.data : 0;
+            int spatialSize = total(inpShape, endAxis);  // spatialSize != 1
+            for (int i = 0; i < numSlices; ++i)
             {
-                for (int n = 0; n < inpBlob.size[1]; n++)
+                for (int j = 0; j < numWeights; ++j)
                 {
-                    float w = blobs[0].at<float>(n);
-                    float b = hasBias ? blobs[1].at<float>(n) : 0;
-                    Mat outBlobPlane = slice(outBlob, cn, n);
-                    Mat inpBlobPlane = slice(inpBlob, cn, n);
-                    inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b);
+                    float w = weightsData[j];
+                    float b = hasBias ? biasesData[j] : 0;
+                    Mat inpSlice(1, spatialSize, CV_32F, inpData);
+                    Mat outSlice(1, spatialSize, CV_32F, outData);
+                    inpSlice.convertTo(outSlice, CV_32F, w, b);
+                    inpData += spatialSize;
+                    outData += spatialSize;
                 }
             }
         }
+        else
+        {
+            for (int i = 0; i < numSlices; ++i)
+            {
+                Mat inpSlice(weights.dims, weights.size, CV_32F, inpData);
+                Mat outSlice(weights.dims, weights.size, CV_32F, outData);
+                multiply(inpSlice, weights, outSlice);
+                if (hasBias)
+                    add(outSlice, bias, outSlice);
+
+                inpData += numWeights;
+                outData += numWeights;
+            }
+        }
     }
 
     virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node)
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index febf0357c3..200c2664c5 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -87,10 +87,10 @@ TEST(Test_Caffe, read_googlenet)
     ASSERT_FALSE(net.empty());
 }
 
-typedef testing::TestWithParam<tuple<bool> > Reproducibility_AlexNet;
+typedef testing::TestWithParam<bool> Reproducibility_AlexNet;
 TEST_P(Reproducibility_AlexNet, Accuracy)
 {
-    bool readFromMemory = get<0>(GetParam());
+    bool readFromMemory = GetParam();
     Net net;
     {
         const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
@@ -119,12 +119,12 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
     normAssert(ref, out);
 }
 
-INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Values(true, false));
+INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Bool());
 
-typedef testing::TestWithParam<tuple<bool> > Reproducibility_OCL_AlexNet;
+typedef testing::TestWithParam<bool> Reproducibility_OCL_AlexNet;
 OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy)
 {
-    bool readFromMemory = get<0>(GetParam());
+    bool readFromMemory = GetParam();
     Net net;
     {
         const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
@@ -156,7 +156,7 @@ OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy)
     normAssert(ref, out);
 }
 
-OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Values(true, false));
+OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Bool());
 
 #if !defined(_WIN32) || defined(_WIN64)
 TEST(Reproducibility_FCN, Accuracy)
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index cd23541aed..5bf77eea10 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -627,4 +627,81 @@ OCL_TEST(Layer_Test_FasterRCNN_Proposal, Accuracy)
         EXPECT_EQ(countNonZero(out.rowRange(numDets, out.size[0])), 0);
 }
 
+typedef testing::TestWithParam<tuple<Vec4i, Vec2i, bool> > Scale_untrainable;
+TEST_P(Scale_untrainable, Accuracy)
+{
+    Vec4i inpShapeVec = get<0>(GetParam());
+    int axis = get<1>(GetParam())[0];
+    int weightsDims = get<1>(GetParam())[1];
+    bool testFusion = get<2>(GetParam());
+    const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
+
+    // Create a network with two inputs. Scale layer multiplies a first input to
+    // a second one. See http://caffe.berkeleyvision.org/tutorial/layers/scale.html
+    Net net;
+    // Check that this version of Scale layer won't be fused with Convolution layer.
+    if (testFusion)
+    {
+        LayerParams lp;
+        lp.set("kernel_size", 1);
+        lp.set("num_output", 3);
+        lp.set("group", 3);
+        lp.set("bias_term", false);
+        lp.type = "Convolution";
+        lp.name = "testConv";
+
+        std::vector<int> weightsShape(4);
+        weightsShape[0] = 3;  // #outChannels
+        weightsShape[1] = 1;  // #inpChannels / group
+        weightsShape[2] = 1;  // height
+        weightsShape[3] = 1;  // width
+        Mat weights(weightsShape, CV_32F);
+        weights.setTo(1);
+        lp.blobs.push_back(weights);
+        net.addLayerToPrev(lp.name, lp.type, lp);
+    }
+    LayerParams lp;
+    lp.type = "Scale";
+    lp.name = "testLayer";
+    lp.set("axis", axis);
+    int id = net.addLayerToPrev(lp.name, lp.type, lp);
+    net.connect(0, 1, id, 1);
+
+    Mat input(4, inpShape, CV_32F);
+    Mat weights(weightsDims, &inpShape[axis], CV_32F);
+    randu(input, -1, 1);
+    randu(weights, -1, 1);
+
+    std::vector<String> inpNames(2);
+    inpNames[0] = "scale_input";
+    inpNames[1] = "scale_weights";
+    net.setInputsNames(inpNames);
+    net.setInput(input, inpNames[0]);
+    net.setInput(weights, inpNames[1]);
+    Mat out = net.forward();
+
+    Mat ref(input.dims, input.size, CV_32F);
+    float* inpData = (float*)input.data;
+    float* refData = (float*)ref.data;
+    float* weightsData = (float*)weights.data;
+    int spatialSize = 1;
+    for (int i = axis + weightsDims; i < 4; ++i)
+        spatialSize *= inpShape[i];
+    for (int i = 0; i < ref.total(); ++i)
+    {
+        float w = weightsData[(i / spatialSize) % weights.total()];
+        refData[i] = inpData[i] * w;
+    }
+    normAssert(out, ref);
+}
+
+INSTANTIATE_TEST_CASE_P(Layer_Test, Scale_untrainable, Combine(
+/*input size*/   Values(Vec4i(2, 3, 4, 5)),
+/*axis, #dims*/  Values(Vec2i(0, 1), Vec2i(0, 2), Vec2i(0, 3), Vec2i(0, 4),
+                                     Vec2i(1, 1), Vec2i(1, 2), Vec2i(1, 3),
+                                                  Vec2i(2, 1), Vec2i(2, 2),
+                                                               Vec2i(3, 1)),
+/*conv fusion*/  testing::Bool()
+));
+
 }