diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 8324fe9d05..01e0021ccb 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -483,15 +483,15 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          * order is the same as in layersIds
          */
         CV_WRAP void getLayersShapes(const std::vector<MatShape>& netInputShapes,
-                                     std::vector<int>* layersIds,
-                                     std::vector<std::vector<MatShape> >* inLayersShapes,
-                                     std::vector<std::vector<MatShape> >* outLayersShapes) const;
+                                     CV_OUT std::vector<int>& layersIds,
+                                     CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes,
+                                     CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const;
 
         /** @overload */
         CV_WRAP void getLayersShapes(const MatShape& netInputShape,
-                                     std::vector<int>* layersIds,
-                                     std::vector<std::vector<MatShape> >* inLayersShapes,
-                                     std::vector<std::vector<MatShape> >* outLayersShapes) const;
+                                     CV_OUT std::vector<int>& layersIds,
+                                     CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes,
+                                     CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const;
 
         /** @brief Returns input and output shapes for layer with specified
          * id in loaded model; preliminary inferencing isn't necessary.
@@ -504,14 +504,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          */
         CV_WRAP void getLayerShapes(const MatShape& netInputShape,
                                     const int layerId,
-                                    std::vector<MatShape>* inLayerShapes,
-                                    std::vector<MatShape>* outLayerShapes) const;
+                                    CV_OUT std::vector<MatShape>& inLayerShapes,
+                                    CV_OUT std::vector<MatShape>& outLayerShapes) const;
 
         /** @overload */
         CV_WRAP void getLayerShapes(const std::vector<MatShape>& netInputShapes,
                                     const int layerId,
-                                    std::vector<MatShape>* inLayerShapes,
-                                    std::vector<MatShape>* outLayerShapes) const;
+                                    CV_OUT std::vector<MatShape>& inLayerShapes,
+                                    CV_OUT std::vector<MatShape>& outLayerShapes) const;
 
         /** @brief Computes FLOP for whole loaded model with specified input shapes.
          * @param netInputShapes vector of shapes for all net inputs.
@@ -566,11 +566,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          * @param blobs output parameter to store resulting bytes for intermediate blobs.
          */
         CV_WRAP void getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
-                                          CV_OUT std::vector<int>& layerIds, CV_OUT std::vector<size_t>& weights,
+                                          CV_OUT std::vector<int>& layerIds,
+                                          CV_OUT std::vector<size_t>& weights,
                                           CV_OUT std::vector<size_t>& blobs) const;
         /** @overload */
         CV_WRAP void getMemoryConsumption(const MatShape& netInputShape,
-                                          CV_OUT std::vector<int>& layerIds, CV_OUT std::vector<size_t>& weights,
+                                          CV_OUT std::vector<int>& layerIds,
+                                          CV_OUT std::vector<size_t>& weights,
                                           CV_OUT std::vector<size_t>& blobs) const;
 
         /** @brief Enables or disables layer fusion in the network.
@@ -578,6 +580,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          */
         CV_WRAP void enableFusion(bool fusion);
 
+        /** @brief Returns overall time for inference and timings (in ticks) for layers.
+         * Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
+         * in this case zero ticks count will be return for that skipped layers.
+         * @param timings vector for tick timings for all layers.
+         * @return overall ticks for model inference.
+         */
+        CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);
+
     private:
         struct Impl;
         Ptr<Impl> impl;
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 5529cad526..e4671ec5bf 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -47,6 +47,7 @@
 #include <iostream>
 #include <sstream>
 #include <iterator>
+#include <numeric>
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/imgproc.hpp>
 
@@ -633,7 +634,7 @@ struct Net::Impl
         inpl.layerInstance = netInputLayer;
         layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
 
-        lastLayerId = 1;
+        lastLayerId = 0;
         netWasAllocated = false;
         fusion = true;
         preferableBackend = DNN_BACKEND_DEFAULT;
@@ -656,6 +657,7 @@ struct Net::Impl
 
     bool netWasAllocated;
     bool fusion;
+    std::vector<int64> layersTimings;
 
     void compileHalide()
     {
@@ -716,6 +718,8 @@ struct Net::Impl
         it = layers.find(0);
         CV_Assert(it != layers.end());
         it->second.skipFlags[DNN_BACKEND_DEFAULT] = true;
+
+        layersTimings.clear();
     }
 
     void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
@@ -1269,6 +1273,8 @@ struct Net::Impl
             allocateLayer(lid, layersShapes);
         }
 
+        layersTimings.resize(lastLayerId + 1, 0);
+
         fuseLayers(blobsToKeep_);
     }
 
@@ -1278,11 +1284,16 @@ struct Net::Impl
 
         Ptr<Layer> layer = ld.layerInstance;
 
+        TickMeter tm;
+        tm.start();
+
         if (preferableBackend == DNN_BACKEND_DEFAULT ||
             !layer->supportBackend(preferableBackend))
         {
             if( !ld.skipFlags[DNN_BACKEND_DEFAULT] )
                 layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
+            else
+                tm.reset();
         }
         else if (!ld.skipFlags[preferableBackend])
         {
@@ -1299,6 +1310,9 @@ struct Net::Impl
             }
         }
 
+        tm.stop();
+        layersTimings[ld.id] = tm.getTimeTicks();
+
         ld.flag = 1;
     }
 
@@ -1717,16 +1731,13 @@ std::vector<int> Net::getUnconnectedOutLayers() const
 }
 
 void Net::getLayersShapes(const ShapesVec& netInputShapes,
-                          std::vector<int>* layersIds,
-                          std::vector<ShapesVec>* inLayersShapes,
-                          std::vector<ShapesVec>* outLayersShapes) const
+                          std::vector<int>& layersIds,
+                          std::vector<ShapesVec>& inLayersShapes,
+                          std::vector<ShapesVec>& outLayersShapes) const
 {
-    if ((layersIds || inLayersShapes || outLayersShapes) == false)
-        return;
-
-    if (layersIds) layersIds->clear();
-    if (inLayersShapes) inLayersShapes->clear();
-    if (outLayersShapes) outLayersShapes->clear();
+    layersIds.clear();
+    inLayersShapes.clear();
+    outLayersShapes.clear();
 
     Impl::LayersShapesMap inOutShapes;
     impl->getLayersShapes(netInputShapes, inOutShapes);
@@ -1734,19 +1745,16 @@ void Net::getLayersShapes(const ShapesVec& netInputShapes,
     for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
         it != inOutShapes.end(); it++)
     {
-        if (layersIds)
-            layersIds->push_back(it->first);
-        if (inLayersShapes)
-            inLayersShapes->push_back(it->second.in);
-        if (outLayersShapes)
-            outLayersShapes->push_back(it->second.out);
+        layersIds.push_back(it->first);
+        inLayersShapes.push_back(it->second.in);
+        outLayersShapes.push_back(it->second.out);
     }
 }
 
 void Net::getLayersShapes(const MatShape& netInputShape,
-                          std::vector<int>* layerIds,
-                          std::vector<ShapesVec>* inLayersShapes,
-                          std::vector<ShapesVec>* outLayersShapes) const
+                          std::vector<int>& layerIds,
+                          std::vector<ShapesVec>& inLayersShapes,
+                          std::vector<ShapesVec>& outLayersShapes) const
 {
     getLayersShapes(ShapesVec(1, netInputShape),
                     layerIds, inLayersShapes, outLayersShapes);
@@ -1754,8 +1762,8 @@ void Net::getLayersShapes(const MatShape& netInputShape,
 
 void Net::getLayerShapes(const MatShape& netInputShape,
                          const int layerId,
-                         ShapesVec* inLayerShapes,
-                         ShapesVec* outLayerShapes) const
+                         ShapesVec& inLayerShapes,
+                         ShapesVec& outLayerShapes) const
 {
     getLayerShapes(ShapesVec(1, netInputShape),
                    layerId, inLayerShapes, outLayerShapes);
@@ -1764,15 +1772,13 @@ void Net::getLayerShapes(const MatShape& netInputShape,
 
 void Net::getLayerShapes(const ShapesVec& netInputShapes,
                     const int layerId,
-                    ShapesVec* inLayerShapes,
-                    ShapesVec* outLayerShapes) const
+                    ShapesVec& inLayerShapes,
+                    ShapesVec& outLayerShapes) const
 {
     LayerShapes shapes;
     impl->getLayerShapes(netInputShapes, layerId, shapes);
-    if (inLayerShapes)
-        *inLayerShapes = shapes.in;
-    if (outLayerShapes)
-        *outLayerShapes = shapes.out;
+    inLayerShapes = shapes.in;
+    outLayerShapes = shapes.out;
 }
 
 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
@@ -1782,7 +1788,7 @@ int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
     int64 flops = 0;
     std::vector<int> ids;
     std::vector<std::vector<MatShape> > inShapes, outShapes;
-    getLayersShapes(netInputShapes, &ids, &inShapes, &outShapes);
+    getLayersShapes(netInputShapes, ids, inShapes, outShapes);
     CV_Assert(inShapes.size() == outShapes.size());
     CV_Assert(inShapes.size() == ids.size());
 
@@ -1867,8 +1873,8 @@ void Net::getMemoryConsumption(const int layerId,
         weights += weightsBlob.total()*weightsBlob.elemSize();
     }
 
-    std::vector<MatShape> outLayerShapes;
-    getLayerShapes(netInputShapes, layerId, 0, &outLayerShapes);
+    ShapesVec inLayerShapes, outLayerShapes;
+    getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
     for(int i = 0; i < outLayerShapes.size(); i++)
     {
         blobs += total(outLayerShapes[i]) * sizeof(float);
@@ -1917,9 +1923,9 @@ void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
     weights.clear();
     blobs.clear();
 
-    std::vector<std::vector<MatShape> > outLayerShapes;
+    std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
 
-    getLayersShapes(netInputShapes, &layerIds, 0, &outLayerShapes);
+    getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
 
     for(int i = 0; i < layerIds.size(); i++)
     {
@@ -1968,6 +1974,13 @@ void Net::setHalideScheduler(const String& scheduler)
     impl->halideConfigFile = scheduler;
 }
 
+int64 Net::getPerfProfile(std::vector<double>& timings)
+{
+    timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
+    int64 total = std::accumulate(timings.begin(), timings.end(), 0);
+    return total;
+}
+
 //////////////////////////////////////////////////////////////////////////
 
 Importer::~Importer() {}
diff --git a/modules/java/generator/src/cpp/listconverters.cpp b/modules/java/generator/src/cpp/listconverters.cpp
index a1d1140a45..150bdae091 100644
--- a/modules/java/generator/src/cpp/listconverters.cpp
+++ b/modules/java/generator/src/cpp/listconverters.cpp
@@ -56,4 +56,21 @@ void Copy_vector_String_to_List(JNIEnv* env, std::vector<cv::String>& vs, jobjec
         env->CallBooleanMethod(list, m_add, element);
         env->DeleteLocalRef(element);
     }
-}
\ No newline at end of file
+}
+
+#if defined(HAVE_OPENCV_DNN)
+void Copy_vector_MatShape_to_List(JNIEnv* env, std::vector<cv::dnn::MatShape>& vs, jobject list)
+{
+    static jclass juArrayList       = ARRAYLIST(env);
+    jmethodID m_clear     = LIST_CLEAR(env, juArrayList);
+    jmethodID m_add       = LIST_ADD(env, juArrayList);
+
+    env->CallVoidMethod(list, m_clear);
+    for (std::vector<cv::dnn::MatShape>::iterator it = vs.begin(); it != vs.end(); ++it)
+    {
+        jstring element = env->NewStringUTF("");
+        env->CallBooleanMethod(list, m_add, element);
+        env->DeleteLocalRef(element);
+    }
+}
+#endif
diff --git a/modules/java/generator/src/cpp/listconverters.hpp b/modules/java/generator/src/cpp/listconverters.hpp
index 655f87afde..5de2c3a12e 100644
--- a/modules/java/generator/src/cpp/listconverters.hpp
+++ b/modules/java/generator/src/cpp/listconverters.hpp
@@ -16,4 +16,9 @@ std::vector<cv::String> List_to_vector_String(JNIEnv* env, jobject list);
 
 void Copy_vector_String_to_List(JNIEnv* env, std::vector<cv::String>& vs, jobject list);
 
-#endif	/* LISTCONVERTERS_HPP */
\ No newline at end of file
+#if defined(HAVE_OPENCV_DNN)
+#include "opencv2/dnn.hpp"
+void Copy_vector_MatShape_to_List(JNIEnv* env, std::vector<cv::dnn::MatShape>& vs, jobject list);
+#endif
+
+#endif	/* LISTCONVERTERS_HPP */
diff --git a/samples/dnn/ssd_mobilenet_object_detection.cpp b/samples/dnn/ssd_mobilenet_object_detection.cpp
index c3e731cc50..3d785f53ac 100644
--- a/samples/dnn/ssd_mobilenet_object_detection.cpp
+++ b/samples/dnn/ssd_mobilenet_object_detection.cpp
@@ -101,17 +101,18 @@ int main(int argc, char** argv)
         //! [Prepare blob]
 
         //! [Set input blob]
-        net.setInput(inputBlob, "data");                //set the network input
+        net.setInput(inputBlob, "data"); //set the network input
         //! [Set input blob]
 
-        TickMeter tm;
-        tm.start();
         //! [Make forward pass]
-        Mat detection = net.forward("detection_out");                                  //compute output
-        tm.stop();
-        cout << "Inference time, ms: " << tm.getTimeMilli() << endl;
+        Mat detection = net.forward("detection_out"); //compute output
         //! [Make forward pass]
 
+        std::vector<double> layersTimings;
+        double freq = getTickFrequency() / 1000;
+        double time = net.getPerfProfile(layersTimings) / freq;
+        cout << "Inference time, ms: " << time << endl;
+
         Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
 
         frame = frame(crop);