diff --git a/doc/pattern_tools/gen_pattern.py b/doc/pattern_tools/gen_pattern.py
index a40b7887c5..720ebc7f91 100755
--- a/doc/pattern_tools/gen_pattern.py
+++ b/doc/pattern_tools/gen_pattern.py
@@ -16,110 +16,106 @@ python gen_pattern.py -o out.svg -r 11 -c 8 -T circles -s 20.0 -R 5.0 -u mm -w 2
 -H, --help - show help
 """
 
+import argparse
+
 from svgfig import *
 
-import sys
-import getopt
 
 class PatternMaker:
-  def __init__(self, cols,rows,output,units,square_size,radius_rate,page_width,page_height):
-    self.cols = cols
-    self.rows = rows
-    self.output = output
-    self.units = units
-    self.square_size = square_size
-    self.radius_rate = radius_rate
-    self.width = page_width
-    self.height = page_height
-    self.g = SVG("g") # the svg group container
+    def __init__(self, cols, rows, output, units, square_size, radius_rate, page_width, page_height):
+        self.cols = cols
+        self.rows = rows
+        self.output = output
+        self.units = units
+        self.square_size = square_size
+        self.radius_rate = radius_rate
+        self.width = page_width
+        self.height = page_height
+        self.g = SVG("g")  # the svg group container
 
-  def makeCirclesPattern(self):
-    spacing = self.square_size
-    r = spacing / self.radius_rate
-    for x in range(1,self.cols+1):
-      for y in range(1,self.rows+1):
-        dot = SVG("circle", cx=x * spacing, cy=y * spacing, r=r, fill="black", stroke="none")
-        self.g.append(dot)
+    def make_circles_pattern(self):
+        spacing = self.square_size
+        r = spacing / self.radius_rate
+        for x in range(1, self.cols + 1):
+            for y in range(1, self.rows + 1):
+                dot = SVG("circle", cx=x * spacing, cy=y * spacing, r=r, fill="black", stroke="none")
+                self.g.append(dot)
 
-  def makeACirclesPattern(self):
-    spacing = self.square_size
-    r = spacing / self.radius_rate
-    for i in range(0,self.rows):
-      for j in range(0,self.cols):
-        dot = SVG("circle", cx= ((j*2 + i%2)*spacing) + spacing, cy=self.height - (i * spacing + spacing), r=r, fill="black", stroke="none")
-        self.g.append(dot)
+    def make_acircles_pattern(self):
+        spacing = self.square_size
+        r = spacing / self.radius_rate
+        for i in range(0, self.rows):
+            for j in range(0, self.cols):
+                dot = SVG("circle", cx=((j * 2 + i % 2) * spacing) + spacing, cy=self.height - (i * spacing + spacing),
+                          r=r, fill="black", stroke="none")
+                self.g.append(dot)
 
-  def makeCheckerboardPattern(self):
-    spacing = self.square_size
-    xspacing = (self.width - self.cols * self.square_size) / 2.0
-    yspacing = (self.height - self.rows * self.square_size) / 2.0
-    for x in range(0,self.cols):
-      for y in range(0,self.rows):
-        if x%2 == y%2:
-          square = SVG("rect", x=x * spacing + xspacing, y=y * spacing + yspacing, width=spacing, height=spacing, fill="black", stroke="none")
-          self.g.append(square)
+    def make_checkerboard_pattern(self):
+        spacing = self.square_size
+        xspacing = (self.width - self.cols * self.square_size) / 2.0
+        yspacing = (self.height - self.rows * self.square_size) / 2.0
+        for x in range(0, self.cols):
+            for y in range(0, self.rows):
+                if x % 2 == y % 2:
+                    square = SVG("rect", x=x * spacing + xspacing, y=y * spacing + yspacing, width=spacing,
+                                 height=spacing, fill="black", stroke="none")
+                    self.g.append(square)
 
-  def save(self):
-    c = canvas(self.g,width="%d%s"%(self.width,self.units),height="%d%s"%(self.height,self.units),viewBox="0 0 %d %d"%(self.width,self.height))
-    c.save(self.output)
+    def save(self):
+        c = canvas(self.g, width="%d%s" % (self.width, self.units), height="%d%s" % (self.height, self.units),
+                   viewBox="0 0 %d %d" % (self.width, self.height))
+        c.save(self.output)
 
 
 def main():
-    # parse command line options, TODO use argparse for better doc
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "Ho:c:r:T:u:s:R:w:h:a:", ["help","output=","columns=","rows=",
-                                                                      "type=","units=","square_size=","radius_rate=",
-                                                                      "page_width=","page_height=", "page_size="])
-    except getopt.error as msg:
-        print(msg)
-        print("for help use --help")
-        sys.exit(2)
-    output = "out.svg"
-    columns = 8
-    rows = 11
-    p_type = "circles"
-    units = "mm"
-    square_size = 20.0
-    radius_rate = 5.0
-    page_size = "A4"
+    # parse command line options
+    parser = argparse.ArgumentParser(description="generate camera-calibration pattern", add_help=False)
+    parser.add_argument("-H", "--help", help="show help", action="store_true", dest="show_help")
+    parser.add_argument("-o", "--output", help="output file", default="out.svg", action="store", dest="output")
+    parser.add_argument("-c", "--columns", help="pattern columns", default="8", action="store", dest="columns",
+                        type=int)
+    parser.add_argument("-r", "--rows", help="pattern rows", default="11", action="store", dest="rows", type=int)
+    parser.add_argument("-T", "--type", help="type of pattern", default="circles", action="store", dest="p_type",
+                        choices=["circles", "acircles", "checkerboard"])
+    parser.add_argument("-u", "--units", help="length unit", default="mm", action="store", dest="units",
+                        choices=["mm", "inches", "px", "m"])
+    parser.add_argument("-s", "--square_size", help="size of squares in pattern", default="20.0", action="store",
+                        dest="square_size", type=float)
+    parser.add_argument("-R", "--radius_rate", help="circles_radius = square_size/radius_rate", default="5.0",
+                        action="store", dest="radius_rate", type=float)
+    parser.add_argument("-w", "--page_width", help="page width in units", default="216", action="store",
+                        dest="page_width", type=int)
+    parser.add_argument("-h", "--page_height", help="page height in units", default="279", action="store",
+                        dest="page_width", type=int)
+    parser.add_argument("-a", "--page_size", help="page size, supersedes -h -w arguments", default="A4", action="store",
+                        dest="page_size", choices=["A0", "A1", "A2", "A3", "A4", "A5"])
+    args = parser.parse_args()
+
+    show_help = args.show_help
+    if show_help:
+        parser.print_help()
+        return
+    output = args.output
+    columns = args.columns
+    rows = args.rows
+    p_type = args.p_type
+    units = args.units
+    square_size = args.square_size
+    radius_rate = args.radius_rate
+    page_size = args.page_size
     # page size dict (ISO standard, mm) for easy lookup. format - size: [width, height]
-    page_sizes = {"A0": [840, 1188], "A1": [594, 840], "A2": [420, 594], "A3": [297, 420], "A4": [210, 297], "A5": [148, 210]}
+    page_sizes = {"A0": [840, 1188], "A1": [594, 840], "A2": [420, 594], "A3": [297, 420], "A4": [210, 297],
+                  "A5": [148, 210]}
     page_width = page_sizes[page_size.upper()][0]
     page_height = page_sizes[page_size.upper()][1]
-    # process options
-    for o, a in opts:
-        if o in ("-H", "--help"):
-            print(__doc__)
-            sys.exit(0)
-        elif o in ("-r", "--rows"):
-            rows = int(a)
-        elif o in ("-c", "--columns"):
-            columns = int(a)
-        elif o in ("-o", "--output"):
-            output = a
-        elif o in ("-T", "--type"):
-            p_type = a
-        elif o in ("-u", "--units"):
-            units = a
-        elif o in ("-s", "--square_size"):
-            square_size = float(a)
-        elif o in ("-R", "--radius_rate"):
-            radius_rate = float(a)
-        elif o in ("-w", "--page_width"):
-            page_width = float(a)
-        elif o in ("-h", "--page_height"):
-            page_height = float(a)
-        elif o in ("-a", "--page_size"):
-            units = "mm"
-            page_size = a.upper()
-            page_width = page_sizes[page_size][0]
-            page_height = page_sizes[page_size][1]
-    pm = PatternMaker(columns,rows,output,units,square_size,radius_rate,page_width,page_height)
-    #dict for easy lookup of pattern type
-    mp = {"circles":pm.makeCirclesPattern,"acircles":pm.makeACirclesPattern,"checkerboard":pm.makeCheckerboardPattern}
+    pm = PatternMaker(columns, rows, output, units, square_size, radius_rate, page_width, page_height)
+    # dict for easy lookup of pattern type
+    mp = {"circles": pm.make_circles_pattern, "acircles": pm.make_acircles_pattern,
+          "checkerboard": pm.make_checkerboard_pattern}
     mp[p_type]()
-    #this should save pattern to output
+    # this should save pattern to output
     pm.save()
 
+
 if __name__ == "__main__":
     main()
diff --git a/doc/py_tutorials/py_gui/py_image_display/images/matplotlib_screenshot.jpg b/doc/py_tutorials/py_gui/py_image_display/images/matplotlib_screenshot.jpg
deleted file mode 100644
index dcd84f3b87..0000000000
Binary files a/doc/py_tutorials/py_gui/py_image_display/images/matplotlib_screenshot.jpg and /dev/null differ
diff --git a/doc/py_tutorials/py_gui/py_image_display/images/opencv_screenshot.jpg b/doc/py_tutorials/py_gui/py_image_display/images/opencv_screenshot.jpg
deleted file mode 100644
index 2ef7d348d1..0000000000
Binary files a/doc/py_tutorials/py_gui/py_image_display/images/opencv_screenshot.jpg and /dev/null differ
diff --git a/doc/py_tutorials/py_gui/py_image_display/py_image_display.markdown b/doc/py_tutorials/py_gui/py_image_display/py_image_display.markdown
index edb957bd95..220df25525 100644
--- a/doc/py_tutorials/py_gui/py_image_display/py_image_display.markdown
+++ b/doc/py_tutorials/py_gui/py_image_display/py_image_display.markdown
@@ -1,153 +1,4 @@
 Getting Started with Images {#tutorial_py_image_display}
 ===========================
 
-Goals
------
-
--   Here, you will learn how to read an image, how to display it, and how to save it back
--   You will learn these functions : **cv.imread()**, **cv.imshow()** , **cv.imwrite()**
--   Optionally, you will learn how to display images with Matplotlib
-
-Using OpenCV
-------------
-
-### Read an image
-
-Use the function **cv.imread()** to read an image. The image should be in the working directory or
-a full path of image should be given.
-
-Second argument is a flag which specifies the way image should be read.
-
--   cv.IMREAD_COLOR : Loads a color image. Any transparency of image will be neglected. It is the
-    default flag.
--   cv.IMREAD_GRAYSCALE : Loads image in grayscale mode
--   cv.IMREAD_UNCHANGED : Loads image as such including alpha channel
-
-@note Instead of these three flags, you can simply pass integers 1, 0 or -1 respectively.
-
-See the code below:
-@code{.py}
-import numpy as np
-import cv2 as cv
-
-# Load a color image in grayscale
-img = cv.imread('messi5.jpg',0)
-@endcode
-
-**warning**
-
-Even if the image path is wrong, it won't throw any error, but `print img` will give you `None`
-
-### Display an image
-
-Use the function **cv.imshow()** to display an image in a window. The window automatically fits to
-the image size.
-
-First argument is a window name which is a string. Second argument is our image. You can create as
-many windows as you wish, but with different window names.
-@code{.py}
-cv.imshow('image',img)
-cv.waitKey(0)
-cv.destroyAllWindows()
-@endcode
-A screenshot of the window will look like this (in Fedora-Gnome machine):
-
-![image](images/opencv_screenshot.jpg)
-
-**cv.waitKey()** is a keyboard binding function. Its argument is the time in milliseconds. The
-function waits for specified milliseconds for any keyboard event. If you press any key in that time,
-the program continues. If **0** is passed, it waits indefinitely for a key stroke. It can also be
-set to detect specific key strokes like, if key a is pressed etc which we will discuss below.
-
-@note Besides binding keyboard events this function also processes many other GUI events, so you
-MUST use it to actually display the image.
-
-**cv.destroyAllWindows()** simply destroys all the windows we created. If you want to destroy any
-specific window, use the function **cv.destroyWindow()** where you pass the exact window name as
-the argument.
-
-@note There is a special case where you can create an empty window and load an image to it later. In
-that case, you can specify whether the window is resizable or not. It is done with the function
-**cv.namedWindow()**. By default, the flag is cv.WINDOW_AUTOSIZE. But if you specify the flag to be
-cv.WINDOW_NORMAL, you can resize window. It will be helpful when an image is too large in dimension
-and when adding track bars to windows.
-
-See the code below:
-@code{.py}
-cv.namedWindow('image', cv.WINDOW_NORMAL)
-cv.imshow('image',img)
-cv.waitKey(0)
-cv.destroyAllWindows()
-@endcode
-### Write an image
-
-Use the function **cv.imwrite()** to save an image.
-
-First argument is the file name, second argument is the image you want to save.
-@code{.py}
-cv.imwrite('messigray.png',img)
-@endcode
-This will save the image in PNG format in the working directory.
-
-### Sum it up
-
-Below program loads an image in grayscale, displays it, saves the image if you press 's' and exit, or
-simply exits without saving if you press ESC key.
-@code{.py}
-import numpy as np
-import cv2 as cv
-
-img = cv.imread('messi5.jpg',0)
-cv.imshow('image',img)
-k = cv.waitKey(0)
-if k == 27:         # wait for ESC key to exit
-    cv.destroyAllWindows()
-elif k == ord('s'): # wait for 's' key to save and exit
-    cv.imwrite('messigray.png',img)
-    cv.destroyAllWindows()
-@endcode
-
-**warning**
-
-If you are using a 64-bit machine, you will have to modify `k = cv.waitKey(0)` line as follows :
-`k = cv.waitKey(0) & 0xFF`
-
-Using Matplotlib
-----------------
-
-Matplotlib is a plotting library for Python which gives you wide variety of plotting methods. You
-will see them in coming articles. Here, you will learn how to display image with Matplotlib. You can
-zoom images, save them, etc, using Matplotlib.
-@code{.py}
-import numpy as np
-import cv2 as cv
-from matplotlib import pyplot as plt
-
-img = cv.imread('messi5.jpg',0)
-plt.imshow(img, cmap = 'gray', interpolation = 'bicubic')
-plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
-plt.show()
-@endcode
-A screen-shot of the window will look like this :
-
-![image](images/matplotlib_screenshot.jpg)
-
-@note Plenty of plotting options are available in Matplotlib. Please refer to Matplotlib docs for more
-details. Some, we will see on the way.
-
-__warning__
-
-Color image loaded by OpenCV is in BGR mode. But Matplotlib displays in RGB mode. So color images
-will not be displayed correctly in Matplotlib if image is read with OpenCV. Please see the exercises
-for more details.
-
-Additional Resources
---------------------
-
--#  [Matplotlib Plotting Styles and Features](http://matplotlib.org/api/pyplot_api.html)
-
-Exercises
----------
-
--#  There is some problem when you try to load color image in OpenCV and display it in Matplotlib.
-    Read [this discussion](http://stackoverflow.com/a/15074748/1134940) and understand it.
+Tutorial content has been moved: @ref tutorial_display_image
diff --git a/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown b/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown
index 471d464b55..034aeb1b2a 100644
--- a/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown
+++ b/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown
@@ -1,7 +1,7 @@
 Gui Features in OpenCV {#tutorial_py_table_of_contents_gui}
 ======================
 
--   @subpage tutorial_py_image_display
+-   @ref tutorial_display_image
 
     Learn to load an
     image, display it, and save it back
diff --git a/doc/tutorials/introduction/display_image/display_image.markdown b/doc/tutorials/introduction/display_image/display_image.markdown
index ba550580b6..72635a3229 100644
--- a/doc/tutorials/introduction/display_image/display_image.markdown
+++ b/doc/tutorials/introduction/display_image/display_image.markdown
@@ -1,63 +1,82 @@
-Load and Display an Image {#tutorial_display_image}
-=========================
+Getting Started with Images {#tutorial_display_image}
+===========================
 
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Load an image (using @ref cv::imread )
--   Create a named OpenCV window (using @ref cv::namedWindow )
--   Display an image in an OpenCV window (using @ref cv::imshow )
+-   Read an image from file (using @ref cv::imread)
+-   Display an image in an OpenCV window (using @ref cv::imshow)
+-   Write an image to a file (using @ref cv::imwrite)
 
 Source Code
 -----------
 
-Download the source code from
-[here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp).
+@add_toggle_cpp
+-   **Downloadable code**: Click
+    [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp)
+
+-   **Code at glance:**
+    @include samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
+@end_toggle
+
+@add_toggle_python
+-   **Downloadable code**: Click
+    [here](https://github.com/opencv/opencv/tree/master/samples/python/tutorial_code/introduction/display_image/display_image.py)
+
+-   **Code at glance:**
+    @include samples/python/tutorial_code/introduction/display_image/display_image.py
+@end_toggle
 
-@include cpp/tutorial_code/introduction/display_image/display_image.cpp
 
 Explanation
 -----------
 
-In OpenCV 2 we have multiple modules. Each one takes care of a different area or approach towards
+@add_toggle_cpp
+In OpenCV 3 we have multiple modules. Each one takes care of a different area or approach towards
 image processing. You could already observe this in the structure of the user guide of these
 tutorials itself. Before you use any of them you first need to include the header files where the
 content of each individual module is declared.
 
 You'll almost always end up using the:
 
--   *core* section, as here are defined the basic building blocks of the library
--   *highgui* module, as this contains the functions for input and output operations
+- @ref core "core" section, as here are defined the basic building blocks of the library
+- @ref imgcodecs "imgcodecs" module, which provides functions for reading and writing
+- @ref highgui "highgui" module, as this contains the functions to show an image in a window
+
+We also include the *iostream* to facilitate console line output and input.
+
+By declaring `using namespace cv;`, in the following, the library functions can be accessed without explicitly stating the namespace.
 
 @snippet cpp/tutorial_code/introduction/display_image/display_image.cpp includes
+@end_toggle
 
-We also include the *iostream* to facilitate console line output and input. To avoid data structure
-and function name conflicts with other libraries, OpenCV has its own namespace: *cv*. To avoid the
-need appending prior each of these the *cv::* keyword you can import the namespace in the whole file
-by using the lines:
+@add_toggle_python
+As a first step, the OpenCV python library is imported.
+The proper way to do this is to additionally assign it the name *cv*, which is used in the following to reference the library.
 
-@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp namespace
+@snippet samples/python/tutorial_code/introduction/display_image/display_image.py imports
+@end_toggle
 
-This is true for the STL library too (used for console I/O). Now, let's analyze the *main* function.
-We start up assuring that we acquire a valid image name argument from the command line. Otherwise
-take a picture by default: "HappyFish.jpg".
+Now, let's analyze the main code.
+As a first step, we read the image "starry_night.jpg" from the OpenCV samples.
+In order to do so, a call to the @ref cv::imread function loads the image using the file path specified by the first argument.
+The second argument is optional and specifies the format in which we want the image. This may be:
 
-@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp load
+-   IMREAD_COLOR loads the image in the BGR 8-bit format. This is the **default** that is used here.
+-   IMREAD_UNCHANGED loads the image as is (including the alpha channel if present)
+-   IMREAD_GRAYSCALE loads the image as an intensity one
 
-Then create a *Mat* object that will store the data of the loaded image.
-
-@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp mat
-
-Now we call the @ref cv::imread function which loads the image name specified by the first argument
-(*argv[1]*). The second argument specifies the format in what we want the image. This may be:
-
--   IMREAD_UNCHANGED (\<0) loads the image as is (including the alpha channel if present)
--   IMREAD_GRAYSCALE ( 0) loads the image as an intensity one
--   IMREAD_COLOR (\>0) loads the image in the RGB format
+After reading in the image data will be stored in a @ref cv::Mat object.
 
+@add_toggle_cpp
 @snippet cpp/tutorial_code/introduction/display_image/display_image.cpp imread
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/introduction/display_image/display_image.py imread
+@end_toggle
 
 @note
    OpenCV offers support for the image formats Windows bitmap (bmp), portable image formats (pbm,
@@ -67,42 +86,38 @@ Now we call the @ref cv::imread function which loads the image name specified by
     Jasper), TIFF files (tiff, tif) and portable network graphics (png). Furthermore, OpenEXR is
     also a possibility.
 
-After checking that the image data was loaded correctly, we want to display our image, so we create
-an OpenCV window using the @ref cv::namedWindow function. These are automatically managed by OpenCV
-once you create them. For this you need to specify its name and how it should handle the change of
-the image it contains from a size point of view. It may be:
+Afterwards, a check is executed, if the image was loaded correctly.
+@add_toggle_cpp
+@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp empty
+@end_toggle
 
--   *WINDOW_AUTOSIZE* is the only supported one if you do not use the Qt backend. In this case the
-    window size will take up the size of the image it shows. No resize permitted!
--   *WINDOW_NORMAL* on Qt you may use this to allow window resize. The image will resize itself
-    according to the current window size. By using the | operator you also need to specify if you
-    would like the image to keep its aspect ratio (*WINDOW_KEEPRATIO*) or not
-    (*WINDOW_FREERATIO*).
+@add_toggle_python
+@snippet samples/python/tutorial_code/introduction/display_image/display_image.py empty
+@end_toggle
 
-@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp window
-
-Finally, to update the content of the OpenCV window with a new image use the @ref cv::imshow
-function. Specify the OpenCV window name to update and the image to use during this operation:
-
-@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp imshow
+Then, the image is shown using a call to the @ref cv::imshow function.
+The first argument is the title of the window and the second argument is the @ref cv::Mat object that will be shown.
 
 Because we want our window to be displayed until the user presses a key (otherwise the program would
 end far too quickly), we use the @ref cv::waitKey function whose only parameter is just how long
 should it wait for a user input (measured in milliseconds). Zero means to wait forever.
+The return value is the key that was pressed.
 
-@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp wait
+@add_toggle_cpp
+@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp imshow
+@end_toggle
 
-Result
-------
+@add_toggle_python
+@snippet samples/python/tutorial_code/introduction/display_image/display_image.py imshow
+@end_toggle
 
--   Compile your code and then run the executable giving an image path as argument. If you're on
-    Windows the executable will of course contain an *exe* extension too. Of course assure the image
-    file is near your program file.
-    @code{.sh}
-    ./DisplayImage HappyFish.jpg
-    @endcode
--   You should get a nice window as the one shown below:
+In the end, the image is written to a file if the pressed key was the "s"-key.
+For this the cv::imwrite function is called that has the file path and the cv::Mat object as an argument.
 
-    ![](images/Display_Image_Tutorial_Result.jpg)
+@add_toggle_cpp
+@snippet cpp/tutorial_code/introduction/display_image/display_image.cpp imsave
+@end_toggle
 
-@youtube{1OJEqpuaGc4}
+@add_toggle_python
+@snippet samples/python/tutorial_code/introduction/display_image/display_image.py imsave
+@end_toggle
diff --git a/doc/tutorials/introduction/images/Load_Save_Image_Result_1.jpg b/doc/tutorials/introduction/images/Load_Save_Image_Result_1.jpg
deleted file mode 100644
index fcc7afa4e3..0000000000
Binary files a/doc/tutorials/introduction/images/Load_Save_Image_Result_1.jpg and /dev/null differ
diff --git a/doc/tutorials/introduction/images/Load_Save_Image_Result_2.jpg b/doc/tutorials/introduction/images/Load_Save_Image_Result_2.jpg
deleted file mode 100644
index e91b6a1a02..0000000000
Binary files a/doc/tutorials/introduction/images/Load_Save_Image_Result_2.jpg and /dev/null differ
diff --git a/doc/tutorials/introduction/load_save_image/load_save_image.markdown b/doc/tutorials/introduction/load_save_image/load_save_image.markdown
index ad452378d1..9f37538b7d 100644
--- a/doc/tutorials/introduction/load_save_image/load_save_image.markdown
+++ b/doc/tutorials/introduction/load_save_image/load_save_image.markdown
@@ -1,105 +1,4 @@
 Load, Modify, and Save an Image {#tutorial_load_save_image}
 ===============================
 
-@note
-   We assume that by now you know how to load an image using @ref cv::imread and to display it in a
-    window (using @ref cv::imshow ). Read the @ref tutorial_display_image tutorial otherwise.
-
-Goals
------
-
-In this tutorial you will learn how to:
-
--   Load an image using @ref cv::imread
--   Transform an image from BGR to Grayscale format by using @ref cv::cvtColor
--   Save your transformed image in a file on disk (using @ref cv::imwrite )
-
-Code
-----
-
-Here it is:
-@code{.cpp}
-#include <opencv2/opencv.hpp>
-
-using namespace cv;
-
-int main( int argc, char** argv )
-{
- char* imageName = argv[1];
-
- Mat image;
- image = imread( imageName, IMREAD_COLOR );
-
- if( argc != 2 || !image.data )
- {
-   printf( " No image data \n " );
-   return -1;
- }
-
- Mat gray_image;
- cvtColor( image, gray_image, COLOR_BGR2GRAY );
-
- imwrite( "../../images/Gray_Image.jpg", gray_image );
-
- namedWindow( imageName, WINDOW_AUTOSIZE );
- namedWindow( "Gray image", WINDOW_AUTOSIZE );
-
- imshow( imageName, image );
- imshow( "Gray image", gray_image );
-
- waitKey(0);
-
- return 0;
-}
-@endcode
-Explanation
------------
-
--#  We begin by loading an image using @ref cv::imread , located in the path given by *imageName*.
-    For this example, assume you are loading a BGR image.
--#  Now we are going to convert our image from BGR to Grayscale format. OpenCV has a really nice
-    function to do this kind of transformations:
-    @code{.cpp}
-    cvtColor( image, gray_image, COLOR_BGR2GRAY );
-    @endcode
-    As you can see, @ref cv::cvtColor takes as arguments:
-
-    -   a source image (*image*)
-    -   a destination image (*gray_image*), in which we will save the converted image.
-    -   an additional parameter that indicates what kind of transformation will be performed. In
-        this case we use **COLOR_BGR2GRAY** (because of @ref cv::imread has BGR default channel
-        order in case of color images).
-
--#  So now we have our new *gray_image* and want to save it on disk (otherwise it will get lost
-    after the program ends). To save it, we will use a function analogous to @ref cv::imread : @ref
-    cv::imwrite
-    @code{.cpp}
-    imwrite( "../../images/Gray_Image.jpg", gray_image );
-    @endcode
-    Which will save our *gray_image* as *Gray_Image.jpg* in the folder *images* located two levels
-    up of my current location.
-
--#  Finally, let's check out the images. We create two windows and use them to show the original
-    image as well as the new one:
-    @code{.cpp}
-    namedWindow( imageName, WINDOW_AUTOSIZE );
-    namedWindow( "Gray image", WINDOW_AUTOSIZE );
-
-    imshow( imageName, image );
-    imshow( "Gray image", gray_image );
-    @endcode
--#  Add the *waitKey(0)* function call for the program to wait forever for an user key press.
-
-Result
-------
-
-When you run your program you should get something like this:
-
-![](images/Load_Save_Image_Result_1.jpg)
-
-And if you check in your folder (in my case *images*), you should have a newly .jpg file named
-*Gray_Image.jpg*:
-
-![](images/Load_Save_Image_Result_2.jpg)
-
-Congratulations, you are done with this tutorial!
+Tutorial content has been moved: @ref tutorial_display_image
diff --git a/doc/tutorials/introduction/table_of_content_introduction.markdown b/doc/tutorials/introduction/table_of_content_introduction.markdown
index b6dc8bd48b..0f711427ef 100644
--- a/doc/tutorials/introduction/table_of_content_introduction.markdown
+++ b/doc/tutorials/introduction/table_of_content_introduction.markdown
@@ -144,19 +144,13 @@ Additionally you can find very basic sample source code to introduce you to the
 
 -   @subpage tutorial_display_image
 
-    _Compatibility:_ \> OpenCV 2.0
+    _Languages:_ C++, Python
+
+    _Compatibility:_ \> OpenCV 3.4.4
 
     _Author:_ Ana Huamán
 
-    We will learn how to display an image using OpenCV
-
--   @subpage tutorial_load_save_image
-
-    _Compatibility:_ \> OpenCV 2.0
-
-    _Author:_ Ana Huamán
-
-    We will learn how to save an Image in OpenCV...plus a small conversion to grayscale
+    We will learn how to read an image, display it in a window and write it to a file using OpenCV
 
 -   @subpage tutorial_documentation
 
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index f7387a0c70..616422ab1d 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -4946,16 +4946,15 @@ void LayerFactory::registerLayer(const String &type, Constructor constructor)
     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
 
     cv::AutoLock lock(getLayerFactoryMutex());
-    String type_ = toLowerCase(type);
-    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
+    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
 
     if (it != getLayerFactoryImpl().end())
     {
         if (it->second.back() == constructor)
-            CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
+            CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
         it->second.push_back(constructor);
     }
-    getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
+    getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
 }
 
 void LayerFactory::unregisterLayer(const String &type)
@@ -4964,9 +4963,8 @@ void LayerFactory::unregisterLayer(const String &type)
     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
 
     cv::AutoLock lock(getLayerFactoryMutex());
-    String type_ = toLowerCase(type);
 
-    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
+    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
     if (it != getLayerFactoryImpl().end())
     {
         if (it->second.size() > 1)
@@ -4982,8 +4980,7 @@ Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& pa
     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
 
     cv::AutoLock lock(getLayerFactoryMutex());
-    String type_ = toLowerCase(type);
-    LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
+    LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
 
     if (it != getLayerFactoryImpl().end())
     {
diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp
index b083e2c586..e9bcfa6279 100644
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -95,6 +95,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(LRN,            LRNLayer);
     CV_DNN_REGISTER_LAYER_CLASS(InnerProduct,   InnerProductLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Softmax,        SoftmaxLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(SoftMax,        SoftmaxLayer);  // For compatibility. See https://github.com/opencv/opencv/issues/16877
     CV_DNN_REGISTER_LAYER_CLASS(MVN,            MVNLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(ReLU,           ReLULayer);
diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index 9fdcd32294..b17391f638 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -78,32 +78,34 @@ public:
     FullyConnectedLayerImpl(const LayerParams& params)
     {
         setParamsFrom(params);
-        CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
-
-        int numOutput = params.get<int>("num_output");
-        int innerSize = (int)blobs[0].total() / numOutput;
         bias = params.get<bool>("bias_term", true);
         axis = params.get<int>("axis", 1);
-
-        CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
-        CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
-
-        weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
-        int vecsize = weightsMat.cols;
-        if( vecsize % VEC_ALIGN != 0 )
+        if (!blobs.empty())
         {
-            int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
-            Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
-            Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
-            wpadding.setTo(Scalar::all(0.));
-            weightsMat = weightsBuf.colRange(0, vecsize);
-            blobs[0].copyTo(weightsMat);
-        }
+            CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
+            int numOutput = params.get<int>("num_output");
+            int innerSize = (int)blobs[0].total() / numOutput;
 
-        if (bias)
-            biasMat = blobs[1] = blobs[1].reshape(1, 1);
-        else
-            biasMat = Mat::zeros(1, numOutput, weightsMat.type());
+            CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
+            CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
+
+            weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
+            int vecsize = weightsMat.cols;
+            if (vecsize % VEC_ALIGN != 0)
+            {
+                int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
+                Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
+                Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
+                wpadding.setTo(Scalar::all(0.));
+                weightsMat = weightsBuf.colRange(0, vecsize);
+                blobs[0].copyTo(weightsMat);
+            }
+
+            if (bias)
+                biasMat = blobs[1] = blobs[1].reshape(1, 1);
+            else
+                biasMat = Mat::zeros(1, numOutput, weightsMat.type());
+        }
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -111,20 +113,35 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &) const CV_OVERRIDE
     {
-        CV_Assert(inputs.size() == 1);
-        CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
-        CV_Assert(blobs[0].dims == 2);
+        int numOutput, cAxis;
+        if (blobs.empty())
+        {
+            CV_CheckEQ(inputs.size(), (size_t)2, "");
+            numOutput = inputs[1].back();
+            cAxis = inputs[0].size() - 1;
+            CV_CheckEQ(numOutput, inputs[0][cAxis - 1], "");
+            int dims = inputs[0].size();
+            CV_CheckEQ(inputs[1].size(), (size_t)dims, "");
+            CV_CheckGE(dims, 2, "");
+            for (int i = 0; i < dims - 2; i++)
+                CV_CheckEQ(inputs[0][i], inputs[1][i], "");
+            CV_CheckEQ(inputs[0].back(), inputs[1][dims - 2], "");
+        }
+        else
+        {
+            CV_CheckEQ(inputs.size(), (size_t)1, "");
+            CV_CheckEQ(blobs[0].dims, 2, "");
+            numOutput = blobs[0].size[0];
+            CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
+            cAxis = clamp(axis, inputs[0]);
+        }
 
-        int cAxis = clamp(axis, inputs[0]);
-        int numOutput = blobs[0].size[0];
         MatShape outShape(cAxis + 1);
         for (int i = 0; i < cAxis; ++i)
             outShape[i] = inputs[0][i];
         outShape.back() = numOutput;
 
-        outputs.resize(inputs.size(), outShape);
-
-        CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
+        outputs.resize(1, outShape);
         return false;
     }
 
@@ -133,7 +150,8 @@ public:
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && axis == 1);
+               (((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) ||
+                backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
     }
 
     virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
@@ -292,6 +310,51 @@ public:
         inps.getUMatVector(inputs);
         outs.getUMatVector(outputs);
 
+        if (inputs.size() == 2)
+        {
+            int dims = outputs[0].dims;
+            int m = inputs[0].size[dims - 2];
+            int n = inputs[0].size[dims - 1];
+            int k = inputs[1].size[dims - 1];
+            int rows = inputs[0].total() / (m * n);
+
+            MatShape sh_A = shape(rows, m * n);
+            MatShape sh_B = shape(rows, n * k);
+            MatShape sh_C = shape(rows, m * k);
+            UMat inp = inputs[0].reshape(1, sh_A.size(), &sh_A[0]);
+            UMat weight = inputs[1].reshape(1, sh_B.size(), &sh_B[0]);
+            UMat out = outputs[0].reshape(1, sh_C.size(), &sh_C[0]);
+
+            UMat A, B, C, A_fp32, B_fp32, C_fp32;
+            for (int i = 0; i < rows; ++i)
+            {
+                A = inp.row(i).reshape(1, m);
+                B = weight.row(i).reshape(1, n);
+                C = out.row(i).reshape(1, m);
+
+                if (use_half)
+                {
+                    convertFp16(A, A_fp32);
+                    convertFp16(B, B_fp32);
+                    convertFp16(C, C_fp32);
+                }
+                else
+                {
+                    A_fp32 = A;
+                    B_fp32 = B;
+                    C_fp32 = C;
+                }
+                cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32);
+                if (use_half)
+                {
+                    convertFp16(A_fp32, A);
+                    convertFp16(B_fp32, B);
+                    convertFp16(C_fp32, C);
+                }
+            }
+            return true;
+        }
+
         int axisCan = clamp(axis, inputs[0].dims);
         int numOutput = blobs[0].size[0];
         int innerSize = blobs[0].size[1];
@@ -411,16 +474,42 @@ public:
         inputs_arr.getMatVector(input);
         outputs_arr.getMatVector(output);
 
-        int axisCan = clamp(axis, input[0].dims);
-        int outerSize = input[0].total(0, axisCan);
-
-        for (size_t i = 0; i < input.size(); i++)
+        if (!blobs.empty())
         {
-            Mat srcMat = input[i].reshape(1, outerSize);
-            Mat dstMat = output[i].reshape(1, outerSize);
+            int axisCan = clamp(axis, input[0].dims);
+            int outerSize = input[0].total(0, axisCan);
 
-            const int nstripes = getNumThreads();
-            FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
+            for (size_t i = 0; i < input.size(); i++)
+            {
+                Mat srcMat = input[i].reshape(1, outerSize);
+                Mat dstMat = output[i].reshape(1, outerSize);
+
+                const int nstripes = getNumThreads();
+                FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
+            }
+        }
+        else
+        {
+            float* inpData = input[0].ptr<float>();
+            float* weightData = input[1].ptr<float>();
+            float* outData = output[0].ptr<float>();
+
+            int dims = output[0].dims;
+            int numSlice = output[0].total() / output[0].total(dims - 2);
+            int m = input[0].size[dims - 2];
+            int n = input[0].size[dims - 1];
+            int k = input[1].size[dims - 1];
+            for (int i = 0; i < numSlice; i++)
+            {
+                Mat inpSlice(m, n, CV_32F, inpData);
+                Mat weightSlice(n, k, CV_32F, weightData);
+                Mat outSlice(m, k, CV_32F, outData);
+
+                outSlice = inpSlice * weightSlice;
+                inpData += inpSlice.total();
+                weightData += weightSlice.total();
+                outData += outSlice.total();
+            }
         }
     }
 
@@ -489,20 +578,28 @@ public:
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        int batch = ieInpNode->get_shape()[0];
+        std::shared_ptr<ngraph::Node> matmul;
 
-        std::vector<size_t> data = {(size_t)batch, (size_t)blobs[0].size[1]};
-        auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
-        auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
+        if (nodes.size() == 2)
+        {
+            auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+            matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
+        }
+        else
+        {
+            std::vector<size_t> data = {(size_t)ieInpNode->get_shape()[0], (size_t)blobs[0].size[1]};
+            auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
+            auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
+
+            std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
+            auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
+            matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
+        }
 
-        std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
-        auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
-        auto matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
         if (bias) {
             auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
                                               ngraph::Shape{(size_t)blobs[1].size[1]}, blobs[1].data);
-            auto fc = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
-            return Ptr<BackendNode>(new InfEngineNgraphNode(fc));
+            matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
         }
         return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
     }
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
index 74fd44b7b2..275c82738b 100644
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -154,6 +154,73 @@ private:
     int axis;
 };
 
+class NormalizeSubgraph1 : public Subgraph
+{
+public:
+    NormalizeSubgraph1() : axis(1)
+    {
+        input = addNodeToMatch("");
+        norm = addNodeToMatch("ReduceL2", input);
+        addNodeToMatch("Div", input, norm);
+        setFusedNode("Normalize", input);
+    }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
+        {
+            Ptr<ImportNodeWrapper> norm = net->getNode(matchedNodesIds[0]);
+            opencv_onnx::NodeProto* node = norm.dynamicCast<ONNXNodeWrapper>()->node;
+
+            for (int i = 0; i < node->attribute_size(); i++)
+            {
+                opencv_onnx::AttributeProto attr = node->attribute(i);
+                if (attr.name() != "axes")
+                    continue;
+                if (attr.ints_size() != 1)
+                    CV_Error(Error::StsNotImplemented, format("Unexpected number of axes: %d", attr.ints_size()));
+                axis = attr.ints(0);
+                return true;
+            }
+            CV_Error(Error::StsNotImplemented, "Missed axes attribute");
+        }
+        return false;
+    }
+
+    virtual void finalize(const Ptr<ImportGraphWrapper>&,
+                          const Ptr<ImportNodeWrapper>& fusedNode,
+                          std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE
+    {
+        opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
+        opencv_onnx::AttributeProto* axis_attr = node->add_attribute();
+        axis_attr->set_name("axis");
+        axis_attr->set_i(axis);
+
+        opencv_onnx::AttributeProto* end_axis_attr = node->add_attribute();
+        end_axis_attr->set_name("end_axis");
+        end_axis_attr->set_i(axis);
+    }
+
+protected:
+    int input, norm;
+    int axis;
+};
+
+
+class NormalizeSubgraph2 : public NormalizeSubgraph1
+{
+public:
+    NormalizeSubgraph2() : NormalizeSubgraph1()
+    {
+        int clip = addNodeToMatch("Clip", norm);
+        int shape = addNodeToMatch("Shape", input);
+        int expand = addNodeToMatch("Expand", clip, shape);
+        addNodeToMatch("Div", input, expand);
+    }
+};
+
 class GatherCastSubgraph : public Subgraph
 {
 public:
@@ -299,6 +366,8 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
     subgraphs.push_back(makePtr<ResizeSubgraph1>());
     subgraphs.push_back(makePtr<ResizeSubgraph2>());
     subgraphs.push_back(makePtr<SoftMaxSubgraph>());
+    subgraphs.push_back(makePtr<NormalizeSubgraph1>());
+    subgraphs.push_back(makePtr<NormalizeSubgraph2>());
 
     simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
 }
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 7672c28ad5..f1ae16ed8b 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -391,19 +391,71 @@ void ONNXImporter::populateNet(Net dstNet)
                     CV_Error(Error::StsNotImplemented, "Unsupported mode of ReduceMean operation.");
 
                 MatShape inpShape = outShapes[node_proto.input(0)];
-                if (inpShape.size() != 4 && inpShape.size() != 5)
+                DictValue axes = layerParams.get("axes");
+                if (inpShape.size() == 3 && axes.size() <= 2)
+                {
+                    int axis = axes.get<int>(0);
+                    CV_CheckNE(axis, 0, "");
+                    outShapes[layerParams.name] = inpShape;
+                    outShapes[layerParams.name][axis] = 1;
+
+                    LayerParams reshapeLp;
+                    reshapeLp.name = layerParams.name + "/reshape";
+                    reshapeLp.type = "Reshape";
+                    CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                    reshapeLp.set("axis", 0);
+                    reshapeLp.set("num_axes", 1);
+                    int newShape[] = {1, -1};
+                    reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2));
+
+                    opencv_onnx::NodeProto proto;
+                    proto.add_input(node_proto.input(0));
+                    proto.add_output(reshapeLp.name);
+                    addLayer(dstNet, reshapeLp, proto, layer_id, outShapes);
+
+                    LayerParams avgLp;
+                    avgLp.name = layerParams.name + "/avg";
+                    avgLp.type = "Pooling";
+                    CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
+                    avgLp.set("pool", "ave");
+                    if (axes.size() == 2)
+                    {
+                        CV_CheckEQ(axes.get<int>(0), 1, "Unsupported ReduceMean mode");
+                        CV_CheckEQ(axes.get<int>(1), 2, "Unsupported ReduceMean mode");
+                        avgLp.set("global_pooling", true);
+                        outShapes[layerParams.name][axes.get<int>(1)] = 1;
+                    }
+                    else
+                    {
+                        avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true);
+                        avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1);
+                    }
+
+                    node_proto.set_input(0, reshapeLp.name);
+                    node_proto.set_output(0, avgLp.name);
+                    addLayer(dstNet, avgLp, node_proto, layer_id, outShapes);
+
+                    layerParams.type = "Flatten";
+                    layerParams.set("axis", 0);
+                    layerParams.set("end_axis", 1);
+
+                    node_proto.set_input(0, avgLp.name);
+                    node_proto.set_output(0, layerParams.name);
+                }
+                else
+                {
+                    if (inpShape.size() != 4 && inpShape.size() != 5)
                     CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation.");
 
-                DictValue axes = layerParams.get("axes");
-                CV_Assert(axes.size() <= inpShape.size() - 2);
-                std::vector<int> kernel_size(inpShape.size() - 2, 1);
-                for (int i = 0; i < axes.size(); i++) {
-                    int axis = axes.get<int>(i);
-                    CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
-                    kernel_size[axis - 2] = inpShape[axis];
+                    CV_Assert(axes.size() <= inpShape.size() - 2);
+                    std::vector<int> kernel_size(inpShape.size() - 2, 1);
+                    for (int i = 0; i < axes.size(); i++) {
+                        int axis = axes.get<int>(i);
+                        CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
+                        kernel_size[axis - 2] = inpShape[axis];
+                    }
+                    layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
                 }
-
-                layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
             }
         }
         else if (layer_type == "Slice")
@@ -721,6 +773,19 @@ void ONNXImporter::populateNet(Net dstNet)
             layerParams.type = "ReLU";
             replaceLayerParam(layerParams, "alpha", "negative_slope");
         }
+        else if (layer_type == "Relu")
+        {
+            layerParams.type = "ReLU";
+        }
+        else if (layer_type == "Elu")
+        {
+            layerParams.type = "ELU";
+        }
+        else if (layer_type == "PRelu")
+        {
+            layerParams.type = "PReLU";
+            layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 1));
+        }
         else if (layer_type == "LRN")
         {
             replaceLayerParam(layerParams, "size", "local_size");
@@ -816,10 +881,14 @@ void ONNXImporter::populateNet(Net dstNet)
         {
             CV_Assert(node_proto.input_size() == 2);
             layerParams.type = "InnerProduct";
-            Mat blob = getBlob(node_proto, constBlobs, 1);
-            layerParams.blobs.push_back(blob.t());
             layerParams.set("bias_term", false);
-            layerParams.set("num_output", layerParams.blobs[0].size[0]);
+
+            if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
+            {
+                Mat blob = getBlob(node_proto, constBlobs, 1);
+                layerParams.blobs.push_back(blob.t());
+                layerParams.set("num_output", layerParams.blobs[0].size[0]);
+            }
         }
         else if (layer_type == "Mul" || layer_type == "Div")
         {
@@ -968,22 +1037,6 @@ void ONNXImporter::populateNet(Net dstNet)
                 continue;
             }
         }
-        else if (layer_type == "ReduceL2")
-        {
-            CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
-            CV_Assert(graph_proto.node_size() > li + 1 && graph_proto.node(li + 1).op_type() == "Div");
-            ++li;
-            node_proto = graph_proto.node(li);
-            layerParams.name = node_proto.output(0);
-            layerParams.type = "Normalize";
-
-            DictValue axes_dict = layerParams.get("axes");
-            if (axes_dict.size() != 1)
-                CV_Error(Error::StsNotImplemented, "Multidimensional reduceL2");
-            int axis = axes_dict.getIntValue(0);
-            layerParams.set("axis",axis);
-            layerParams.set("end_axis", axis);
-        }
         else if (layer_type == "Squeeze")
         {
             CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
@@ -1071,6 +1124,78 @@ void ONNXImporter::populateNet(Net dstNet)
             layerParams.type = "Reshape";
             layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
         }
+        else if (layer_type == "Expand")
+        {
+            CV_CheckEQ(node_proto.input_size(), 2, "");
+            CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
+            Mat newShapeMat = getBlob(node_proto, constBlobs, 1);
+            MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
+
+            shapeIt = outShapes.find(node_proto.input(0));
+            CV_Assert(shapeIt != outShapes.end());
+            MatShape inpShape = shapeIt->second;
+            CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
+
+            std::vector<int> broadcast_axes;
+            for (int i = 0; i < targetShape.size(); i++)
+            {
+                if (targetShape[i] != inpShape[i])
+                {
+                    if (inpShape[i] == 1)
+                        broadcast_axes.push_back(i);
+                    else
+                        CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
+                }
+            }
+
+            if (broadcast_axes.size() == 2 &&
+                broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
+            {
+                LayerParams constParams;
+                constParams.name = layerParams.name + "/const";
+                CV_Assert(layer_id.find(constParams.name) == layer_id.end());
+                constParams.type = "Const";
+
+                Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
+                constParams.blobs.push_back(inp);
+
+                opencv_onnx::NodeProto proto;
+                proto.add_output(constParams.name);
+                addLayer(dstNet, constParams, proto, layer_id, outShapes);
+
+                layerParams.type = "Scale";
+                layerParams.set("bias_term", false);
+                node_proto.set_input(0, constParams.name);
+                node_proto.set_input(1, shapeIt->first);
+            }
+            else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
+            {
+                String base_name = layerParams.name + "/copy_";
+                std::vector<std::string> input_names;
+                for (int j = 0; j < targetShape[broadcast_axes[0]]; j++)
+                {
+                    std::ostringstream ss;
+                    ss << j;
+                    LayerParams copyLP;
+                    copyLP.name = base_name + ss.str();
+                    copyLP.type = "Identity";
+                    CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
+                    input_names.push_back(copyLP.name);
+
+                    node_proto.set_output(0, copyLP.name);
+                    addLayer(dstNet, copyLP, node_proto, layer_id, outShapes);
+                }
+                node_proto.clear_input();
+                for (int i = 0; i < input_names.size(); i++)
+                {
+                    node_proto.add_input(input_names[i]);
+                }
+                layerParams.set("axis", broadcast_axes[0]);
+                layerParams.type = "Concat";
+            }
+            else
+                CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
+        }
         else if (layer_type == "Reshape")
         {
             CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
@@ -1285,10 +1410,10 @@ void ONNXImporter::populateNet(Net dstNet)
                 layerParams.set("zoom_factor_x", scales.at<float>(3));
             }
         }
-        else if (layer_type == "LogSoftmax")
+        else if (layer_type == "SoftMax" || layer_type == "LogSoftmax")
         {
             layerParams.type = "Softmax";
-            layerParams.set("log_softmax", true);
+            layerParams.set("log_softmax", layer_type == "LogSoftmax");
         }
         else
         {
diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
index ba81677800..85113a94c0 100644
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@@ -682,6 +682,15 @@ void RemoveIdentityOps(tensorflow::GraphDef& net)
             IdentityOpsMap::iterator it = identity_ops.find(input_op_name);
 
             if (it != identity_ops.end()) {
+                // In case of Identity after Identity
+                while (true)
+                {
+                    IdentityOpsMap::iterator nextIt = identity_ops.find(it->second);
+                    if (nextIt != identity_ops.end())
+                        it = nextIt;
+                    else
+                        break;
+                }
                 layer->set_input(input_id, it->second);
             }
         }
@@ -847,7 +856,7 @@ void sortByExecutionOrder(tensorflow::GraphDef& net)
             nodesToAdd.push_back(i);
         else
         {
-            if (node.op() == "Merge" || node.op() == "RefMerge")
+            if (node.op() == "Merge" || node.op() == "RefMerge" || node.op() == "NoOp")
             {
                 int numControlEdges = 0;
                 for (int j = 0; j < numInputsInGraph; ++j)
@@ -896,7 +905,7 @@ void removePhaseSwitches(tensorflow::GraphDef& net)
     {
         const tensorflow::NodeDef& node = net.node(i);
         nodesMap.insert(std::make_pair(node.name(), i));
-        if (node.op() == "Switch" || node.op() == "Merge")
+        if (node.op() == "Switch" || node.op() == "Merge" || node.op() == "NoOp")
         {
             CV_Assert(node.input_size() > 0);
             // Replace consumers' inputs.
@@ -914,7 +923,7 @@ void removePhaseSwitches(tensorflow::GraphDef& net)
                 }
             }
             nodesToRemove.push_back(i);
-            if (node.op() == "Merge" || node.op() == "Switch")
+            if (node.op() == "Merge" || node.op() == "Switch" || node.op() == "NoOp")
                 mergeOpSubgraphNodes.push(i);
         }
     }
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
index d5b2a2a85b..1e7f07a478 100644
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -865,15 +865,10 @@ struct TorchImporter
                 layerParams.set("indices_blob_id", tensorParams["indices"].first);
                 curModule->modules.push_back(newModule);
             }
-            else if (nnName == "SoftMax")
+            else if (nnName == "LogSoftMax" || nnName == "SoftMax")
             {
-                newModule->apiType = "SoftMax";
-                curModule->modules.push_back(newModule);
-            }
-            else if (nnName == "LogSoftMax")
-            {
-                newModule->apiType = "SoftMax";
-                layerParams.set("log_softmax", true);
+                newModule->apiType = "Softmax";
+                layerParams.set("log_softmax", nnName == "LogSoftMax");
                 curModule->modules.push_back(newModule);
             }
             else if (nnName == "SpatialCrossMapLRN")
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
index f56608a216..ce3ab23a77 100644
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -442,7 +442,7 @@ TEST_P(SoftMax, Accuracy)
     Backend backendId = get<0>(get<1>(GetParam()));
     Target targetId = get<1>(get<1>(GetParam()));
     LayerParams lp;
-    lp.type = "SoftMax";
+    lp.type = "Softmax";
     lp.name = "testLayer";
 
     int sz[] = {1, inChannels, 1, 1};
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 914ad405e0..6cab54b9a0 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -70,7 +70,7 @@ public:
         {
             LayerParams lp;
             Net netSoftmax;
-            netSoftmax.addLayerToPrev("softmaxLayer", "SoftMax", lp);
+            netSoftmax.addLayerToPrev("softmaxLayer", "Softmax", lp);
             netSoftmax.setPreferableBackend(DNN_BACKEND_OPENCV);
 
             netSoftmax.setInput(out);
@@ -186,6 +186,8 @@ TEST_P(Test_ONNX_layers, Shape)
 TEST_P(Test_ONNX_layers, ReduceMean)
 {
     testONNXModels("reduce_mean");
+    testONNXModels("reduce_mean_axis1");
+    testONNXModels("reduce_mean_axis2");
 }
 
 TEST_P(Test_ONNX_layers, ReduceMean3D)
@@ -332,6 +334,30 @@ TEST_P(Test_ONNX_layers, Multiplication)
     testONNXModels("mul");
 }
 
+TEST_P(Test_ONNX_layers, MatMul)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+
+    testONNXModels("matmul_2d");
+    testONNXModels("matmul_3d");
+    testONNXModels("matmul_4d");
+}
+
+TEST_P(Test_ONNX_layers, Expand)
+{
+    testONNXModels("expand_batch");
+    testONNXModels("expand_channels");
+}
+
+TEST_P(Test_ONNX_layers, ExpandHW)
+{
+    // ngraph::op::v1::Multiply bug
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    testONNXModels("expand_hw");
+}
+
 TEST_P(Test_ONNX_layers, Constant)
 {
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
@@ -438,6 +464,7 @@ TEST_P(Test_ONNX_layers, Squeeze)
 TEST_P(Test_ONNX_layers, ReduceL2)
 {
     testONNXModels("reduceL2");
+    testONNXModels("reduceL2_subgraph");
 }
 
 TEST_P(Test_ONNX_layers, Split)
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index d2a5e67c32..395f751683 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -947,6 +947,11 @@ TEST_P(Test_TensorFlow_layers, resize_bilinear)
     runTensorFlowNet("resize_bilinear_factor");
 }
 
+TEST_P(Test_TensorFlow_layers, tf2_keras)
+{
+    runTensorFlowNet("tf2_dense");
+}
+
 TEST_P(Test_TensorFlow_layers, squeeze)
 {
 #if defined(INF_ENGINE_RELEASE)
diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl
index 9d0244c2c9..907811cded 100644
--- a/modules/imgproc/src/opencl/hough_lines.cl
+++ b/modules/imgproc/src/opencl/hough_lines.cl
@@ -94,6 +94,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
 {
     int theta_idx = get_group_id(1);
     int count_idx = get_local_id(0);
+    __local int l_accum[BUFFER_SIZE];
 
     if (theta_idx > 0 && theta_idx < numangle + 1)
     {
@@ -102,7 +103,6 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
         sinVal *= irho;
         cosVal *= irho;
 
-        __local int l_accum[BUFFER_SIZE];
         for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
             l_accum[i] = 0;
 
diff --git a/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp b/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
index 80151ec8a9..417fc0d448 100644
--- a/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
+++ b/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
@@ -4,48 +4,36 @@
 #include <opencv2/highgui.hpp>
 
 #include <iostream>
+
+using namespace cv;
 //! [includes]
 
-//! [namespace]
-using namespace cv;
-using namespace std;
-//! [namespace]
-
-
-int main( int argc, char** argv )
+int main()
 {
-    //! [load]
-    String imageName( "HappyFish.jpg" ); // by default
-    if( argc > 1)
-    {
-        imageName = argv[1];
-    }
-    //! [load]
-
-    //! [mat]
-    Mat image;
-    //! [mat]
-
     //! [imread]
-    image = imread( samples::findFile( imageName ), IMREAD_COLOR ); // Read the file
+    std::string image_path = samples::findFile("starry_night.jpg");
+    Mat img = imread(image_path, IMREAD_COLOR);
     //! [imread]
 
-    if( image.empty() )                      // Check for invalid input
+    //! [empty]
+    if(img.empty())
     {
-        cout <<  "Could not open or find the image" << std::endl ;
-        return -1;
+        std::cout << "Could not read the image: " << image_path << std::endl;
+        return 1;
     }
-
-    //! [window]
-    namedWindow( "Display window", WINDOW_AUTOSIZE ); // Create a window for display.
-    //! [window]
+    //! [empty]
 
     //! [imshow]
-    imshow( "Display window", image );                // Show our image inside it.
+    imshow("Display window", img);
+    int k = waitKey(0); // Wait for a keystroke in the window
     //! [imshow]
 
-    //! [wait]
-    waitKey(0); // Wait for a keystroke in the window
-    //! [wait]
+    //! [imsave]
+    if(k == 's')
+    {
+        imwrite("starry_night.png", img);
+    }
+    //! [imsave]
+
     return 0;
 }
diff --git a/samples/python/tutorial_code/introduction/display_image/display_image.py b/samples/python/tutorial_code/introduction/display_image/display_image.py
new file mode 100644
index 0000000000..404d2839b0
--- /dev/null
+++ b/samples/python/tutorial_code/introduction/display_image/display_image.py
@@ -0,0 +1,19 @@
+## [imports]
+import cv2 as cv
+import sys
+## [imports]
+## [imread]
+img = cv.imread(cv.samples.findFile("starry_night.jpg"))
+## [imread]
+## [empty]
+if img is None:
+    sys.exit("Could not read the image.")
+## [empty]
+## [imshow]
+cv.imshow("Display window", img)
+k = cv.waitKey(0)
+## [imshow]
+## [imsave]
+if k == ord("s"):
+    cv.imwrite("starry_night.png", img)
+## [imsave]