Merge remote-tracking branch 'upstream/3.4' into merge-3.4

2025-08-06 14:36:36 +08:00 · 2020-11-17 21:56:26 +00:00 · 2020-11-17 21:56:26 +00:00 · ce8027c6fb
commit ce8027c6fb
parent 9acbfc6e05 301c078d19
39 changed files with 554 additions and 170 deletions
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@ -4,9 +4,9 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-6

 set(VERSION_MAJOR 2)
 set(VERSION_MINOR 0)
-set(VERSION_REVISION 5)
+set(VERSION_REVISION 6)
 set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2000005)
+set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)

 string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@ -91,7 +91,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================

-Copyright (C)2009-2019 D. R. Commander.  All Rights Reserved.
+Copyright (C)2009-2020 D. R. Commander.  All Rights Reserved.
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.

 Redistribution and use in source and binary forms, with or without
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@ -223,12 +223,12 @@ https://www.iso.org/standard/54989.html and http://www.itu.int/rec/T-REC-T.871.
 A PDF file of the older JFIF 1.02 specification is available at
 http://www.w3.org/Graphics/JPEG/jfif3.pdf.

-The TIFF 6.0 file format specification can be obtained by FTP from
-ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
-found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
-IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
-Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
-(Compression tag 7).  Copies of this Note can be obtained from
+The TIFF 6.0 file format specification can be obtained from
+http://mirrors.ctan.org/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation
+scheme found in the TIFF 6.0 spec of 3-June-92 has a number of serious
+problems.  IJG does not recommend use of the TIFF 6.0 design (TIFF Compression
+tag 6).  Instead, we recommend the JPEG design proposed by TIFF Technical Note
+#2 (Compression tag 7).  Copies of this Note can be obtained from
 http://www.ijg.org/files/.  It is expected that the next revision
 of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
 Although IJG's own code does not support TIFF/JPEG, the free libtiff library
@ -243,14 +243,8 @@ The most recent released version can always be found there in
 directory "files".

 The JPEG FAQ (Frequently Asked Questions) article is a source of some
-general information about JPEG.
-It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
-and other news.answers archive sites, including the official news.answers
-archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
-If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
-with body
-        send usenet/news.answers/jpeg-faq/part1
-        send usenet/news.answers/jpeg-faq/part2
+general information about JPEG.  It is available at
+http://www.faqs.org/faqs/jpeg-faq.


 FILE FORMAT COMPATIBILITY
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@ -2,7 +2,7 @@ Background
 ==========

 libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
-baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
+baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
 MIPS systems, as well as progressive JPEG compression on x86 and x86-64
 systems.  On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
 all else being equal.  On other types of systems, libjpeg-turbo can still
@ -179,8 +179,8 @@ supported and which aren't.

 NOTE:  As of this writing, extensive research has been conducted into the
 usefulness of DCT scaling as a means of data reduction and SmartScale as a
-means of quality improvement.  The reader is invited to peruse the research at
-<http://www.libjpeg-turbo.org/About/SmartScale> and draw his/her own conclusions,
+means of quality improvement.  Readers are invited to peruse the research at
+<http://www.libjpeg-turbo.org/About/SmartScale> and draw their own conclusions,
 but it is the general belief of our project that these features have not
 demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.

@ -287,12 +287,13 @@ following reasons:
  (and slightly faster) floating point IDCT algorithm introduced in libjpeg
  v8a as opposed to the algorithm used in libjpeg v6b.  It should be noted,
  however, that this algorithm basically brings the accuracy of the floating
-  point IDCT in line with the accuracy of the slow integer IDCT.  The floating
-  point DCT/IDCT algorithms are mainly a legacy feature, and they do not
-  produce significantly more accuracy than the slow integer algorithms (to put
-  numbers on this, the typical difference in PNSR between the two algorithms
-  is less than 0.10 dB, whereas changing the quality level by 1 in the upper
-  range of the quality scale is typically more like a 1.0 dB difference.)
+  point IDCT in line with the accuracy of the accurate integer IDCT.  The
+  floating point DCT/IDCT algorithms are mainly a legacy feature, and they do
+  not produce significantly more accuracy than the accurate integer algorithms
+  (to put numbers on this, the typical difference in PNSR between the two
+  algorithms is less than 0.10 dB, whereas changing the quality level by 1 in
+  the upper range of the quality scale is typically more like a 1.0 dB
+  difference.)

 - If the floating point algorithms in libjpeg-turbo are not implemented using
  SIMD instructions on a particular platform, then the accuracy of the
@ -340,7 +341,7 @@ The algorithm used by the SIMD-accelerated quantization function cannot produce
 correct results whenever the fast integer forward DCT is used along with a JPEG
 quality of 98-100.  Thus, libjpeg-turbo must use the non-SIMD quantization
 function in those cases.  This causes performance to drop by as much as 40%.
-It is therefore strongly advised that you use the slow integer forward DCT
+It is therefore strongly advised that you use the accurate integer forward DCT
 whenever encoding images with a JPEG quality of 98 or higher.


--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@ -34,10 +34,10 @@
 * memory footprint by 64k, which is important for some mobile applications
 * that create many isolated instances of libjpeg-turbo (web browsers, for
 * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on ARM processors, because some x86
+ * This feature is enabled by default only on Arm processors, because some x86
 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
 * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for ARMv6, you can
+ * have a fast implementation of it.  When building for Armv6, you can
 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
 * flags (this defines __thumb__).
 */
--- a/3rdparty/libjpeg-turbo/src/jcinit.c
+++ b/3rdparty/libjpeg-turbo/src/jcinit.c
@ -1,8 +1,10 @@
 /*
 * jcinit.c
 *
+ * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -19,6 +21,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"


 /*
--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@ -43,10 +43,10 @@
 * memory footprint by 64k, which is important for some mobile applications
 * that create many isolated instances of libjpeg-turbo (web browsers, for
 * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on ARM processors, because some x86
+ * This feature is enabled by default only on Arm processors, because some x86
 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
 * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for ARMv6, you can
+ * have a fast implementation of it.  When building for Armv6, you can
 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
 * flags (this defines __thumb__).
 */
--- a/3rdparty/libjpeg-turbo/src/jctrans.c
+++ b/3rdparty/libjpeg-turbo/src/jctrans.c
@ -4,8 +4,8 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-1998, Thomas G. Lane.
 * Modified 2000-2009 by Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -17,6 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"


 /* Forward declarations */
--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2018, D. R. Commander.
+ * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@ -21,6 +21,8 @@
 #include "jinclude.h"
 #include "jdmainct.h"
 #include "jdcoefct.h"
+#include "jdmaster.h"
+#include "jdmerge.h"
 #include "jdsample.h"
 #include "jmemsys.h"

@ -316,6 +318,8 @@ LOCAL(void)
 read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
  JDIMENSION n;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  JSAMPARRAY scanlines = NULL;
  void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                         JDIMENSION input_row, JSAMPARRAY output_buf,
                         int num_rows) = NULL;
@ -332,8 +336,13 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
    cinfo->cquantize->color_quantize = noop_quantize;
  }

+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    scanlines = &upsample->spare_row;
+  }
+
  for (n = 0; n < num_lines; n++)
-    jpeg_read_scanlines(cinfo, NULL, 1);
+    jpeg_read_scanlines(cinfo, scanlines, 1);

  if (color_convert)
    cinfo->cconvert->color_convert = color_convert;
@ -353,6 +362,12 @@ increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
 {
  JDIMENSION rows_left;
  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    read_and_discard_scanlines(cinfo, rows);
+    return;
+  }

  /* Increment the counter to the next row group after the skipped rows. */
  main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor;
@ -382,21 +397,27 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
  JDIMENSION i, x;
  int y;
  JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
  JDIMENSION lines_to_skip, lines_to_read;

+  /* Two-pass color quantization is not supported. */
+  if (cinfo->quantize_colors && cinfo->two_pass_quantize)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
  if (cinfo->global_state != DSTATE_SCANNING)
    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);

  /* Do not skip past the bottom of the image. */
  if (cinfo->output_scanline + num_lines >= cinfo->output_height) {
+    num_lines = cinfo->output_height - cinfo->output_scanline;
    cinfo->output_scanline = cinfo->output_height;
    (*cinfo->inputctl->finish_input_pass) (cinfo);
    cinfo->inputctl->eoi_reached = TRUE;
-    return cinfo->output_height - cinfo->output_scanline;
+    return num_lines;
  }

  if (num_lines == 0)
@ -445,8 +466,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
    main_ptr->buffer_full = FALSE;
    main_ptr->rowgroup_ctr = 0;
    main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
-    upsample->next_row_out = cinfo->max_v_samp_factor;
-    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    if (!master->using_merged_upsample) {
+      upsample->next_row_out = cinfo->max_v_samp_factor;
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    }
  }

  /* Skipping is much simpler when context rows are not required. */
@ -458,8 +481,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
      cinfo->output_scanline += lines_left_in_iMCU_row;
      main_ptr->buffer_full = FALSE;
      main_ptr->rowgroup_ctr = 0;
-      upsample->next_row_out = cinfo->max_v_samp_factor;
-      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      if (!master->using_merged_upsample) {
+        upsample->next_row_out = cinfo->max_v_samp_factor;
+        upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      }
    }
  }

@ -494,7 +519,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
      cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
      increment_simple_rowgroup_ctr(cinfo, lines_to_read);
    }
-    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    if (!master->using_merged_upsample)
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
    return num_lines;
  }

@ -535,7 +561,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
   * bit odd, since "rows_to_go" seems to be redundantly keeping track of
   * output_scanline.
   */
-  upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+  if (!master->using_merged_upsample)
+    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;

  /* Always skip the requested number of lines. */
  return num_lines;
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c
@ -6,7 +6,7 @@
 * libjpeg-turbo Modifications:
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2010, 2015-2016, D. R. Commander.
- * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, 2020, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -495,11 +495,13 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
      if (first_row && block_row == 0)
        prev_block_row = buffer_ptr;
      else
-        prev_block_row = buffer[block_row - 1];
+        prev_block_row = buffer[block_row - 1] +
+                         cinfo->master->first_MCU_col[ci];
      if (last_row && block_row == block_rows - 1)
        next_block_row = buffer_ptr;
      else
-        next_block_row = buffer[block_row + 1];
+        next_block_row = buffer[block_row + 1] +
+                         cinfo->master->first_MCU_col[ci];
      /* We fetch the surrounding DC values using a sliding-register approach.
       * Initialize all nine here so as to do the right thing on narrow pics.
       */
--- a/3rdparty/libjpeg-turbo/src/jdcolor.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolor.c
@ -571,11 +571,10 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 * RGB565 conversion
 */

-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))

 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
--- a/3rdparty/libjpeg-turbo/src/jdmerge.c
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.c
@ -5,7 +5,7 @@
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2020, D. R. Commander.
 * Copyright (C) 2013, Linaro Limited.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@ -40,41 +40,13 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jdmerge.h"
 #include "jsimd.h"
 #include "jconfigint.h"

 #ifdef UPSAMPLE_MERGING_SUPPORTED


-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;    /* public fields */
-
-  /* Pointer to routine to do actual upsampling/conversion of one row group */
-  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
-
-  /* Private state for YCC->RGB conversion */
-  int *Cr_r_tab;                /* => table for Cr to R conversion */
-  int *Cb_b_tab;                /* => table for Cb to B conversion */
-  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
-  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
-
-  /* For 2:1 vertical sampling, we produce two output rows at a time.
-   * We need a "spare" row buffer to hold the second output row if the
-   * application provides just a one-row buffer; we also use the spare
-   * to discard the dummy last row if the image height is odd.
-   */
-  JSAMPROW spare_row;
-  boolean spare_full;           /* T if spare buffer is occupied */
-
-  JDIMENSION out_row_width;     /* samples per output row */
-  JDIMENSION rows_to_go;        /* counts rows remaining in image */
-} my_upsampler;
-
-typedef my_upsampler *my_upsample_ptr;
-
 #define SCALEBITS       16      /* speediest right-shift on some machines */
 #define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
 #define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
@ -189,7 +161,7 @@ typedef my_upsampler *my_upsample_ptr;
 LOCAL(void)
 build_ycc_rgb_table(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  int i;
  JLONG x;
  SHIFT_TEMPS
@ -232,7 +204,7 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
 METHODDEF(void)
 start_pass_merged_upsample(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;

  /* Mark the spare buffer empty */
  upsample->spare_full = FALSE;
@ -254,7 +226,7 @@ merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 2:1 vertical sampling case: may need a spare row. */
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  JSAMPROW work_ptrs[2];
  JDIMENSION num_rows;          /* number of rows returned to caller */

@ -305,7 +277,7 @@ merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 1:1 vertical sampling case: much easier, never need a spare row. */
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;

  /* Just do the upsampling. */
  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
@ -420,11 +392,10 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 * RGB565 conversion
 */

-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))

 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
@ -566,11 +537,11 @@ h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 GLOBAL(void)
 jinit_merged_upsampler(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample;
+  my_merged_upsample_ptr upsample;

-  upsample = (my_upsample_ptr)
+  upsample = (my_merged_upsample_ptr)
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                sizeof(my_upsampler));
+                                sizeof(my_merged_upsampler));
  cinfo->upsample = (struct jpeg_upsampler *)upsample;
  upsample->pub.start_pass = start_pass_merged_upsample;
  upsample->pub.need_context_rows = FALSE;
--- a/3rdparty/libjpeg-turbo/src/jdmerge.h
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.h
@ -0,0 +1,47 @@
+/*
+ * jdmerge.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;    /* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+  /* Private state for YCC->RGB conversion */
+  int *Cr_r_tab;                /* => table for Cr to R conversion */
+  int *Cb_b_tab;                /* => table for Cb to B conversion */
+  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
+  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;           /* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;     /* samples per output row */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
+} my_merged_upsampler;
+
+typedef my_merged_upsampler *my_merged_upsample_ptr;
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
--- a/3rdparty/libjpeg-turbo/src/jdmrg565.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c
@ -5,7 +5,7 @@
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
 * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014-2015, 2018, D. R. Commander.
+ * Copyright (C) 2014-2015, 2018, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -19,7 +19,7 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                  JDIMENSION in_row_group_ctr,
                                  JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  register int y, cred, cgreen, cblue;
  int cb, cr;
  register JSAMPROW outptr;
@ -90,7 +90,7 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
                                   JDIMENSION in_row_group_ctr,
                                   JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  register int y, cred, cgreen, cblue;
  int cb, cr;
  register JSAMPROW outptr;
@ -163,7 +163,7 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                  JDIMENSION in_row_group_ctr,
                                  JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  register int y, cred, cgreen, cblue;
  int cb, cr;
  register JSAMPROW outptr0, outptr1;
@ -259,7 +259,7 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
                                   JDIMENSION in_row_group_ctr,
                                   JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  register int y, cred, cgreen, cblue;
  int cb, cr;
  register JSAMPROW outptr0, outptr1;
--- a/3rdparty/libjpeg-turbo/src/jdmrgext.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c
@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -25,7 +25,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                              JDIMENSION in_row_group_ctr,
                              JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  register int y, cred, cgreen, cblue;
  int cb, cr;
  register JSAMPROW outptr;
@ -97,7 +97,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                              JDIMENSION in_row_group_ctr,
                              JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
  register int y, cred, cgreen, cblue;
  int cb, cr;
  register JSAMPROW outptr0, outptr1;
--- a/3rdparty/libjpeg-turbo/src/jdtrans.c
+++ b/3rdparty/libjpeg-turbo/src/jdtrans.c
@ -3,8 +3,8 @@
 *
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -16,6 +16,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"


 /* Forward declarations */
--- a/3rdparty/libjpeg-turbo/src/jfdctint.c
+++ b/3rdparty/libjpeg-turbo/src/jfdctint.c
@ -4,11 +4,11 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
- * This file contains a slow-but-accurate integer implementation of the
+ * This file contains a slower but more accurate integer implementation of the
 * forward DCT (Discrete Cosine Transform).
 *
 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
--- a/3rdparty/libjpeg-turbo/src/jidctint.c
+++ b/3rdparty/libjpeg-turbo/src/jidctint.c
@ -5,11 +5,11 @@
 * Copyright (C) 1991-1998, Thomas G. Lane.
 * Modification developed 2002-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
- * This file contains a slow-but-accurate integer implementation of the
+ * This file contains a slower but more accurate integer implementation of the
 * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
 * must also perform dequantization of the input coefficients.
 *
--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@ -5,7 +5,7 @@
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * Modified 1997-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2014-2015, 2018, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -273,9 +273,9 @@ typedef int boolean;

 /* Capability options common to encoder and decoder: */

-#define DCT_ISLOW_SUPPORTED     /* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED     /* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED     /* floating-point: accurate, fast on fast HW */
+#define DCT_ISLOW_SUPPORTED     /* accurate integer method */
+#define DCT_IFAST_SUPPORTED     /* less accurate int method [legacy feature] */
+#define DCT_FLOAT_SUPPORTED     /* floating-point method [legacy feature] */

 /* Encoder capability options: */

--- a/3rdparty/libjpeg-turbo/src/jpegcomp.h
+++ b/3rdparty/libjpeg-turbo/src/jpegcomp.h
@ -1,7 +1,7 @@
 /*
 * jpegcomp.h
 *
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -19,6 +19,7 @@
 #define _min_DCT_v_scaled_size  min_DCT_v_scaled_size
 #define _jpeg_width  jpeg_width
 #define _jpeg_height  jpeg_height
+#define JERR_ARITH_NOTIMPL  JERR_NOT_COMPILED
 #else
 #define _DCT_scaled_size  DCT_scaled_size
 #define _DCT_h_scaled_size  DCT_scaled_size
--- a/3rdparty/libjpeg-turbo/src/jpeglib.h
+++ b/3rdparty/libjpeg-turbo/src/jpeglib.h
@ -5,7 +5,7 @@
 * Copyright (C) 1991-1998, Thomas G. Lane.
 * Modified 2002-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2013-2014, 2016-2017, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, D. R. Commander.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@ -244,9 +244,9 @@ typedef enum {
 /* DCT/IDCT algorithm options. */

 typedef enum {
-  JDCT_ISLOW,             /* slow but accurate integer algorithm */
-  JDCT_IFAST,             /* faster, less accurate integer method */
-  JDCT_FLOAT              /* floating-point: accurate, fast on fast HW */
+  JDCT_ISLOW,             /* accurate integer method */
+  JDCT_IFAST,             /* less accurate integer method [legacy feature] */
+  JDCT_FLOAT              /* floating-point method [legacy feature] */
 } J_DCT_METHOD;

 #ifndef JDCT_DEFAULT            /* may be overridden in jconfig.h */
--- a/3rdparty/libjpeg-turbo/src/jquant2.c
+++ b/3rdparty/libjpeg-turbo/src/jquant2.c
@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2014-2015, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -1145,7 +1145,7 @@ start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
  int i;

  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
+  /* If user asks for ordered dither, give them F-S. */
  if (cinfo->dither_mode != JDITHER_NONE)
    cinfo->dither_mode = JDITHER_FS;

@ -1263,7 +1263,7 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
    cquantize->sv_colormap = NULL;

  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
+  /* If user asks for ordered dither, give them F-S. */
  if (cinfo->dither_mode != JDITHER_NONE)
    cinfo->dither_mode = JDITHER_FS;

--- a/3rdparty/libjpeg-turbo/src/jversion.h
+++ b/3rdparty/libjpeg-turbo/src/jversion.h
@ -30,23 +30,25 @@
 * NOTE: It is our convention to place the authors in the following order:
 * - libjpeg-turbo authors (2009-) in descending order of the date of their
 *   most recent contribution to the project, then in ascending order of the
- *   date of their first contribution to the project
+ *   date of their first contribution to the project, then in alphabetical
+ *   order
 * - Upstream authors in descending order of the date of the first inclusion of
 *   their code
 */

 #define JCOPYRIGHT \
  "Copyright (C) 2009-2020 D. R. Commander\n" \
-  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
+  "Copyright (C) 2015, 2020 Google, Inc.\n" \
+  "Copyright (C) 2019 Arm Limited\n" \
  "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
+  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
  "Copyright (C) 2015 Intel Corporation\n" \
-  "Copyright (C) 2015 Google, Inc.\n" \
+  "Copyright (C) 2013-2014 Linaro Limited\n" \
  "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
-  "Copyright (C) 2013 Linaro Limited\n" \
+  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
  "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
-  "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
  "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"

 #define JCOPYRIGHT_SHORT \
  "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
--- a/apps/interactive-calibration/parametersController.cpp
+++ b/apps/interactive-calibration/parametersController.cpp
@ -32,7 +32,7 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName)

    if(!reader.isOpened()) {
        std::cerr << "Warning: Unable to open " << inputFileName <<
-                     " Applicatioin stated with default advanced parameters" << std::endl;
+                     " Application started with default advanced parameters" << std::endl;
        return true;
    }

--- a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
+++ b/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
@ -131,7 +131,7 @@ For properties you can read and change look into the documentation of the @ref c
 We want to check just how imperceptible our video converting operation went, therefore we need a
 system to check frame by frame the similarity or differences. The most common algorithm used for
 this is the PSNR (aka **Peak signal-to-noise ratio**). The simplest definition of this starts out
-from the *mean squad error*. Let there be two images: I1 and I2; with a two dimensional size i and
+from the *mean squared error*. Let there be two images: I1 and I2; with a two dimensional size i and
 j, composed of c number of channels.

 \f[MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}\f]
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@ -220,6 +220,11 @@ struct VZeroUpperGuard {
 #  define CV_VSX 1
 #endif

+#ifdef __F16C__
+#  include <immintrin.h>
+#  define CV_FP16 1
+#endif
+
 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)


--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@ -844,7 +844,7 @@ protected:
    float16_t() : w(0) {}
    explicit float16_t(float x)
    {
-    #if CV_AVX2
+    #if CV_FP16
        __m128 v = _mm_load_ss(&x);
        w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
    #else
@ -875,7 +875,7 @@ protected:

    operator float() const
    {
-    #if CV_AVX2
+    #if CV_FP16
        float f;
        _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
        return f;
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@ -3121,18 +3121,39 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, un
 OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
 OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)

+//
 // FP16
+//
+
 inline v_float32x8 v256_load_expand(const float16_t* ptr)
 {
+#if CV_FP16
    return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    for (int i = 0; i < 8; i++)
+        buf[i] = (float)ptr[i];
+    return v256_load_aligned(buf);
+#endif
 }

 inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
 {
+#if CV_FP16
    __m128i ah = _mm256_cvtps_ph(a.val, 0);
    _mm_storeu_si128((__m128i*)ptr, ah);
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    v_store_aligned(buf, a);
+    for (int i = 0; i < 8; i++)
+        ptr[i] = float16_t(buf[i]);
+#endif
 }

+//
+// end of FP16
+//
+
 inline void v256_cleanup() { _mm256_zeroall(); }

 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
--- a/modules/core/src/convert.simd.hpp
+++ b/modules/core/src/convert.simd.hpp
@ -5,6 +5,11 @@
 #include "precomp.hpp"
 #include "convert.hpp"

+#if !defined(OPENCV_SUPRESS_WARNING_AVX2_WITHOUT_FP16C) && \
+    (defined(__GNUC__) && defined(__AVX2__) && !defined(__F16C__))
+#warning "Non-optimal compiler flags: AVX2 without FP16. Generated code is very slow. Consider adding '-mf16c' compiler option."
+#endif
+
 namespace cv {
 namespace hal {
 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@ -126,9 +126,11 @@ DEFINE_SIMD_TESTS(256, AVX512_SKX)

 TEST(hal_intrin256, float16x16_FP16)
 {
+#if CV_TRY_FP16
    //CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
    CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
 }


@ -142,8 +144,10 @@ namespace intrin512 {

 TEST(hal_intrin512, float16x32_FP16)
 {
+#if CV_TRY_FP16
    CV_CPU_CALL_AVX512_SKX_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
 }


--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -1902,21 +1902,21 @@ void test_hal_intrin_float64()
 #endif
 }

-#if CV_FP16
 void test_hal_intrin_float16()
 {
    DUMP_ENTRY(v_float16);
 #if CV_FP16
    TheTest<v_float32>()
        .test_loadstore_fp16_f32()
-#endif
 #if CV_SIMD_FP16
        .test_loadstore_fp16()
        .test_float_cvt_fp16()
 #endif
        ;
-}
+#else
+    std::cout << "SKIP: CV_FP16 is not available" << std::endl;
 #endif
+}

 /*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
 void test_hal_intrin_float16()
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -365,9 +365,12 @@ CV__DNN_INLINE_NS_BEGIN
                                     const int requiredOutputs,
                                     std::vector<MatShape> &outputs,
                                     std::vector<MatShape> &internals) const;
+
        virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                               const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;}

+        virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs);
+
        CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
        CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
        CV_PROP int preferableTarget; //!< prefer target for layer forwarding
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -1182,6 +1182,7 @@ struct Net::Impl : public detail::NetImplBase
        preferableBackend = DNN_BACKEND_DEFAULT;
        preferableTarget = DNN_TARGET_CPU;
        skipInfEngineInit = false;
+        hasDynamicShapes = false;
    }

    Ptr<DataLayer> netInputLayer;
@ -1193,6 +1194,7 @@ struct Net::Impl : public detail::NetImplBase
    int preferableTarget;
    String halideConfigFile;
    bool skipInfEngineInit;
+    bool hasDynamicShapes;
    // Map host data to backend specific wrapper.
    std::map<void*, Ptr<BackendWrapper> > backendWrappers;

@ -3539,6 +3541,46 @@ struct Net::Impl : public detail::NetImplBase
        shapes = inOutShapes[layerId];
    }

+    void updateLayersShapes()
+    {
+        CV_Assert(!layers[0].outputBlobs.empty());
+        ShapesVec inputShapes;
+        for(int i = 0; i < layers[0].outputBlobs.size(); i++)
+        {
+            Mat& inp = layers[0].outputBlobs[i];
+            CV_Assert(inp.total());
+            if (preferableBackend == DNN_BACKEND_OPENCV &&
+                preferableTarget == DNN_TARGET_OPENCL_FP16)
+            {
+                layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
+            }
+            inputShapes.push_back(shape(inp));
+        }
+        LayersShapesMap layersShapes;
+        layersShapes[0].in = inputShapes;
+        for (MapIdToLayerData::iterator it = layers.begin();
+             it != layers.end(); it++)
+        {
+            int layerId = it->first;
+            std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
+            if (layersShapes[layerId].in.empty())
+            {
+                for(int i = 0; i < inputLayerIds.size(); i++)
+                {
+                    int inputLayerId = inputLayerIds[i].lid;
+                    LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
+                    if(inputIt == layersShapes.end() || inputIt->second.out.empty())
+                    {
+                        getLayerShapesRecursively(inputLayerId, layersShapes);
+                    }
+                    const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
+                    layersShapes[layerId].in.push_back(shape);
+                }
+                it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in);
+            }
+        }
+    }
+
    LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
    {
        return *std::max_element(pins.begin(), pins.end());
@ -3952,6 +3994,8 @@ int Net::addLayer(const String &name, const String &type, LayerParams &params)
    int id = ++impl->lastLayerId;
    impl->layerNameToId.insert(std::make_pair(name, id));
    impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
+    if (params.get<bool>("has_dynamic_shapes", false))
+        impl->hasDynamicShapes = true;

    return id;
 }
@ -4283,8 +4327,13 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
    bool oldShape = prevShape == blobShape;

    blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
-    if (!oldShape)
+    if (!oldShape) {
        ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
+        if (impl->hasDynamicShapes)
+        {
+            impl->updateLayersShapes();
+        }
+    }

    if (!ld.outputBlobsWrappers[pin.oid].empty())
    {
@ -5234,6 +5283,10 @@ bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
    return false;
 }

+bool Layer::updateMemoryShapes(const std::vector<MatShape> &inputs)
+{
+    return true;
+}
 //////////////////////////////////////////////////////////////////////////

 static Mutex& getLayerFactoryMutex()
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -98,6 +98,9 @@ public:
        stride = Size(1, 1);
        pad_t = pad_l = pad_b = pad_r = 0;

+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
+
        if (params.has("pool") || params.has("kernel_size") ||
            params.has("kernel_w") || params.has("kernel_h"))
        {
@ -1191,25 +1194,33 @@ public:
            outShape.push_back(pooledSize.height);
            outShape.push_back(pooledSize.width);
        }
-        else if (padMode.empty())
-        {
-            for (int i = 0; i < local_kernel.size(); i++) {
-                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
-                outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
-            }
-
-            // If we have padding, ensure that the last pooling starts strictly
-            // inside the image (instead of at the padding); otherwise clip the last.
-            for (int i = 0; i < pads_end.size(); i++) {
-                if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
-                    --outShape[2 + i];
-                    CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
-                }
-            }
-        }
        else
        {
-            getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector<size_t>(local_kernel.size(), 1), outShape);
+            if (hasDynamicShapes && !shapesInitialized)
+            {
+                //Just copy input shapes for width and height to prevent errors on loading stage
+                for (int i = 0; i < inpShape.size(); i++)
+                    outShape.push_back(inpShape[i]);
+            }
+            else if (padMode.empty())
+            {
+                for (int i = 0; i < local_kernel.size(); i++) {
+                    float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
+                    outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
+                }
+
+                // If we have padding, ensure that the last pooling starts strictly
+                // inside the image (instead of at the padding); otherwise clip the last.
+                for (int i = 0; i < pads_end.size(); i++) {
+                    if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
+                        --outShape[2 + i];
+                        CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
+                    }
+                }
+            } else {
+                getConvPoolOutParams(inpShape, local_kernel, strides, padMode,
+                                     std::vector<size_t>(local_kernel.size(), 1), outShape);
+            }
        }
        if (type == ROI)
        {
@ -1231,6 +1242,14 @@ public:
        return false;
    }

+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        int dims = inputs[0].size();
+        CV_Assert(inputs[0][dims - 1] > 0 && inputs[0][dims - 2] > 0);
+        shapesInitialized = true;
+        return true;
+    }
+
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                           const std::vector<MatShape> &outputs) const CV_OVERRIDE
    {
@ -1262,6 +1281,8 @@ private:
        ROI,   // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
        PSROI  // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
    };
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };

 Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@ -170,6 +170,9 @@ public:
        setParamsFrom(params);
        int axis = params.get<int>("axis", 0);
        int numAxes = params.get<int>("num_axes", -1);
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
+
        CV_Assert(numAxes >= -1);
        newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);

@ -182,6 +185,25 @@ public:
            for (i = 0; i < dims; i++)
                newShapeDesc[i] = paramShape.get<int>(i);
        }
+        if (hasDynamicShapes)
+        {
+            dynamicShapes.clear();
+            inputIndices.clear();
+            if (params.has("dynamic_axes")) {
+                CV_Assert(params.has("input_indices"));
+                const DictValue &dynamicAxes = params.get("dynamic_axes");
+                const DictValue &dynamicInputShapes = params.get("input_indices");
+                int i, dims = dynamicAxes.size();
+                CV_Assert(dims == dynamicInputShapes.size());
+                CV_Assert(dims > 0);
+                dynamicShapes.resize(dims);
+                inputIndices.resize(dims);
+                for (i = 0; i < dims; i++) {
+                    dynamicShapes[i] = dynamicAxes.get<int>(i);
+                    inputIndices[i] = dynamicInputShapes.get<int>(i);
+                }
+            }
+        }
    }

    virtual bool supportBackend(int backendId) CV_OVERRIDE
@ -196,13 +218,21 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
+
        if (inputs.size() == 1 || inputs.size() == requiredOutputs)
        {
            outputs.clear();
            for (size_t i = 0; i < inputs.size(); i++)
            {
-                outputs.push_back(MatShape());
-                computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
+                if (hasDynamicShapes && !shapesInitialized)
+                {
+                    outputs.push_back(newShapeDesc);
+                }
+                else
+                {
+                    outputs.push_back(MatShape());
+                    computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
+                }
            }
        }
        else
@ -213,6 +243,19 @@ public:
        return true;
    }

+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        if (hasDynamicShapes)
+        {
+            for (int i = 0; i < dynamicShapes.size(); ++i)
+            {
+                newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]];
+            }
+        }
+        shapesInitialized = true;
+        return true;
+    }
+
    void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
    {
        std::vector<Mat> outputs;
@ -310,6 +353,10 @@ public:

 private:
    std::vector<MatShape> outShapes;
+    std::vector<int> dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input
+    std::vector<int> inputIndices; // Which axes from input are needed to compute correct output shape
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };

 Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@ -72,6 +72,8 @@ public:
        setParamsFrom(params);
        axis = params.get<int>("axis", 1);
        num_split = params.get<int>("num_split", 0);
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
        if (params.has("slice_point"))
        {
            CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
@ -150,7 +152,8 @@ public:
                CV_Assert(sliceRanges[i].size() <= inpShape.size());
                for (int j = 0; j < sliceRanges[i].size(); ++j)
                {
-                    outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
+                    if (shapesInitialized || inpShape[j] > 0)
+                        outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
                }
            }
        }
@ -165,6 +168,12 @@ public:
        return false;
    }

+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        shapesInitialized = true;
+        return true;
+    }
+
    void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
    {
 #ifdef HAVE_OPENCL
@ -597,6 +606,8 @@ public:
 protected:
    // The actual non-negative values determined from @p sliceRanges depends on input size.
    std::vector<std::vector<Range> > finalSliceRanges;
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };

 class CropLayerImpl CV_FINAL : public SliceLayerImpl
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@ -260,6 +260,40 @@ public:
        addNodeToMatch("Cast", gather);
        setFusedNode("Gather", input, index);
    }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        bool retVal = Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds);
+        size_t matchedNodesNum = matchedNodesIds.size();
+        // Now we check if merging can be made for these Gather and Cast nodes
+        if (!retVal || matchedNodesNum < 2)
+            return retVal;
+        else {
+            int nodeToMatch = matchedNodesIds[matchedNodesNum - 1];
+            const Ptr<ImportNodeWrapper> node = net->getNode(nodeToMatch);
+            if (node->getType() == "Cast") {
+                int inpNodeId = matchedNodesIds[matchedNodesNum - 2];
+                const Ptr<ImportNodeWrapper> inpNode = net->getNode(inpNodeId);
+                if (inpNode->getType() == "Gather") {
+                    int numNodes = net->getNumNodes();
+                    std::string inpNodeName = node->getInputName(0);
+                    for (int i = 0; i < numNodes; ++i) {
+                        const Ptr<ImportNodeWrapper> node_to_check = net->getNode(i);
+                        int numInp = node_to_check->getNumInputs();
+                        for (int inp = 0; inp < numInp; ++inp) {
+                            if (i != nodeToMatch && inpNodeName == node_to_check->getInputName(0)) {
+                                // Another node has the same input node, so it cannot be merged.
+                                return false;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return retVal;
+    }
 };

 class ExpandSubgraph : public Subgraph
@ -513,6 +547,19 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
        CV_Assert(!field.empty());
        Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1);
    }
+    else if (datatype == opencv_onnx::TensorProto_DataType_INT32)
+    {
+        if (!tensor_proto.int32_data().empty())
+        {
+            const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
+            Mat(sizes, CV_32SC1, (void*)field.data()).copyTo(blob);
+        }
+        else
+        {
+            char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
+            Mat(sizes, CV_32SC1, val).copyTo(blob);
+        }
+    }
    else if (datatype == opencv_onnx::TensorProto_DataType_INT64)
    {
        blob.create(sizes, CV_32SC1);
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -64,6 +64,7 @@ public:
    ONNXImporter(Net& net, const char *onnxFile)
        : dstNet(net)
    {
+        hasDynamicShapes = false;
        CV_Assert(onnxFile);
        CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);

@ -84,6 +85,7 @@ public:
    ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
        : dstNet(net)
    {
+        hasDynamicShapes = false;
        CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");

        struct _Buf : public std::streambuf
@ -115,6 +117,7 @@ protected:
    std::map<std::string, Mat> constBlobs;

    std::map<std::string, MatShape> outShapes;  // List of internal blobs shapes.
+    bool hasDynamicShapes;  // Whether the model has inputs with dynamic shapes
    typedef std::map<std::string, MatShape>::iterator IterShape_t;

    std::map<std::string, LayerInfo> layer_id;
@ -413,8 +416,10 @@ void ONNXImporter::populateNet()
        for (int j = 0; j < inpShape.size(); ++j)
        {
            inpShape[j] = tensorShape.dim(j).dim_value();
+            if (!tensorShape.dim(j).dim_param().empty())
+                hasDynamicShapes = true;
        }
-        if (!inpShape.empty())
+        if (!inpShape.empty() && !hasDynamicShapes)
        {
            inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
        }
@ -461,6 +466,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)

        layerParams.name = name;
        layerParams.type = layer_type;
+        layerParams.set("has_dynamic_shapes", hasDynamicShapes);

        if (layer_type == "MaxPool")
        {
@ -551,11 +557,36 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)

                    CV_Assert(axes.size() <= inpShape.size() - 2);
                    std::vector<int> kernel_size(inpShape.size() - 2, 1);
-                    for (int i = 0; i < axes.size(); i++) {
-                        int axis = clamp(axes.get<int>(i), inpShape.size());
-                        CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
-                        kernel_size[axis - 2] = inpShape[axis];
+                    if (axes.size() == 1 && (clamp(axes.get<int>(0), inpShape.size()) <= 1))
+                    {
+                        int axis = clamp(axes.get<int>(0), inpShape.size());
+                        MatShape newShape = inpShape;
+                        newShape[axis + 1] = total(newShape, axis + 1);
+                        newShape.resize(axis + 2);
+                        newShape.insert(newShape.begin(), 2 - axis, 1);
+
+                        LayerParams reshapeLp;
+                        reshapeLp.type = "Reshape";
+                        reshapeLp.name = layerParams.name + "/reshape";
+                        CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                        reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
+
+                        node_proto.set_output(0, reshapeLp.name);
+                        addLayer(reshapeLp, node_proto);
+
+                        kernel_size.resize(2);
+                        kernel_size[0] = inpShape[axis];
+                        node_proto.set_input(0, node_proto.output(0));
                    }
+                    else
+                    {
+                        for (int i = 0; i < axes.size(); i++) {
+                            int axis = clamp(axes.get<int>(i), inpShape.size());
+                            CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
+                            kernel_size[axis - 2] = inpShape[axis];
+                        }
+                    }
+
                    LayerParams poolLp = layerParams;
                    poolLp.name = layerParams.name + "/avg";
                    CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
@ -1276,6 +1307,20 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
            {
                layerParams.type = "Reshape";
                layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
+                if (hasDynamicShapes)
+                {
+                    std::vector<int> dynamicAxes;
+                    std::vector<int> inputIndices;
+                    for (int index = 0; index < inpShape.size(); ++index)
+                    {
+                        if (!maskedAxes[index])
+                            inputIndices.push_back(index);
+                    }
+                    for (int index = 0; index < outShape.size(); ++index)
+                        dynamicAxes.push_back(index);
+                    layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                    layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+                }
            }
            else
                layerParams.type = "Identity";
@ -1338,6 +1383,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
            outShape.insert(outShape.begin() + axis, 1);
            layerParams.type = "Reshape";
            layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
+            if (hasDynamicShapes)
+            {
+                std::vector<int> dynamicAxes;
+                std::vector<int> inputIndices;
+                for (int index = 0; index < outShape.size(); ++index) {
+                    if (index != axis)
+                        dynamicAxes.push_back(index);
+                }
+                for (int index = 0; index < inpShape.size(); ++index)
+                    inputIndices.push_back(index);
+                layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+            }
        }
        else if (layer_type == "Expand")
        {
@ -1625,6 +1683,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
                sliceLp.set("begin", paramBegin);
                sliceLp.set("end", paramEnd);
+                sliceLp.set("has_dynamic_shapes", hasDynamicShapes);

                if (inpShape.size() > 1)
                {
@ -1637,6 +1696,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                    layerParams.type = "Reshape";
                    layerParams.set("axis", 0);
                    layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
+                    if (hasDynamicShapes)
+                    {
+                        std::vector<int> dynamicAxes;
+                        std::vector<int> inputIndices;
+                        for (int index = 0; index < inpShape.size(); ++index)
+                            dynamicAxes.push_back(index);
+                        for (int index = 0; index < inpShape.size(); ++index)
+                            inputIndices.push_back(index);
+                        layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                        layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+                    }
                    node_proto.set_input(0, sliceLp.name);
                }
                else
@ -1676,7 +1746,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
            for (int i = 1; i < node_proto.input_size(); i++)
                CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());

-            String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
+            String interp_mode;
+            if (layerParams.has("coordinate_transformation_mode"))
+                interp_mode = layerParams.get<String>("coordinate_transformation_mode");
+            else
+                interp_mode = layerParams.get<String>("mode");
            CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");

            layerParams.set("align_corners", interp_mode == "align_corners");
@ -1688,16 +1762,23 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                shapes.convertTo(shapes, CV_32S);
            int height = shapes.at<int>(2);
            int width  = shapes.at<int>(3);
-            if (node_proto.input_size() == 3)
+            if (hasDynamicShapes)
            {
-                IterShape_t shapeIt = outShapes.find(node_proto.input(0));
-                CV_Assert(shapeIt != outShapes.end());
-                MatShape scales = shapeIt->second;
-                height *= scales[2];
-                width  *= scales[3];
+                layerParams.set("zoom_factor_x", width);
+                layerParams.set("zoom_factor_y", height);
+            }
+            else
+            {
+                if (node_proto.input_size() == 3) {
+                    IterShape_t shapeIt = outShapes.find(node_proto.input(0));
+                    CV_Assert(shapeIt != outShapes.end());
+                    MatShape scales = shapeIt->second;
+                    height *= scales[2];
+                    width *= scales[3];
+                }
+                layerParams.set("width", width);
+                layerParams.set("height", height);
            }
-            layerParams.set("width", width);
-            layerParams.set("height", height);

            if (layerParams.get<String>("mode") == "linear") {
                layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -280,9 +280,11 @@ TEST_P(Test_ONNX_layers, ReduceSum)
    testONNXModels("reduce_sum");
 }

-TEST_P(Test_ONNX_layers, ReduceMaxGlobal)
+TEST_P(Test_ONNX_layers, ReduceMax)
 {
    testONNXModels("reduce_max");
+    testONNXModels("reduce_max_axis_0");
+    testONNXModels("reduce_max_axis_1");
 }

 TEST_P(Test_ONNX_layers, Scale)
@ -718,6 +720,10 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight)

 TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
    String basename = "conv1d_variable_wb";
    Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
    ASSERT_FALSE(net.empty());
@ -738,6 +744,37 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
    normAssert(ref, out, "", default_l1, default_lInf);
 }

+TEST_P(Test_ONNX_layers, GatherMultiOutput)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE);
+#endif
+
+    testONNXModels("gather_multi_output");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("squeeze_and_conv_dynamic_axes");
+    testONNXModels("unsqueeze_and_conv_dynamic_axes");
+    testONNXModels("gather_dynamic_axes");
+    testONNXModels("gather_scalar_dynamic_axes");
+    testONNXModels("slice_dynamic_axes");
+    testONNXModels("slice_opset_11_dynamic_axes");
+    testONNXModels("resize_opset11_torch1.6_dynamic_axes");
+    testONNXModels("average_pooling_dynamic_axes");
+    testONNXModels("maxpooling_sigmoid_dynamic_axes");
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());

 class Test_ONNX_nets : public Test_ONNX_layers