Merge pull request #20192 from alalek:update_libjpeg-turbo

2025-06-07 17:44:04 +08:00 · 2021-06-02 16:30:24 +00:00 · 2021-06-02 16:30:24 +00:00 · 81afeda537
commit 81afeda537
parent e10de25e86 dcb4cabb26
37 changed files with 878 additions and 685 deletions
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@ -3,10 +3,10 @@ project(${JPEG_LIBRARY} C)
 ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-64-to-32 -Wimplicit-fallthrough)
 set(VERSION_MAJOR 2)
-set(VERSION_MINOR 0)
+set(VERSION_MINOR 1)
-set(VERSION_REVISION 6)
+set(VERSION_REVISION 0)
 set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)
+set(LIBJPEG_TURBO_VERSION_NUMBER 2001000)
 string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@ -46,7 +46,6 @@ if(UNIX)
  ocv_update(HAVE_UNSIGNED_SHORT 1)
  # undef INCOMPLETE_TYPES_BROKEN
  ocv_update(RIGHT_SHIFT_IS_UNSIGNED 0)
  ocv_update(__CHAR_UNSIGNED__ 0)
 endif()
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@ -91,7 +91,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================
-Copyright (C)2009-2020 D. R. Commander.  All Rights Reserved.
+Copyright (C)2009-2021 D. R. Commander.  All Rights Reserved.<br>
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
 Redistribution and use in source and binary forms, with or without
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@ -128,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.
-This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.
 Permission is hereby granted to use, copy, modify, and distribute this
@ -159,19 +159,6 @@ commercial products, provided that all warranty or liability claims are
 assumed by the product vendor.
 The IJG distribution formerly included code to read and write GIF files.
 To avoid entanglement with the Unisys LZW patent (now expired), GIF reading
 support has been removed altogether, and the GIF writer has been simplified
 to produce "uncompressed GIFs".  This technique does not use the LZW
 algorithm; the resulting GIF files are larger than usual, but are readable
 by all standard GIF decoders.
 We are required to state that
    "The Graphics Interchange Format(c) is the Copyright property of
    CompuServe Incorporated.  GIF(sm) is a Service Mark property of
    CompuServe Incorporated."
 REFERENCES
 ==========
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@ -3,7 +3,7 @@ Background
 libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
 baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
-MIPS systems, as well as progressive JPEG compression on x86 and x86-64
+MIPS systems, as well as progressive JPEG compression on x86, x86-64, and Arm
 systems.  On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
 all else being equal.  On other types of systems, libjpeg-turbo can still
 outperform libjpeg by a significant amount, by virtue of its highly-optimized
--- a/3rdparty/libjpeg-turbo/jconfig.h.in
+++ b/3rdparty/libjpeg-turbo/jconfig.h.in
@ -61,11 +61,6 @@
   unsigned. */
 #cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
 /* Define to 1 if type `char' is unsigned and you are not using gcc.  */
 #ifndef __CHAR_UNSIGNED__
  #cmakedefine __CHAR_UNSIGNED__ 1
 #endif
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
--- a/3rdparty/libjpeg-turbo/jconfig.h.win.in
+++ b/3rdparty/libjpeg-turbo/jconfig.h.win.in
@ -18,7 +18,6 @@
 #define HAVE_UNSIGNED_SHORT
 #undef INCOMPLETE_TYPES_BROKEN
 #undef RIGHT_SHIFT_IS_UNSIGNED
 #undef __CHAR_UNSIGNED__
 /* Define "boolean" as unsigned char, not int, per Windows custom */
 #ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
--- a/3rdparty/libjpeg-turbo/src/jccolext.c
+++ b/3rdparty/libjpeg-turbo/src/jccolext.c
@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    outptr2 = output_buf[2][output_row];
    output_row++;
    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
+      r = inptr[RGB_RED];
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      g = inptr[RGB_GREEN];
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      b = inptr[RGB_BLUE];
      inptr += RGB_PIXELSIZE;
      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
       * must be too; we do not need an explicit range-limiting operation.
@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    outptr = output_buf[0][output_row];
    output_row++;
    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
+      r = inptr[RGB_RED];
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      g = inptr[RGB_GREEN];
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      b = inptr[RGB_BLUE];
      inptr += RGB_PIXELSIZE;
      /* Y */
      outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
@ -135,9 +135,9 @@ rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    outptr2 = output_buf[2][output_row];
    output_row++;
    for (col = 0; col < num_cols; col++) {
-      outptr0[col] = GETJSAMPLE(inptr[RGB_RED]);
+      outptr0[col] = inptr[RGB_RED];
-      outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]);
+      outptr1[col] = inptr[RGB_GREEN];
-      outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]);
+      outptr2[col] = inptr[RGB_BLUE];
      inptr += RGB_PIXELSIZE;
    }
  }
--- a/3rdparty/libjpeg-turbo/src/jccolor.c
+++ b/3rdparty/libjpeg-turbo/src/jccolor.c
@ -392,11 +392,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    outptr3 = output_buf[3][output_row];
    output_row++;
    for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
+      r = MAXJSAMPLE - inptr[0];
-      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
+      g = MAXJSAMPLE - inptr[1];
-      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      b = MAXJSAMPLE - inptr[2];
      /* K passes through as-is */
-      outptr3[col] = inptr[3];  /* don't need GETJSAMPLE here */
+      outptr3[col] = inptr[3];
      inptr += 4;
      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
       * must be too; we do not need an explicit range-limiting operation.
@ -438,7 +438,7 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    outptr = output_buf[0][output_row];
    output_row++;
    for (col = 0; col < num_cols; col++) {
-      outptr[col] = inptr[0];   /* don't need GETJSAMPLE() here */
+      outptr[col] = inptr[0];
      inptr += instride;
    }
  }
@ -497,7 +497,7 @@ null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
        inptr = *input_buf;
        outptr = output_buf[ci][output_row];
        for (col = 0; col < num_cols; col++) {
-          outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+          outptr[col] = inptr[ci];
          inptr += nc;
        }
      }
--- a/3rdparty/libjpeg-turbo/src/jcdctmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jcdctmgr.c
@ -381,19 +381,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
    elemptr = sample_data[elemr] + start_col;
 #if DCTSIZE == 8                /* unroll the inner loop */
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
 #else
    {
      register int elemc;
      for (elemc = DCTSIZE; elemc > 0; elemc--)
-        *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+        *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
    }
 #endif
  }
@ -533,20 +533,19 @@ convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
    elemptr = sample_data[elemr] + start_col;
 #if DCTSIZE == 8                /* unroll the inner loop */
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
 #else
    {
      register int elemc;
      for (elemc = DCTSIZE; elemc > 0; elemc--)
-        *workspaceptr++ = (FAST_FLOAT)
+        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
                          (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
    }
 #endif
  }
--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@ -4,8 +4,10 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander.
 * Copyright (C) 2015, Matthieu Darbois.
 * Copyright (C) 2018, Matthias Räncker.
 * Copyright (C) 2020, Arm Limited.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -42,15 +44,19 @@
 * flags (this defines __thumb__).
 */
-/* NOTE: Both GCC and Clang define __GNUC__ */
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+    defined(_M_ARM64)
 #if !defined(__thumb__) || defined(__thumb2__)
 #define USE_CLZ_INTRINSIC
 #endif
 #endif
 #ifdef USE_CLZ_INTRINSIC
 #if defined(_MSC_VER) && !defined(__clang__)
 #define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
 #else
 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
 #endif
 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
 #else
 #include "jpeg_nbits_table.h"
@ -65,32 +71,43 @@
 * but must not be updated permanently until we complete the MCU.
 */
 #if defined(__x86_64__) && defined(__ILP32__)
 typedef unsigned long long bit_buf_type;
 #else
 typedef size_t bit_buf_type;
 #endif
 /* NOTE: The more optimal Huffman encoding algorithm is only used by the
 * intrinsics implementation of the Arm Neon SIMD extensions, which is why we
 * retain the old Huffman encoder behavior when using the GAS implementation.
 */
 #if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
                            defined(_M_ARM) || defined(_M_ARM64))
 typedef unsigned long long simd_bit_buf_type;
 #else
 typedef bit_buf_type simd_bit_buf_type;
 #endif
 #if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \
    (defined(__x86_64__) && defined(__ILP32__))
 #define BIT_BUF_SIZE  64
 #elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)
 #define BIT_BUF_SIZE  32
 #else
 #error Cannot determine word size
 #endif
 #define SIMD_BIT_BUF_SIZE  (sizeof(simd_bit_buf_type) * 8)
 typedef struct {
-  size_t put_buffer;                    /* current bit-accumulation buffer */
+  union {
-  int put_bits;                         /* # of bits now in it */
+    bit_buf_type c;
    simd_bit_buf_type simd;
  } put_buffer;                         /* current bit accumulation buffer */
  int free_bits;                        /* # of bits available in it */
                                        /* (Neon GAS: # of bits now in it) */
  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
 } savable_state;
 /* This macro is to work around compilers with missing or broken
 * structure assignment.  You'll need to fix this code if you have
 * such a compiler and you change MAX_COMPS_IN_SCAN.
 */
 #ifndef NO_STRUCT_ASSIGN
 #define ASSIGN_STATE(dest, src)  ((dest) = (src))
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest, src) \
  ((dest).put_buffer = (src).put_buffer, \
   (dest).put_bits = (src).put_bits, \
   (dest).last_dc_val[0] = (src).last_dc_val[0], \
   (dest).last_dc_val[1] = (src).last_dc_val[1], \
   (dest).last_dc_val[2] = (src).last_dc_val[2], \
   (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 typedef struct {
  struct jpeg_entropy_encoder pub; /* public fields */
@ -123,6 +140,7 @@ typedef struct {
  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
  savable_state cur;            /* Current bit buffer & DC state */
  j_compress_ptr cinfo;         /* dump_buffer needs access to this */
  int simd;
 } working_state;
@ -201,8 +219,17 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
  }
  /* Initialize bit buffer to empty */
-  entropy->saved.put_buffer = 0;
+  if (entropy->simd) {
-  entropy->saved.put_bits = 0;
+    entropy->saved.put_buffer.simd = 0;
 #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
    entropy->saved.free_bits = 0;
 #else
    entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
 #endif
  } else {
    entropy->saved.put_buffer.c = 0;
    entropy->saved.free_bits = BIT_BUF_SIZE;
  }
  /* Initialize restart stuff */
  entropy->restarts_to_go = cinfo->restart_interval;
@ -287,6 +314,7 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
   * this lets us detect duplicate VAL entries here, and later
   * allows emit_bits to detect any attempt to emit such symbols.
   */
  MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco));
  MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
  /* This is also a convenient place to check for out-of-range
@ -334,94 +362,94 @@ dump_buffer(working_state *state)
 /* Outputting bits to the file */
-/* These macros perform the same task as the emit_bits() function in the
+/* Output byte b and, speculatively, an additional 0 byte.  0xFF must be
- * original libjpeg code.  In addition to reducing overhead by explicitly
+ * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the
- * inlining the code, additional performance is achieved by taking into
+ * byte is 0xFF.  Otherwise, the output buffer pointer is advanced by 1, and
- * account the size of the bit buffer and waiting until it is almost full
+ * the speculative 0 byte will be overwritten by the next byte.
 * before emptying it.  This mostly benefits 64-bit platforms, since 6
 * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
 */
-
+#define EMIT_BYTE(b) { \
-#define EMIT_BYTE() { \
+  buffer[0] = (JOCTET)(b); \
-  JOCTET c; \
+  buffer[1] = 0; \
-  put_bits -= 8; \
+  buffer -= -2 + ((JOCTET)(b) < 0xFF); \
  c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
  *buffer++ = c; \
  if (c == 0xFF)  /* need to stuff a zero byte? */ \
    *buffer++ = 0; \
 }
-#define PUT_BITS(code, size) { \
+/* Output the entire bit buffer.  If there are no 0xFF bytes in it, then write
-  put_bits += size; \
+ * directly to the output buffer.  Otherwise, use the EMIT_BYTE() macro to
-  put_buffer = (put_buffer << size) | code; \
+ * encode 0xFF as 0xFF 0x00.
-}
+ */
 #if BIT_BUF_SIZE == 64
-#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
+#define FLUSH() { \
-
+  if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
-#define CHECKBUF15() { \
+    EMIT_BYTE(put_buffer >> 56) \
-  if (put_bits > 15) { \
+    EMIT_BYTE(put_buffer >> 48) \
-    EMIT_BYTE() \
+    EMIT_BYTE(put_buffer >> 40) \
-    EMIT_BYTE() \
+    EMIT_BYTE(put_buffer >> 32) \
    EMIT_BYTE(put_buffer >> 24) \
    EMIT_BYTE(put_buffer >> 16) \
    EMIT_BYTE(put_buffer >>  8) \
    EMIT_BYTE(put_buffer      ) \
  } else { \
    buffer[0] = (JOCTET)(put_buffer >> 56); \
    buffer[1] = (JOCTET)(put_buffer >> 48); \
    buffer[2] = (JOCTET)(put_buffer >> 40); \
    buffer[3] = (JOCTET)(put_buffer >> 32); \
    buffer[4] = (JOCTET)(put_buffer >> 24); \
    buffer[5] = (JOCTET)(put_buffer >> 16); \
    buffer[6] = (JOCTET)(put_buffer >> 8); \
    buffer[7] = (JOCTET)(put_buffer); \
    buffer += 8; \
  } \
 }
 #endif
 #define CHECKBUF31() { \
  if (put_bits > 31) { \
    EMIT_BYTE() \
    EMIT_BYTE() \
    EMIT_BYTE() \
    EMIT_BYTE() \
  } \
 }
 #define CHECKBUF47() { \
  if (put_bits > 47) { \
    EMIT_BYTE() \
    EMIT_BYTE() \
    EMIT_BYTE() \
    EMIT_BYTE() \
    EMIT_BYTE() \
    EMIT_BYTE() \
  } \
 }
 #if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)
 #error Cannot determine word size
 #endif
 #if SIZEOF_SIZE_T == 8 || defined(_WIN64)
 #define EMIT_BITS(code, size) { \
  CHECKBUF47() \
  PUT_BITS(code, size) \
 }
 #define EMIT_CODE(code, size) { \
  temp2 &= (((JLONG)1) << nbits) - 1; \
  CHECKBUF31() \
  PUT_BITS(code, size) \
  PUT_BITS(temp2, nbits) \
 }
 #else
-#define EMIT_BITS(code, size) { \
+#define FLUSH() { \
-  PUT_BITS(code, size) \
+  if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
-  CHECKBUF15() \
+    EMIT_BYTE(put_buffer >> 24) \
-}
+    EMIT_BYTE(put_buffer >> 16) \
-
+    EMIT_BYTE(put_buffer >>  8) \
-#define EMIT_CODE(code, size) { \
+    EMIT_BYTE(put_buffer      ) \
-  temp2 &= (((JLONG)1) << nbits) - 1; \
+  } else { \
-  PUT_BITS(code, size) \
+    buffer[0] = (JOCTET)(put_buffer >> 24); \
-  CHECKBUF15() \
+    buffer[1] = (JOCTET)(put_buffer >> 16); \
-  PUT_BITS(temp2, nbits) \
+    buffer[2] = (JOCTET)(put_buffer >> 8); \
-  CHECKBUF15() \
+    buffer[3] = (JOCTET)(put_buffer); \
    buffer += 4; \
  } \
 }
 #endif
 /* Fill the bit buffer to capacity with the leading bits from code, then output
 * the bit buffer and put the remaining bits from code into the bit buffer.
 */
 #define PUT_AND_FLUSH(code, size) { \
  put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \
  FLUSH() \
  free_bits += BIT_BUF_SIZE; \
  put_buffer = code; \
 }
 /* Insert code into the bit buffer and output the bit buffer if needed.
 * NOTE: We can't flush with free_bits == 0, since the left shift in
 * PUT_AND_FLUSH() would have undefined behavior.
 */
 #define PUT_BITS(code, size) { \
  free_bits -= size; \
  if (free_bits < 0) \
    PUT_AND_FLUSH(code, size) \
  else \
    put_buffer = (put_buffer << size) | code; \
 }
 #define PUT_CODE(code, size) { \
  temp &= (((JLONG)1) << nbits) - 1; \
  temp |= code << nbits; \
  nbits += size; \
  PUT_BITS(temp, nbits) \
 }
 /* Although it is exceedingly rare, it is possible for a Huffman-encoded
 * coefficient block to be larger than the 128-byte unencoded block.  For each
@ -444,6 +472,7 @@ dump_buffer(working_state *state)
 #define STORE_BUFFER() { \
  if (localbuf) { \
    size_t bytes, bytestocopy; \
    bytes = buffer - _buffer; \
    buffer = _buffer; \
    while (bytes > 0) { \
@ -466,20 +495,46 @@ dump_buffer(working_state *state)
 LOCAL(boolean)
 flush_bits(working_state *state)
 {
-  JOCTET _buffer[BUFSIZE], *buffer;
+  JOCTET _buffer[BUFSIZE], *buffer, temp;
-  size_t put_buffer;  int put_bits;
+  simd_bit_buf_type put_buffer;  int put_bits;
-  size_t bytes, bytestocopy;  int localbuf = 0;
+  int localbuf = 0;
  if (state->simd) {
 #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
    put_bits = state->cur.free_bits;
 #else
    put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
 #endif
    put_buffer = state->cur.put_buffer.simd;
  } else {
    put_bits = BIT_BUF_SIZE - state->cur.free_bits;
    put_buffer = state->cur.put_buffer.c;
  }
  put_buffer = state->cur.put_buffer;
  put_bits = state->cur.put_bits;
  LOAD_BUFFER()
-  /* fill any partial byte with ones */
+  while (put_bits >= 8) {
-  PUT_BITS(0x7F, 7)
+    put_bits -= 8;
-  while (put_bits >= 8) EMIT_BYTE()
+    temp = (JOCTET)(put_buffer >> put_bits);
    EMIT_BYTE(temp)
  }
  if (put_bits) {
    /* fill partial byte with ones */
    temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));
    EMIT_BYTE(temp)
  }
-  state->cur.put_buffer = 0;    /* and reset bit-buffer to empty */
+  if (state->simd) {                    /* and reset bit buffer to empty */
-  state->cur.put_bits = 0;
+    state->cur.put_buffer.simd = 0;
 #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
    state->cur.free_bits = 0;
 #else
    state->cur.free_bits = SIMD_BIT_BUF_SIZE;
 #endif
  } else {
    state->cur.put_buffer.c = 0;
    state->cur.free_bits = BIT_BUF_SIZE;
  }
  STORE_BUFFER()
  return TRUE;
@ -493,7 +548,7 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
                      c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
  JOCTET _buffer[BUFSIZE], *buffer;
-  size_t bytes, bytestocopy;  int localbuf = 0;
+  int localbuf = 0;
  LOAD_BUFFER()
@ -509,53 +564,41 @@ LOCAL(boolean)
 encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
                 c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
-  int temp, temp2, temp3;
+  int temp, nbits, free_bits;
-  int nbits;
+  bit_buf_type put_buffer;
  int r, code, size;
  JOCTET _buffer[BUFSIZE], *buffer;
-  size_t put_buffer;  int put_bits;
+  int localbuf = 0;
  int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
  size_t bytes, bytestocopy;  int localbuf = 0;
-  put_buffer = state->cur.put_buffer;
+  free_bits = state->cur.free_bits;
-  put_bits = state->cur.put_bits;
+  put_buffer = state->cur.put_buffer.c;
  LOAD_BUFFER()
  /* Encode the DC coefficient difference per section F.1.2.1 */
-  temp = temp2 = block[0] - last_dc_val;
+  temp = block[0] - last_dc_val;
  /* This is a well-known technique for obtaining the absolute value without a
   * branch.  It is derived from an assembly language technique presented in
   * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
-   * Agner Fog.
+   * Agner Fog.  This code assumes we are on a two's complement machine.
   */
-  temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
+  nbits = temp >> (CHAR_BIT * sizeof(int) - 1);
-  temp ^= temp3;
+  temp += nbits;
-  temp -= temp3;
+  nbits ^= temp;
  /* For a negative input, want temp2 = bitwise complement of abs(input) */
  /* This code assumes we are on a two's complement machine */
  temp2 += temp3;
  /* Find the number of bits needed for the magnitude of the coefficient */
-  nbits = JPEG_NBITS(temp);
+  nbits = JPEG_NBITS(nbits);
-  /* Emit the Huffman-coded symbol for the number of bits */
+  /* Emit the Huffman-coded symbol for the number of bits.
-  code = dctbl->ehufco[nbits];
+   * Emit that number of bits of the value, if positive,
-  size = dctbl->ehufsi[nbits];
+   * or the complement of its magnitude, if negative.
-  EMIT_BITS(code, size)
+   */
-
+  PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])
  /* Mask off any extra bits in code */
  temp2 &= (((JLONG)1) << nbits) - 1;
  /* Emit that number of bits of the value, if positive, */
  /* or the complement of its magnitude, if negative. */
  EMIT_BITS(temp2, nbits)
  /* Encode the AC coefficients per section F.1.2.2 */
-  r = 0;                        /* r = run length of zeros */
+  {
    int r = 0;                  /* r = run length of zeros */
 /* Manually unroll the k loop to eliminate the counter variable.  This
 * improves performance greatly on systems with a limited number of
@ -563,51 +606,46 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
 */
 #define kloop(jpeg_natural_order_of_k) { \
  if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
-    r++; \
+    r += 16; \
  } else { \
    temp2 = temp; \
    /* Branch-less absolute value, bitwise complement, etc., same as above */ \
-    temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \
-    temp ^= temp3; \
+    temp += nbits; \
-    temp -= temp3; \
+    nbits ^= temp; \
-    temp2 += temp3; \
+    nbits = JPEG_NBITS_NONZERO(nbits); \
    nbits = JPEG_NBITS_NONZERO(temp); \
    /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
-    while (r > 15) { \
+    while (r >= 16 * 16) { \
-      EMIT_BITS(code_0xf0, size_0xf0) \
+      r -= 16 * 16; \
-      r -= 16; \
+      PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \
    } \
    /* Emit Huffman symbol for run length / number of bits */ \
-    temp3 = (r << 4) + nbits; \
+    r += nbits; \
-    code = actbl->ehufco[temp3]; \
+    PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \
    size = actbl->ehufsi[temp3]; \
    EMIT_CODE(code, size) \
    r = 0; \
  } \
 }
-  /* One iteration for each value in jpeg_natural_order[] */
+    /* One iteration for each value in jpeg_natural_order[] */
-  kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
+    kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
-  kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
+    kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
-  kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
+    kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
-  kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
+    kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
-  kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
+    kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
-  kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
+    kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
-  kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
+    kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
-  kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
+    kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
-  kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
+    kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
-  kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
+    kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
-  kloop(55);  kloop(62);  kloop(63);
+    kloop(55);  kloop(62);  kloop(63);
-  /* If the last coef(s) were zero, emit an end-of-block code */
+    /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0) {
+    if (r > 0) {
-    code = actbl->ehufco[0];
+      PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
-    size = actbl->ehufsi[0];
+    }
    EMIT_BITS(code, size)
  }
-  state->cur.put_buffer = put_buffer;
+  state->cur.put_buffer.c = put_buffer;
-  state->cur.put_bits = put_bits;
+  state->cur.free_bits = free_bits;
  STORE_BUFFER()
  return TRUE;
@ -654,8 +692,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
  /* Load up working state */
  state.next_output_byte = cinfo->dest->next_output_byte;
  state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cur = entropy->saved;
  state.cinfo = cinfo;
  state.simd = entropy->simd;
  /* Emit restart marker if needed */
  if (cinfo->restart_interval) {
@ -694,7 +733,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
  /* Completed MCU, so update state */
  cinfo->dest->next_output_byte = state.next_output_byte;
  cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
+  entropy->saved = state.cur;
  /* Update restart-interval state too */
  if (cinfo->restart_interval) {
@ -723,8 +762,9 @@ finish_pass_huff(j_compress_ptr cinfo)
  /* Load up working state ... flush_bits needs it */
  state.next_output_byte = cinfo->dest->next_output_byte;
  state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cur = entropy->saved;
  state.cinfo = cinfo;
  state.simd = entropy->simd;
  /* Flush out the last data */
  if (!flush_bits(&state))
@ -733,7 +773,7 @@ finish_pass_huff(j_compress_ptr cinfo)
  /* Update state */
  cinfo->dest->next_output_byte = state.next_output_byte;
  cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
+  entropy->saved = state.cur;
 }
--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@ -4,8 +4,9 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2018, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
 * Copyright (C) 2016, 2018, Matthieu Darbois.
 * Copyright (C) 2020, Arm Limited.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -51,15 +52,19 @@
 * flags (this defines __thumb__).
 */
-/* NOTE: Both GCC and Clang define __GNUC__ */
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+    defined(_M_ARM64)
 #if !defined(__thumb__) || defined(__thumb2__)
 #define USE_CLZ_INTRINSIC
 #endif
 #endif
 #ifdef USE_CLZ_INTRINSIC
 #if defined(_MSC_VER) && !defined(__clang__)
 #define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
 #else
 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
 #endif
 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
 #else
 #include "jpeg_nbits_table.h"
@ -169,24 +174,26 @@ INLINE
 METHODDEF(int)
 count_zeroes(size_t *x)
 {
  int result;
 #if defined(HAVE_BUILTIN_CTZL)
  int result;
  result = __builtin_ctzl(*x);
  *x >>= result;
 #elif defined(HAVE_BITSCANFORWARD64)
  unsigned long result;
  _BitScanForward64(&result, *x);
  *x >>= result;
 #elif defined(HAVE_BITSCANFORWARD)
  unsigned long result;
  _BitScanForward(&result, *x);
  *x >>= result;
 #else
-  result = 0;
+  int result = 0;
  while ((*x & 1) == 0) {
    ++result;
    *x >>= 1;
  }
 #endif
-  return result;
+  return (int)result;
 }
@ -860,7 +867,7 @@ encode_mcu_AC_refine_prepare(const JCOEF *block,
 #define ENCODE_COEFS_AC_REFINE(label) { \
  while (zerobits) { \
-    int idx = count_zeroes(&zerobits); \
+    idx = count_zeroes(&zerobits); \
    r += idx; \
    cabsvalue += idx; \
    signbits >>= idx; \
@ -917,7 +924,7 @@ METHODDEF(boolean)
 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
-  register int temp, r;
+  register int temp, r, idx;
  char *BR_buffer;
  unsigned int BR;
  int Sl = cinfo->Se - cinfo->Ss + 1;
@ -968,7 +975,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
  if (zerobits) {
    int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
-    int idx = count_zeroes(&zerobits);
+    idx = count_zeroes(&zerobits);
    signbits >>= idx;
    idx += diff;
    r += idx;
--- a/3rdparty/libjpeg-turbo/src/jcsample.c
+++ b/3rdparty/libjpeg-turbo/src/jcsample.c
@ -6,7 +6,7 @@
 * libjpeg-turbo Modifications:
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2014, MIPS Technologies, Inc., California.
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2019, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -103,7 +103,7 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
  if (numcols > 0) {
    for (row = 0; row < num_rows; row++) {
      ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];         /* don't need GETJSAMPLE() here */
+      pixval = ptr[-1];
      for (count = numcols; count > 0; count--)
        *ptr++ = pixval;
    }
@ -174,7 +174,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
      for (v = 0; v < v_expand; v++) {
        inptr = input_data[inrow + v] + outcol_h;
        for (h = 0; h < h_expand; h++) {
-          outvalue += (JLONG)GETJSAMPLE(*inptr++);
+          outvalue += (JLONG)(*inptr++);
        }
      }
      *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix);
@ -237,8 +237,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    inptr = input_data[outrow];
    bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ =
+      *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
        (JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1);
      bias ^= 1;                /* 0=>1, 1=>0 */
      inptr += 2;
    }
@ -277,8 +276,7 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
    for (outcol = 0; outcol < output_cols; outcol++) {
      *outptr++ =
-        (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+        (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
                   GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2);
      bias ^= 3;                /* 1=>2, 2=>1 */
      inptr0 += 2;  inptr1 += 2;
    }
@ -337,33 +335,25 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    below_ptr = input_data[inrow + 2];
    /* Special case for first column: pretend column -1 is same as column 0 */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+    membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
-                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+               inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2];
               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
               GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
               GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
    neighsum += neighsum;
-    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
+    neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
                GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
    membersum = membersum * memberscale + neighsum * neighscale;
    *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
    inptr0 += 2;  inptr1 += 2;  above_ptr += 2;  below_ptr += 2;
    for (colctr = output_cols - 2; colctr > 0; colctr--) {
      /* sum of pixels directly mapped to this output element */
-      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+      membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
                  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
      /* sum of edge-neighbor pixels */
-      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+      neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
-                 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+                 inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2];
                 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
                 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
      /* The edge-neighbors count twice as much as corner-neighbors */
      neighsum += neighsum;
      /* Add in the corner-neighbors */
-      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
+      neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2];
                  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
      /* form final output scaled up by 2^16 */
      membersum = membersum * memberscale + neighsum * neighscale;
      /* round, descale and output it */
@ -372,15 +362,11 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    }
    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+    membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
-                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+               inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1];
               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
               GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
               GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
    neighsum += neighsum;
-    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
+    neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
                GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
    membersum = membersum * memberscale + neighsum * neighscale;
    *outptr = (JSAMPLE)((membersum + 32768) >> 16);
@ -429,21 +415,18 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    below_ptr = input_data[outrow + 1];
    /* Special case for first column */
-    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
+    colsum = (*above_ptr++) + (*below_ptr++) + inptr[0];
-             GETJSAMPLE(*inptr);
+    membersum = *inptr++;
-    membersum = GETJSAMPLE(*inptr++);
+    nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
                 GETJSAMPLE(*inptr);
    neighsum = colsum + (colsum - membersum) + nextcolsum;
    membersum = membersum * memberscale + neighsum * neighscale;
    *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
    lastcolsum = colsum;  colsum = nextcolsum;
    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      membersum = GETJSAMPLE(*inptr++);
+      membersum = *inptr++;
      above_ptr++;  below_ptr++;
-      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+      nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
                   GETJSAMPLE(*inptr);
      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
      membersum = membersum * memberscale + neighsum * neighscale;
      *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
@ -451,7 +434,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    }
    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr);
+    membersum = *inptr;
    neighsum = lastcolsum + (colsum - membersum) + colsum;
    membersum = membersum * memberscale + neighsum * neighscale;
    *outptr = (JSAMPLE)((membersum + 32768) >> 16);
--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2020, D. R. Commander.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@ -319,6 +319,8 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
  JDIMENSION n;
  my_master_ptr master = (my_master_ptr)cinfo->master;
  JSAMPLE dummy_sample[1] = { 0 };
  JSAMPROW dummy_row = dummy_sample;
  JSAMPARRAY scanlines = NULL;
  void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                         JDIMENSION input_row, JSAMPARRAY output_buf,
@ -329,6 +331,10 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
  if (cinfo->cconvert && cinfo->cconvert->color_convert) {
    color_convert = cinfo->cconvert->color_convert;
    cinfo->cconvert->color_convert = noop_convert;
    /* This just prevents UBSan from complaining about adding 0 to a NULL
     * pointer.  The pointer isn't actually used.
     */
    scanlines = &dummy_row;
  }
  if (cinfo->cquantize && cinfo->cquantize->color_quantize) {
@ -532,6 +538,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
         * decoded coefficients.  This is ~5% faster for large subsets, but
         * it's tough to tell a difference for smaller images.
         */
        if (!cinfo->entropy->insufficient_data)
          cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
        (*cinfo->entropy->decode_mcu) (cinfo, NULL);
      }
    }
--- a/3rdparty/libjpeg-turbo/src/jdarith.c
+++ b/3rdparty/libjpeg-turbo/src/jdarith.c
@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Developed 1997-2015 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2018, D. R. Commander.
+ * Copyright (C) 2015-2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -80,7 +80,7 @@ get_byte(j_decompress_ptr cinfo)
    if (!(*src->fill_input_buffer) (cinfo))
      ERREXIT(cinfo, JERR_CANT_SUSPEND);
  src->bytes_in_buffer--;
-  return GETJOCTET(*src->next_input_byte++);
+  return *src->next_input_byte++;
 }
@ -665,8 +665,16 @@ bad:
    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
      int *coef_bit_ptr = &cinfo->coef_bits[cindex][0];
      int *prev_coef_bit_ptr =
        &cinfo->coef_bits[cindex + cinfo->num_components][0];
      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
      for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
        if (cinfo->input_scan_number > 1)
          prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
        else
          prev_coef_bit_ptr[coefi] = 0;
      }
      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
        int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
        if (cinfo->Ah != expected)
@ -727,6 +735,7 @@ bad:
  entropy->c = 0;
  entropy->a = 0;
  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
  entropy->pub.insufficient_data = FALSE;
  /* Initialize restart counter */
  entropy->restarts_to_go = cinfo->restart_interval;
@ -763,7 +772,7 @@ jinit_arith_decoder(j_decompress_ptr cinfo)
    int *coef_bit_ptr, ci;
    cinfo->coef_bits = (int (*)[DCTSIZE2])
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                  cinfo->num_components * DCTSIZE2 *
+                                  cinfo->num_components * 2 * DCTSIZE2 *
                                  sizeof(int));
    coef_bit_ptr = &cinfo->coef_bits[0][0];
    for (ci = 0; ci < cinfo->num_components; ci++)
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c
@ -5,7 +5,7 @@
 * Copyright (C) 1994-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander.
 * Copyright (C) 2015, 2020, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@ -102,6 +102,8 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
      jzero_far((void *)coef->MCU_buffer[0],
                (size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK)));
      if (!cinfo->entropy->insufficient_data)
        cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
      if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
        /* Suspension forced; update state counters and exit */
        coef->MCU_vert_offset = yoffset;
@ -227,6 +229,8 @@ consume_data(j_decompress_ptr cinfo)
          }
        }
      }
      if (!cinfo->entropy->insufficient_data)
        cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
      /* Try to fetch the MCU. */
      if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
        /* Suspension forced; update state counters and exit */
@ -326,19 +330,22 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
 #ifdef BLOCK_SMOOTHING_SUPPORTED
 /*
- * This code applies interblock smoothing as described by section K.8
+ * This code applies interblock smoothing; the first 9 AC coefficients are
- * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * estimated from the DC values of a DCT block and its 24 neighboring blocks.
 * the DC values of a DCT block and its 8 neighboring blocks.
 * We apply smoothing only for progressive JPEG decoding, and only if
 * the coefficients it can estimate are not yet known to full precision.
 */
-/* Natural-order array positions of the first 5 zigzag-order coefficients */
+/* Natural-order array positions of the first 9 zigzag-order coefficients */
 #define Q01_POS  1
 #define Q10_POS  8
 #define Q20_POS  16
 #define Q11_POS  9
 #define Q02_POS  2
 #define Q03_POS  3
 #define Q12_POS  10
 #define Q21_POS  17
 #define Q30_POS  24
 /*
 * Determine whether block smoothing is applicable and safe.
@ -356,8 +363,8 @@ smoothing_ok(j_decompress_ptr cinfo)
  int ci, coefi;
  jpeg_component_info *compptr;
  JQUANT_TBL *qtable;
-  int *coef_bits;
+  int *coef_bits, *prev_coef_bits;
-  int *coef_bits_latch;
+  int *coef_bits_latch, *prev_coef_bits_latch;
  if (!cinfo->progressive_mode || cinfo->coef_bits == NULL)
    return FALSE;
@ -366,34 +373,47 @@ smoothing_ok(j_decompress_ptr cinfo)
  if (coef->coef_bits_latch == NULL)
    coef->coef_bits_latch = (int *)
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                  cinfo->num_components *
+                                  cinfo->num_components * 2 *
                                  (SAVED_COEFS * sizeof(int)));
  coef_bits_latch = coef->coef_bits_latch;
  prev_coef_bits_latch =
    &coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS];
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++) {
    /* All components' quantization values must already be latched. */
    if ((qtable = compptr->quant_table) == NULL)
      return FALSE;
-    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    /* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */
    if (qtable->quantval[0] == 0 ||
        qtable->quantval[Q01_POS] == 0 ||
        qtable->quantval[Q10_POS] == 0 ||
        qtable->quantval[Q20_POS] == 0 ||
        qtable->quantval[Q11_POS] == 0 ||
-        qtable->quantval[Q02_POS] == 0)
+        qtable->quantval[Q02_POS] == 0 ||
        qtable->quantval[Q03_POS] == 0 ||
        qtable->quantval[Q12_POS] == 0 ||
        qtable->quantval[Q21_POS] == 0 ||
        qtable->quantval[Q30_POS] == 0)
      return FALSE;
    /* DC values must be at least partly known for all components. */
    coef_bits = cinfo->coef_bits[ci];
    prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
    if (coef_bits[0] < 0)
      return FALSE;
    coef_bits_latch[0] = coef_bits[0];
    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
-    for (coefi = 1; coefi <= 5; coefi++) {
+    for (coefi = 1; coefi < SAVED_COEFS; coefi++) {
      if (cinfo->input_scan_number > 1)
        prev_coef_bits_latch[coefi] = prev_coef_bits[coefi];
      else
        prev_coef_bits_latch[coefi] = -1;
      coef_bits_latch[coefi] = coef_bits[coefi];
      if (coef_bits[coefi] != 0)
        smoothing_useful = TRUE;
    }
    coef_bits_latch += SAVED_COEFS;
    prev_coef_bits_latch += SAVED_COEFS;
  }
  return smoothing_useful;
@ -412,17 +432,20 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
  JDIMENSION block_num, last_block_column;
  int ci, block_row, block_rows, access_rows;
  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
  JBLOCKROW next_block_row, next_next_block_row;
  JSAMPARRAY output_ptr;
  JDIMENSION output_col;
  jpeg_component_info *compptr;
  inverse_DCT_method_ptr inverse_DCT;
-  boolean first_row, last_row;
+  boolean change_dc;
  JCOEF *workspace;
  int *coef_bits;
  JQUANT_TBL *quanttbl;
-  JLONG Q00, Q01, Q02, Q10, Q11, Q20, num;
+  JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num;
-  int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9;
+  int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12,
      DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24,
      DC25;
  int Al, pred;
  /* Keep a local variable to avoid looking it up more than once */
@ -434,10 +457,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
    if (cinfo->input_scan_number == cinfo->output_scan_number) {
      /* If input is working on current scan, we ordinarily want it to
       * have completed the current row.  But if input scan is DC,
-       * we want it to keep one row ahead so that next block row's DC
+       * we want it to keep two rows ahead so that next two block rows' DC
       * values are up to date.
       */
-      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0;
      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta)
        break;
    }
@ -452,34 +475,53 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
    if (!compptr->component_needed)
      continue;
    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row) {
+    if (cinfo->output_iMCU_row < last_iMCU_row - 1) {
      block_rows = compptr->v_samp_factor;
      access_rows = block_rows * 3; /* this and next two iMCU rows */
    } else if (cinfo->output_iMCU_row < last_iMCU_row) {
      block_rows = compptr->v_samp_factor;
      access_rows = block_rows * 2; /* this and next iMCU row */
      last_row = FALSE;
    } else {
      /* NB: can't use last_row_height here; it is input-side-dependent! */
      block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
      if (block_rows == 0) block_rows = compptr->v_samp_factor;
      access_rows = block_rows; /* this iMCU row only */
      last_row = TRUE;
    }
    /* Align the virtual buffer for this component. */
-    if (cinfo->output_iMCU_row > 0) {
+    if (cinfo->output_iMCU_row > 1) {
-      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */
      buffer = (*cinfo->mem->access_virt_barray)
        ((j_common_ptr)cinfo, coef->whole_image[ci],
         (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor,
         (JDIMENSION)access_rows, FALSE);
      buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
    } else if (cinfo->output_iMCU_row > 0) {
      buffer = (*cinfo->mem->access_virt_barray)
        ((j_common_ptr)cinfo, coef->whole_image[ci],
         (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
         (JDIMENSION)access_rows, FALSE);
      buffer += compptr->v_samp_factor; /* point to current iMCU row */
      first_row = FALSE;
    } else {
      buffer = (*cinfo->mem->access_virt_barray)
        ((j_common_ptr)cinfo, coef->whole_image[ci],
         (JDIMENSION)0, (JDIMENSION)access_rows, FALSE);
      first_row = TRUE;
    }
-    /* Fetch component-dependent info */
+    /* Fetch component-dependent info.
-    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+     * If the current scan is incomplete, then we use the component-dependent
     * info from the previous scan.
     */
    if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row)
      coef_bits =
        coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS);
    else
      coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
    /* We only do DC interpolation if no AC coefficient data is available. */
    change_dc =
      coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 &&
      coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 &&
      coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1;
    quanttbl = compptr->quant_table;
    Q00 = quanttbl->quantval[0];
    Q01 = quanttbl->quantval[Q01_POS];
@ -487,27 +529,51 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
    Q20 = quanttbl->quantval[Q20_POS];
    Q11 = quanttbl->quantval[Q11_POS];
    Q02 = quanttbl->quantval[Q02_POS];
    if (change_dc) {
      Q03 = quanttbl->quantval[Q03_POS];
      Q12 = quanttbl->quantval[Q12_POS];
      Q21 = quanttbl->quantval[Q21_POS];
      Q30 = quanttbl->quantval[Q30_POS];
    }
    inverse_DCT = cinfo->idct->inverse_DCT[ci];
    output_ptr = output_buf[ci];
    /* Loop over all DCT blocks to be processed. */
    for (block_row = 0; block_row < block_rows; block_row++) {
      buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
-      if (first_row && block_row == 0)
+
      if (block_row > 0 || cinfo->output_iMCU_row > 0)
        prev_block_row =
          buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
      else
        prev_block_row = buffer_ptr;
      if (block_row > 1 || cinfo->output_iMCU_row > 1)
        prev_prev_block_row =
          buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
      else
        prev_prev_block_row = prev_block_row;
      if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row)
        next_block_row =
          buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
      else
        prev_block_row = buffer[block_row - 1] +
                         cinfo->master->first_MCU_col[ci];
      if (last_row && block_row == block_rows - 1)
        next_block_row = buffer_ptr;
      if (block_row < block_rows - 2 ||
          cinfo->output_iMCU_row < last_iMCU_row - 1)
        next_next_block_row =
          buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
      else
-        next_block_row = buffer[block_row + 1] +
+        next_next_block_row = next_block_row;
-                         cinfo->master->first_MCU_col[ci];
+
      /* We fetch the surrounding DC values using a sliding-register approach.
-       * Initialize all nine here so as to do the right thing on narrow pics.
+       * Initialize all 25 here so as to do the right thing on narrow pics.
       */
-      DC1 = DC2 = DC3 = (int)prev_block_row[0][0];
+      DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0];
-      DC4 = DC5 = DC6 = (int)buffer_ptr[0][0];
+      DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0];
-      DC7 = DC8 = DC9 = (int)next_block_row[0][0];
+      DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0];
      DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0];
      DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0];
      output_col = 0;
      last_block_column = compptr->width_in_blocks - 1;
      for (block_num = cinfo->master->first_MCU_col[ci];
@ -515,18 +581,39 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
        /* Fetch current DCT block into workspace so we can modify it. */
        jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1);
        /* Update DC values */
-        if (block_num < last_block_column) {
+        if (block_num == cinfo->master->first_MCU_col[ci] &&
-          DC3 = (int)prev_block_row[1][0];
+            block_num < last_block_column) {
-          DC6 = (int)buffer_ptr[1][0];
+          DC04 = (int)prev_prev_block_row[1][0];
-          DC9 = (int)next_block_row[1][0];
+          DC09 = (int)prev_block_row[1][0];
          DC14 = (int)buffer_ptr[1][0];
          DC19 = (int)next_block_row[1][0];
          DC24 = (int)next_next_block_row[1][0];
        }
-        /* Compute coefficient estimates per K.8.
+        if (block_num + 1 < last_block_column) {
-         * An estimate is applied only if coefficient is still zero,
+          DC05 = (int)prev_prev_block_row[2][0];
-         * and is not known to be fully accurate.
+          DC10 = (int)prev_block_row[2][0];
          DC15 = (int)buffer_ptr[2][0];
          DC20 = (int)next_block_row[2][0];
          DC25 = (int)next_next_block_row[2][0];
        }
        /* If DC interpolation is enabled, compute coefficient estimates using
         * a Gaussian-like kernel, keeping the averages of the DC values.
         *
         * If DC interpolation is disabled, compute coefficient estimates using
         * an algorithm similar to the one described in Section K.8 of the JPEG
         * standard, except applied to a 5x5 window rather than a 3x3 window.
         *
         * An estimate is applied only if the coefficient is still zero and is
         * not known to be fully accurate.
         */
        /* AC01 */
        if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) {
-          num = 36 * Q00 * (DC4 - DC6);
+          num = Q00 * (change_dc ?
                (-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 -
                 13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 +
                 3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 -
                 DC21 - DC22 + DC24 + DC25) :
                (-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15));
          if (num >= 0) {
            pred = (int)(((Q01 << 7) + num) / (Q01 << 8));
            if (Al > 0 && pred >= (1 << Al))
@ -541,7 +628,12 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
        }
        /* AC10 */
        if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) {
-          num = 36 * Q00 * (DC2 - DC8);
+          num = Q00 * (change_dc ?
                (-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 +
                 13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 -
                 13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 +
                 3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) :
                (-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23));
          if (num >= 0) {
            pred = (int)(((Q10 << 7) + num) / (Q10 << 8));
            if (Al > 0 && pred >= (1 << Al))
@ -556,7 +648,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
        }
        /* AC20 */
        if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) {
-          num = 9 * Q00 * (DC2 + DC8 - 2 * DC5);
+          num = Q00 * (change_dc ?
                (DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 -
                 5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) :
                (-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23));
          if (num >= 0) {
            pred = (int)(((Q20 << 7) + num) / (Q20 << 8));
            if (Al > 0 && pred >= (1 << Al))
@ -571,7 +666,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
        }
        /* AC11 */
        if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) {
-          num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+          num = Q00 * (change_dc ?
                (-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 +
                 9 * DC19 + DC21 - DC25) :
                (DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 -
                 DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09));
          if (num >= 0) {
            pred = (int)(((Q11 << 7) + num) / (Q11 << 8));
            if (Al > 0 && pred >= (1 << Al))
@ -586,7 +685,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
        }
        /* AC02 */
        if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) {
-          num = 9 * Q00 * (DC4 + DC6 - 2 * DC5);
+          num = Q00 * (change_dc ?
                (2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 +
                 7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) :
                (-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15));
          if (num >= 0) {
            pred = (int)(((Q02 << 7) + num) / (Q02 << 8));
            if (Al > 0 && pred >= (1 << Al))
@ -599,14 +701,96 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
          }
          workspace[2] = (JCOEF)pred;
        }
        if (change_dc) {
          /* AC03 */
          if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) {
            num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19);
            if (num >= 0) {
              pred = (int)(((Q03 << 7) + num) / (Q03 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
            } else {
              pred = (int)(((Q03 << 7) - num) / (Q03 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
              pred = -pred;
            }
            workspace[3] = (JCOEF)pred;
          }
          /* AC12 */
          if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) {
            num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19);
            if (num >= 0) {
              pred = (int)(((Q12 << 7) + num) / (Q12 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
            } else {
              pred = (int)(((Q12 << 7) - num) / (Q12 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
              pred = -pred;
            }
            workspace[10] = (JCOEF)pred;
          }
          /* AC21 */
          if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) {
            num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19);
            if (num >= 0) {
              pred = (int)(((Q21 << 7) + num) / (Q21 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
            } else {
              pred = (int)(((Q21 << 7) - num) / (Q21 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
              pred = -pred;
            }
            workspace[17] = (JCOEF)pred;
          }
          /* AC30 */
          if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) {
            num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19);
            if (num >= 0) {
              pred = (int)(((Q30 << 7) + num) / (Q30 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
            } else {
              pred = (int)(((Q30 << 7) - num) / (Q30 << 8));
              if (Al > 0 && pred >= (1 << Al))
                pred = (1 << Al) - 1;
              pred = -pred;
            }
            workspace[24] = (JCOEF)pred;
          }
          /* coef_bits[0] is non-negative.  Otherwise this function would not
           * be called.
           */
          num = Q00 *
                (-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 -
                 6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 -
                 8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 -
                 6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 -
                 2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25);
          if (num >= 0) {
            pred = (int)(((Q00 << 7) + num) / (Q00 << 8));
          } else {
            pred = (int)(((Q00 << 7) - num) / (Q00 << 8));
            pred = -pred;
          }
          workspace[0] = (JCOEF)pred;
        }  /* change_dc */
        /* OK, do the IDCT */
        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr,
                        output_col);
        /* Advance for next column */
-        DC1 = DC2;  DC2 = DC3;
+        DC01 = DC02;  DC02 = DC03;  DC03 = DC04;  DC04 = DC05;
-        DC4 = DC5;  DC5 = DC6;
+        DC06 = DC07;  DC07 = DC08;  DC08 = DC09;  DC09 = DC10;
-        DC7 = DC8;  DC8 = DC9;
+        DC11 = DC12;  DC12 = DC13;  DC13 = DC14;  DC14 = DC15;
-        buffer_ptr++, prev_block_row++, next_block_row++;
+        DC16 = DC17;  DC17 = DC18;  DC18 = DC19;  DC19 = DC20;
        DC21 = DC22;  DC22 = DC23;  DC23 = DC24;  DC24 = DC25;
        buffer_ptr++, prev_block_row++, next_block_row++,
          prev_prev_block_row++, next_next_block_row++;
        output_col += compptr->_DCT_scaled_size;
      }
      output_ptr += compptr->_DCT_scaled_size;
@ -655,7 +839,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 #ifdef BLOCK_SMOOTHING_SUPPORTED
      /* If block smoothing could be used, need a bigger window */
      if (cinfo->progressive_mode)
-        access_rows *= 3;
+        access_rows *= 5;
 #endif
      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
        ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE,
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.h
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.h
@ -5,6 +5,7 @@
 * Copyright (C) 1994-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2020, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 */
@ -51,7 +52,7 @@ typedef struct {
 #ifdef BLOCK_SMOOTHING_SUPPORTED
  /* When doing block smoothing, we latch coefficient Al values here */
  int *coef_bits_latch;
-#define SAVED_COEFS  6          /* we save coef_bits[0..5] */
+#define SAVED_COEFS  10         /* we save coef_bits[0..9] */
 #endif
 } my_coef_controller;
--- a/3rdparty/libjpeg-turbo/src/jdcol565.c
+++ b/3rdparty/libjpeg-turbo/src/jdcol565.c
@ -45,9 +45,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    outptr = *output_buf++;
    if (PACK_NEED_ALIGNMENT(outptr)) {
-      y  = GETJSAMPLE(*inptr0++);
+      y  = *inptr0++;
-      cb = GETJSAMPLE(*inptr1++);
+      cb = *inptr1++;
-      cr = GETJSAMPLE(*inptr2++);
+      cr = *inptr2++;
      r = range_limit[y + Crrtab[cr]];
      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                            SCALEBITS))];
@ -58,18 +58,18 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
      num_cols--;
    }
    for (col = 0; col < (num_cols >> 1); col++) {
-      y  = GETJSAMPLE(*inptr0++);
+      y  = *inptr0++;
-      cb = GETJSAMPLE(*inptr1++);
+      cb = *inptr1++;
-      cr = GETJSAMPLE(*inptr2++);
+      cr = *inptr2++;
      r = range_limit[y + Crrtab[cr]];
      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                            SCALEBITS))];
      b = range_limit[y + Cbbtab[cb]];
      rgb = PACK_SHORT_565(r, g, b);
-      y  = GETJSAMPLE(*inptr0++);
+      y  = *inptr0++;
-      cb = GETJSAMPLE(*inptr1++);
+      cb = *inptr1++;
-      cr = GETJSAMPLE(*inptr2++);
+      cr = *inptr2++;
      r = range_limit[y + Crrtab[cr]];
      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                            SCALEBITS))];
@ -80,9 +80,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
      outptr += 4;
    }
    if (num_cols & 1) {
-      y  = GETJSAMPLE(*inptr0);
+      y  = *inptr0;
-      cb = GETJSAMPLE(*inptr1);
+      cb = *inptr1;
-      cr = GETJSAMPLE(*inptr2);
+      cr = *inptr2;
      r = range_limit[y + Crrtab[cr]];
      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                            SCALEBITS))];
@ -125,9 +125,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    if (PACK_NEED_ALIGNMENT(outptr)) {
-      y  = GETJSAMPLE(*inptr0++);
+      y  = *inptr0++;
-      cb = GETJSAMPLE(*inptr1++);
+      cb = *inptr1++;
-      cr = GETJSAMPLE(*inptr2++);
+      cr = *inptr2++;
      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
      g = range_limit[DITHER_565_G(y +
                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -139,9 +139,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
      num_cols--;
    }
    for (col = 0; col < (num_cols >> 1); col++) {
-      y  = GETJSAMPLE(*inptr0++);
+      y  = *inptr0++;
-      cb = GETJSAMPLE(*inptr1++);
+      cb = *inptr1++;
-      cr = GETJSAMPLE(*inptr2++);
+      cr = *inptr2++;
      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
      g = range_limit[DITHER_565_G(y +
                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -150,9 +150,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
      d0 = DITHER_ROTATE(d0);
      rgb = PACK_SHORT_565(r, g, b);
-      y  = GETJSAMPLE(*inptr0++);
+      y  = *inptr0++;
-      cb = GETJSAMPLE(*inptr1++);
+      cb = *inptr1++;
-      cr = GETJSAMPLE(*inptr2++);
+      cr = *inptr2++;
      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
      g = range_limit[DITHER_565_G(y +
                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -165,9 +165,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
      outptr += 4;
    }
    if (num_cols & 1) {
-      y  = GETJSAMPLE(*inptr0);
+      y  = *inptr0;
-      cb = GETJSAMPLE(*inptr1);
+      cb = *inptr1;
-      cr = GETJSAMPLE(*inptr2);
+      cr = *inptr2;
      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
      g = range_limit[DITHER_565_G(y +
                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -202,32 +202,32 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    if (PACK_NEED_ALIGNMENT(outptr)) {
-      r = GETJSAMPLE(*inptr0++);
+      r = *inptr0++;
-      g = GETJSAMPLE(*inptr1++);
+      g = *inptr1++;
-      b = GETJSAMPLE(*inptr2++);
+      b = *inptr2++;
      rgb = PACK_SHORT_565(r, g, b);
      *(INT16 *)outptr = (INT16)rgb;
      outptr += 2;
      num_cols--;
    }
    for (col = 0; col < (num_cols >> 1); col++) {
-      r = GETJSAMPLE(*inptr0++);
+      r = *inptr0++;
-      g = GETJSAMPLE(*inptr1++);
+      g = *inptr1++;
-      b = GETJSAMPLE(*inptr2++);
+      b = *inptr2++;
      rgb = PACK_SHORT_565(r, g, b);
-      r = GETJSAMPLE(*inptr0++);
+      r = *inptr0++;
-      g = GETJSAMPLE(*inptr1++);
+      g = *inptr1++;
-      b = GETJSAMPLE(*inptr2++);
+      b = *inptr2++;
      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
      outptr += 4;
    }
    if (num_cols & 1) {
-      r = GETJSAMPLE(*inptr0);
+      r = *inptr0;
-      g = GETJSAMPLE(*inptr1);
+      g = *inptr1;
-      b = GETJSAMPLE(*inptr2);
+      b = *inptr2;
      rgb = PACK_SHORT_565(r, g, b);
      *(INT16 *)outptr = (INT16)rgb;
    }
@ -259,24 +259,24 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    if (PACK_NEED_ALIGNMENT(outptr)) {
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
      rgb = PACK_SHORT_565(r, g, b);
      *(INT16 *)outptr = (INT16)rgb;
      outptr += 2;
      num_cols--;
    }
    for (col = 0; col < (num_cols >> 1); col++) {
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
      d0 = DITHER_ROTATE(d0);
      rgb = PACK_SHORT_565(r, g, b);
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
      d0 = DITHER_ROTATE(d0);
      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
@ -284,9 +284,9 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
      outptr += 4;
    }
    if (num_cols & 1) {
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)];
+      r = range_limit[DITHER_565_R(*inptr0, d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)];
+      g = range_limit[DITHER_565_G(*inptr1, d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)];
+      b = range_limit[DITHER_565_B(*inptr2, d0)];
      rgb = PACK_SHORT_565(r, g, b);
      *(INT16 *)outptr = (INT16)rgb;
    }
--- a/3rdparty/libjpeg-turbo/src/jdcolext.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolext.c
@ -53,9 +53,9 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
+      y  = inptr0[col];
-      cb = GETJSAMPLE(inptr1[col]);
+      cb = inptr1[col];
-      cr = GETJSAMPLE(inptr2[col]);
+      cr = inptr2[col];
      /* Range-limiting is essential due to noise introduced by DCT losses. */
      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
      outptr[RGB_GREEN] = range_limit[y +
@ -93,7 +93,6 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    inptr = input_buf[0][input_row++];
    outptr = *output_buf++;
    for (col = 0; col < num_cols; col++) {
      /* We can dispense with GETJSAMPLE() here */
      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
      /* alpha channel value */
@ -128,7 +127,6 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    for (col = 0; col < num_cols; col++) {
      /* We can dispense with GETJSAMPLE() here */
      outptr[RGB_RED] = inptr0[col];
      outptr[RGB_GREEN] = inptr1[col];
      outptr[RGB_BLUE] = inptr2[col];
--- a/3rdparty/libjpeg-turbo/src/jdcolor.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolor.c
@ -341,9 +341,9 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
+      r = inptr0[col];
-      g = GETJSAMPLE(inptr1[col]);
+      g = inptr1[col];
-      b = GETJSAMPLE(inptr2[col]);
+      b = inptr2[col];
      /* Y */
      outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
                               ctab[b + B_Y_OFF]) >> SCALEBITS);
@ -550,9 +550,9 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    input_row++;
    outptr = *output_buf++;
    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
+      y  = inptr0[col];
-      cb = GETJSAMPLE(inptr1[col]);
+      cb = inptr1[col];
-      cr = GETJSAMPLE(inptr2[col]);
+      cr = inptr2[col];
      /* Range-limiting is essential due to noise introduced by DCT losses. */
      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];   /* red */
      outptr[1] = range_limit[MAXJSAMPLE - (y +                 /* green */
@ -560,7 +560,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                                 SCALEBITS)))];
      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];   /* blue */
      /* K passes through unchanged */
-      outptr[3] = inptr3[col];  /* don't need GETJSAMPLE here */
+      outptr[3] = inptr3[col];
      outptr += 4;
    }
  }
--- a/3rdparty/libjpeg-turbo/src/jdhuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdhuff.c
@ -5,6 +5,7 @@
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
 * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
 * Copyright (C) 2018, Matthias Räncker.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -39,24 +40,6 @@ typedef struct {
  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 } savable_state;
 /* This macro is to work around compilers with missing or broken
 * structure assignment.  You'll need to fix this code if you have
 * such a compiler and you change MAX_COMPS_IN_SCAN.
 */
 #ifndef NO_STRUCT_ASSIGN
 #define ASSIGN_STATE(dest, src)  ((dest) = (src))
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest, src) \
  ((dest).last_dc_val[0] = (src).last_dc_val[0], \
   (dest).last_dc_val[1] = (src).last_dc_val[1], \
   (dest).last_dc_val[2] = (src).last_dc_val[2], \
   (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 typedef struct {
  struct jpeg_entropy_decoder pub; /* public fields */
@ -325,7 +308,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
        bytes_in_buffer = cinfo->src->bytes_in_buffer;
      }
      bytes_in_buffer--;
-      c = GETJOCTET(*next_input_byte++);
+      c = *next_input_byte++;
      /* If it's 0xFF, check and discard stuffed zero byte */
      if (c == 0xFF) {
@ -342,7 +325,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
            bytes_in_buffer = cinfo->src->bytes_in_buffer;
          }
          bytes_in_buffer--;
-          c = GETJOCTET(*next_input_byte++);
+          c = *next_input_byte++;
        } while (c == 0xFF);
        if (c == 0) {
@ -405,8 +388,8 @@ no_more_bytes:
 #define GET_BYTE { \
  register int c0, c1; \
-  c0 = GETJOCTET(*buffer++); \
+  c0 = *buffer++; \
-  c1 = GETJOCTET(*buffer); \
+  c1 = *buffer; \
  /* Pre-execute most common case */ \
  get_buffer = (get_buffer << 8) | c0; \
  bits_left += 8; \
@ -423,7 +406,7 @@ no_more_bytes:
  } \
 }
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
 /* Pre-fetch 48 bytes, because the holding register is 64-bit */
 #define FILL_BIT_BUFFER_FAST \
@ -557,6 +540,12 @@ process_restart(j_decompress_ptr cinfo)
 }
 #if defined(__has_feature)
 #if __has_feature(undefined_behavior_sanitizer)
 __attribute__((no_sanitize("signed-integer-overflow"),
               no_sanitize("unsigned-integer-overflow")))
 #endif
 #endif
 LOCAL(boolean)
 decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
@ -568,7 +557,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  /* Load up working state */
  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(state, entropy->saved);
+  state = entropy->saved;
  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
    JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@ -589,11 +578,15 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
    if (entropy->dc_needed[blkn]) {
      /* Convert DC difference to actual value, update last_dc_val */
      int ci = cinfo->MCU_membership[blkn];
-      /* This is really just
+      /* Certain malformed JPEG images produce repeated DC coefficient
-       *   s += state.last_dc_val[ci];
+       * differences of 2047 or -2047, which causes state.last_dc_val[ci] to
-       * It is written this way in order to shut up UBSan.
+       * grow until it overflows or underflows a 32-bit signed integer.  This
       * behavior is, to the best of our understanding, innocuous, and it is
       * unclear how to work around it without potentially affecting
       * performance.  Thus, we (hopefully temporarily) suppress UBSan integer
       * overflow errors for this function.
       */
-      s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
+      s += state.last_dc_val[ci];
      state.last_dc_val[ci] = s;
      if (block) {
        /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
@ -653,7 +646,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  /* Completed MCU, so update state */
  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(entropy->saved, state);
+  entropy->saved = state;
  return TRUE;
 }
@ -671,7 +664,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  /* Load up working state */
  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
  buffer = (JOCTET *)br_state.next_input_byte;
-  ASSIGN_STATE(state, entropy->saved);
+  state = entropy->saved;
  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
    JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@ -688,7 +681,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
    if (entropy->dc_needed[blkn]) {
      int ci = cinfo->MCU_membership[blkn];
-      s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
+      s += state.last_dc_val[ci];
      state.last_dc_val[ci] = s;
      if (block)
        (*block)[0] = (JCOEF)s;
@ -740,7 +733,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
  br_state.next_input_byte = buffer;
  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(entropy->saved, state);
+  entropy->saved = state;
  return TRUE;
 }
@ -795,7 +788,8 @@ use_slow:
  }
  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
    entropy->restarts_to_go--;
  return TRUE;
 }
--- a/3rdparty/libjpeg-turbo/src/jdhuff.h
+++ b/3rdparty/libjpeg-turbo/src/jdhuff.h
@ -4,7 +4,8 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
 * Copyright (C) 2018, Matthias Räncker.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
 typedef size_t bit_buf_type;            /* type of bit-extraction buffer */
 #define BIT_BUF_SIZE  64                /* size of buffer in bits */
 #elif defined(__x86_64__) && defined(__ILP32__)
 typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
 #define BIT_BUF_SIZE  64                 /* size of buffer in bits */
 #else
 typedef unsigned long bit_buf_type;     /* type of bit-extraction buffer */
@ -228,7 +234,10 @@ slowlabel: \
      s |= GET_BITS(1); \
      nb++; \
    } \
-    s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
+    if (nb > 16) \
      s = 0; \
    else \
      s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
  }
 /* Out-of-line case for Huffman code fetching */
--- a/3rdparty/libjpeg-turbo/src/jdicc.c
+++ b/3rdparty/libjpeg-turbo/src/jdicc.c
@ -38,18 +38,18 @@ marker_is_icc(jpeg_saved_marker_ptr marker)
    marker->marker == ICC_MARKER &&
    marker->data_length >= ICC_OVERHEAD_LEN &&
    /* verify the identifying string */
-    GETJOCTET(marker->data[0]) == 0x49 &&
+    marker->data[0] == 0x49 &&
-    GETJOCTET(marker->data[1]) == 0x43 &&
+    marker->data[1] == 0x43 &&
-    GETJOCTET(marker->data[2]) == 0x43 &&
+    marker->data[2] == 0x43 &&
-    GETJOCTET(marker->data[3]) == 0x5F &&
+    marker->data[3] == 0x5F &&
-    GETJOCTET(marker->data[4]) == 0x50 &&
+    marker->data[4] == 0x50 &&
-    GETJOCTET(marker->data[5]) == 0x52 &&
+    marker->data[5] == 0x52 &&
-    GETJOCTET(marker->data[6]) == 0x4F &&
+    marker->data[6] == 0x4F &&
-    GETJOCTET(marker->data[7]) == 0x46 &&
+    marker->data[7] == 0x46 &&
-    GETJOCTET(marker->data[8]) == 0x49 &&
+    marker->data[8] == 0x49 &&
-    GETJOCTET(marker->data[9]) == 0x4C &&
+    marker->data[9] == 0x4C &&
-    GETJOCTET(marker->data[10]) == 0x45 &&
+    marker->data[10] == 0x45 &&
-    GETJOCTET(marker->data[11]) == 0x0;
+    marker->data[11] == 0x0;
 }
@ -102,12 +102,12 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
    if (marker_is_icc(marker)) {
      if (num_markers == 0)
-        num_markers = GETJOCTET(marker->data[13]);
+        num_markers = marker->data[13];
-      else if (num_markers != GETJOCTET(marker->data[13])) {
+      else if (num_markers != marker->data[13]) {
        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* inconsistent num_markers fields */
        return FALSE;
      }
-      seq_no = GETJOCTET(marker->data[12]);
+      seq_no = marker->data[12];
      if (seq_no <= 0 || seq_no > num_markers) {
        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* bogus sequence number */
        return FALSE;
@ -154,7 +154,7 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
      JOCTET FAR *src_ptr;
      JOCTET *dst_ptr;
      unsigned int length;
-      seq_no = GETJOCTET(marker->data[12]);
+      seq_no = marker->data[12];
      dst_ptr = icc_data + data_offset[seq_no];
      src_ptr = marker->data + ICC_OVERHEAD_LEN;
      length = data_length[seq_no];
--- a/3rdparty/libjpeg-turbo/src/jdmarker.c
+++ b/3rdparty/libjpeg-turbo/src/jdmarker.c
@ -151,7 +151,7 @@ typedef my_marker_reader *my_marker_ptr;
 #define INPUT_BYTE(cinfo, V, action) \
  MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
            bytes_in_buffer--; \
-            V = GETJOCTET(*next_input_byte++); )
+            V = *next_input_byte++; )
 /* As above, but read two bytes interpreted as an unsigned 16-bit integer.
 * V should be declared unsigned int or perhaps JLONG.
@ -159,10 +159,10 @@ typedef my_marker_reader *my_marker_ptr;
 #define INPUT_2BYTES(cinfo, V, action) \
  MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
            bytes_in_buffer--; \
-            V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \
+            V = ((unsigned int)(*next_input_byte++)) << 8; \
            MAKE_BYTE_AVAIL(cinfo, action); \
            bytes_in_buffer--; \
-            V += GETJOCTET(*next_input_byte++); )
+            V += *next_input_byte++; )
 /*
@ -608,18 +608,18 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
  JLONG totallen = (JLONG)datalen + remaining;
  if (datalen >= APP0_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x4A &&
+      data[0] == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
+      data[1] == 0x46 &&
-      GETJOCTET(data[2]) == 0x49 &&
+      data[2] == 0x49 &&
-      GETJOCTET(data[3]) == 0x46 &&
+      data[3] == 0x46 &&
-      GETJOCTET(data[4]) == 0) {
+      data[4] == 0) {
    /* Found JFIF APP0 marker: save info */
    cinfo->saw_JFIF_marker = TRUE;
-    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_major_version = data[5];
-    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->JFIF_minor_version = data[6];
-    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->density_unit = data[7];
-    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->X_density = (data[8] << 8) + data[9];
-    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    cinfo->Y_density = (data[10] << 8) + data[11];
    /* Check version.
     * Major version must be 1, anything else signals an incompatible change.
     * (We used to treat this as an error, but now it's a nonfatal warning,
@ -634,24 +634,22 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
             cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
             cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
    /* Validate thumbnail dimensions and issue appropriate messages */
-    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+    if (data[12] | data[13])
-      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]);
               GETJOCTET(data[12]), GETJOCTET(data[13]));
    totallen -= APP0_DATA_LEN;
-    if (totallen !=
+    if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3))
        ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3))
      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen);
  } else if (datalen >= 6 &&
-             GETJOCTET(data[0]) == 0x4A &&
+             data[0] == 0x4A &&
-             GETJOCTET(data[1]) == 0x46 &&
+             data[1] == 0x46 &&
-             GETJOCTET(data[2]) == 0x58 &&
+             data[2] == 0x58 &&
-             GETJOCTET(data[3]) == 0x58 &&
+             data[3] == 0x58 &&
-             GETJOCTET(data[4]) == 0) {
+             data[4] == 0) {
    /* Found JFIF "JFXX" extension APP0 marker */
    /* The library doesn't actually do anything with these,
     * but we try to produce a helpful trace message.
     */
-    switch (GETJOCTET(data[5])) {
+    switch (data[5]) {
    case 0x10:
      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen);
      break;
@ -662,8 +660,7 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen);
      break;
    default:
-      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen);
               GETJOCTET(data[5]), (int)totallen);
      break;
    }
  } else {
@ -684,16 +681,16 @@ examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
  unsigned int version, flags0, flags1, transform;
  if (datalen >= APP14_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x41 &&
+      data[0] == 0x41 &&
-      GETJOCTET(data[1]) == 0x64 &&
+      data[1] == 0x64 &&
-      GETJOCTET(data[2]) == 0x6F &&
+      data[2] == 0x6F &&
-      GETJOCTET(data[3]) == 0x62 &&
+      data[3] == 0x62 &&
-      GETJOCTET(data[4]) == 0x65) {
+      data[4] == 0x65) {
    /* Found Adobe APP14 marker */
-    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    version = (data[5] << 8) + data[6];
-    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags0 = (data[7] << 8) + data[8];
-    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    flags1 = (data[9] << 8) + data[10];
-    transform = GETJOCTET(data[11]);
+    transform = data[11];
    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
    cinfo->saw_Adobe_marker = TRUE;
    cinfo->Adobe_transform = (UINT8)transform;
--- a/3rdparty/libjpeg-turbo/src/jdmaster.c
+++ b/3rdparty/libjpeg-turbo/src/jdmaster.c
@ -5,7 +5,7 @@
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * Modified 2002-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander.
 * Copyright (C) 2013, Linaro Limited.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
@ -22,7 +22,6 @@
 #include "jpeglib.h"
 #include "jpegcomp.h"
 #include "jdmaster.h"
 #include "jsimd.h"
 /*
@ -70,17 +69,6 @@ use_merged_upsample(j_decompress_ptr cinfo)
      cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
      cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
    return FALSE;
 #ifdef WITH_SIMD
  /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling
     isn't, then disabling merged upsampling is likely to be faster when
     decompressing YCbCr JPEG images. */
  if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() &&
      jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr &&
      (cinfo->out_color_space == JCS_RGB ||
       (cinfo->out_color_space >= JCS_EXT_RGB &&
        cinfo->out_color_space <= JCS_EXT_ARGB)))
    return FALSE;
 #endif
  /* ??? also need to test for upsample-time rescaling, when & if supported */
  return TRUE;                  /* by golly, it'll work... */
 #else
@ -580,6 +568,7 @@ master_selection(j_decompress_ptr cinfo)
   */
  cinfo->master->first_iMCU_col = 0;
  cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1;
  cinfo->master->last_good_iMCU_row = 0;
 #ifdef D_MULTISCAN_FILES_SUPPORTED
  /* If jpeg_start_decompress will read the whole file, initialize
--- a/3rdparty/libjpeg-turbo/src/jdmrg565.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c
@ -43,20 +43,20 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  /* Loop for each pair of output pixels */
  for (col = cinfo->output_width >> 1; col > 0; col--) {
    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
+    cb = *inptr1++;
-    cr = GETJSAMPLE(*inptr2++);
+    cr = *inptr2++;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
    rgb = PACK_SHORT_565(r, g, b);
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
@ -68,12 +68,12 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  /* If image width is odd, do the last output column separately */
  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
+    cb = *inptr1;
-    cr = GETJSAMPLE(*inptr2);
+    cr = *inptr2;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
+    y  = *inptr0;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
@ -115,21 +115,21 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
  /* Loop for each pair of output pixels */
  for (col = cinfo->output_width >> 1; col > 0; col--) {
    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
+    cb = *inptr1++;
-    cr = GETJSAMPLE(*inptr2++);
+    cr = *inptr2++;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
    r = range_limit[DITHER_565_R(y + cred, d0)];
    g = range_limit[DITHER_565_G(y + cgreen, d0)];
    b = range_limit[DITHER_565_B(y + cblue, d0)];
    d0 = DITHER_ROTATE(d0);
    rgb = PACK_SHORT_565(r, g, b);
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
    r = range_limit[DITHER_565_R(y + cred, d0)];
    g = range_limit[DITHER_565_G(y + cgreen, d0)];
    b = range_limit[DITHER_565_B(y + cblue, d0)];
@ -142,12 +142,12 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
  /* If image width is odd, do the last output column separately */
  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
+    cb = *inptr1;
-    cr = GETJSAMPLE(*inptr2);
+    cr = *inptr2;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
+    y  = *inptr0;
    r = range_limit[DITHER_565_R(y + cred, d0)];
    g = range_limit[DITHER_565_G(y + cgreen, d0)];
    b = range_limit[DITHER_565_B(y + cblue, d0)];
@ -189,20 +189,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  /* Loop for each group of output pixels */
  for (col = cinfo->output_width >> 1; col > 0; col--) {
    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
+    cb = *inptr1++;
-    cr = GETJSAMPLE(*inptr2++);
+    cr = *inptr2++;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
    rgb = PACK_SHORT_565(r, g, b);
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
@ -211,13 +211,13 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    WRITE_TWO_PIXELS(outptr0, rgb);
    outptr0 += 4;
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
    rgb = PACK_SHORT_565(r, g, b);
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
@ -229,20 +229,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  /* If image width is odd, do the last output column separately */
  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
+    cb = *inptr1;
-    cr = GETJSAMPLE(*inptr2);
+    cr = *inptr2;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr00);
+    y  = *inptr00;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
    rgb = PACK_SHORT_565(r, g, b);
    *(INT16 *)outptr0 = (INT16)rgb;
-    y  = GETJSAMPLE(*inptr01);
+    y  = *inptr01;
    r = range_limit[y + cred];
    g = range_limit[y + cgreen];
    b = range_limit[y + cblue];
@ -287,21 +287,21 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
  /* Loop for each group of output pixels */
  for (col = cinfo->output_width >> 1; col > 0; col--) {
    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
+    cb = *inptr1++;
-    cr = GETJSAMPLE(*inptr2++);
+    cr = *inptr2++;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
    r = range_limit[DITHER_565_R(y + cred, d0)];
    g = range_limit[DITHER_565_G(y + cgreen, d0)];
    b = range_limit[DITHER_565_B(y + cblue, d0)];
    d0 = DITHER_ROTATE(d0);
    rgb = PACK_SHORT_565(r, g, b);
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
    r = range_limit[DITHER_565_R(y + cred, d0)];
    g = range_limit[DITHER_565_G(y + cgreen, d0)];
    b = range_limit[DITHER_565_B(y + cblue, d0)];
@ -311,14 +311,14 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
    WRITE_TWO_PIXELS(outptr0, rgb);
    outptr0 += 4;
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
    r = range_limit[DITHER_565_R(y + cred, d1)];
    g = range_limit[DITHER_565_G(y + cgreen, d1)];
    b = range_limit[DITHER_565_B(y + cblue, d1)];
    d1 = DITHER_ROTATE(d1);
    rgb = PACK_SHORT_565(r, g, b);
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
    r = range_limit[DITHER_565_R(y + cred, d1)];
    g = range_limit[DITHER_565_G(y + cgreen, d1)];
    b = range_limit[DITHER_565_B(y + cblue, d1)];
@ -331,20 +331,20 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
  /* If image width is odd, do the last output column separately */
  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
+    cb = *inptr1;
-    cr = GETJSAMPLE(*inptr2);
+    cr = *inptr2;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr00);
+    y  = *inptr00;
    r = range_limit[DITHER_565_R(y + cred, d0)];
    g = range_limit[DITHER_565_G(y + cgreen, d0)];
    b = range_limit[DITHER_565_B(y + cblue, d0)];
    rgb = PACK_SHORT_565(r, g, b);
    *(INT16 *)outptr0 = (INT16)rgb;
-    y  = GETJSAMPLE(*inptr01);
+    y  = *inptr01;
    r = range_limit[DITHER_565_R(y + cred, d1)];
    g = range_limit[DITHER_565_G(y + cgreen, d1)];
    b = range_limit[DITHER_565_B(y + cblue, d1)];
--- a/3rdparty/libjpeg-turbo/src/jdmrgext.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c
@ -46,13 +46,13 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  /* Loop for each pair of output pixels */
  for (col = cinfo->output_width >> 1; col > 0; col--) {
    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
+    cb = *inptr1++;
-    cr = GETJSAMPLE(*inptr2++);
+    cr = *inptr2++;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
    outptr[RGB_RED] =   range_limit[y + cred];
    outptr[RGB_GREEN] = range_limit[y + cgreen];
    outptr[RGB_BLUE] =  range_limit[y + cblue];
@ -60,7 +60,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    outptr[RGB_ALPHA] = 0xFF;
 #endif
    outptr += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
    outptr[RGB_RED] =   range_limit[y + cred];
    outptr[RGB_GREEN] = range_limit[y + cgreen];
    outptr[RGB_BLUE] =  range_limit[y + cblue];
@ -71,12 +71,12 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  }
  /* If image width is odd, do the last output column separately */
  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
+    cb = *inptr1;
-    cr = GETJSAMPLE(*inptr2);
+    cr = *inptr2;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
+    y  = *inptr0;
    outptr[RGB_RED] =   range_limit[y + cred];
    outptr[RGB_GREEN] = range_limit[y + cgreen];
    outptr[RGB_BLUE] =  range_limit[y + cblue];
@ -120,13 +120,13 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  /* Loop for each group of output pixels */
  for (col = cinfo->output_width >> 1; col > 0; col--) {
    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
+    cb = *inptr1++;
-    cr = GETJSAMPLE(*inptr2++);
+    cr = *inptr2++;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
    outptr0[RGB_RED] =   range_limit[y + cred];
    outptr0[RGB_GREEN] = range_limit[y + cgreen];
    outptr0[RGB_BLUE] =  range_limit[y + cblue];
@ -134,7 +134,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    outptr0[RGB_ALPHA] = 0xFF;
 #endif
    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
    outptr0[RGB_RED] =   range_limit[y + cred];
    outptr0[RGB_GREEN] = range_limit[y + cgreen];
    outptr0[RGB_BLUE] =  range_limit[y + cblue];
@ -142,7 +142,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    outptr0[RGB_ALPHA] = 0xFF;
 #endif
    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
    outptr1[RGB_RED] =   range_limit[y + cred];
    outptr1[RGB_GREEN] = range_limit[y + cgreen];
    outptr1[RGB_BLUE] =  range_limit[y + cblue];
@ -150,7 +150,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    outptr1[RGB_ALPHA] = 0xFF;
 #endif
    outptr1 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
    outptr1[RGB_RED] =   range_limit[y + cred];
    outptr1[RGB_GREEN] = range_limit[y + cgreen];
    outptr1[RGB_BLUE] =  range_limit[y + cblue];
@ -161,19 +161,19 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  }
  /* If image width is odd, do the last output column separately */
  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
+    cb = *inptr1;
-    cr = GETJSAMPLE(*inptr2);
+    cr = *inptr2;
    cred = Crrtab[cr];
    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr00);
+    y  = *inptr00;
    outptr0[RGB_RED] =   range_limit[y + cred];
    outptr0[RGB_GREEN] = range_limit[y + cgreen];
    outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
    outptr0[RGB_ALPHA] = 0xFF;
 #endif
-    y  = GETJSAMPLE(*inptr01);
+    y  = *inptr01;
    outptr1[RGB_RED] =   range_limit[y + cred];
    outptr1[RGB_GREEN] = range_limit[y + cgreen];
    outptr1[RGB_BLUE] =  range_limit[y + cblue];
--- a/3rdparty/libjpeg-turbo/src/jdphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdphuff.c
@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2018, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018-2021, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -41,25 +41,6 @@ typedef struct {
  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
 } savable_state;
 /* This macro is to work around compilers with missing or broken
 * structure assignment.  You'll need to fix this code if you have
 * such a compiler and you change MAX_COMPS_IN_SCAN.
 */
 #ifndef NO_STRUCT_ASSIGN
 #define ASSIGN_STATE(dest, src)  ((dest) = (src))
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest, src) \
  ((dest).EOBRUN = (src).EOBRUN, \
   (dest).last_dc_val[0] = (src).last_dc_val[0], \
   (dest).last_dc_val[1] = (src).last_dc_val[1], \
   (dest).last_dc_val[2] = (src).last_dc_val[2], \
   (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 typedef struct {
  struct jpeg_entropy_decoder pub; /* public fields */
@ -102,7 +83,7 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
  boolean is_DC_band, bad;
  int ci, coefi, tbl;
  d_derived_tbl **pdtbl;
-  int *coef_bit_ptr;
+  int *coef_bit_ptr, *prev_coef_bit_ptr;
  jpeg_component_info *compptr;
  is_DC_band = (cinfo->Ss == 0);
@ -143,8 +124,15 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
    int cindex = cinfo->cur_comp_info[ci]->component_index;
    coef_bit_ptr = &cinfo->coef_bits[cindex][0];
    prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0];
    if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
      WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
    for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
      if (cinfo->input_scan_number > 1)
        prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
      else
        prev_coef_bit_ptr[coefi] = 0;
    }
    for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
      int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
      if (cinfo->Ah != expected)
@ -323,7 +311,7 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
    /* Load up working state */
    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
+    state = entropy->saved;
    /* Outer loop handles each block in the MCU */
@ -356,11 +344,12 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
    /* Completed MCU, so update state */
    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
+    entropy->saved = state;
  }
  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
    entropy->restarts_to_go--;
  return TRUE;
 }
@ -444,7 +433,8 @@ decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  }
  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
    entropy->restarts_to_go--;
  return TRUE;
 }
@ -495,7 +485,8 @@ decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
    entropy->restarts_to_go--;
  return TRUE;
 }
@ -638,7 +629,8 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  }
  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
    entropy->restarts_to_go--;
  return TRUE;
@ -676,7 +668,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo)
  /* Create progression status table */
  cinfo->coef_bits = (int (*)[DCTSIZE2])
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                cinfo->num_components * DCTSIZE2 *
+                                cinfo->num_components * 2 * DCTSIZE2 *
                                sizeof(int));
  coef_bit_ptr = &cinfo->coef_bits[0][0];
  for (ci = 0; ci < cinfo->num_components; ci++)
--- a/3rdparty/libjpeg-turbo/src/jdsample.c
+++ b/3rdparty/libjpeg-turbo/src/jdsample.c
@ -8,7 +8,7 @@
 * Copyright (C) 2010, 2015-2016, D. R. Commander.
 * Copyright (C) 2014, MIPS Technologies, Inc., California.
 * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2019, Arm Limited.
+ * Copyright (C) 2019-2020, Arm Limited.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -177,7 +177,7 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    outptr = output_data[outrow];
    outend = outptr + cinfo->output_width;
    while (outptr < outend) {
-      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
+      invalue = *inptr++;
      for (h = h_expand; h > 0; h--) {
        *outptr++ = invalue;
      }
@ -213,7 +213,7 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    outptr = output_data[inrow];
    outend = outptr + cinfo->output_width;
    while (outptr < outend) {
-      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
+      invalue = *inptr++;
      *outptr++ = invalue;
      *outptr++ = invalue;
    }
@ -242,7 +242,7 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    outptr = output_data[outrow];
    outend = outptr + cinfo->output_width;
    while (outptr < outend) {
-      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
+      invalue = *inptr++;
      *outptr++ = invalue;
      *outptr++ = invalue;
    }
@ -283,20 +283,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    inptr = input_data[inrow];
    outptr = output_data[inrow];
    /* Special case for first column */
-    invalue = GETJSAMPLE(*inptr++);
+    invalue = *inptr++;
    *outptr++ = (JSAMPLE)invalue;
-    *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
+    *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
    for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
      /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
-      invalue = GETJSAMPLE(*inptr++) * 3;
+      invalue = (*inptr++) * 3;
-      *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
+      *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
-      *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
+      *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2);
    }
    /* Special case for last column */
-    invalue = GETJSAMPLE(*inptr);
+    invalue = *inptr;
-    *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
+    *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
    *outptr++ = (JSAMPLE)invalue;
  }
 }
@ -338,7 +338,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
      outptr = output_data[outrow++];
      for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
-        thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+        thiscolsum = (*inptr0++) * 3 + (*inptr1++);
        *outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
      }
    }
@ -381,8 +381,8 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
      outptr = output_data[outrow++];
      /* Special case for first column */
-      thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      thiscolsum = (*inptr0++) * 3 + (*inptr1++);
-      nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      nextcolsum = (*inptr0++) * 3 + (*inptr1++);
      *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4);
      *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
      lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
@ -390,7 +390,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
      for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
        /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
        /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
-        nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+        nextcolsum = (*inptr0++) * 3 + (*inptr1++);
        *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
        *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
        lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
@ -477,7 +477,13 @@ jinit_upsampler(j_decompress_ptr cinfo)
    } else if (h_in_group == h_out_group &&
               v_in_group * 2 == v_out_group && do_fancy) {
      /* Non-fancy upsampling is handled by the generic method */
-      upsample->methods[ci] = h1v2_fancy_upsample;
+#if defined(__arm__) || defined(__aarch64__) || \
    defined(_M_ARM) || defined(_M_ARM64)
      if (jsimd_can_h1v2_fancy_upsample())
        upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
      else
 #endif
        upsample->methods[ci] = h1v2_fancy_upsample;
      upsample->pub.need_context_rows = TRUE;
    } else if (h_in_group * 2 == h_out_group &&
               v_in_group * 2 == v_out_group) {
--- a/3rdparty/libjpeg-turbo/src/jerror.h
+++ b/3rdparty/libjpeg-turbo/src/jerror.h
@ -207,6 +207,10 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
 #endif
 #endif
 JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
 #if JPEG_LIB_VERSION < 70
 JMESSAGE(JERR_BAD_DROP_SAMPLING,
         "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
 #endif
 #ifdef JMAKE_ENUM_LIST
@ -252,6 +256,15 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
   (cinfo)->err->msg_parm.i[2] = (p3), \
   (cinfo)->err->msg_parm.i[3] = (p4), \
   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
 #define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \
  ((cinfo)->err->msg_code = (code), \
   (cinfo)->err->msg_parm.i[0] = (p1), \
   (cinfo)->err->msg_parm.i[1] = (p2), \
   (cinfo)->err->msg_parm.i[2] = (p3), \
   (cinfo)->err->msg_parm.i[3] = (p4), \
   (cinfo)->err->msg_parm.i[4] = (p5), \
   (cinfo)->err->msg_parm.i[5] = (p6), \
   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
 #define ERREXITS(cinfo, code, str) \
  ((cinfo)->err->msg_code = (code), \
   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
--- a/3rdparty/libjpeg-turbo/src/jidctint.c
+++ b/3rdparty/libjpeg-turbo/src/jidctint.c
@ -3,7 +3,7 @@
 *
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2009 by Guido Vollbeding.
+ * Modification developed 2002-2018 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
 * Copyright (C) 2015, 2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
@ -417,7 +417,7 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 /*
 * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x7 output block.
+ * producing a reduced-size 7x7 output block.
 *
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
 * cK represents sqrt(2) * cos(K*pi/14).
@ -1258,7 +1258,7 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 /*
 * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 11x11 output block.
+ * producing an 11x11 output block.
 *
 * Optimized algorithm with 24 multiplications in the 1-D kernel.
 * cK represents sqrt(2) * cos(K*pi/22).
@ -2398,7 +2398,7 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
    /* Add fudge factor here for final descale. */
-    tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1);
+    tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
    z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@ -43,25 +43,11 @@
 #if BITS_IN_JSAMPLE == 8
 /* JSAMPLE should be the smallest type that will hold the values 0..255.
 * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
 */
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char JSAMPLE;
 #define GETJSAMPLE(value)  ((int)(value))
 #else /* not HAVE_UNSIGNED_CHAR */
 typedef char JSAMPLE;
 #ifdef __CHAR_UNSIGNED__
 #define GETJSAMPLE(value)  ((int)(value))
 #else
 #define GETJSAMPLE(value)  ((int)(value) & 0xFF)
 #endif /* __CHAR_UNSIGNED__ */
 #endif /* HAVE_UNSIGNED_CHAR */
 #define MAXJSAMPLE      255
 #define CENTERJSAMPLE   128
@ -97,22 +83,9 @@ typedef short JCOEF;
 * managers, this is also the data type passed to fread/fwrite.
 */
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char JOCTET;
 #define GETJOCTET(value)  (value)
 #else /* not HAVE_UNSIGNED_CHAR */
 typedef char JOCTET;
 #ifdef __CHAR_UNSIGNED__
 #define GETJOCTET(value)  (value)
 #else
 #define GETJOCTET(value)  ((value) & 0xFF)
 #endif /* __CHAR_UNSIGNED__ */
 #endif /* HAVE_UNSIGNED_CHAR */
 /* These typedefs are used for various table entries and so forth.
 * They must be at least as wide as specified; but making them too big
@ -123,15 +96,7 @@ typedef char JOCTET;
 /* UINT8 must hold at least the values 0..255. */
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char UINT8;
 #else /* not HAVE_UNSIGNED_CHAR */
 #ifdef __CHAR_UNSIGNED__
 typedef char UINT8;
 #else /* not __CHAR_UNSIGNED__ */
 typedef short UINT8;
 #endif /* __CHAR_UNSIGNED__ */
 #endif /* HAVE_UNSIGNED_CHAR */
 /* UINT16 must hold at least the values 0..65535. */
--- a/3rdparty/libjpeg-turbo/src/jpegint.h
+++ b/3rdparty/libjpeg-turbo/src/jpegint.h
@ -5,7 +5,7 @@
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * Modified 1997-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, D. R. Commander.
+ * Copyright (C) 2015-2016, 2019, D. R. Commander.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@ -158,6 +158,9 @@ struct jpeg_decomp_master {
  JDIMENSION first_MCU_col[MAX_COMPONENTS];
  JDIMENSION last_MCU_col[MAX_COMPONENTS];
  boolean jinit_upsampler_no_alloc;
  /* Last iMCU row that was successfully decoded */
  JDIMENSION last_good_iMCU_row;
 };
 /* Input control module */
--- a/3rdparty/libjpeg-turbo/src/jquant1.c
+++ b/3rdparty/libjpeg-turbo/src/jquant1.c
@ -479,7 +479,7 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
    for (col = width; col > 0; col--) {
      pixcode = 0;
      for (ci = 0; ci < nc; ci++) {
-        pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+        pixcode += colorindex[ci][*ptrin++];
      }
      *ptrout++ = (JSAMPLE)pixcode;
    }
@ -506,9 +506,9 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
    ptrin = input_buf[row];
    ptrout = output_buf[row];
    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode  = colorindex0[*ptrin++];
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += colorindex1[*ptrin++];
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      pixcode += colorindex2[*ptrin++];
      *ptrout++ = (JSAMPLE)pixcode;
    }
  }
@ -552,7 +552,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
         * required amount of padding.
         */
        *output_ptr +=
-          colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]];
+          colorindex_ci[*input_ptr + dither[col_index]];
        input_ptr += nc;
        output_ptr++;
        col_index = (col_index + 1) & ODITHER_MASK;
@ -595,12 +595,9 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
    col_index = 0;
    for (col = width; col > 0; col--) {
-      pixcode  =
+      pixcode  = colorindex0[(*input_ptr++) + dither0[col_index]];
-        GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]);
+      pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
-      pixcode +=
+      pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
        GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]);
      pixcode +=
        GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]);
      *output_ptr++ = (JSAMPLE)pixcode;
      col_index = (col_index + 1) & ODITHER_MASK;
    }
@ -677,15 +674,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
         * The maximum error is +- MAXJSAMPLE; this sets the required size
         * of the range_limit array.
         */
-        cur += GETJSAMPLE(*input_ptr);
+        cur += *input_ptr;
-        cur = GETJSAMPLE(range_limit[cur]);
+        cur = range_limit[cur];
        /* Select output value, accumulate into output code for this pixel */
-        pixcode = GETJSAMPLE(colorindex_ci[cur]);
+        pixcode = colorindex_ci[cur];
        *output_ptr += (JSAMPLE)pixcode;
        /* Compute actual representation error at this pixel */
        /* Note: we can do this even though we don't have the final */
        /* pixel code, because the colormap is orthogonal. */
-        cur -= GETJSAMPLE(colormap_ci[pixcode]);
+        cur -= colormap_ci[pixcode];
        /* Compute error fractions to be propagated to adjacent pixels.
         * Add these into the running sums, and simultaneously shift the
         * next-line error sums left by 1 column.
--- a/3rdparty/libjpeg-turbo/src/jquant2.c
+++ b/3rdparty/libjpeg-turbo/src/jquant2.c
@ -215,9 +215,9 @@ prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
    ptr = input_buf[row];
    for (col = width; col > 0; col--) {
      /* get pixel value and index into the histogram */
-      histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+      histp = &histogram[ptr[0] >> C0_SHIFT]
-                        [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+                        [ptr[1] >> C1_SHIFT]
-                        [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+                        [ptr[2] >> C2_SHIFT];
      /* increment, check for overflow and undo increment if so. */
      if (++(*histp) <= 0)
        (*histp)--;
@ -665,7 +665,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
  for (i = 0; i < numcolors; i++) {
    /* We compute the squared-c0-distance term, then add in the other two. */
-    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    x = cinfo->colormap[0][i];
    if (x < minc0) {
      tdist = (x - minc0) * C0_SCALE;
      min_dist = tdist * tdist;
@ -688,7 +688,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
      }
    }
-    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    x = cinfo->colormap[1][i];
    if (x < minc1) {
      tdist = (x - minc1) * C1_SCALE;
      min_dist += tdist * tdist;
@ -710,7 +710,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
      }
    }
-    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    x = cinfo->colormap[2][i];
    if (x < minc2) {
      tdist = (x - minc2) * C2_SCALE;
      min_dist += tdist * tdist;
@ -788,13 +788,13 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 #define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
  for (i = 0; i < numcolors; i++) {
-    icolor = GETJSAMPLE(colorlist[i]);
+    icolor = colorlist[i];
    /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE;
    dist0 = inc0 * inc0;
-    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE;
    dist0 += inc1 * inc1;
-    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE;
    dist0 += inc2 * inc2;
    /* Form the initial difference increments */
    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
@ -879,7 +879,7 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
      cachep = &histogram[c0 + ic0][c1 + ic1][c2];
      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-        *cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1);
+        *cachep++ = (histcell)((*cptr++) + 1);
      }
    }
  }
@ -909,9 +909,9 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
    outptr = output_buf[row];
    for (col = width; col > 0; col--) {
      /* get pixel value and index into the cache */
-      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c0 = (*inptr++) >> C0_SHIFT;
-      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c1 = (*inptr++) >> C1_SHIFT;
-      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      c2 = (*inptr++) >> C2_SHIFT;
      cachep = &histogram[c0][c1][c2];
      /* If we have not seen this color before, find nearest colormap entry */
      /* and update the cache */
@ -996,12 +996,12 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
       * this sets the required size of the range_limit array.
       */
-      cur0 += GETJSAMPLE(inptr[0]);
+      cur0 += inptr[0];
-      cur1 += GETJSAMPLE(inptr[1]);
+      cur1 += inptr[1];
-      cur2 += GETJSAMPLE(inptr[2]);
+      cur2 += inptr[2];
-      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur0 = range_limit[cur0];
-      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur1 = range_limit[cur1];
-      cur2 = GETJSAMPLE(range_limit[cur2]);
+      cur2 = range_limit[cur2];
      /* Index into the cache with adjusted pixel value */
      cachep =
        &histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT];
@ -1015,9 +1015,9 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
        register int pixcode = *cachep - 1;
        *outptr = (JSAMPLE)pixcode;
        /* Compute representation error for this pixel */
-        cur0 -= GETJSAMPLE(colormap0[pixcode]);
+        cur0 -= colormap0[pixcode];
-        cur1 -= GETJSAMPLE(colormap1[pixcode]);
+        cur1 -= colormap1[pixcode];
-        cur2 -= GETJSAMPLE(colormap2[pixcode]);
+        cur2 -= colormap2[pixcode];
      }
      /* Compute error fractions to be propagated to adjacent pixels.
       * Add these into the running sums, and simultaneously shift the
--- a/3rdparty/libjpeg-turbo/src/jsimd.h
+++ b/3rdparty/libjpeg-turbo/src/jsimd.h
@ -4,6 +4,7 @@
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2011, 2014, D. R. Commander.
 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
 * Copyright (C) 2020, Arm Limited.
 *
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -75,6 +76,7 @@ EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
 EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
 EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
 EXTERN(int) jsimd_can_h1v2_fancy_upsample(void);
 EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
                                       jpeg_component_info *compptr,
@ -84,6 +86,10 @@ EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
                                       jpeg_component_info *compptr,
                                       JSAMPARRAY input_data,
                                       JSAMPARRAY *output_data_ptr);
 EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
                                       jpeg_component_info *compptr,
                                       JSAMPARRAY input_data,
                                       JSAMPARRAY *output_data_ptr);
 EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
 EXTERN(int) jsimd_can_h2v1_merged_upsample(void);
--- a/3rdparty/libjpeg-turbo/src/jsimd_none.c
+++ b/3rdparty/libjpeg-turbo/src/jsimd_none.c
@ -4,6 +4,7 @@
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2009-2011, 2014, D. R. Commander.
 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
 * Copyright (C) 2020, Arm Limited.
 *
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -169,6 +170,12 @@ jsimd_can_h2v1_fancy_upsample(void)
  return 0;
 }
 GLOBAL(int)
 jsimd_can_h1v2_fancy_upsample(void)
 {
  return 0;
 }
 GLOBAL(void)
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
@ -181,6 +188,12 @@ jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 {
 }
 GLOBAL(void)
 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
 }
 GLOBAL(int)
 jsimd_can_h2v2_merged_upsample(void)
 {
--- a/3rdparty/libjpeg-turbo/src/jversion.h
+++ b/3rdparty/libjpeg-turbo/src/jversion.h
@ -2,9 +2,9 @@
 * jversion.h
 *
 * This file was part of the Independent JPEG Group's software:
- * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2012-2020, D. R. Commander.
+ * Copyright (C) 2010, 2012-2021, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@ -37,9 +37,9 @@
 */
 #define JCOPYRIGHT \
-  "Copyright (C) 2009-2020 D. R. Commander\n" \
+  "Copyright (C) 2009-2021 D. R. Commander\n" \
  "Copyright (C) 2015, 2020 Google, Inc.\n" \
-  "Copyright (C) 2019 Arm Limited\n" \
+  "Copyright (C) 2019-2020 Arm Limited\n" \
  "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
  "Copyright (C) 2015 Intel Corporation\n" \
@ -48,7 +48,7 @@
  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
  "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
  "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
 #define JCOPYRIGHT_SHORT \
-  "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
+  "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"