Merge branch 4.x

2025-06-11 03:33:28 +08:00 · 2025-02-19 09:31:51 +03:00 · 2025-02-19 09:31:51 +03:00 · 1483504702
commit 1483504702
parent 4d15b2a33f 6092499907
130 changed files with 6998 additions and 3110 deletions
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@ -119,7 +119,7 @@ private: \
 #define TEGRA_BINARYOP(type, op, src1, sz1, src2, sz2, dst, sz, w, h) \
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
-    parallel_for_(Range(0, h), \
+    parallel_for_(cv::Range(0, h), \
    TegraGenOp_##op##_Invoker<const type, type>(src1, sz1, src2, sz2, dst, sz, w, h), \
    (w * h) / static_cast<double>(1<<16)), \
    CV_HAL_ERROR_OK \
@ -154,7 +154,7 @@ TegraUnaryOp_Invoker(bitwiseNot, bitwiseNot)
 #define TEGRA_UNARYOP(type, op, src1, sz1, dst, sz, w, h) \
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
-    parallel_for_(Range(0, h), \
+    parallel_for_(cv::Range(0, h), \
    TegraGenOp_##op##_Invoker<const type, type>(src1, sz1, dst, sz, w, h), \
    (w * h) / static_cast<double>(1<<16)), \
    CV_HAL_ERROR_OK \
@ -254,32 +254,32 @@ TegraGenOp_Invoker(cmpLE, cmpGE, 2, 1, 0, RANGE_DATA(ST, src2_data, src2_step),
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
        ((op) == cv::CMP_EQ) ? \
-        parallel_for_(Range(0, h), \
+        parallel_for_(cv::Range(0, h), \
        TegraGenOp_cmpEQ_Invoker<const type, CAROTENE_NS::u8>(src1, sz1, src2, sz2, dst, sz, w, h), \
        (w * h) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        ((op) == cv::CMP_NE) ? \
-        parallel_for_(Range(0, h), \
+        parallel_for_(cv::Range(0, h), \
        TegraGenOp_cmpNE_Invoker<const type, CAROTENE_NS::u8>(src1, sz1, src2, sz2, dst, sz, w, h), \
        (w * h) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        ((op) == cv::CMP_GT) ? \
-        parallel_for_(Range(0, h), \
+        parallel_for_(cv::Range(0, h), \
        TegraGenOp_cmpGT_Invoker<const type, CAROTENE_NS::u8>(src1, sz1, src2, sz2, dst, sz, w, h), \
        (w * h) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        ((op) == cv::CMP_GE) ? \
-        parallel_for_(Range(0, h), \
+        parallel_for_(cv::Range(0, h), \
        TegraGenOp_cmpGE_Invoker<const type, CAROTENE_NS::u8>(src1, sz1, src2, sz2, dst, sz, w, h), \
        (w * h) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        ((op) == cv::CMP_LT) ? \
-        parallel_for_(Range(0, h), \
+        parallel_for_(cv::Range(0, h), \
        TegraGenOp_cmpLT_Invoker<const type, CAROTENE_NS::u8>(src1, sz1, src2, sz2, dst, sz, w, h), \
        (w * h) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        ((op) == cv::CMP_LE) ? \
-        parallel_for_(Range(0, h), \
+        parallel_for_(cv::Range(0, h), \
        TegraGenOp_cmpLE_Invoker<const type, CAROTENE_NS::u8>(src1, sz1, src2, sz2, dst, sz, w, h), \
        (w * h) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
@ -310,7 +310,7 @@ TegraGenOp_Invoker(cmpLE, cmpGE, 2, 1, 0, RANGE_DATA(ST, src2_data, src2_step),
 #define TEGRA_BINARYOPSCALE(type, op, src1, sz1, src2, sz2, dst, sz, w, h, scales) \
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
-    parallel_for_(Range(0, h), \
+    parallel_for_(cv::Range(0, h), \
    TegraGenOp_##op##_Invoker<const type, type>(src1, sz1, src2, sz2, dst, sz, w, h, scales), \
    (w * h) / static_cast<double>(1<<16)), \
    CV_HAL_ERROR_OK \
@ -332,7 +332,7 @@ TegraBinaryOpScale_Invoker(divf, div, 1, scale)
 #define TEGRA_UNARYOPSCALE(type, op, src1, sz1, dst, sz, w, h, scales) \
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
-    parallel_for_(Range(0, h), \
+    parallel_for_(cv::Range(0, h), \
    TegraGenOp_##op##_Invoker<const type, type>(src1, sz1, dst, sz, w, h, scales), \
    (w * h) / static_cast<double>(1<<16)), \
    CV_HAL_ERROR_OK \
@ -928,17 +928,17 @@ TegraRowOp_Invoker(split4, split4, 1, 4, 0, RANGE_DATA(ST, src1_data, 4*sizeof(S
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
        cn == 2 ? \
-        parallel_for_(Range(0, len), \
+        parallel_for_(cv::Range(0, len), \
        TegraRowOp_split2_Invoker<const type, type>(src, dst[0], dst[1]), \
        (len) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        cn == 3 ? \
-        parallel_for_(Range(0, len), \
+        parallel_for_(cv::Range(0, len), \
        TegraRowOp_split3_Invoker<const type, type>(src, dst[0], dst[1], dst[2]), \
        (len) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        cn == 4 ? \
-        parallel_for_(Range(0, len), \
+        parallel_for_(cv::Range(0, len), \
        TegraRowOp_split4_Invoker<const type, type>(src, dst[0], dst[1], dst[2], dst[3]), \
        (len) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
@ -990,17 +990,17 @@ TegraRowOp_Invoker(combine4, combine4, 4, 1, 0, RANGE_DATA(ST, src1_data, sizeof
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
        cn == 2 ? \
-        parallel_for_(Range(0, len), \
+        parallel_for_(cv::Range(0, len), \
        TegraRowOp_combine2_Invoker<const type, type>(src[0], src[1], dst), \
        (len) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        cn == 3 ? \
-        parallel_for_(Range(0, len), \
+        parallel_for_(cv::Range(0, len), \
        TegraRowOp_combine3_Invoker<const type, type>(src[0], src[1], src[2], dst), \
        (len) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
        cn == 4 ? \
-        parallel_for_(Range(0, len), \
+        parallel_for_(cv::Range(0, len), \
        TegraRowOp_combine4_Invoker<const type, type>(src[0], src[1], src[2], src[3], dst), \
        (len) / static_cast<double>(1<<16)), \
        CV_HAL_ERROR_OK : \
@ -1033,7 +1033,7 @@ TegraRowOp_Invoker(phase, phase, 2, 1, 1, RANGE_DATA(ST, src1_data, sizeof(CAROT
 #define TEGRA_FASTATAN(y, x, dst, len, angleInDegrees) \
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
-    parallel_for_(Range(0, len), \
+    parallel_for_(cv::Range(0, len), \
    TegraRowOp_phase_Invoker<const CAROTENE_NS::f32, CAROTENE_NS::f32>(x, y, dst, angleInDegrees ? 1.0f : M_PI/180), \
    (len) / static_cast<double>(1<<16)), \
    CV_HAL_ERROR_OK \
@ -1049,7 +1049,7 @@ TegraRowOp_Invoker(magnitude, magnitude, 2, 1, 0, RANGE_DATA(ST, src1_data, size
 #define TEGRA_MAGNITUDE(x, y, dst, len) \
 ( \
    CAROTENE_NS::isSupportedConfiguration() ? \
-    parallel_for_(Range(0, len), \
+    parallel_for_(cv::Range(0, len), \
    TegraRowOp_magnitude_Invoker<const CAROTENE_NS::f32, CAROTENE_NS::f32>(x, y, dst), \
    (len) / static_cast<double>(1<<16)), \
    CV_HAL_ERROR_OK \
@ -1563,17 +1563,17 @@ TegraCvtColor_Invoker(rgbx2bgrx, rgbx2bgrx, src_data + static_cast<size_t>(range
        scn == 3 ? \
            dcn == 3 ? \
                swapBlue ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgb2bgr_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)), \
                    CV_HAL_ERROR_OK : \
                    CV_HAL_ERROR_NOT_IMPLEMENTED : \
            dcn == 4 ? \
                (swapBlue ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgb2bgrx_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgb2rgbx_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) ), \
                CV_HAL_ERROR_OK : \
@ -1581,16 +1581,16 @@ TegraCvtColor_Invoker(rgbx2bgrx, rgbx2bgrx, src_data + static_cast<size_t>(range
        scn == 4 ? \
            dcn == 3 ? \
                (swapBlue ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgbx2bgr_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgbx2rgb_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) ), \
                CV_HAL_ERROR_OK : \
            dcn == 4 ? \
                swapBlue ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgbx2bgrx_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)), \
                    CV_HAL_ERROR_OK : \
@ -1613,19 +1613,19 @@ TegraCvtColor_Invoker(rgbx2rgb565, rgbx2rgb565, src_data + static_cast<size_t>(r
    greenBits == 6 && CAROTENE_NS::isSupportedConfiguration() ? \
        scn == 3 ? \
            (swapBlue ? \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgb2bgr565_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) : \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgb2rgb565_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
        scn == 4 ? \
            (swapBlue ? \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgbx2bgr565_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) : \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgbx2rgb565_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
@ -1646,19 +1646,19 @@ TegraCvtColor_Invoker(bgrx2gray, bgrx2gray, CAROTENE_NS::COLOR_SPACE_BT601, src_
    depth == CV_8U && CAROTENE_NS::isSupportedConfiguration() ? \
        scn == 3 ? \
            (swapBlue ? \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgb2gray_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) : \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_bgr2gray_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
        scn == 4 ? \
            (swapBlue ? \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgbx2gray_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) : \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_bgrx2gray_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
@ -1674,12 +1674,12 @@ TegraCvtColor_Invoker(gray2rgbx, gray2rgbx, src_data + static_cast<size_t>(range
 ( \
    depth == CV_8U && CAROTENE_NS::isSupportedConfiguration() ? \
        dcn == 3 ? \
-            parallel_for_(Range(0, height), \
+            parallel_for_(cv::Range(0, height), \
            TegraCvtColor_gray2rgb_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
            (width * height) / static_cast<double>(1<<16)), \
            CV_HAL_ERROR_OK : \
        dcn == 4 ? \
-            parallel_for_(Range(0, height), \
+            parallel_for_(cv::Range(0, height), \
            TegraCvtColor_gray2rgbx_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
            (width * height) / static_cast<double>(1<<16)), \
            CV_HAL_ERROR_OK : \
@ -1700,19 +1700,19 @@ TegraCvtColor_Invoker(bgrx2ycrcb, bgrx2ycrcb, src_data + static_cast<size_t>(ran
    isCbCr && depth == CV_8U && CAROTENE_NS::isSupportedConfiguration() ? \
        scn == 3 ? \
            (swapBlue ? \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgb2ycrcb_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) : \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_bgr2ycrcb_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
        scn == 4 ? \
            (swapBlue ? \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_rgbx2ycrcb_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) : \
-                parallel_for_(Range(0, height), \
+                parallel_for_(cv::Range(0, height), \
                TegraCvtColor_bgrx2ycrcb_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
@ -1742,34 +1742,34 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast<size_t>(range.
        scn == 3 ? \
            (swapBlue ? \
                isFullRange ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgb2hsvf_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgb2hsv_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
                isFullRange ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_bgr2hsvf_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_bgr2hsv_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
        scn == 4 ? \
            (swapBlue ? \
                isFullRange ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgbx2hsvf_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_rgbx2hsv_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
                isFullRange ? \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_bgrx2hsvf_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) : \
-                    parallel_for_(Range(0, height), \
+                    parallel_for_(cv::Range(0, height), \
                    TegraCvtColor_bgrx2hsv_Invoker(src_data, src_step, dst_data, dst_step, width, height), \
                    (width * height) / static_cast<double>(1<<16)) ), \
            CV_HAL_ERROR_OK : \
--- a/3rdparty/fastcv/fastcv.cmake
+++ b/3rdparty/fastcv/fastcv.cmake
@ -1,7 +1,7 @@
 function(download_fastcv root_dir)

  # Commit SHA in the opencv_3rdparty repo
-  set(FASTCV_COMMIT "dc5d58018f3af915a8d209386d2c58c0501c0f2c")
+  set(FASTCV_COMMIT "f4413cc2ab7233fdfc383a4cded402c072677fb0")

  # Define actual FastCV versions
  if(ANDROID)
@ -16,14 +16,15 @@ function(download_fastcv root_dir)
    endif()
  elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
    if(AARCH64)
-      set(FCV_PACKAGE_NAME  "fastcv_linux_aarch64_2024_12_11.tgz")
-      set(FCV_PACKAGE_HASH  "7b33ad833e6f15ab6d4ec64fa3c17acd")
+      set(FCV_PACKAGE_NAME  "fastcv_linux_aarch64_2025_02_12.tgz")
+      set(FCV_PACKAGE_HASH  "33ac2a59cf3e7d6402eee2e010de1202")
    else()
      message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
    endif()
  endif(ANDROID)

  # Download Package
+
  set(OPENCV_FASTCV_URL "https://raw.githubusercontent.com/opencv/opencv_3rdparty/${FASTCV_COMMIT}/fastcv/")

  ocv_download( FILENAME        ${FCV_PACKAGE_NAME}
--- a/3rdparty/hal_rvv/hal_rvv.hpp
+++ b/3rdparty/hal_rvv/hal_rvv.hpp
@ -22,6 +22,12 @@
 #if defined(__riscv_v) && __riscv_v == 1000000
 #include "hal_rvv_1p0/merge.hpp" // core
 #include "hal_rvv_1p0/mean.hpp" // core
+#include "hal_rvv_1p0/norm.hpp" // core
+#include "hal_rvv_1p0/norm_diff.hpp" // core
+#include "hal_rvv_1p0/convert_scale.hpp" // core
+#include "hal_rvv_1p0/minmax.hpp" // core
+#include "hal_rvv_1p0/atan.hpp" // core
+#include "hal_rvv_1p0/split.hpp" // core
 #endif

 #endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/atan.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/atan.hpp
@ -0,0 +1,128 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level
+// directory of this distribution and at http://opencv.org/license.html.
+#pragma once
+
+#undef cv_hal_fastAtan32f
+#define cv_hal_fastAtan32f cv::cv_hal_rvv::fast_atan_32
+
+#undef cv_hal_fastAtan64f
+#define cv_hal_fastAtan64f cv::cv_hal_rvv::fast_atan_64
+
+#include <riscv_vector.h>
+
+#include <cfloat>
+
+namespace cv::cv_hal_rvv {
+
+namespace detail {
+// ref: mathfuncs_core.simd.hpp
+static constexpr float pi = CV_PI;
+static constexpr float atan2_p1 = 0.9997878412794807F * (180 / pi);
+static constexpr float atan2_p3 = -0.3258083974640975F * (180 / pi);
+static constexpr float atan2_p5 = 0.1555786518463281F * (180 / pi);
+static constexpr float atan2_p7 = -0.04432655554792128F * (180 / pi);
+
+__attribute__((always_inline)) inline vfloat32m4_t
+rvv_atan_f32(vfloat32m4_t vy, vfloat32m4_t vx, size_t vl, float p7,
+             vfloat32m4_t vp5, vfloat32m4_t vp3, vfloat32m4_t vp1,
+             float angle_90_deg) {
+    const auto ax = __riscv_vfabs(vx, vl);
+    const auto ay = __riscv_vfabs(vy, vl);
+    const auto c = __riscv_vfdiv(
+        __riscv_vfmin(ax, ay, vl),
+        __riscv_vfadd(__riscv_vfmax(ax, ay, vl), FLT_EPSILON, vl), vl);
+    const auto c2 = __riscv_vfmul(c, c, vl);
+
+    auto a = __riscv_vfmadd(c2, p7, vp5, vl);
+    a = __riscv_vfmadd(a, c2, vp3, vl);
+    a = __riscv_vfmadd(a, c2, vp1, vl);
+    a = __riscv_vfmul(a, c, vl);
+
+    const auto mask = __riscv_vmflt(ax, ay, vl);
+    a = __riscv_vfrsub_mu(mask, a, a, angle_90_deg, vl);
+
+    a = __riscv_vfrsub_mu(__riscv_vmflt(vx, 0.F, vl), a, a, angle_90_deg * 2,
+                          vl);
+    a = __riscv_vfrsub_mu(__riscv_vmflt(vy, 0.F, vl), a, a, angle_90_deg * 4,
+                          vl);
+
+    return a;
+}
+
+} // namespace detail
+
+inline int fast_atan_32(const float *y, const float *x, float *dst, size_t n,
+                        bool angle_in_deg) {
+    const float scale = angle_in_deg ? 1.f : CV_PI / 180.f;
+    const float p1 = detail::atan2_p1 * scale;
+    const float p3 = detail::atan2_p3 * scale;
+    const float p5 = detail::atan2_p5 * scale;
+    const float p7 = detail::atan2_p7 * scale;
+    const float angle_90_deg = 90.F * scale;
+
+    static size_t vlmax = __riscv_vsetvlmax_e32m4();
+    auto vp1 = __riscv_vfmv_v_f_f32m4(p1, vlmax);
+    auto vp3 = __riscv_vfmv_v_f_f32m4(p3, vlmax);
+    auto vp5 = __riscv_vfmv_v_f_f32m4(p5, vlmax);
+
+    for (size_t vl{}; n > 0; n -= vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+
+        auto vy = __riscv_vle32_v_f32m4(y, vl);
+        auto vx = __riscv_vle32_v_f32m4(x, vl);
+
+        auto a =
+            detail::rvv_atan_f32(vy, vx, vl, p7, vp5, vp3, vp1, angle_90_deg);
+
+        __riscv_vse32(dst, a, vl);
+
+        x += vl;
+        y += vl;
+        dst += vl;
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int fast_atan_64(const double *y, const double *x, double *dst, size_t n,
+                        bool angle_in_deg) {
+    // this also uses float32 version, ref: mathfuncs_core.simd.hpp
+
+    const float scale = angle_in_deg ? 1.f : CV_PI / 180.f;
+    const float p1 = detail::atan2_p1 * scale;
+    const float p3 = detail::atan2_p3 * scale;
+    const float p5 = detail::atan2_p5 * scale;
+    const float p7 = detail::atan2_p7 * scale;
+    const float angle_90_deg = 90.F * scale;
+
+    static size_t vlmax = __riscv_vsetvlmax_e32m4();
+    auto vp1 = __riscv_vfmv_v_f_f32m4(p1, vlmax);
+    auto vp3 = __riscv_vfmv_v_f_f32m4(p3, vlmax);
+    auto vp5 = __riscv_vfmv_v_f_f32m4(p5, vlmax);
+
+    for (size_t vl{}; n > 0; n -= vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+
+        auto wy = __riscv_vle64_v_f64m8(y, vl);
+        auto wx = __riscv_vle64_v_f64m8(x, vl);
+
+        auto vy = __riscv_vfncvt_f_f_w_f32m4(wy, vl);
+        auto vx = __riscv_vfncvt_f_f_w_f32m4(wx, vl);
+
+        auto a =
+            detail::rvv_atan_f32(vy, vx, vl, p7, vp5, vp3, vp1, angle_90_deg);
+
+        auto wa = __riscv_vfwcvt_f_f_v_f64m8(a, vl);
+
+        __riscv_vse64(dst, wa, vl);
+
+        x += vl;
+        y += vl;
+        dst += vl;
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+} // namespace cv::cv_hal_rvv
--- a/3rdparty/hal_rvv/hal_rvv_1p0/convert_scale.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/convert_scale.hpp
@ -0,0 +1,120 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_HAL_RVV_CONVERT_SCALE_HPP_INCLUDED
+#define OPENCV_HAL_RVV_CONVERT_SCALE_HPP_INCLUDED
+
+#include <riscv_vector.h>
+
+namespace cv { namespace cv_hal_rvv {
+
+#undef cv_hal_convertScale
+#define cv_hal_convertScale cv::cv_hal_rvv::convertScale
+
+inline int convertScale_8U8U(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_b = __riscv_vfmv_v_f_f32m8(beta, vlmax);
+    float a = alpha;
+
+    for (int i = 0; i < height; i++)
+    {
+        const uchar* src_row = src + i * src_step;
+        uchar* dst_row = dst + i * dst_step;
+        int vl;
+        for (int j = 0; j < width; j += vl)
+        {
+            vl = __riscv_vsetvl_e8m2(width - j);
+            auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+            auto vec_src_u16 = __riscv_vzext_vf2(vec_src, vl);
+            auto vec_src_f32 = __riscv_vfwcvt_f(vec_src_u16, vl);
+            auto vec_fma = __riscv_vfmadd(vec_src_f32, a, vec_b, vl);
+            auto vec_dst_u16 = __riscv_vfncvt_xu(vec_fma, vl);
+            auto vec_dst = __riscv_vnclipu(vec_dst_u16, 0, __RISCV_VXRM_RNU, vl);
+            __riscv_vse8_v_u8m2(dst_row + j, vec_dst, vl);
+        }
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int convertScale_8U32F(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_b = __riscv_vfmv_v_f_f32m8(beta, vlmax);
+    float a = alpha;
+
+    for (int i = 0; i < height; i++)
+    {
+        const uchar* src_row = src + i * src_step;
+        float* dst_row = reinterpret_cast<float*>(dst + i * dst_step);
+        int vl;
+        for (int j = 0; j < width; j += vl)
+        {
+            vl = __riscv_vsetvl_e8m2(width - j);
+            auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+            auto vec_src_u16 = __riscv_vzext_vf2(vec_src, vl);
+            auto vec_src_f32 = __riscv_vfwcvt_f(vec_src_u16, vl);
+            auto vec_fma = __riscv_vfmadd(vec_src_f32, a, vec_b, vl);
+            __riscv_vse32_v_f32m8(dst_row + j, vec_fma, vl);
+        }
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int convertScale_32F32F(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_b = __riscv_vfmv_v_f_f32m8(beta, vlmax);
+    float a = alpha;
+
+    for (int i = 0; i < height; i++)
+    {
+        const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+        float* dst_row = reinterpret_cast<float*>(dst + i * dst_step);
+        int vl;
+        for (int j = 0; j < width; j += vl)
+        {
+            vl = __riscv_vsetvl_e32m8(width - j);
+            auto vec_src = __riscv_vle32_v_f32m8(src_row + j, vl);
+            auto vec_fma = __riscv_vfmadd(vec_src, a, vec_b, vl);
+            __riscv_vse32_v_f32m8(dst_row + j, vec_fma, vl);
+        }
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height,
+                        int sdepth, int ddepth, double alpha, double beta)
+{
+    if (!dst)
+        return CV_HAL_ERROR_OK;
+
+    switch (sdepth)
+    {
+    case CV_8U:
+        switch (ddepth)
+        {
+        case CV_8U:
+            return convertScale_8U8U(src, src_step, dst, dst_step, width, height, alpha, beta);
+        case CV_32F:
+            return convertScale_8U32F(src, src_step, dst, dst_step, width, height, alpha, beta);
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    case CV_32F:
+        switch (ddepth)
+        {
+        case CV_32F:
+            return convertScale_32F32F(src, src_step, dst, dst_step, width, height, alpha, beta);
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    }
+
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+}}
+
+#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp
@ -0,0 +1,337 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED
+#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED
+
+#include <riscv_vector.h>
+
+namespace cv { namespace cv_hal_rvv {
+
+#undef cv_hal_minMaxIdx
+#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx
+
+// 1d support issue https://github.com/opencv/opencv/issues/26947
+//#undef cv_hal_minMaxIdxMaskStep
+//#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx
+
+namespace
+{
+    template<typename T> struct rvv;
+
+    #define HAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \
+    template<> struct rvv<T> \
+    { \
+        using vec_t = v##IS_U##int##EEW##EMUL##_t; \
+        using bool_t = vbool##B_LEN##_t; \
+        static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \
+        static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \
+        static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \
+        static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
+        static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \
+        static inline vec_t vmin_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmin##IS_U##_tu(a, b, c, d); } \
+        static inline vec_t vmax_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmax##IS_U##_tu(a, b, c, d); } \
+        static inline vec_t vmin_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \
+        static inline vec_t vmax_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \
+        static inline vec_t vredmin(vec_t a, vec_t b, size_t c) { return __riscv_vredmin##IS_U(a, b, c); } \
+        static inline vec_t vredmax(vec_t a, vec_t b, size_t c) { return __riscv_vredmax##IS_U(a, b, c); } \
+    };
+    HAL_RVV_GENERATOR(uchar , 8 , u8 , u, m1, m1 , 8 )
+    HAL_RVV_GENERATOR(schar , 8 , i8 ,  , m1, m1 , 8 )
+    HAL_RVV_GENERATOR(ushort, 16, u16, u, m1, mf2, 16)
+    HAL_RVV_GENERATOR(short , 16, i16,  , m1, mf2, 16)
+    #undef HAL_RVV_GENERATOR
+
+    #define HAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \
+    template<> struct rvv<T> \
+    { \
+        using vec_t = v##NAME##EEW##EMUL##_t; \
+        using bool_t = vbool##B_LEN##_t; \
+        static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \
+        static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \
+        static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \
+        static inline vuint32##P_EMUL##_t vid(size_t a) { return __riscv_vid_v_u32##P_EMUL(a); } \
+        static inline vuint32##P_EMUL##_t vundefined() { return __riscv_vundefined_u32##P_EMUL(); } \
+        static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
+        static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \
+        static inline bool_t vmlt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##lt(a, b, c); } \
+        static inline bool_t vmgt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##gt(a, b, c); } \
+        static inline bool_t vmlt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \
+        static inline bool_t vmgt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \
+        static inline T vmv_x_s(vec_t a) { return __riscv_v##IS_F##mv_##F_OR_X(a); } \
+    };
+    HAL_RVV_GENERATOR(int   , int  , 32, i32,  , s, x, m4, m1 , m4, 8 )
+    HAL_RVV_GENERATOR(float , float, 32, f32, f, f, f, m4, m1 , m4, 8 )
+    HAL_RVV_GENERATOR(double, float, 64, f64, f, f, f, m4, mf2, m2, 16)
+    #undef HAL_RVV_GENERATOR
+}
+
+template<typename T>
+inline int minMaxIdxReadTwice(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal,
+                              int* minIdx, int* maxIdx, uchar* mask, size_t mask_step)
+{
+    int vlmax = rvv<T>::vsetvlmax();
+    auto vec_min = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
+    auto vec_max = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
+    T val_min, val_max;
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = rvv<T>::vsetvl(width - j);
+                auto vec_src = rvv<T>::vle(src_row + j, vl);
+                auto vec_mask = rvv<T>::vle_mask(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                vec_min = rvv<T>::vmin_tumu(bool_mask, vec_min, vec_min, vec_src, vl);
+                vec_max = rvv<T>::vmax_tumu(bool_mask, vec_max, vec_max, vec_src, vl);
+            }
+        }
+
+        auto sc_minval = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
+        auto sc_maxval = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
+        sc_minval = rvv<T>::vredmin(vec_min, sc_minval, vlmax);
+        sc_maxval = rvv<T>::vredmax(vec_max, sc_maxval, vlmax);
+        val_min = __riscv_vmv_x(sc_minval);
+        val_max = __riscv_vmv_x(sc_maxval);
+
+        bool found_min = !minIdx, found_max = !maxIdx;
+        for (int i = 0; i < height && (!found_min || !found_max); i++)
+        {
+            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width && (!found_min || !found_max); j += vl)
+            {
+                vl = rvv<T>::vsetvl(width - j);
+                auto vec_src = rvv<T>::vle(src_row + j, vl);
+                auto vec_mask = rvv<T>::vle_mask(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto bool_zero = __riscv_vmxor(bool_mask, bool_mask, vl);
+                if (!found_min)
+                {
+                    auto bool_minpos = __riscv_vmseq_mu(bool_mask, bool_zero, vec_src, val_min, vl);
+                    int index = __riscv_vfirst(bool_minpos, vl);
+                    if (index != -1)
+                    {
+                        found_min = true;
+                        minIdx[0] = i;
+                        minIdx[1] = j + index;
+                    }
+                }
+                if (!found_max)
+                {
+                    auto bool_maxpos = __riscv_vmseq_mu(bool_mask, bool_zero, vec_src, val_max, vl);
+                    int index = __riscv_vfirst(bool_maxpos, vl);
+                    if (index != -1)
+                    {
+                        found_max = true;
+                        maxIdx[0] = i;
+                        maxIdx[1] = j + index;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = rvv<T>::vsetvl(width - j);
+                auto vec_src = rvv<T>::vle(src_row + j, vl);
+                vec_min = rvv<T>::vmin_tu(vec_min, vec_min, vec_src, vl);
+                vec_max = rvv<T>::vmax_tu(vec_max, vec_max, vec_src, vl);
+            }
+        }
+
+        auto sc_minval = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
+        auto sc_maxval = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
+        sc_minval = rvv<T>::vredmin(vec_min, sc_minval, vlmax);
+        sc_maxval = rvv<T>::vredmax(vec_max, sc_maxval, vlmax);
+        val_min = __riscv_vmv_x(sc_minval);
+        val_max = __riscv_vmv_x(sc_maxval);
+
+        bool found_min = !minIdx, found_max = !maxIdx;
+        for (int i = 0; i < height && (!found_min || !found_max); i++)
+        {
+            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
+            int vl;
+            for (int j = 0; j < width && (!found_min || !found_max); j += vl)
+            {
+                vl = rvv<T>::vsetvl(width - j);
+                auto vec_src = rvv<T>::vle(src_row + j, vl);
+                if (!found_min)
+                {
+                    auto bool_minpos = __riscv_vmseq(vec_src, val_min, vl);
+                    int index = __riscv_vfirst(bool_minpos, vl);
+                    if (index != -1)
+                    {
+                        found_min = true;
+                        minIdx[0] = i;
+                        minIdx[1] = j + index;
+                    }
+                }
+                if (!found_max)
+                {
+                    auto bool_maxpos = __riscv_vmseq(vec_src, val_max, vl);
+                    int index = __riscv_vfirst(bool_maxpos, vl);
+                    if (index != -1)
+                    {
+                        found_max = true;
+                        maxIdx[0] = i;
+                        maxIdx[1] = j + index;
+                    }
+                }
+            }
+        }
+    }
+    if (minVal)
+    {
+        *minVal = val_min;
+    }
+    if (maxVal)
+    {
+        *maxVal = val_max;
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+template<typename T>
+inline int minMaxIdxReadOnce(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal,
+                             int* minIdx, int* maxIdx, uchar* mask, size_t mask_step)
+{
+    int vlmax = rvv<T>::vsetvlmax();
+    auto vec_min = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
+    auto vec_max = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
+    auto vec_pos = rvv<T>::vid(vlmax);
+    auto vec_minpos = rvv<T>::vundefined(), vec_maxpos = rvv<T>::vundefined();
+    T val_min, val_max;
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = rvv<T>::vsetvl(width - j);
+                auto vec_src = rvv<T>::vle(src_row + j, vl);
+                auto vec_mask = rvv<T>::vle_mask(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto bool_zero = __riscv_vmxor(bool_mask, bool_mask, vl);
+
+                auto bool_minpos = rvv<T>::vmlt_mu(bool_mask, bool_zero, vec_src, vec_min, vl);
+                auto bool_maxpos = rvv<T>::vmgt_mu(bool_mask, bool_zero, vec_src, vec_max, vl);
+                vec_minpos = __riscv_vmerge_tu(vec_minpos, vec_minpos, vec_pos, bool_minpos, vl);
+                vec_maxpos = __riscv_vmerge_tu(vec_maxpos, vec_maxpos, vec_pos, bool_maxpos, vl);
+
+                vec_min = __riscv_vmerge_tu(vec_min, vec_min, vec_src, bool_minpos, vl);
+                vec_max = __riscv_vmerge_tu(vec_max, vec_max, vec_src, bool_maxpos, vl);
+                vec_pos = __riscv_vadd(vec_pos, vl, vlmax);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = rvv<T>::vsetvl(width - j);
+                auto vec_src = rvv<T>::vle(src_row + j, vl);
+
+                auto bool_minpos = rvv<T>::vmlt(vec_src, vec_min, vl);
+                auto bool_maxpos = rvv<T>::vmgt(vec_src, vec_max, vl);
+                vec_minpos = __riscv_vmerge_tu(vec_minpos, vec_minpos, vec_pos, bool_minpos, vl);
+                vec_maxpos = __riscv_vmerge_tu(vec_maxpos, vec_maxpos, vec_pos, bool_maxpos, vl);
+
+                vec_min = __riscv_vmerge_tu(vec_min, vec_min, vec_src, bool_minpos, vl);
+                vec_max = __riscv_vmerge_tu(vec_max, vec_max, vec_src, bool_maxpos, vl);
+                vec_pos = __riscv_vadd(vec_pos, vl, vlmax);
+            }
+        }
+    }
+
+    val_min = std::numeric_limits<T>::max();
+    val_max = std::numeric_limits<T>::lowest();
+    for (int i = 0; i < vlmax; i++)
+    {
+        if (val_min > rvv<T>::vmv_x_s(vec_min))
+        {
+            val_min = rvv<T>::vmv_x_s(vec_min);
+            if (minIdx)
+            {
+                minIdx[0] = __riscv_vmv_x(vec_minpos) / width;
+                minIdx[1] = __riscv_vmv_x(vec_minpos) % width;
+            }
+        }
+        if (val_max < rvv<T>::vmv_x_s(vec_max))
+        {
+            val_max = rvv<T>::vmv_x_s(vec_max);
+            if (maxIdx)
+            {
+                maxIdx[0] = __riscv_vmv_x(vec_maxpos) / width;
+                maxIdx[1] = __riscv_vmv_x(vec_maxpos) % width;
+            }
+        }
+        vec_min = __riscv_vslidedown(vec_min, 1, vlmax);
+        vec_max = __riscv_vslidedown(vec_max, 1, vlmax);
+        vec_minpos = __riscv_vslidedown(vec_minpos, 1, vlmax);
+        vec_maxpos = __riscv_vslidedown(vec_maxpos, 1, vlmax);
+    }
+    if (minVal)
+    {
+        *minVal = val_min;
+    }
+    if (maxVal)
+    {
+        *maxVal = val_max;
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
+                     int* minIdx, int* maxIdx, uchar* mask, size_t mask_step = 0)
+{
+    if (!mask_step)
+        mask_step = src_step;
+
+    switch (depth)
+    {
+    case CV_8UC1:
+        return minMaxIdxReadTwice<uchar>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    case CV_8SC1:
+        return minMaxIdxReadTwice<schar>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    case CV_16UC1:
+        return minMaxIdxReadTwice<ushort>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    case CV_16SC1:
+        return minMaxIdxReadTwice<short>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    case CV_32SC1:
+        return minMaxIdxReadOnce<int>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    case CV_32FC1:
+        return minMaxIdxReadOnce<float>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    case CV_64FC1:
+        return minMaxIdxReadOnce<double>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
+    }
+
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+}}
+
+#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp
@ -0,0 +1,517 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_HAL_RVV_NORM_HPP_INCLUDED
+#define OPENCV_HAL_RVV_NORM_HPP_INCLUDED
+
+#include <riscv_vector.h>
+
+namespace cv { namespace cv_hal_rvv {
+
+#undef cv_hal_norm
+#define cv_hal_norm cv::cv_hal_rvv::norm
+
+inline int normInf_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m8();
+    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m8(width - j);
+                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m8(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                vec_max = __riscv_vmaxu_tumu(bool_mask, vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m8(width - j);
+                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
+                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
+    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
+    *result = __riscv_vmv_x(sc_max);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normL1_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask, vec_src, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
+    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vmv_x(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normL2Sqr_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    int cnt = 0;
+    auto reduce = [&](int vl) {
+        if ((cnt += vl) < (1 << 16))
+            return;
+        cnt = vl;
+        for (int i = 0; i < vlmax; i++)
+        {
+            *result += __riscv_vmv_x(vec_sum);
+            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
+        }
+        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    };
+
+    *result = 0;
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                reduce(vl);
+
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask, vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask, vec_mul, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                reduce(vl);
+
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    reduce(1 << 16);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normInf_8UC4(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m8();
+    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl, vlm;
+            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
+            {
+                vl = __riscv_vsetvl_e8m8(width * 4 - j);
+                vlm = __riscv_vsetvl_e8m2(width - jm);
+                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + jm, vlm);
+                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
+                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m8(vec_mask_ext), 0, vl);
+                vec_max = __riscv_vmaxu_tumu(bool_mask_ext, vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            int vl;
+            for (int j = 0; j < width * 4; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m8(width * 4 - j);
+                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
+                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
+    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
+    *result = __riscv_vmv_x(sc_max);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normL1_8UC4(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl, vlm;
+            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                vlm = __riscv_vsetvl_e8mf2(width - jm);
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
+                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
+                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
+                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask_ext, vec_src, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            int vl;
+            for (int j = 0; j < width * 4; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
+    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vmv_x(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normL2Sqr_8UC4(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    int cnt = 0;
+    auto reduce = [&](int vl) {
+        if ((cnt += vl) < (1 << 16))
+            return;
+        cnt = vl;
+        for (int i = 0; i < vlmax; i++)
+        {
+            *result += __riscv_vmv_x(vec_sum);
+            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
+        }
+        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    };
+
+    *result = 0;
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl, vlm;
+            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                vlm = __riscv_vsetvl_e8mf2(width - jm);
+                reduce(vl);
+
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
+                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
+                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
+                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask_ext, vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask_ext, vec_mul, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src_row = src + i * src_step;
+            int vl;
+            for (int j = 0; j < width * 4; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                reduce(vl);
+
+                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    reduce(1 << 16);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normInf_32FC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_max = __riscv_vfmv_v_f_f32m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m8(width - j);
+                auto vec_src = __riscv_vle32_v_f32m8(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_abs = __riscv_vfabs_v_f32m8_m(bool_mask, vec_src, vl);
+                vec_max = __riscv_vfmax_tumu(bool_mask, vec_max, vec_max, vec_abs, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m8(width - j);
+                auto vec_src = __riscv_vle32_v_f32m8(src_row + j, vl);
+                auto vec_abs = __riscv_vfabs(vec_src, vl);
+                vec_max = __riscv_vfmax_tu(vec_max, vec_max, vec_abs, vl);
+            }
+        }
+    }
+    auto sc_max = __riscv_vfmv_s_f_f32m1(0, vlmax);
+    sc_max = __riscv_vfredmax(vec_max, sc_max, vlmax);
+    *result = __riscv_vfmv_f(sc_max);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normL1_32FC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e32m4();
+    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_abs = __riscv_vfabs_v_f32m4_m(bool_mask, vec_src, vl);
+                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8_m(bool_mask, vec_abs, vl);
+                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_fext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
+                auto vec_abs = __riscv_vfabs(vec_src, vl);
+                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8(vec_abs, vl);
+                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_fext, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
+    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vfmv_f(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normL2Sqr_32FC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e32m4();
+    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_mul = __riscv_vfwmul_vv_f64m8_m(bool_mask, vec_src, vec_src, vl);
+                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_mul, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
+                auto vec_mul = __riscv_vfwmul(vec_src, vec_src, vl);
+                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_mul, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
+    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vfmv_f(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width,
+                int height, int type, int norm_type, double* result)
+{
+    if (!result)
+        return CV_HAL_ERROR_OK;
+
+    switch (type)
+    {
+    case CV_8UC1:
+        switch (norm_type)
+        {
+        case NORM_INF:
+            return normInf_8UC1(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L1:
+            return normL1_8UC1(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L2SQR:
+            return normL2Sqr_8UC1(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L2:
+            int ret = normL2Sqr_8UC1(src, src_step, mask, mask_step, width, height, result);
+            *result = std::sqrt(*result);
+            return ret;
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    case CV_8UC4:
+        switch (norm_type)
+        {
+        case NORM_INF:
+            return normInf_8UC4(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L1:
+            return normL1_8UC4(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L2SQR:
+            return normL2Sqr_8UC4(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L2:
+            int ret = normL2Sqr_8UC4(src, src_step, mask, mask_step, width, height, result);
+            *result = std::sqrt(*result);
+            return ret;
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    case CV_32FC1:
+        switch (norm_type)
+        {
+        case NORM_INF:
+            return normInf_32FC1(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L1:
+            return normL1_32FC1(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L2SQR:
+            return normL2Sqr_32FC1(src, src_step, mask, mask_step, width, height, result);
+        case NORM_L2:
+            int ret = normL2Sqr_32FC1(src, src_step, mask, mask_step, width, height, result);
+            *result = std::sqrt(*result);
+            return ret;
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    }
+
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+}}
+
+#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp
@ -0,0 +1,605 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_HAL_RVV_NORM_DIFF_HPP_INCLUDED
+#define OPENCV_HAL_RVV_NORM_DIFF_HPP_INCLUDED
+
+#include <riscv_vector.h>
+
+namespace cv { namespace cv_hal_rvv {
+
+#undef cv_hal_normDiff
+#define cv_hal_normDiff cv::cv_hal_rvv::normDiff
+
+inline int normDiffInf_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m8();
+    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m8(width - j);
+                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m8(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_src = __riscv_vsub_vv_u8m8_m(bool_mask, __riscv_vmaxu_vv_u8m8_m(bool_mask, vec_src1, vec_src2, vl),
+                                                      __riscv_vminu_vv_u8m8_m(bool_mask, vec_src1, vec_src2, vl), vl);
+                vec_max = __riscv_vmaxu_tumu(bool_mask, vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m8(width - j);
+                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
+                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
+                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
+    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
+    *result = __riscv_vmv_x(sc_max);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffL1_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask, __riscv_vmaxu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl),
+                                                      __riscv_vminu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl), vl);
+                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask, vec_src, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
+                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
+    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vmv_x(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffL2Sqr_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    int cnt = 0;
+    auto reduce = [&](int vl) {
+        if ((cnt += vl) < (1 << 16))
+            return;
+        cnt = vl;
+        for (int i = 0; i < vlmax; i++)
+        {
+            *result += __riscv_vmv_x(vec_sum);
+            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
+        }
+        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    };
+
+    *result = 0;
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                reduce(vl);
+
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask, __riscv_vmaxu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl),
+                                                      __riscv_vminu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl), vl);
+                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask, vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask, vec_mul, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width - j);
+                reduce(vl);
+
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
+                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    reduce(1 << 16);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffInf_8UC4(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m8();
+    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl, vlm;
+            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
+            {
+                vl = __riscv_vsetvl_e8m8(width * 4 - j);
+                vlm = __riscv_vsetvl_e8m2(width - jm);
+                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + jm, vlm);
+                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
+                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m8(vec_mask_ext), 0, vl);
+                auto vec_src = __riscv_vsub_vv_u8m8_m(bool_mask_ext, __riscv_vmaxu_vv_u8m8_m(bool_mask_ext, vec_src1, vec_src2, vl),
+                                                      __riscv_vminu_vv_u8m8_m(bool_mask_ext, vec_src1, vec_src2, vl), vl);
+                vec_max = __riscv_vmaxu_tumu(bool_mask_ext, vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            int vl;
+            for (int j = 0; j < width * 4; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m8(width * 4 - j);
+                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
+                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
+                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
+            }
+        }
+    }
+    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
+    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
+    *result = __riscv_vmv_x(sc_max);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffL1_8UC4(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl, vlm;
+            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                vlm = __riscv_vsetvl_e8mf2(width - jm);
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
+                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
+                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
+                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask_ext, __riscv_vmaxu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl),
+                                                      __riscv_vminu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl), vl);
+                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask_ext, vec_src, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            int vl;
+            for (int j = 0; j < width * 4; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
+                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
+    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vmv_x(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffL2Sqr_8UC4(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e8m2();
+    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    int cnt = 0;
+    auto reduce = [&](int vl) {
+        if ((cnt += vl) < (1 << 16))
+            return;
+        cnt = vl;
+        for (int i = 0; i < vlmax; i++)
+        {
+            *result += __riscv_vmv_x(vec_sum);
+            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
+        }
+        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
+    };
+
+    *result = 0;
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            const uchar* mask_row = mask + i * mask_step;
+            int vl, vlm;
+            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                vlm = __riscv_vsetvl_e8mf2(width - jm);
+                reduce(vl);
+
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
+                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
+                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
+                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask_ext, __riscv_vmaxu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl),
+                                                      __riscv_vminu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl), vl);
+                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask_ext, vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask_ext, vec_mul, vl);
+                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const uchar* src1_row = src1 + i * src1_step;
+            const uchar* src2_row = src2 + i * src2_step;
+            int vl;
+            for (int j = 0; j < width * 4; j += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(width * 4 - j);
+                reduce(vl);
+
+                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
+                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
+                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
+                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
+                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
+            }
+        }
+    }
+    reduce(1 << 16);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffInf_32FC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_max = __riscv_vfmv_v_f_f32m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
+            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m8(width - j);
+                auto vec_src1 = __riscv_vle32_v_f32m8(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle32_v_f32m8(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_src = __riscv_vfsub_vv_f32m8_m(bool_mask, vec_src1, vec_src2, vl);
+                auto vec_abs = __riscv_vfabs_v_f32m8_m(bool_mask, vec_src, vl);
+                vec_max = __riscv_vfmax_tumu(bool_mask, vec_max, vec_max, vec_abs, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
+            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m8(width - j);
+                auto vec_src1 = __riscv_vle32_v_f32m8(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle32_v_f32m8(src2_row + j, vl);
+                auto vec_src = __riscv_vfsub(vec_src1, vec_src2, vl);
+                auto vec_abs = __riscv_vfabs(vec_src, vl);
+                vec_max = __riscv_vfmax_tu(vec_max, vec_max, vec_abs, vl);
+            }
+        }
+    }
+    auto sc_max = __riscv_vfmv_s_f_f32m1(0, vlmax);
+    sc_max = __riscv_vfredmax(vec_max, sc_max, vlmax);
+    *result = __riscv_vfmv_f(sc_max);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffL1_32FC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e32m4();
+    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
+            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_src = __riscv_vfsub_vv_f32m4_m(bool_mask, vec_src1, vec_src2, vl);
+                auto vec_abs = __riscv_vfabs_v_f32m4_m(bool_mask, vec_src, vl);
+                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8_m(bool_mask, vec_abs, vl);
+                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_fext, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
+            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
+                auto vec_src = __riscv_vfsub(vec_src1, vec_src2, vl);
+                auto vec_abs = __riscv_vfabs(vec_src, vl);
+                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8(vec_abs, vl);
+                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_fext, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
+    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vfmv_f(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiffL2Sqr_32FC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
+{
+    int vlmax = __riscv_vsetvlmax_e32m4();
+    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
+
+    if (mask)
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
+            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
+            const uchar* mask_row = mask + i * mask_step;
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
+                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
+                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
+                auto vec_src = __riscv_vfsub_vv_f32m4_m(bool_mask, vec_src1, vec_src2, vl);
+                auto vec_mul = __riscv_vfwmul_vv_f64m8_m(bool_mask, vec_src, vec_src, vl);
+                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_mul, vl);
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < height; i++)
+        {
+            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
+            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
+            int vl;
+            for (int j = 0; j < width; j += vl)
+            {
+                vl = __riscv_vsetvl_e32m4(width - j);
+                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
+                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
+                auto vec_src = __riscv_vfsub(vec_src1, vec_src2, vl);
+                auto vec_mul = __riscv_vfwmul(vec_src, vec_src, vl);
+                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_mul, vl);
+            }
+        }
+    }
+    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
+    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
+    *result = __riscv_vfmv_f(sc_sum);
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask,
+                    size_t mask_step, int width, int height, int type, int norm_type, double* result)
+{
+    if (!result)
+        return CV_HAL_ERROR_OK;
+
+    int ret;
+    switch (type)
+    {
+    case CV_8UC1:
+        switch (norm_type & ~NORM_RELATIVE)
+        {
+        case NORM_INF:
+            ret = normDiffInf_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L1:
+            ret = normDiffL1_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L2SQR:
+            ret = normDiffL2Sqr_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L2:
+            ret = normDiffL2Sqr_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            *result = std::sqrt(*result);
+            break;
+        default:
+            ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
+        }
+        break;
+    case CV_8UC4:
+        switch (norm_type & ~NORM_RELATIVE)
+        {
+        case NORM_INF:
+            ret = normDiffInf_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L1:
+            ret = normDiffL1_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L2SQR:
+            ret = normDiffL2Sqr_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L2:
+            ret = normDiffL2Sqr_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            *result = std::sqrt(*result);
+            break;
+        default:
+            ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
+        }
+        break;
+    case CV_32FC1:
+        switch (norm_type & ~NORM_RELATIVE)
+        {
+        case NORM_INF:
+            ret = normDiffInf_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L1:
+            ret = normDiffL1_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L2SQR:
+            ret = normDiffL2Sqr_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            break;
+        case NORM_L2:
+            ret = normDiffL2Sqr_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
+            *result = std::sqrt(*result);
+            break;
+        default:
+            ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
+        }
+        break;
+    default:
+        ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
+    }
+
+    if(ret == CV_HAL_ERROR_OK && (norm_type & NORM_RELATIVE))
+    {
+        double result_;
+        ret = cv::cv_hal_rvv::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type & ~NORM_RELATIVE, &result_);
+        if(ret == CV_HAL_ERROR_OK)
+        {
+            *result /= result_ + DBL_EPSILON;
+        }
+    }
+
+    return ret;
+}
+
+}}
+
+#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/split.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/split.hpp
@ -0,0 +1,93 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_HAL_RVV_SPLIT_HPP_INCLUDED
+#define OPENCV_HAL_RVV_SPLIT_HPP_INCLUDED
+
+#include <riscv_vector.h>
+
+namespace cv { namespace cv_hal_rvv {
+
+#undef cv_hal_split8u
+#define cv_hal_split8u cv::cv_hal_rvv::split8u
+
+inline int split8u(const uchar* src, uchar** dst, int len, int cn)
+{
+    int vl = 0;
+    if (cn == 1)
+    {
+        uchar* dst0 = dst[0];
+        for (int i = 0; i < len; i += vl)
+        {
+            vl = __riscv_vsetvl_e8m8(len - i);
+            __riscv_vse8_v_u8m8(dst0 + i, __riscv_vle8_v_u8m8(src + i, vl), vl);
+        }
+    }
+    else if (cn == 2)
+    {
+        uchar *dst0 = dst[0], *dst1 = dst[1];
+        for (int i = 0; i < len; i += vl)
+        {
+            vl = __riscv_vsetvl_e8m4(len - i);
+            vuint8m4x2_t seg = __riscv_vlseg2e8_v_u8m4x2(src + i * cn, vl);
+            __riscv_vse8_v_u8m4(dst0 + i, __riscv_vget_v_u8m4x2_u8m4(seg, 0), vl);
+            __riscv_vse8_v_u8m4(dst1 + i, __riscv_vget_v_u8m4x2_u8m4(seg, 1), vl);
+        }
+    }
+    else if (cn == 3)
+    {
+        uchar *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
+        for (int i = 0; i < len; i += vl)
+        {
+            vl = __riscv_vsetvl_e8m2(len - i);
+            vuint8m2x3_t seg = __riscv_vlseg3e8_v_u8m2x3(src + i * cn, vl);
+            __riscv_vse8_v_u8m2(dst0 + i, __riscv_vget_v_u8m2x3_u8m2(seg, 0), vl);
+            __riscv_vse8_v_u8m2(dst1 + i, __riscv_vget_v_u8m2x3_u8m2(seg, 1), vl);
+            __riscv_vse8_v_u8m2(dst2 + i, __riscv_vget_v_u8m2x3_u8m2(seg, 2), vl);
+        }
+    }
+    else if (cn == 4)
+    {
+        uchar *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
+        for (int i = 0; i < len; i += vl)
+        {
+            vl = __riscv_vsetvl_e8m2(len - i);
+            vuint8m2x4_t seg = __riscv_vlseg4e8_v_u8m2x4(src + i * cn, vl);
+            __riscv_vse8_v_u8m2(dst0 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 0), vl);
+            __riscv_vse8_v_u8m2(dst1 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 1), vl);
+            __riscv_vse8_v_u8m2(dst2 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 2), vl);
+            __riscv_vse8_v_u8m2(dst3 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 3), vl);
+        }
+    }
+    else
+    {
+        int k = 0;
+        for (; k <= cn - 4; k += 4)
+        {
+            uchar *dst0 = dst[k], *dst1 = dst[k + 1], *dst2 = dst[k + 2], *dst3 = dst[k + 3];
+            for (int i = 0; i < len; i += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(len - i);
+                vuint8m2x4_t seg = __riscv_vlsseg4e8_v_u8m2x4(src + k + i * cn, cn, vl);
+                __riscv_vse8_v_u8m2(dst0 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 0), vl);
+                __riscv_vse8_v_u8m2(dst1 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 1), vl);
+                __riscv_vse8_v_u8m2(dst2 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 2), vl);
+                __riscv_vse8_v_u8m2(dst3 + i, __riscv_vget_v_u8m2x4_u8m2(seg, 3), vl);
+            }
+        }
+        for (; k < cn; ++k)
+        {
+            uchar* dstK = dst[k];
+            for (int i = 0; i < len; i += vl)
+            {
+                vl = __riscv_vsetvl_e8m2(len - i);
+                vuint8m2_t seg = __riscv_vlse8_v_u8m2(src + k + i * cn, cn, vl);
+                __riscv_vse8_v_u8m2(dstK + i, seg, vl);
+            }
+        }
+    }
+    return CV_HAL_ERROR_OK;
+}
+
+}}
+#endif
--- a/3rdparty/ippicv/ippicv.cmake
+++ b/3rdparty/ippicv/ippicv.cmake
@ -2,7 +2,7 @@ function(download_ippicv root_var)
  set(${root_var} "" PARENT_SCOPE)

  # Commit SHA in the opencv_3rdparty repo
-  set(IPPICV_COMMIT "7f55c0c26be418d494615afca15218566775c725")
+  set(IPPICV_COMMIT "d1cbea44d326eb0421fedcdd16de4630fd8c7ed0")
  # Define actual ICV versions
  if(APPLE)
    set(IPPICV_COMMIT "0cc4aa06bf2bef4b05d237c69a5a96b9cd0cb85a")
@ -14,9 +14,10 @@ function(download_ippicv root_var)
    set(OPENCV_ICV_PLATFORM "linux")
    set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
    if(X86_64)
-      set(OPENCV_ICV_NAME "ippicv_2021.12.0_lnx_intel64_20240425_general.tgz")
-      set(OPENCV_ICV_HASH "d06e6d44ece88f7f17a6cd9216761186")
+      set(OPENCV_ICV_NAME "ippicv_2022.0.0_lnx_intel64_20240904_general.tgz")
+      set(OPENCV_ICV_HASH "63717ee0f918ad72fb5a737992a206d1")
    else()
+      set(IPPICV_COMMIT "7f55c0c26be418d494615afca15218566775c725")
      set(OPENCV_ICV_NAME "ippicv_2021.12.0_lnx_ia32_20240425_general.tgz")
      set(OPENCV_ICV_HASH "85ffa2b9ed7802b93c23fa27b0097d36")
    endif()
@ -24,9 +25,10 @@ function(download_ippicv root_var)
    set(OPENCV_ICV_PLATFORM "windows")
    set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
    if(X86_64)
-      set(OPENCV_ICV_NAME "ippicv_2021.12.0_win_intel64_20240425_general.zip")
-      set(OPENCV_ICV_HASH "402ff8c6b4986738fed71c44e1ce665d")
+      set(OPENCV_ICV_NAME "ippicv_2022.0.0_win_intel64_20240904_general.zip")
+      set(OPENCV_ICV_HASH "3a6eca7cc3bce7159eb1443c6fca4e31")
    else()
+      set(IPPICV_COMMIT "7f55c0c26be418d494615afca15218566775c725")
      set(OPENCV_ICV_NAME "ippicv_2021.12.0_win_ia32_20240425_general.zip")
      set(OPENCV_ICV_HASH "8b1d2a23957d57624d0de8f2a5cae5f1")
    endif()
--- a/3rdparty/ittnotify/CMakeLists.txt
+++ b/3rdparty/ittnotify/CMakeLists.txt
@ -24,7 +24,6 @@ set(ITT_PUBLIC_HDRS
    include/ittnotify.h
    include/jitprofiling.h
    include/libittnotify.h
-    include/llvm_jit_event_listener.hpp
 )
 set(ITT_PRIVATE_HDRS
    src/ittnotify/disable_warnings.h
@ -39,6 +38,11 @@ set(ITT_SRCS

 add_library(${ITT_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${ITT_SRCS} ${ITT_PUBLIC_HDRS} ${ITT_PRIVATE_HDRS})

+file(STRINGS "src/ittnotify/ittnotify_config.h" API_VERSION_NUM REGEX "#define\[ \t]+API_VERSION_NUM[ \t]+([0-9\.]+)")
+if(API_VERSION_NUM MATCHES "#define\[ \t]+API_VERSION_NUM[ \t]+([0-9\.]*)")
+  set(ITTNOTIFY_VERSION "${CMAKE_MATCH_1}"  CACHE INTERNAL "" FORCE)
+endif()
+
 if(NOT WIN32)
  if(HAVE_DL_LIBRARY)
    target_link_libraries(${ITT_LIBRARY} dl)
@ -64,4 +68,4 @@ if(NOT BUILD_SHARED_LIBS)
  ocv_install_target(${ITT_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()

-ocv_install_3rdparty_licenses(ittnotify src/ittnotify/LICENSE.BSD src/ittnotify/LICENSE.GPL)
+ocv_install_3rdparty_licenses(ittnotify src/ittnotify/BSD-3-Clause.txt src/ittnotify/GPL-2.0-only.txt)
--- a/3rdparty/ittnotify/include/ittnotify.h
+++ b/3rdparty/ittnotify/include/ittnotify.h
@ -1,60 +1,8 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/
 #ifndef _ITTNOTIFY_H_
 #define _ITTNOTIFY_H_

@ -63,7 +11,8 @@
@brief Public User API functions and types
@mainpage

-The ITT API is used to annotate a user's program with additional information
+The Instrumentation and Tracing Technology API (ITT API) is used to
+annotate a user's program with additional information
 that can be used by correctness and performance tools. The user inserts
 calls in their program. Those calls generate information that is collected
 at runtime, and used by Intel(R) Threading Tools.
@ -141,6 +90,10 @@ The same ID may not be reused for different instances, unless a previous
 #  define ITT_OS_FREEBSD   4
 #endif /* ITT_OS_FREEBSD */

+#ifndef ITT_OS_OPENBSD
+#  define ITT_OS_OPENBSD   5
+#endif /* ITT_OS_OPENBSD */
+
 #ifndef ITT_OS
 #  if defined WIN32 || defined _WIN32
 #    define ITT_OS ITT_OS_WIN
@ -148,6 +101,8 @@ The same ID may not be reused for different instances, unless a previous
 #    define ITT_OS ITT_OS_MAC
 #  elif defined( __FreeBSD__ )
 #    define ITT_OS ITT_OS_FREEBSD
+#  elif defined( __OpenBSD__)
+#    define ITT_OS ITT_OS_OPENBSD
 #  else
 #    define ITT_OS ITT_OS_LINUX
 #  endif
@ -169,6 +124,10 @@ The same ID may not be reused for different instances, unless a previous
 #  define ITT_PLATFORM_FREEBSD 4
 #endif /* ITT_PLATFORM_FREEBSD */

+#ifndef ITT_PLATFORM_OPENBSD
+#  define ITT_PLATFORM_OPENBSD 5
+#endif /* ITT_PLATFORM_OPENBSD */
+
 #ifndef ITT_PLATFORM
 #  if ITT_OS==ITT_OS_WIN
 #    define ITT_PLATFORM ITT_PLATFORM_WIN
@ -176,6 +135,8 @@ The same ID may not be reused for different instances, unless a previous
 #    define ITT_PLATFORM ITT_PLATFORM_MAC
 #  elif ITT_OS==ITT_OS_FREEBSD
 #    define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+#  elif ITT_OS==ITT_OS_OPENBSD
+#    define ITT_PLATFORM ITT_PLATFORM_OPENBSD
 #  else
 #    define ITT_PLATFORM ITT_PLATFORM_POSIX
 #  endif
@ -228,7 +189,12 @@ The same ID may not be reused for different instances, unless a previous

 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 /* use __forceinline (VC++ specific) */
-#define ITT_INLINE           __forceinline
+#if defined(__MINGW32__) && !defined(__cplusplus)
+#define ITT_INLINE           static __inline__ __attribute__((__always_inline__,__gnu_inline__))
+#else
+#define ITT_INLINE           static __forceinline
+#endif /* __MINGW32__ */
+
 #define ITT_INLINE_ATTRIBUTE /* nothing */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /*
@ -289,20 +255,20 @@ The same ID may not be reused for different instances, unless a previous
 #define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
 #define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)

-#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
-#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
-#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
-#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
-#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
-#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
-#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
-#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
-#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
-#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
-#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
-#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
-#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
-#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_VOID_D0(n,d)       (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x)     (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y)   (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d)       (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x)     (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y)   (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)

 #ifdef ITT_STUB
 #undef ITT_STUB
@ -340,7 +306,7 @@ extern "C" {
 *     only pauses tracing and analyzing memory access.
 *     It does not pause tracing or analyzing threading APIs.
 *   .
- * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ * - Intel(R) VTune(TM) Profiler:
 *   - Does continue to record when new threads are started.
 *   .
 * - Other effects:
@ -355,35 +321,143 @@ void ITTAPI __itt_resume(void);
 /** @brief Detach collection */
 void ITTAPI __itt_detach(void);

+/**
+ * @enum __itt_collection_scope
+ * @brief Enumerator for collection scopes
+ */
+typedef enum {
+    __itt_collection_scope_host    = 1 << 0,
+    __itt_collection_scope_offload = 1 << 1,
+    __itt_collection_scope_all     = 0x7FFFFFFF
+} __itt_collection_scope;
+
+/** @brief Pause scoped collection */
+void ITTAPI __itt_pause_scoped(__itt_collection_scope);
+/** @brief Resume scoped collection */
+void ITTAPI __itt_resume_scoped(__itt_collection_scope);
+
 /** @cond exclude_from_documentation */
 #ifndef INTEL_NO_MACRO_BODY
 #ifndef INTEL_NO_ITTNOTIFY_API
-ITT_STUBV(ITTAPI, void, pause,  (void))
-ITT_STUBV(ITTAPI, void, resume, (void))
-ITT_STUBV(ITTAPI, void, detach, (void))
-#define __itt_pause      ITTNOTIFY_VOID(pause)
-#define __itt_pause_ptr  ITTNOTIFY_NAME(pause)
-#define __itt_resume     ITTNOTIFY_VOID(resume)
-#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
-#define __itt_detach     ITTNOTIFY_VOID(detach)
-#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+ITT_STUBV(ITTAPI, void, pause,         (void))
+ITT_STUBV(ITTAPI, void, pause_scoped,  (__itt_collection_scope))
+ITT_STUBV(ITTAPI, void, resume,        (void))
+ITT_STUBV(ITTAPI, void, resume_scoped, (__itt_collection_scope))
+ITT_STUBV(ITTAPI, void, detach,        (void))
+#define __itt_pause             ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr         ITTNOTIFY_NAME(pause)
+#define __itt_pause_scoped      ITTNOTIFY_VOID(pause_scoped)
+#define __itt_pause_scoped_ptr  ITTNOTIFY_NAME(pause_scoped)
+#define __itt_resume            ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr        ITTNOTIFY_NAME(resume)
+#define __itt_resume_scoped     ITTNOTIFY_VOID(resume_scoped)
+#define __itt_resume_scoped_ptr ITTNOTIFY_NAME(resume_scoped)
+#define __itt_detach            ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr        ITTNOTIFY_NAME(detach)
 #else  /* INTEL_NO_ITTNOTIFY_API */
 #define __itt_pause()
-#define __itt_pause_ptr  0
+#define __itt_pause_ptr           0
+#define __itt_pause_scoped(scope)
+#define __itt_pause_scoped_ptr    0
 #define __itt_resume()
-#define __itt_resume_ptr 0
+#define __itt_resume_ptr          0
+#define __itt_resume_scoped(scope)
+#define __itt_resume_scoped_ptr   0
 #define __itt_detach()
-#define __itt_detach_ptr 0
+#define __itt_detach_ptr          0
 #endif /* INTEL_NO_ITTNOTIFY_API */
 #else  /* INTEL_NO_MACRO_BODY */
-#define __itt_pause_ptr  0
-#define __itt_resume_ptr 0
-#define __itt_detach_ptr 0
+#define __itt_pause_ptr           0
+#define __itt_pause_scoped_ptr    0
+#define __itt_resume_ptr          0
+#define __itt_resume_scoped_ptr   0
+#define __itt_detach_ptr          0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 /** @} control group */
 /** @endcond */

+/**
+ * @defgroup Intel Processor Trace control
+ * API from this group provides control over collection and analysis of Intel Processor Trace (Intel PT) data
+ * Information about Intel Processor Trace technology can be found here (Volume 3 chapter 35):
+ * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
+ * Use this API to mark particular code regions for loading detailed performance statistics.
+ * This mode makes your analysis faster and more accurate.
+ * @{
+*/
+typedef unsigned char __itt_pt_region;
+
+/**
+ * @brief function saves a region name marked with Intel PT API and returns a region id.
+ * Only 7 names can be registered. Attempts to register more names will be ignored and a region id with auto names will be returned.
+ * For automatic naming of regions pass NULL as function parameter
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_pt_region ITTAPI __itt_pt_region_createA(const char    *name);
+__itt_pt_region ITTAPI __itt_pt_region_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_pt_region_create __itt_pt_region_createW
+#else /* UNICODE */
+#  define __itt_pt_region_create __itt_pt_region_createA
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_pt_region ITTAPI __itt_pt_region_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_pt_region_createA     ITTNOTIFY_DATA(pt_region_createA)
+#define __itt_pt_region_createA_ptr ITTNOTIFY_NAME(pt_region_createA)
+#define __itt_pt_region_createW     ITTNOTIFY_DATA(pt_region_createW)
+#define __itt_pt_region_createW_ptr ITTNOTIFY_NAME(pt_region_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create     ITTNOTIFY_DATA(pt_region_create)
+#define __itt_pt_region_create_ptr ITTNOTIFY_NAME(pt_region_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_pt_region_createA(name) (__itt_pt_region)0
+#define __itt_pt_region_createA_ptr 0
+#define __itt_pt_region_createW(name) (__itt_pt_region)0
+#define __itt_pt_region_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create(name)  (__itt_pt_region)0
+#define __itt_pt_region_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_pt_region_createA_ptr 0
+#define __itt_pt_region_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief function contains a special code pattern identified on the post-processing stage and
+ * marks the beginning of a code region targeted for Intel PT analysis
+ * @param[in] region - region id, 0 <= region < 8
+*/
+void __itt_mark_pt_region_begin(__itt_pt_region region);
+/**
+ * @brief function contains a special code pattern identified on the post-processing stage and
+ * marks the end of a code region targeted for Intel PT analysis
+ * @param[in] region - region id, 0 <= region < 8
+*/
+void __itt_mark_pt_region_end(__itt_pt_region region);
+/** @} Intel PT control group*/
+
 /**
 * @defgroup threads Threads
 * @ingroup public
@ -541,14 +615,26 @@ ITT_STUBV(ITTAPI, void, suppress_pop, (void))
 /** @endcond */

 /**
- * @enum __itt_model_disable
- * @brief Enumerator for the disable methods
+ * @enum __itt_suppress_mode
+ * @brief Enumerator for the suppressing modes
 */
 typedef enum __itt_suppress_mode {
    __itt_unsuppress_range,
    __itt_suppress_range
 } __itt_suppress_mode_t;

+/**
+ * @enum __itt_collection_state
+ * @brief Enumerator for collection state.
+ */
+typedef enum {
+    __itt_collection_uninitialized = 0, /* uninitialized */
+    __itt_collection_init_fail = 1, /* failed to init */
+    __itt_collection_collector_absent = 2, /* non work state collector is absent */
+    __itt_collection_collector_exists = 3, /* work state collector exists */
+    __itt_collection_init_successful = 4 /* success to init */
+} __itt_collection_state;
+
 /**
 * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
 */
@ -1496,7 +1582,7 @@ ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr,
 /** @endcond */

 /**
- * @brief Record an free begin occurrence.
+ * @brief Record a free begin occurrence.
 */
 void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);

@ -1516,7 +1602,7 @@ ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
 /** @endcond */

 /**
- * @brief Record an free end occurrence.
+ * @brief Record a free end occurrence.
 */
 void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);

@ -1536,7 +1622,7 @@ ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
 /** @endcond */

 /**
- * @brief Record an reallocation begin occurrence.
+ * @brief Record a reallocation begin occurrence.
 */
 void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);

@ -1556,7 +1642,7 @@ ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* add
 /** @endcond */

 /**
- * @brief Record an reallocation end occurrence.
+ * @brief Record a reallocation end occurrence.
 */
 void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);

@ -2692,7 +2778,7 @@ ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info

 /**
 * @ingroup clockdomains
- * @brief Recalculate clock domains frequences and clock base timestamps.
+ * @brief Recalculate clock domains frequencies and clock base timestamps.
 */
 void ITTAPI __itt_clock_domain_reset(void);

@ -3597,11 +3683,12 @@ ITT_STUBV(ITTAPI, void, enable_attach, (void))
 /** @endcond */

 /**
- * @brief Module load info
- * This API is used to report necessary information in case of module relocation
- * @param[in] start_addr - relocated module start address
- * @param[in] end_addr - relocated module end address
- * @param[in] path - file system path to the module
+ * @brief Module load notification
+ * This API is used to report necessary information in case of bypassing default system loader.
+ * Notification should be done immidiatelly after this module is loaded to process memory.
+ * @param[in] start_addr - module start address
+ * @param[in] end_addr - module end address
+ * @param[in] path - file system full path to the module
 */
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path);
@ -3656,7 +3743,462 @@ ITT_STUB(ITTAPI, void, module_load,  (void *start_addr, void *end_addr, const ch
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */

+/**
+ * @brief Report module unload
+ * This API is used to report necessary information in case of bypassing default system loader.
+ * Notification should be done just before the module is unloaded from process memory.
+ * @param[in] addr - base address of loaded module
+ */
+void ITTAPI __itt_module_unload(void *addr);

+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, module_unload, (void *addr))
+#define __itt_module_unload     ITTNOTIFY_VOID(module_unload)
+#define __itt_module_unload_ptr ITTNOTIFY_NAME(module_unload)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_module_unload(addr)
+#define __itt_module_unload_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_module_unload_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum
+{
+    __itt_module_type_unknown = 0,
+    __itt_module_type_elf,
+    __itt_module_type_coff
+} __itt_module_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum
+{
+    itt_section_type_unknown,
+    itt_section_type_bss,        /* notifies that the section contains uninitialized data. These are the relevant section types and the modules that contain them:
+                                  * ELF module:  SHT_NOBITS section type
+                                  * COFF module: IMAGE_SCN_CNT_UNINITIALIZED_DATA section type
+                                  */
+    itt_section_type_data,       /* notifies that section contains initialized data. These are the relevant section types and the modules that contain them:
+                                  * ELF module:  SHT_PROGBITS section type
+                                  * COFF module: IMAGE_SCN_CNT_INITIALIZED_DATA section type
+                                  */
+    itt_section_type_text        /* notifies that the section contains executable code. These are the relevant section types and the modules that contain them:
+                                  * ELF module:  SHT_PROGBITS section type
+                                  * COFF module: IMAGE_SCN_CNT_CODE section type
+                                  */
+} __itt_section_type;
+/** @endcond */
+
+/**
+ * @hideinitializer
+ * @brief bit-mask, detects a section attribute that indicates whether a section can be executed as code:
+ * These are the relevant section attributes and the modules that contain them:
+ * ELF module:  PF_X section attribute
+ * COFF module: IMAGE_SCN_MEM_EXECUTE attribute
+ */
+#define __itt_section_exec 0x20000000
+
+/**
+ * @hideinitializer
+ * @brief bit-mask, detects a section attribute that indicates whether a section can be read.
+ * These are the relevant section attributes and the modules that contain them:
+ * ELF module:  PF_R attribute
+ * COFF module: IMAGE_SCN_MEM_READ attribute
+ */
+#define __itt_section_read 0x40000000
+
+/**
+ * @hideinitializer
+ * @brief bit-mask, detects a section attribute that indicates whether a section can be written to.
+ * These are the relevant section attributes and the modules that contain them:
+ * ELF module:  PF_W attribute
+ * COFF module: IMAGE_SCN_MEM_WRITE attribute
+ */
+#define __itt_section_write 0x80000000
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_section_info
+{
+    const char* name;                 /*!< Section name in UTF8 */
+    __itt_section_type type;          /*!< Section content and semantics description */
+    size_t flags;                     /*!< Section bit flags that describe attributes using bit mask
+                                       * Zero if disabled, non-zero if enabled
+                                       */
+    void* start_addr;                 /*!< Section load(relocated) start address */
+    size_t size;                      /*!< Section file offset */
+    size_t file_offset;               /*!< Section size */
+} __itt_section_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_module_object
+{
+    unsigned int version;                 /*!< API version*/
+    __itt_id module_id;                   /*!< Unique identifier. This is unchanged for sections that belong to the same module */
+    __itt_module_type module_type;        /*!< Binary module format */
+    const char* module_name;              /*!< Unique module name or path to module in UTF8
+                                           * Contains module name when module_bufer and module_size exist
+                                           * Contains module path when module_bufer and module_size absent
+                                           * module_name remains the same for the certain module_id
+                                           */
+    void* module_buffer;                  /*!< Module buffer content */
+    size_t module_size;                   /*!< Module buffer size */
+                                          /*!< If module_buffer and module_size exist, the binary module is dumped onto the system.
+                                           * If module_buffer and module_size do not exist,
+                                           * the binary module exists on the system already.
+                                           * The module_name parameter contains the path to the module.
+                                           */
+    __itt_section_info* section_array;    /*!< Reference to section information */
+    size_t section_number;
+} __itt_module_object;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Load module content and its loaded(relocated) sections.
+ * This API is useful to save a module, or specify its location on the system and report information about loaded sections.
+ * The target module is saved on the system if module buffer content and size are available.
+ * If module buffer content and size are unavailable, the module name contains the path to the existing binary module.
+ * @param[in] module_obj - provides module and section information, along with unique module identifiers (name,module ID)
+ * which bind the binary module to particular sections.
+ */
+void ITTAPI __itt_module_load_with_sections(__itt_module_object* module_obj);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, module_load_with_sections,  (__itt_module_object* module_obj))
+#define __itt_module_load_with_sections     ITTNOTIFY_VOID(module_load_with_sections)
+#define __itt_module_load_with_sections_ptr ITTNOTIFY_NAME(module_load_with_sections)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_module_load_with_sections(module_obj)
+#define __itt_module_load_with_sections_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_module_load_with_sections_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Unload a module and its loaded(relocated) sections.
+ * This API notifies that the module and its sections were unloaded.
+ * @param[in] module_obj - provides module and sections information, along with unique module identifiers (name,module ID)
+ * which bind the binary module to particular sections.
+ */
+void ITTAPI __itt_module_unload_with_sections(__itt_module_object* module_obj);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, module_unload_with_sections,  (__itt_module_object* module_obj))
+#define __itt_module_unload_with_sections     ITTNOTIFY_VOID(module_unload_with_sections)
+#define __itt_module_unload_with_sections_ptr ITTNOTIFY_NAME(module_unload_with_sections)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_module_unload_with_sections(module_obj)
+#define __itt_module_unload_with_sections_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_module_unload_with_sections_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_histogram
+{
+    const __itt_domain* domain;      /*!< Domain of the histogram*/
+    const char* nameA;               /*!< Name of the histogram */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* nameW;
+#else  /* UNICODE || _UNICODE */
+    void* nameW;
+#endif /* UNICODE || _UNICODE */
+    __itt_metadata_type x_type;     /*!< Type of the histogram X axis */
+    __itt_metadata_type y_type;     /*!< Type of the histogram Y axis */
+    int   extra1;                   /*!< Reserved to the runtime */
+    void* extra2;                   /*!< Reserved to the runtime */
+    struct ___itt_histogram* next;
+}  __itt_histogram;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create a typed histogram instance with given name/domain.
+ * @param[in] domain The domain controlling the call.
+ * @param[in] name   The name of the histogram.
+ * @param[in] x_type The type of the X axis in histogram (may be 0 to calculate batch statistics).
+ * @param[in] y_type The type of the Y axis in histogram.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_histogram* ITTAPI __itt_histogram_createA(const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type);
+__itt_histogram* ITTAPI __itt_histogram_createW(const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_histogram_create     __itt_histogram_createW
+#  define __itt_histogram_create_ptr __itt_histogram_createW_ptr
+#else /* UNICODE */
+#  define __itt_histogram_create     __itt_histogram_createA
+#  define __itt_histogram_create_ptr __itt_histogram_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_histogram* ITTAPI __itt_histogram_create(const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_histogram*, histogram_createA, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type))
+ITT_STUB(ITTAPI, __itt_histogram*, histogram_createW, (const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_histogram*, histogram_create, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_histogram_createA     ITTNOTIFY_DATA(histogram_createA)
+#define __itt_histogram_createA_ptr ITTNOTIFY_NAME(histogram_createA)
+#define __itt_histogram_createW     ITTNOTIFY_DATA(histogram_createW)
+#define __itt_histogram_createW_ptr ITTNOTIFY_NAME(histogram_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_histogram_create     ITTNOTIFY_DATA(histogram_create)
+#define __itt_histogram_create_ptr ITTNOTIFY_NAME(histogram_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_histogram_createA(domain, name, x_type, y_type) (__itt_histogram*)0
+#define __itt_histogram_createA_ptr 0
+#define __itt_histogram_createW(domain, name, x_type, y_type) (__itt_histogram*)0
+#define __itt_histogram_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_histogram_create(domain, name, x_type, y_type) (__itt_histogram*)0
+#define __itt_histogram_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_histogram_createA_ptr 0
+#define __itt_histogram_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_histogram_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Submit statistics for a histogram instance.
+ * @param[in] hist    Pointer to the histogram instance to which the histogram statistic is to be dumped.
+ * @param[in] length  The number of elements in dumped axis data array.
+ * @param[in] x_data  The X axis dumped data itself (may be NULL to calculate batch statistics).
+ * @param[in] y_data  The Y axis dumped data itself.
+*/
+void ITTAPI __itt_histogram_submit(__itt_histogram* hist, size_t length, void* x_data, void* y_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* hist, size_t length, void* x_data, void* y_data))
+#define __itt_histogram_submit     ITTNOTIFY_VOID(histogram_submit)
+#define __itt_histogram_submit_ptr ITTNOTIFY_NAME(histogram_submit)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_histogram_submit(hist, length, x_data, y_data)
+#define __itt_histogram_submit_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_histogram_submit_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+
+/**
+* @brief function allows to obtain the current collection state at the moment
+* @return collection state as a enum __itt_collection_state
+*/
+__itt_collection_state __itt_get_collection_state(void);
+
+/**
+* @brief function releases resources allocated by ITT API static part
+* this API should be called from the library destructor
+* @return void
+*/
+void __itt_release_resources(void);
+/** @endcond */
+
+/**
+ * @brief Create a typed counter with given domain pointer, string name and counter type
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA_v3(const __itt_domain* domain, const char* name, __itt_metadata_type type);
+__itt_counter ITTAPI __itt_counter_createW_v3(const __itt_domain* domain, const wchar_t* name, __itt_metadata_type type);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_counter_create_v3     __itt_counter_createW_v3
+#  define __itt_counter_create_v3_ptr __itt_counter_createW_v3_ptr
+#else /* UNICODE */
+#  define __itt_counter_create_v3     __itt_counter_createA_v3
+#  define __itt_counter_create_v3_ptr __itt_counter_createA_v3_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create_v3(const __itt_domain* domain, const char* name, __itt_metadata_type type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA_v3, (const __itt_domain* domain, const char* name, __itt_metadata_type type))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW_v3, (const __itt_domain* domain, const wchar_t* name, __itt_metadata_type type))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_v3,  (const __itt_domain* domain, const char* name, __itt_metadata_type type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_v3     ITTNOTIFY_DATA(counter_createA_v3)
+#define __itt_counter_createA_v3_ptr ITTNOTIFY_NAME(counter_createA_v3)
+#define __itt_counter_createW_v3     ITTNOTIFY_DATA(counter_createW_v3)
+#define __itt_counter_createW_v3_ptr ITTNOTIFY_NAME(counter_createW_v3)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_v3     ITTNOTIFY_DATA(counter_create_v3)
+#define __itt_counter_create_v3_ptr ITTNOTIFY_NAME(counter_create_v3)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_v3(domain, name, type) (__itt_counter)0
+#define __itt_counter_createA_v3_ptr 0
+#define __itt_counter_createW_v3(domain, name, type) (__itt_counter)0
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_v3(domain, name, type) (__itt_counter)0
+#define __itt_counter_create_v3_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_v3_ptr 0
+#define __itt_counter_createW_v3_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_v3_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the counter value api
+ */
+void ITTAPI __itt_counter_set_value_v3(__itt_counter counter, void *value_ptr);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value_v3, (__itt_counter counter, void *value_ptr))
+#define __itt_counter_set_value_v3     ITTNOTIFY_VOID(counter_set_value_v3)
+#define __itt_counter_set_value_v3_ptr ITTNOTIFY_NAME(counter_set_value_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value_v3(counter, value_ptr)
+#define __itt_counter_set_value_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief describes the type of context metadata
+*/
+typedef enum {
+    __itt_context_unknown = 0,              /*!< Undefined type */
+    __itt_context_nameA,                    /*!< ASCII string char* type */
+    __itt_context_nameW,                    /*!< Unicode string wchar_t* type */
+    __itt_context_deviceA,                  /*!< ASCII string char* type */
+    __itt_context_deviceW,                  /*!< Unicode string wchar_t* type */
+    __itt_context_unitsA,                   /*!< ASCII string char* type */
+    __itt_context_unitsW,                   /*!< Unicode string wchar_t* type */
+    __itt_context_pci_addrA,                /*!< ASCII string char* type */
+    __itt_context_pci_addrW,                /*!< Unicode string wchar_t* type */
+    __itt_context_tid,                      /*!< Unsigned 64-bit integer type */
+    __itt_context_max_val,                  /*!< Unsigned 64-bit integer type */
+    __itt_context_bandwidth_flag,           /*!< Unsigned 64-bit integer type */
+    __itt_context_latency_flag,             /*!< Unsigned 64-bit integer type */
+    __itt_context_occupancy_flag,           /*!< Unsigned 64-bit integer type */
+    __itt_context_on_thread_flag,           /*!< Unsigned 64-bit integer type */
+    __itt_context_is_abs_val_flag,          /*!< Unsigned 64-bit integer type */
+    __itt_context_cpu_instructions_flag,    /*!< Unsigned 64-bit integer type */
+    __itt_context_cpu_cycles_flag           /*!< Unsigned 64-bit integer type */
+} __itt_context_type;
+
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_context_name __itt_context_nameW
+#  define __itt_context_device __itt_context_deviceW
+#  define __itt_context_units __itt_context_unitsW
+#  define __itt_context_pci_addr __itt_context_pci_addrW
+#else  /* UNICODE || _UNICODE */
+#  define __itt_context_name __itt_context_nameA
+#  define __itt_context_device __itt_context_deviceA
+#  define __itt_context_units __itt_context_unitsA
+#  define __itt_context_pci_addr __itt_context_pci_addrA
+#endif /* UNICODE || _UNICODE */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_context_metadata
+{
+    __itt_context_type type;    /*!< Type of the context metadata value */
+    void* value;                /*!< Pointer to context metadata value itself */
+}  __itt_context_metadata;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_counter_metadata
+{
+    __itt_counter counter;              /*!< Associated context metadata counter */
+    __itt_context_type type;            /*!< Type of the context metadata value */
+    const char* str_valueA;             /*!< String context metadata value */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* str_valueW;
+#else  /* UNICODE || _UNICODE */
+    void* str_valueW;
+#endif /* UNICODE || _UNICODE */
+    unsigned long long value;           /*!< Numeric context metadata value */
+    int   extra1;                       /*!< Reserved to the runtime */
+    void* extra2;                       /*!< Reserved to the runtime */
+    struct ___itt_counter_metadata* next;
+}  __itt_counter_metadata;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Bind context metadata to counter instance
+ * @param[in] counter   Pointer to the counter instance to which the context metadata is to be associated.
+ * @param[in] length    The number of elements in context metadata array.
+ * @param[in] metadata  The context metadata itself.
+*/
+void ITTAPI __itt_bind_context_metadata_to_counter(__itt_counter counter, size_t length, __itt_context_metadata* metadata);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, bind_context_metadata_to_counter, (__itt_counter counter, size_t length, __itt_context_metadata* metadata))
+#define __itt_bind_context_metadata_to_counter     ITTNOTIFY_VOID(bind_context_metadata_to_counter)
+#define __itt_bind_context_metadata_to_counter_ptr ITTNOTIFY_NAME(bind_context_metadata_to_counter)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_bind_context_metadata_to_counter(counter, length, metadata)
+#define __itt_bind_context_metadata_to_counter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_bind_context_metadata_to_counter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */

 #ifdef __cplusplus
 }
@ -4005,7 +4547,7 @@ ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
 /** @endcond */

 /**
- * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ * @brief Destroy the information about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
 */
 void ITTAPI __itt_stack_caller_destroy(__itt_caller id);

--- a/3rdparty/ittnotify/include/jitprofiling.h
+++ b/3rdparty/ittnotify/include/jitprofiling.h
@ -1,60 +1,8 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/

 #ifndef __JITPROFILING_H__
 #define __JITPROFILING_H__
@ -66,7 +14,7 @@
 * generated code that can be used by performance tools. The user inserts
 * calls in the code generator to report information before JIT-compiled
 * code goes to execution. This information is collected at runtime and used
- * by tools like Intel(R) VTune(TM) Amplifier to display performance metrics
+ * by tools like Intel(R) VTune(TM) Profiler to display performance metrics
 * associated with JIT-compiled code.
 *
 * These APIs can be used to\n
@ -97,16 +45,16 @@
 *  * Expected behavior:
 *    * If any iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event overwrites an
 *      already reported method, then such a method becomes invalid and its
- *      memory region is treated as unloaded. VTune Amplifier displays the metrics
+ *      memory region is treated as unloaded. VTune Profiler displays the metrics
 *      collected by the method until it is overwritten.
 *    * If supplied line number information contains multiple source lines for
- *      the same assembly instruction (code location), then VTune Amplifier picks up
+ *      the same assembly instruction (code location), then VTune Profiler picks up
 *      the first line number.
 *    * Dynamically generated code can be associated with a module name.
 *      Use the iJIT_Method_Load_V2 structure.\n
 *      Clarification of some cases:
 *        * If you register a function with the same method ID multiple times,
- *          specifying different module names, then the VTune Amplifier picks up
+ *          specifying different module names, then the VTune Profiler picks up
 *          the module name registered first. If you want to distinguish the same
 *          function between different JIT engines, supply different method IDs for
 *          each function. Other symbolic information (for example, source file)
@ -143,18 +91,18 @@
 *        belonging to the same method. Symbolic information (method name,
 *        source file name) will be taken from the first notification, and all
 *        subsequent notifications with the same method ID will be processed
- *        only for line number table information. So, the VTune Amplifier will map
+ *        only for line number table information. So, the VTune Profiler will map
 *        samples to a source line using the line number table from the current
 *        notification while taking the source file name from the very first one.\n
 *        Clarification of some cases:\n
 *          * If you register a second code region with a different source file
 *          name and the same method ID, then this information will be saved and
 *          will not be considered as an extension of the first code region, but
- *          VTune Amplifier will use the source file of the first code region and map
+ *          VTune Profiler will use the source file of the first code region and map
 *          performance metrics incorrectly.
 *          * If you register a second code region with the same source file as
 *          for the first region and the same method ID, then the source file will be
- *          discarded but VTune Amplifier will map metrics to the source file correctly.
+ *          discarded but VTune Profiler will map metrics to the source file correctly.
 *          * If you register a second code region with a null source file and
 *          the same method ID, then provided line number info will be associated
 *          with the source file of the first code region.
@ -293,7 +241,7 @@ typedef enum _iJIT_IsProfilingActiveFlags
 * @brief Description of a single entry in the line number information of a code region.
 * @details A table of line number entries gives information about how the reported code region
 * is mapped to source file.
- * Intel(R) VTune(TM) Amplifier uses line number information to attribute
+ * Intel(R) VTune(TM) Profiler uses line number information to attribute
 * the samples (virtual address) to a line number. \n
 * It is acceptable to report different code addresses for the same source line:
 * @code
@ -304,7 +252,7 @@ typedef enum _iJIT_IsProfilingActiveFlags
 *      18      1
 *      21      30
 *
- *  VTune Amplifier constructs the following table using the client data
+ *  VTune Profiler constructs the following table using the client data
 *
 *   Code subrange  Line number
 *      0-1             2
@ -428,7 +376,7 @@ typedef struct _iJIT_Method_Load_V2

    char* module_name; /**<\brief Module name. Can be NULL.
                           The module name can be useful for distinguishing among
-                           different JIT engines. VTune Amplifier will display
+                           different JIT engines. VTune Profiler will display
                           reported methods grouped by specific module. */

 } *piJIT_Method_Load_V2, iJIT_Method_Load_V2;
@ -480,7 +428,7 @@ typedef struct _iJIT_Method_Load_V3

    char* module_name; /**<\brief Module name. Can be NULL.
                        *  The module name can be useful for distinguishing among
-                        *  different JIT engines. VTune Amplifier will display
+                        *  different JIT engines. VTune Profiler will display
                        *  reported methods grouped by specific module. */

    iJIT_CodeArchitecture module_arch; /**<\brief Architecture of the method's code region.
@ -490,9 +438,9 @@ typedef struct _iJIT_Method_Load_V3
                                        *  engine generates 64-bit code.
                                        *
                                        *  If JIT engine reports both 32-bit and 64-bit types
-                                        *  of methods then VTune Amplifier splits the methods
+                                        *  of methods then VTune Profiler splits the methods
                                        *  with the same module name but with different
-                                        *  architectures in two different modules. VTune Amplifier
+                                        *  architectures in two different modules. VTune Profiler
                                        *  modifies the original name provided with a 64-bit method
                                        *  version by ending it with '(64)' */

@ -561,9 +509,9 @@ typedef enum _iJIT_SegmentType
    iJIT_CT_CODE,           /**<\brief Executable code. */

    iJIT_CT_DATA,           /**<\brief Data (not executable code).
-                             * VTune Amplifier uses the format string
+                             * VTune Profiler uses the format string
                             * (see iJIT_Method_Update) to represent
-                             * this data in the VTune Amplifier GUI */
+                             * this data in the VTune Profiler GUI */

    iJIT_CT_KEEP,           /**<\brief Use the previous markup for the trace.
                             * Can be used for the following
@ -580,11 +528,11 @@ typedef enum _iJIT_SegmentType
 * structure to describe the update of the content within a JIT-compiled method,
 * use iJVM_EVENT_TYPE_METHOD_UPDATE_V2 as an event type to report it.
 *
- * On the first Update event, VTune Amplifier copies the original code range reported by
+ * On the first Update event, VTune Profiler copies the original code range reported by
 * the iJVM_EVENT_TYPE_METHOD_LOAD event, then modifies it with the supplied bytes and
- * adds the modified range to the original method. For next update events, VTune Amplifier
+ * adds the modified range to the original method. For next update events, VTune Profiler
 * does the same but it uses the latest modified version of a code region for update.
- * Eventually, VTune Amplifier GUI displays multiple code ranges for the method reported by
+ * Eventually, VTune Profiler GUI displays multiple code ranges for the method reported by
 * the iJVM_EVENT_TYPE_METHOD_LOAD event.
 * Notes:
 * - Multiple update events with different types for the same trace are allowed
@ -673,7 +621,7 @@ iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void);
 * @brief Reports infomation about JIT-compiled code to the agent.
 *
 * The reported information is used to attribute samples obtained from any
- * Intel(R) VTune(TM) Amplifier collector. This API needs to be called
+ * Intel(R) VTune(TM) Profiler collector. This API needs to be called
 * after JIT compilation and before the first entry into the JIT-compiled
 * code.
 *
--- a/3rdparty/ittnotify/include/legacy/ittnotify.h
+++ b/3rdparty/ittnotify/include/legacy/ittnotify.h
@ -1,60 +1,8 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/
 #ifndef _LEGACY_ITTNOTIFY_H_
 #define _LEGACY_ITTNOTIFY_H_

@ -80,6 +28,10 @@
 #  define ITT_OS_FREEBSD   4
 #endif /* ITT_OS_FREEBSD */

+#ifndef ITT_OS_OPENBSD
+#  define ITT_OS_OPENBSD   5
+#endif /* ITT_OS_OPENBSD */
+
 #ifndef ITT_OS
 #  if defined WIN32 || defined _WIN32
 #    define ITT_OS ITT_OS_WIN
@ -87,6 +39,8 @@
 #    define ITT_OS ITT_OS_MAC
 #  elif defined( __FreeBSD__ )
 #    define ITT_OS ITT_OS_FREEBSD
+#  elif defined( __OpenBSD__ )
+#    define ITT_OS ITT_OS_OPENBSD
 #  else
 #    define ITT_OS ITT_OS_LINUX
 #  endif
@ -108,6 +62,10 @@
 #  define ITT_PLATFORM_FREEBSD 4
 #endif /* ITT_PLATFORM_FREEBSD */

+#ifndef ITT_PLATFORM_OPENBSD
+#  define ITT_PLATFORM_OPENBSD 5
+#endif /* ITT_PLATFORM_OPENBSD */
+
 #ifndef ITT_PLATFORM
 #  if ITT_OS==ITT_OS_WIN
 #    define ITT_PLATFORM ITT_PLATFORM_WIN
@ -115,6 +73,8 @@
 #    define ITT_PLATFORM ITT_PLATFORM_MAC
 #  elif ITT_OS==ITT_OS_FREEBSD
 #    define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+#  elif ITT_OS==ITT_OS_OPENBSD
+#    define ITT_PLATFORM ITT_PLATFORM_OPENBSD
 #  else
 #    define ITT_PLATFORM ITT_PLATFORM_POSIX
 #  endif
@ -167,7 +127,12 @@

 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 /* use __forceinline (VC++ specific) */
-#define ITT_INLINE           __forceinline
+#if defined(__MINGW32__) && !defined(__cplusplus)
+#define ITT_INLINE           static __inline__ __attribute__((__always_inline__,__gnu_inline__))
+#else
+#define ITT_INLINE           static __forceinline
+#endif /* __MINGW32__ */
+
 #define ITT_INLINE_ATTRIBUTE /* nothing */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /*
@ -219,20 +184,20 @@
 #define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
 #define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)

-#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
-#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
-#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
-#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
-#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
-#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
-#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
-#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
-#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
-#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
-#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
-#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
-#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
-#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_VOID_D0(n,d)       (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x)     (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y)   (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d)       (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x)     (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y)   (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (d == NULL) ? 0 : (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)

 #ifdef ITT_STUB
 #undef ITT_STUB
@ -269,7 +234,7 @@ extern "C" {
 *     only pauses tracing and analyzing memory access.
 *     It does not pause tracing or analyzing threading APIs.
 *   .
- * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ * - Intel(R) VTune(TM) Profiler:
 *   - Does continue to record when new threads are started.
 *   .
 * - Other effects:
@ -1005,9 +970,9 @@ ITT_STUB(ITTAPI, __itt_frame, frame_create,  (const char *domain))
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */

-/** @brief Record an frame begin occurrence. */
+/** @brief Record a frame begin occurrence. */
 void ITTAPI __itt_frame_begin(__itt_frame frame);
-/** @brief Record an frame end occurrence. */
+/** @brief Record a frame end occurrence. */
 void ITTAPI __itt_frame_end  (__itt_frame frame);

 /** @cond exclude_from_documentation */
--- a/3rdparty/ittnotify/include/libittnotify.h
+++ b/3rdparty/ittnotify/include/libittnotify.h
@ -1,60 +1,8 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/

 #ifndef _LIBITTNOTIFY_H_
 #define _LIBITTNOTIFY_H_
--- a/3rdparty/ittnotify/include/llvm_jit_event_listener.hpp
+++ b/3rdparty/ittnotify/include/llvm_jit_event_listener.hpp
@ -1,241 +0,0 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
-
-/*
- * This file implements an interface bridge from Low-Level Virtual Machine
- * llvm::JITEventListener to Intel JIT Profiling API.  It passes the function
- * and line information to the appropriate functions in the JIT profiling
- * interface so that any LLVM-based JIT engine can emit the JIT code
- * notifications that the profiler will receive.
- *
- * Usage model:
- *
- * 1. Register the listener implementation instance with the execution engine:
- *
- *    #include <llvm_jit_event_listener.hpp>
- *    ...
- *    ExecutionEngine *TheExecutionEngine;
- *    ...
- *    TheExecutionEngine = EngineBuilder(TheModule).create();
- *    ...
- *    __itt_llvm_jit_event_listener jitListener;
- *    TheExecutionEngine->RegisterJITEventListener(&jitListener);
- *    ...
- *
- * 2. When compiling make sure to add the ITT API include directory to the
- *    compiler include directories, ITT API library directory to the linker
- *    library directories and link with jitprofling static library.
- */
-
-#ifndef __ITT_LLVM_JIT_EVENT_LISTENER_HPP__
-#define __ITT_LLVM_JIT_EVENT_LISTENER_HPP__
-
-#include "jitprofiling.h"
-
-#include <llvm/Function.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
-#include <llvm/ADT/StringRef.h>
-#include <llvm/Analysis/DebugInfo.h>
-
-#include <map>
-#include <cassert>
-
-// Uncomment the line below to turn on logging to stderr
-#define JITPROFILING_DEBUG_ENABLE
-
-// Some elementary logging support
-#ifdef JITPROFILING_DEBUG_ENABLE
-#include <cstdio>
-#include <cstdarg>
-static void _jit_debug(const char* format, ...)
-{
-    va_list args;
-    va_start(args, format);
-    vfprintf(stderr, format, args);
-    va_end(args);
-}
-// Use the macro as JITDEBUG(("foo: %d", foo_val));
-#define JITDEBUG(x) \
-    do { \
-        _jit_debug("jit-listener: "); \
-        _jit_debug x; \
-    } \
-    while (0)
-#else
-#define JITDEBUG(x)
-#endif
-
-// LLVM JIT event listener, translates the notifications to the JIT profiling
-// API information.
-class __itt_llvm_jit_event_listener : public llvm::JITEventListener
-{
-public:
-    __itt_llvm_jit_event_listener() {}
-
-public:
-    virtual void NotifyFunctionEmitted(const llvm::Function &F,
-        void *Code, size_t Size, const EmittedFunctionDetails &Details)
-    {
-        std::string name = F.getName().str();
-        JITDEBUG(("function jitted:\n"));
-        JITDEBUG(("  addr=0x%08x\n", (int)Code));
-        JITDEBUG(("  name=`%s'\n", name.c_str()));
-        JITDEBUG(("  code-size=%d\n", (int)Size));
-        JITDEBUG(("  line-infos-count=%d\n", Details.LineStarts.size()));
-
-        // The method must not be in the map - the entry must have been cleared
-        // from the map in NotifyFreeingMachineCode in case of rejitting.
-        assert(m_addr2MethodId.find(Code) == m_addr2MethodId.end());
-
-        int mid = iJIT_GetNewMethodID();
-        m_addr2MethodId[Code] = mid;
-
-        iJIT_Method_Load mload;
-        memset(&mload, 0, sizeof mload);
-        mload.method_id = mid;
-
-        // Populate the method size and name information
-        // TODO: The JIT profiling API should have members as const char pointers.
-        mload.method_name = (char*)name.c_str();
-        mload.method_load_address = Code;
-        mload.method_size = (unsigned int)Size;
-
-        // Populate line information now.
-        // From the JIT API documentation it is not quite clear whether the
-        // line information can be given in ranges, so we'll populate it for
-        // every byte of the function, hmm.
-        std::string srcFilePath;
-        std::vector<LineNumberInfo> lineInfos;
-        char *addr = (char*)Code;
-        char *lineAddr = addr;          // Exclusive end point at which current
-                                        // line info changes.
-        const llvm::DebugLoc* loc = 0;  // Current line info
-        int lineIndex = -1;             // Current index into the line info table
-        for (int i = 0; i < Size; ++i, ++addr) {
-            while (addr >= lineAddr) {
-                if (lineIndex >= 0 && lineIndex < Details.LineStarts.size()) {
-                    loc = &Details.LineStarts[lineIndex].Loc;
-                    std::string p = getSrcFilePath(F.getContext(), *loc);
-                    assert(srcFilePath.empty() || p == srcFilePath);
-                    srcFilePath = p;
-                } else {
-                    loc = NULL;
-                }
-                lineIndex++;
-                if (lineIndex >= 0 && lineIndex < Details.LineStarts.size()) {
-                    lineAddr = (char*)Details.LineStarts[lineIndex].Address;
-                } else {
-                    lineAddr = addr + Size;
-                }
-            }
-            if (loc) {
-                int line = loc->getLine();
-                LineNumberInfo info = { i, line };
-                lineInfos.push_back(info);
-                JITDEBUG(("  addr 0x%08x -> line %d\n", addr, line));
-            }
-        }
-        if (!lineInfos.empty()) {
-            mload.line_number_size = lineInfos.size();
-            JITDEBUG(("  translated to %d line infos to JIT", (int)lineInfos.size()));
-            mload.line_number_table = &lineInfos[0];
-            mload.source_file_name = (char*)srcFilePath.c_str();
-        }
-
-        iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &mload);
-    }
-
-    virtual void NotifyFreeingMachineCode(void *OldPtr)
-    {
-        JITDEBUG(("function unjitted\n"));
-        JITDEBUG(("  addr=0x%08x\n", (int)OldPtr));
-        Addr2MethodId::iterator it = m_addr2MethodId.find(OldPtr);
-        assert(it != m_addr2MethodId.end());
-        iJIT_Method_Id mid = { it->second };
-        iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &mid);
-        m_addr2MethodId.erase(it);
-    }
-
-private:
-    std::string getSrcFilePath(const llvm::LLVMContext& ctx, const llvm::DebugLoc& loc)
-    {
-        llvm::MDNode* node = loc.getAsMDNode(ctx);
-        llvm::DILocation srcLoc(node);
-        return srcLoc.getDirectory().str() + "/" + srcLoc.getFilename().str();
-    }
-
-private:
-    /// Don't copy
-    __itt_llvm_jit_event_listener(const __itt_llvm_jit_event_listener&);
-    __itt_llvm_jit_event_listener& operator=(const __itt_llvm_jit_event_listener&);
-
-private:
-    typedef std::vector<LineNumberInfo> LineInfoList;
-
-    // The method unload notification in VTune JIT profiling API takes the
-    // method ID, not method address so have to maintain the mapping.  Is
-    // there a more efficient and simple way to do this like attaching the
-    // method ID information somehow to the LLVM function instance?
-    //
-    // TODO: It would be more convenient for the JIT API to take the method
-    // address, not method ID.
-    typedef std::map<const void*, int> Addr2MethodId;
-    Addr2MethodId m_addr2MethodId;
-};
-
-#endif // Header guard
--- a/3rdparty/ittnotify/src/ittnotify/BSD-3-Clause.txt
+++ b/3rdparty/ittnotify/src/ittnotify/BSD-3-Clause.txt
@ -1,7 +1,8 @@
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
+Copyright (c) 2019 Intel Corporation. All rights reserved.
+
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-•	Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-•	Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-•	Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/3rdparty/ittnotify/src/ittnotify/GPL-2.0-only.txt
+++ b/3rdparty/ittnotify/src/ittnotify/GPL-2.0-only.txt
@ -1,65 +1,103 @@
-The GNU General Public License (GPL)
+GNU GENERAL PUBLIC LICENSE
 Version 2, June 1991
+
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
 Preamble
-The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too.
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
 When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
 To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
 For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
 We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
 Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
 Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
 The precise terms and conditions for copying, distribution and modification follow.
+
 TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
 Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does.
+
 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program.
 You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
 a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
 b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
 c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
 These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
 Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
 In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
 a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
 b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
 c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
 The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
 If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License.
 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
 If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
 It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
 This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
 Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
 NO WARRANTY
+
 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 END OF TERMS AND CONDITIONS
+
 How to Apply These Terms to Your New Programs
+
 If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
+
 To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.
-One line to give the program's name and a brief idea of what it does.
-Copyright (C) <year> <name of author>
+
+<one line to give the program's name and an idea of what it does.>
+Copyright (C) < yyyy> <name of author>
+
 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
+
 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
 Also add information on how to contact you by electronic and paper mail.
+
 If the program is interactive, make it output a short notice like this when it starts in an interactive mode:
+
 Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details.
+
 The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program.
+
 You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names:
+
 Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker.
-signature of Ty Coon, 1 April 1989
-Ty Coon, President of Vice
-This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License.
+
+<signature of Ty Coon>, 1 April 1989 Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License.
--- a/3rdparty/ittnotify/src/ittnotify/disable_warnings.h
+++ b/3rdparty/ittnotify/src/ittnotify/disable_warnings.h
@ -1,71 +1,23 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/

 #include "ittnotify_config.h"

 #if ITT_PLATFORM==ITT_PLATFORM_WIN

+#if defined _MSC_VER
+
 #pragma warning (disable: 593)   /* parameter "XXXX" was set but never used                 */
 #pragma warning (disable: 344)   /* typedef name has already been declared (with same type) */
 #pragma warning (disable: 174)   /* expression has no effect                                */
 #pragma warning (disable: 4127)  /* conditional expression is constant                      */
 #pragma warning (disable: 4306)  /* conversion from '?' to '?' of greater size              */

+#endif
+
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */

 #if defined __INTEL_COMPILER
--- a/3rdparty/ittnotify/src/ittnotify/ittnotify_config.h
+++ b/3rdparty/ittnotify/src/ittnotify/ittnotify_config.h
@ -1,60 +1,8 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/
 #ifndef _ITTNOTIFY_CONFIG_H_
 #define _ITTNOTIFY_CONFIG_H_

@ -75,6 +23,10 @@
 #  define ITT_OS_FREEBSD   4
 #endif /* ITT_OS_FREEBSD */

+#ifndef ITT_OS_OPENBSD
+#  define ITT_OS_OPENBSD   5
+#endif /* ITT_OS_OPENBSD */
+
 #ifndef ITT_OS
 #  if defined WIN32 || defined _WIN32
 #    define ITT_OS ITT_OS_WIN
@ -82,6 +34,8 @@
 #    define ITT_OS ITT_OS_MAC
 #  elif defined( __FreeBSD__ )
 #    define ITT_OS ITT_OS_FREEBSD
+#  elif defined( __OpenBSD__ )
+#    define ITT_OS ITT_OS_OPENBSD
 #  else
 #    define ITT_OS ITT_OS_LINUX
 #  endif
@ -103,6 +57,10 @@
 #  define ITT_PLATFORM_FREEBSD 4
 #endif /* ITT_PLATFORM_FREEBSD */

+#ifndef ITT_PLATFORM_OPENBSD
+#  define ITT_PLATFORM_OPENBSD 5
+#endif /* ITT_PLATFORM_OPENBSD */
+
 #ifndef ITT_PLATFORM
 #  if ITT_OS==ITT_OS_WIN
 #    define ITT_PLATFORM ITT_PLATFORM_WIN
@ -110,6 +68,8 @@
 #    define ITT_PLATFORM ITT_PLATFORM_MAC
 #  elif ITT_OS==ITT_OS_FREEBSD
 #    define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+#  elif ITT_OS==ITT_OS_OPENBSD
+#    define ITT_PLATFORM ITT_PLATFORM_OPENBSD
 #  else
 #    define ITT_PLATFORM ITT_PLATFORM_POSIX
 #  endif
@ -162,7 +122,12 @@

 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 /* use __forceinline (VC++ specific) */
-#define ITT_INLINE           __forceinline
+#if defined(__MINGW32__) && !defined(__cplusplus)
+#define ITT_INLINE           static __inline__ __attribute__((__always_inline__,__gnu_inline__))
+#else
+#define ITT_INLINE           static __forceinline
+#endif /* __MINGW32__ */
+
 #define ITT_INLINE_ATTRIBUTE /* nothing */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /*
@ -188,6 +153,10 @@
 #  define ITT_ARCH_IA32E 2
 #endif /* ITT_ARCH_IA32E */

+#ifndef ITT_ARCH_IA64
+#  define ITT_ARCH_IA64 3
+#endif /* ITT_ARCH_IA64 */
+
 #ifndef ITT_ARCH_ARM
 #  define ITT_ARCH_ARM  4
 #endif /* ITT_ARCH_ARM */
@ -196,9 +165,9 @@
 #  define ITT_ARCH_PPC64  5
 #endif /* ITT_ARCH_PPC64 */

-#ifndef ITT_ARCH_AARCH64  /* 64-bit ARM */
-#  define ITT_ARCH_AARCH64  6
-#endif /* ITT_ARCH_AARCH64 */
+#ifndef ITT_ARCH_ARM64
+#  define ITT_ARCH_ARM64  6
+#endif /* ITT_ARCH_ARM64 */

 #ifndef ITT_ARCH
 #  if defined _M_IX86 || defined __i386__
@ -210,7 +179,7 @@
 #  elif defined _M_ARM || defined __arm__
 #    define ITT_ARCH ITT_ARCH_ARM
 #  elif defined __aarch64__
-#    define ITT_ARCH ITT_ARCH_AARCH64
+#    define ITT_ARCH ITT_ARCH_ARM64
 #  elif defined __powerpc64__
 #    define ITT_ARCH ITT_ARCH_PPC64
 #  endif
@ -239,10 +208,10 @@
 #define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }

 /* Replace with snapshot date YYYYMMDD for promotion build. */
-#define API_VERSION_BUILD    20151119
+#define API_VERSION_BUILD    20250113

 #ifndef API_VERSION_NUM
-#define API_VERSION_NUM 0.0.0
+#define API_VERSION_NUM 3.25.4
 #endif /* API_VERSION_NUM */

 #define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
@ -254,7 +223,11 @@
 typedef HMODULE           lib_t;
 typedef DWORD             TIDT;
 typedef CRITICAL_SECTION  mutex_t;
+#ifdef __cplusplus
+#define MUTEX_INITIALIZER {}
+#else
 #define MUTEX_INITIALIZER { 0 }
+#endif
 #define strong_alias(name, aliasname) /* empty for Windows */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #include <dlfcn.h>
@ -282,13 +255,13 @@ typedef pthread_mutex_t   mutex_t;
 #define __itt_mutex_init(mutex)   InitializeCriticalSection(mutex)
 #define __itt_mutex_lock(mutex)   EnterCriticalSection(mutex)
 #define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex)
+#define __itt_mutex_destroy(mutex) DeleteCriticalSection(mutex)
 #define __itt_load_lib(name)      LoadLibraryA(name)
 #define __itt_unload_lib(handle)  FreeLibrary(handle)
 #define __itt_system_error()      (int)GetLastError()
 #define __itt_fstrcmp(s1, s2)     lstrcmpA(s1, s2)
 #define __itt_fstrnlen(s, l)      strnlen_s(s, l)
 #define __itt_fstrcpyn(s1, b, s2, l) strncpy_s(s1, b, s2, l)
-#define __itt_fstrdup(s)          _strdup(s)
 #define __itt_thread_id()         GetCurrentThreadId()
 #define __itt_thread_yield()      SwitchToThread()
 #ifndef ITT_SIMPLE_INIT
@ -298,6 +271,13 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
 {
    return InterlockedIncrement(ptr);
 }
+ITT_INLINE long
+__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long
+__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand)
+{
+    return InterlockedCompareExchange(ptr, exchange, comperand);
+}
 #endif /* ITT_SIMPLE_INIT */

 #define DL_SYMBOLS (1)
@ -327,6 +307,7 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
 }
 #define __itt_mutex_lock(mutex)   pthread_mutex_lock(mutex)
 #define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
+#define __itt_mutex_destroy(mutex) pthread_mutex_destroy(mutex)
 #define __itt_load_lib(name)      dlopen(name, RTLD_LAZY)
 #define __itt_unload_lib(handle)  dlclose(handle)
 #define __itt_system_error()      errno
@ -341,10 +322,18 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
 #ifdef SDL_STRNCPY_S
 #define __itt_fstrcpyn(s1, b, s2, l) SDL_STRNCPY_S(s1, b, s2, l)
 #else
-#define __itt_fstrcpyn(s1, b, s2, l) strncpy(s1, s2, b)
+#define __itt_fstrcpyn(s1, b, s2, l) {                                      \
+    if (b > 0) {                                                            \
+        /* 'volatile' is used to suppress the warning that a destination */ \
+        /*  bound depends on the length of the source.                   */ \
+        volatile size_t num_to_copy = (size_t)(b - 1) < (size_t)(l) ?       \
+                (size_t)(b - 1) : (size_t)(l);                              \
+        strncpy(s1, s2, num_to_copy);                                       \
+        s1[num_to_copy] = 0;                                                \
+    }                                                                       \
+}
 #endif /* SDL_STRNCPY_S */

-#define __itt_fstrdup(s)          strdup(s)
 #define __itt_thread_id()         pthread_self()
 #define __itt_thread_yield()      sched_yield()
 #if ITT_ARCH==ITT_ARCH_IA64
@ -360,12 +349,12 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
 {
    long result;
    __asm__ __volatile__("lock\nxadd %0,%1"
-                          : "=r"(result),"=m"(*(int*)ptr)
-                          : "0"(addend), "m"(*(int*)ptr)
+                          : "=r"(result),"=m"(*(volatile int*)ptr)
+                          : "0"(addend), "m"(*(volatile int*)ptr)
                          : "memory");
    return result;
 }
-#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_AARCH64 || ITT_ARCH==ITT_ARCH_PPC64
+#else
 #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
 #endif /* ITT_ARCH==ITT_ARCH_IA64 */
 #ifndef ITT_SIMPLE_INIT
@ -375,6 +364,13 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
 {
    return __TBB_machine_fetchadd4(ptr, 1) + 1L;
 }
+ITT_INLINE long
+__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long
+__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand)
+{
+    return __sync_val_compare_and_swap(ptr, exchange, comperand);
+}
 #endif /* ITT_SIMPLE_INIT */

 void* dlopen(const char*, int) __attribute__((weak));
@ -394,10 +390,20 @@ pthread_t pthread_self(void) __attribute__((weak));

 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */

-typedef enum {
-    __itt_collection_normal = 0,
-    __itt_collection_paused = 1
-} __itt_collection_state;
+/* strdup() is not included into C99 which results in a compiler warning about
+ * implicitly declared symbol. To avoid the issue strdup is implemented
+ * manually.
+ */
+#define ITT_STRDUP_MAX_STRING_SIZE 4096
+#define __itt_fstrdup(s, new_s) do {                                        \
+    if (s != NULL) {                                                        \
+        size_t s_len = __itt_fstrnlen(s, ITT_STRDUP_MAX_STRING_SIZE);       \
+        new_s = (char *)malloc(s_len + 1);                                  \
+        if (new_s != NULL) {                                                \
+            __itt_fstrcpyn(new_s, s_len + 1, s, s_len);                     \
+        }                                                                   \
+    }                                                                       \
+} while(0)

 typedef enum {
    __itt_thread_normal  = 0,
@ -463,6 +469,10 @@ typedef struct __itt_counter_info

 struct ___itt_domain;
 struct ___itt_string_handle;
+struct ___itt_histogram;
+struct ___itt_counter_metadata;
+
+#include "ittnotify.h"

 typedef struct ___itt_global
 {
@ -484,7 +494,10 @@ typedef struct ___itt_global
    struct ___itt_domain*  domain_list;
    struct ___itt_string_handle* string_list;
    __itt_collection_state state;
-    __itt_counter_info_t* counter_list;
+    __itt_counter_info_t*  counter_list;
+    unsigned int           ipt_collect_events;
+    struct ___itt_histogram* histogram_list;
+    struct ___itt_counter_metadata* counter_metadata_list;
 } __itt_global;

 #pragma pack(pop)
@ -510,7 +523,9 @@ typedef struct ___itt_global
    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
    if (h != NULL) { \
        h->tid    = t; \
-        h->nameA  = n ? __itt_fstrdup(n) : NULL; \
+        char *n_copy = NULL; \
+        __itt_fstrdup(n, n_copy); \
+        h->nameA  = n_copy; \
        h->nameW  = NULL; \
        h->state  = s; \
        h->extra1 = 0;    /* reserved */ \
@ -543,7 +558,9 @@ typedef struct ___itt_global
    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
    if (h != NULL) { \
        h->flags  = 1;    /* domain is enabled by default */ \
-        h->nameA  = name ? __itt_fstrdup(name) : NULL; \
+        char *name_copy = NULL; \
+        __itt_fstrdup(name, name_copy); \
+        h->nameA  = name_copy; \
        h->nameW  = NULL; \
        h->extra1 = 0;    /* reserved */ \
        h->extra2 = NULL; /* reserved */ \
@ -573,7 +590,9 @@ typedef struct ___itt_global
 #define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \
    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
    if (h != NULL) { \
-        h->strA   = name ? __itt_fstrdup(name) : NULL; \
+        char *name_copy = NULL; \
+        __itt_fstrdup(name, name_copy); \
+        h->strA  = name_copy; \
        h->strW   = NULL; \
        h->extra1 = 0;    /* reserved */ \
        h->extra2 = NULL; /* reserved */ \
@ -591,7 +610,7 @@ typedef struct ___itt_global
        h->nameA   = NULL; \
        h->nameW   = name ? _wcsdup(name) : NULL; \
        h->domainA   = NULL; \
-        h->domainW   = name ? _wcsdup(domain) : NULL; \
+        h->domainW   = domain ? _wcsdup(domain) : NULL; \
        h->type = type; \
        h->index = 0; \
        h->next   = NULL; \
@ -605,9 +624,13 @@ typedef struct ___itt_global
 #define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \
    h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \
    if (h != NULL) { \
-        h->nameA   = name ? __itt_fstrdup(name) : NULL; \
+        char *name_copy = NULL; \
+        __itt_fstrdup(name, name_copy); \
+        h->nameA  = name_copy; \
        h->nameW   = NULL; \
-        h->domainA   = domain ? __itt_fstrdup(domain) : NULL; \
+        char *domain_copy = NULL; \
+        __itt_fstrdup(domain, domain_copy); \
+        h->domainA  = domain_copy; \
        h->domainW   = NULL; \
        h->type = type; \
        h->index = 0; \
@ -619,4 +642,98 @@ typedef struct ___itt_global
    } \
 }

+#define NEW_HISTOGRAM_W(gptr,h,h_tail,domain,name,x_type,y_type) { \
+    h = (__itt_histogram*)malloc(sizeof(__itt_histogram)); \
+    if (h != NULL) { \
+        h->domain = domain; \
+        h->nameA  = NULL; \
+        h->nameW  = name ? _wcsdup(name) : NULL; \
+        h->x_type = x_type; \
+        h->y_type = y_type; \
+        h->extra1 = 0; \
+        h->extra2 = NULL; \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->histogram_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_HISTOGRAM_A(gptr,h,h_tail,domain,name,x_type,y_type) { \
+    h = (__itt_histogram*)malloc(sizeof(__itt_histogram)); \
+    if (h != NULL) { \
+        h->domain = domain; \
+        char *name_copy = NULL; \
+        __itt_fstrdup(name, name_copy); \
+        h->nameA  = name_copy; \
+        h->nameW  = NULL; \
+        h->x_type = x_type; \
+        h->y_type = y_type; \
+        h->extra1 = 0; \
+        h->extra2 = NULL; \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->histogram_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_COUNTER_METADATA_NUM(gptr,h,h_tail,counter,type,value) { \
+    h = (__itt_counter_metadata*)malloc(sizeof(__itt_counter_metadata)); \
+    if (h != NULL) { \
+        h->counter = counter; \
+        h->type = type; \
+        h->str_valueA = NULL; \
+        h->str_valueW = NULL; \
+        h->value = value; \
+        h->extra1 = 0; \
+        h->extra2 = NULL; \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->counter_metadata_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_COUNTER_METADATA_STR_A(gptr,h,h_tail,counter,type,str_valueA) { \
+    h = (__itt_counter_metadata*)malloc(sizeof(__itt_counter_metadata)); \
+    if (h != NULL) { \
+        h->counter = counter; \
+        h->type = type; \
+        char *str_value_copy = NULL; \
+        __itt_fstrdup(str_valueA, str_value_copy); \
+        h->str_valueA = str_value_copy; \
+        h->str_valueW = NULL; \
+        h->value = 0; \
+        h->extra1 = 0; \
+        h->extra2 = NULL; \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->counter_metadata_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_COUNTER_METADATA_STR_W(gptr,h,h_tail,counter,type,str_valueW) { \
+    h = (__itt_counter_metadata*)malloc(sizeof(__itt_counter_metadata)); \
+    if (h != NULL) { \
+        h->counter = counter; \
+        h->type = type; \
+        h->str_valueA = NULL; \
+        h->str_valueW = str_valueW ? _wcsdup(str_valueW) : NULL; \
+        h->value = 0; \
+        h->extra1 = 0; \
+        h->extra2 = NULL; \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->counter_metadata_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
 #endif /* _ITTNOTIFY_CONFIG_H_ */
--- a/3rdparty/ittnotify/src/ittnotify/ittnotify_static.c
+++ b/3rdparty/ittnotify/src/ittnotify/ittnotify_static.c
--- a/3rdparty/ittnotify/src/ittnotify/ittnotify_static.h
+++ b/3rdparty/ittnotify/src/ittnotify/ittnotify_static.h
@ -1,60 +1,8 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/

 #include "ittnotify_config.h"

@ -81,6 +29,9 @@ ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORM
 ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name), (ITT_FORMAT name), domain_create,  __itt_group_structure, "\"%s\"")
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */

+ITT_STUBV(ITTAPI, void, module_load_with_sections, (__itt_module_object* module_obj), (ITT_FORMAT module_obj), module_load_with_sections, __itt_group_module, "%p")
+ITT_STUBV(ITTAPI, void, module_unload_with_sections, (__itt_module_object* module_obj), (ITT_FORMAT module_obj), module_unload_with_sections, __itt_group_module, "%p")
+
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"")
 ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"")
@ -105,6 +56,8 @@ ITT_STUB(ITTAPI, __itt_counter, counter_create_typed,  (const char    *name, con

 ITT_STUBV(ITTAPI, void, pause,  (void), (ITT_NO_PARAMS), pause,  __itt_group_control | __itt_group_legacy, "no args")
 ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args")
+ITT_STUBV(ITTAPI, void, pause_scoped,  (__itt_collection_scope scope), (ITT_FORMAT scope), pause_scoped,  __itt_group_control, "%d")
+ITT_STUBV(ITTAPI, void, resume_scoped, (__itt_collection_scope scope), (ITT_FORMAT scope), resume_scoped, __itt_group_control, "%d")

 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"")
@ -121,6 +74,23 @@ ITT_STUB(LIBITTAPI, int,  thr_name_setW, (const wchar_t *name, int namelen), (IT
 ITT_STUB(LIBITTAPI, int,  thr_name_set,  (const char    *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set,  __itt_group_thread | __itt_group_legacy, "\"%s\", %d")
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 ITT_STUBV(LIBITTAPI, void, thr_ignore,   (void),                             (ITT_NO_PARAMS),            thr_ignore,    __itt_group_thread | __itt_group_legacy, "no args")
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_histogram*, histogram_createA, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_createA, __itt_group_structure, "%p, \"%s\", %d, %d")
+ITT_STUB(ITTAPI, __itt_histogram*, histogram_createW, (const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_createW, __itt_group_structure, "%p, \"%s\", %d, %d")
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_histogram*, histogram_create, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_create, __itt_group_structure, "%p, \"%s\", %d, %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA_v3, (const __itt_domain* domain, const char    *name, __itt_metadata_type type), (ITT_FORMAT domain, name, type), counter_createA_v3, __itt_group_counter, "%p, \"%s\", %d")
+ITT_STUB(ITTAPI, __itt_counter, counter_createW_v3, (const __itt_domain* domain, const wchar_t *name, __itt_metadata_type type), (ITT_FORMAT domain, name, type), counter_createW_v3, __itt_group_counter, "%p, \"%s\", %d")
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_v3,  (const __itt_domain* domain, const char    *name, __itt_metadata_type type), (ITT_FORMAT domain, name, type), counter_create_v3,  __itt_group_counter, "%p, \"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_STUBV(ITTAPI, void, bind_context_metadata_to_counter, (__itt_counter counter, size_t length, __itt_context_metadata* metadata), (ITT_FORMAT counter, length, metadata), bind_context_metadata_to_counter, __itt_group_structure, "%p, %lu, %p")
+
 #endif /* __ITT_INTERNAL_BODY */

 ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args")
@ -296,6 +266,13 @@ ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMA
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 ITT_STUB(ITTAPI, __itt_frame, frame_create,  (const char    *domain), (ITT_FORMAT domain), frame_create,  __itt_group_frame, "\"%s\"")
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char    *name), (ITT_FORMAT name), pt_region_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name), (ITT_FORMAT name), pt_region_createW, __itt_group_structure, "\"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create,  (const char    *name), (ITT_FORMAT name), pt_region_create,  __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* __ITT_INTERNAL_BODY */
 ITT_STUBV(ITTAPI, void, frame_begin,         (__itt_frame frame),     (ITT_FORMAT frame),  frame_begin,   __itt_group_frame, "%p")
 ITT_STUBV(ITTAPI, void, frame_end,           (__itt_frame frame),     (ITT_FORMAT frame),  frame_end,     __itt_group_frame, "%p")
@ -376,14 +353,16 @@ ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, in
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* __ITT_INTERNAL_BODY */

-#ifndef __ITT_INTERNAL_BODY
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
-ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p")
-ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p")
+ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_module, "%p, %p, %p")
+ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_module, "%p, %p, %p")
 #else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
-ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p")
+ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_module, "%p, %p, %p")
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, module_unload, (void *start_addr), (ITT_FORMAT start_addr), module_unload, __itt_group_module, "%p")

+ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* hist, size_t length, void* x_data, void* y_data), (ITT_FORMAT hist, length, x_data, y_data), histogram_submit, __itt_group_structure, "%p, %lu, %p, %p")
+
+ITT_STUBV(ITTAPI, void, counter_set_value_v3, (__itt_counter counter, void *value_ptr), (ITT_FORMAT counter, value_ptr), counter_set_value_v3, __itt_group_counter, "%p, %p")

 #endif /* __ITT_INTERNAL_INIT */
--- a/3rdparty/ittnotify/src/ittnotify/ittnotify_types.h
+++ b/3rdparty/ittnotify/src/ittnotify/ittnotify_types.h
@ -1,85 +1,34 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/

 #ifndef _ITTNOTIFY_TYPES_H_
 #define _ITTNOTIFY_TYPES_H_

 typedef enum ___itt_group_id
 {
-    __itt_group_none      = 0,
-    __itt_group_legacy    = 1<<0,
-    __itt_group_control   = 1<<1,
-    __itt_group_thread    = 1<<2,
-    __itt_group_mark      = 1<<3,
-    __itt_group_sync      = 1<<4,
-    __itt_group_fsync     = 1<<5,
-    __itt_group_jit       = 1<<6,
-    __itt_group_model     = 1<<7,
-    __itt_group_splitter_min = 1<<7,
-    __itt_group_counter   = 1<<8,
-    __itt_group_frame     = 1<<9,
-    __itt_group_stitch    = 1<<10,
-    __itt_group_heap      = 1<<11,
-    __itt_group_splitter_max = 1<<12,
-    __itt_group_structure = 1<<12,
-    __itt_group_suppress = 1<<13,
-    __itt_group_arrays    = 1<<14,
-    __itt_group_all       = -1
+    __itt_group_none      		= 0,
+    __itt_group_legacy    		= 1<<0,
+    __itt_group_control   		= 1<<1,
+    __itt_group_thread    		= 1<<2,
+    __itt_group_mark      		= 1<<3,
+    __itt_group_sync      		= 1<<4,
+    __itt_group_fsync     		= 1<<5,
+    __itt_group_jit       		= 1<<6,
+    __itt_group_model     		= 1<<7,
+    __itt_group_splitter_min 	= 1<<7,
+    __itt_group_counter   		= 1<<8,
+    __itt_group_frame     		= 1<<9,
+    __itt_group_stitch    		= 1<<10,
+    __itt_group_heap      		= 1<<11,
+    __itt_group_splitter_max 	= 1<<12,
+    __itt_group_structure 		= 1<<12,
+    __itt_group_suppress 		= 1<<13,
+    __itt_group_arrays    		= 1<<14,
+    __itt_group_module    		= 1<<15,
+    __itt_group_all       		= -1
 } __itt_group_id;

 #pragma pack(push, 8)
@ -109,6 +58,7 @@ typedef struct ___itt_group_list
        { __itt_group_structure, "structure" }, \
        { __itt_group_suppress,  "suppress"  }, \
        { __itt_group_arrays,    "arrays"    }, \
+		{ __itt_group_module,    "module"    }, \
        { __itt_group_none,      NULL        }  \
    }

--- a/3rdparty/ittnotify/src/ittnotify/jitprofiling.c
+++ b/3rdparty/ittnotify/src/ittnotify/jitprofiling.c
@ -1,76 +1,24 @@
-/* <copyright>
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
+/*
+  Copyright (C) 2005-2019 Intel Corporation

-  GPL LICENSE SUMMARY
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  Contact Information:
-  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
-
-  BSD LICENSE
-
-  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-</copyright> */
+  SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+*/

 #include "ittnotify_config.h"

 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 #include <windows.h>
+#include <string.h>
+#include <ctype.h>
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#if ITT_PLATFORM != ITT_PLATFORM_MAC && ITT_PLATFORM != ITT_PLATFORM_FREEBSD
+#if ITT_PLATFORM != ITT_PLATFORM_MAC && ITT_PLATFORM != ITT_PLATFORM_FREEBSD && ITT_PLATFORM != ITT_PLATFORM_OPENBSD
 #include <malloc.h>
 #endif
 #include <stdlib.h>

 #include "jitprofiling.h"

-static const char rcsid[] = "\n@(#) $Revision: 471937 $\n";
-
-#define DLL_ENVIRONMENT_VAR             "VS_PROFILER"
+static const char rcsid[] = "\n@(#) $Revision$\n";

 #ifndef NEW_DLL_ENVIRONMENT_VAR
 #if ITT_ARCH==ITT_ARCH_IA32
@ -81,13 +29,10 @@ static const char rcsid[] = "\n@(#) $Revision: 471937 $\n";
 #endif /* NEW_DLL_ENVIRONMENT_VAR */

 #if ITT_PLATFORM==ITT_PLATFORM_WIN
-#define DEFAULT_DLLNAME                 "JitPI.dll"
 HINSTANCE m_libHandle = NULL;
 #elif ITT_PLATFORM==ITT_PLATFORM_MAC
-#define DEFAULT_DLLNAME                 "libJitPI.dylib"
 void* m_libHandle = NULL;
 #else
-#define DEFAULT_DLLNAME                 "libJitPI.so"
 void* m_libHandle = NULL;
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */

@ -169,6 +114,38 @@ ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive()
    return executionMode;
 }

+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+static int isValidAbsolutePath(char *path, size_t maxPathLength)
+{
+    if (path == NULL)
+    {
+        return 0;
+    }
+
+    size_t pathLength = strnlen(path, maxPathLength);
+    if (pathLength == maxPathLength)
+    {
+      /* The strnlen() function returns maxPathLength if there is no null terminating
+       * among the first maxPathLength characters in the string pointed to by path.
+       */
+      return 0;
+    }
+
+    if (pathLength > 2)
+    {
+        if (isalpha(path[0]) && path[1] == ':' && path[2] == '\\')
+        {
+            return 1;
+        }
+        else if (path[0] == '\\' && path[1] == '\\')
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+#endif
+
 /* This function loads the collector dll and the relevant functions.
 * on success: all functions load,     iJIT_DLL_is_missing = 0, return value = 1
 * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0
@ -212,7 +189,7 @@ static int loadiJIT_Funcs()
        {
            envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, 
                                             dllName, dNameLength);
-            if (envret)
+            if (envret && isValidAbsolutePath(dllName, dNameLength))
            {
                /* Try to load the dll from the PATH... */
                m_libHandle = LoadLibraryExA(dllName, 
@ -220,30 +197,9 @@ static int loadiJIT_Funcs()
            }
            free(dllName);
        }
-    } else {
-        /* Try to use old VS_PROFILER variable */
-        dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0);
-        if (dNameLength)
-        {
-            DWORD envret = 0;
-            dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
-            if(dllName != NULL)
-            {
-                envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, 
-                                                 dllName, dNameLength);
-                if (envret)
-                {
-                    /* Try to load the dll from the PATH... */
-                    m_libHandle = LoadLibraryA(dllName);
-                }
-                free(dllName);
-            }
-        }
    }
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
    dllName = getenv(NEW_DLL_ENVIRONMENT_VAR);
-    if (!dllName)
-        dllName = getenv(DLL_ENVIRONMENT_VAR);
 #if defined(__ANDROID__) || defined(ANDROID)
    if (!dllName)
        dllName = ANDROID_JIT_AGENT_PATH;
@ -251,19 +207,13 @@ static int loadiJIT_Funcs()
    if (dllName)
    {
        /* Try to load the dll from the PATH... */
-        m_libHandle = dlopen(dllName, RTLD_LAZY);
+        if (DL_SYMBOLS)
+        {
+            m_libHandle = dlopen(dllName, RTLD_LAZY);
+        }
    }
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */

-    if (!m_libHandle)
-    {
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
-        m_libHandle = LoadLibraryA(DEFAULT_DLLNAME);
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-        m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY);
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-    }
-
    /* if the dll wasn't loaded - exit. */
    if (!m_libHandle)
    {
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -692,6 +692,9 @@ if(UNIX OR MINGW)
    # TODO:
    # - std::aligned_alloc() C++17 / C11
  endif()
+
+  CHECK_SYMBOL_EXISTS(getauxval sys/auxv.h HAVE_GETAUXVAL)
+  CHECK_SYMBOL_EXISTS(elf_aux_info sys/auxv.h HAVE_ELF_AUX_INFO)
 elseif(WIN32)
  include(CheckIncludeFile)
  include(CheckSymbolExists)
@ -1647,7 +1650,7 @@ endif()

 if(CV_TRACE OR OPENCV_TRACE)
  ocv_build_features_string(trace_status EXCLUSIVE
-    IF HAVE_ITT THEN "with Intel ITT"
+    IF HAVE_ITT THEN "with Intel ITT(${ITTNOTIFY_VERSION})"
    ELSE "built-in")
  status("")
  status("  Trace: " OPENCV_TRACE THEN "YES (${trace_status})" ELSE NO)
--- a/cmake/OpenCVDetectCUDAUtils.cmake
+++ b/cmake/OpenCVDetectCUDAUtils.cmake
@ -95,7 +95,7 @@ endfunction()

 macro(ocv_initialize_nvidia_device_generations)
  OCV_OPTION(CUDA_ENABLE_DEPRECATED_GENERATION "Enable deprecated generations in the list" OFF)
-  set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper")
+  set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper" "Blackwell")
  if(CUDA_ENABLE_DEPRECATED_GENERATION)
    set(_generations "Fermi" "${_generations}")
    set(_generations "Kepler" "${_generations}")
@ -109,6 +109,7 @@ macro(ocv_initialize_nvidia_device_generations)
  set(_arch_ampere   "8.0;8.6")
  set(_arch_lovelace "8.9")
  set(_arch_hopper   "9.0")
+  set(_arch_blackwell "10.0;12.0")
  if(NOT CMAKE_CROSSCOMPILING)
    list(APPEND _generations "Auto")
  endif()
@ -244,6 +245,8 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
    set(__cuda_arch_bin ${_arch_lovelace})
  elseif(CUDA_GENERATION STREQUAL "Hopper")
    set(__cuda_arch_bin ${_arch_hopper})
+  elseif(CUDA_GENERATION STREQUAL "Blackwell")
+    set(__cuda_arch_bin ${_arch_blackwell})
  elseif(CUDA_GENERATION STREQUAL "Auto")
    ocv_detect_native_cuda_arch(${nvcc_executable} _nvcc_res _nvcc_out)
    if(NOT _nvcc_res EQUAL 0)
@ -268,13 +271,14 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
      endif()
      if(NOT _nvcc_res EQUAL 0)
        message(STATUS "CUDA: Automatic detection of CUDA generation failed. Going to build for all known architectures")
-        # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7)
+        # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) Thor (10.1)
        ocv_filter_available_architecture(${nvcc_executable} __cuda_arch_bin
            5.3
            6.2
            7.2
            7.0
            8.7
+            10.1
        )
      else()
        set(__cuda_arch_bin "${_nvcc_out}")
@ -291,6 +295,7 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
          ${_arch_ampere}
          ${_arch_lovelace}
          ${_arch_hopper}
+          ${_arch_blackwell}
      )
      list(GET __cuda_arch_bin -1 __cuda_arch_ptx)
    endif()
@ -303,12 +308,12 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)

  # Check if user specified 1.0/2.1 compute capability: we don't support it
  macro(ocv_wipeout_deprecated_cc target_cc)
-    if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " ${target_cc}")
+    if(${target_cc} IN_LIST ARCH_BIN_NO_POINTS OR ${target_cc} IN_LIST ARCH_PTX_NO_POINTS)
      message(SEND_ERROR "CUDA: ${target_cc} compute capability is not supported - exclude it from ARCH/PTX list and re-run CMake")
    endif()
  endmacro()
-  ocv_wipeout_deprecated_cc("1.0")
-  ocv_wipeout_deprecated_cc("2.1")
+  ocv_wipeout_deprecated_cc("10")
+  ocv_wipeout_deprecated_cc("21")
 endmacro()

 macro(ocv_set_nvcc_threads_for_vs)
--- a/cmake/OpenCVFindAVIF.cmake
+++ b/cmake/OpenCVFindAVIF.cmake
@ -19,7 +19,7 @@ if(TARGET avif)
    MARK_AS_ADVANCED(AVIF_LIBRARY)

    SET(AVIF_FOUND TRUE)
-    GET_TARGET_PROPERTY(AVIF_LIBRARY avif LOCATION)
+    SET(AVIF_LIBRARY avif)
    GET_TARGET_PROPERTY(AVIF_INCLUDE_DIR1 avif INCLUDE_DIRECTORIES)
    GET_TARGET_PROPERTY(AVIF_INCLUDE_DIR2 avif INTERFACE_INCLUDE_DIRECTORIES)
    set(AVIF_INCLUDE_DIR)
--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@ -179,7 +179,14 @@ endif(WITH_KLEIDICV)
 if(WITH_FASTCV)
  if((EXISTS ${FastCV_INCLUDE_PATH}) AND (EXISTS ${FastCV_LIB_PATH}))
    message(STATUS "Use external FastCV ${FastCV_INCLUDE_PATH}, ${FastCV_LIB_PATH}")
-    set(HAVE_FASTCV TRUE CACHE BOOL "FastCV status")
+    find_library(FASTCV_LIBRARY NAMES "fastcv"
+                 PATHS "${FastCV_LIB_PATH}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+    mark_as_advanced(FASTCV_LIBRARY)
+    if (FASTCV_LIBRARY)
+      set(HAVE_FASTCV TRUE CACHE BOOL "FastCV status")
+    else()
+      set(HAVE_FASTCV FALSE CACHE BOOL "FastCV status")
+    endif()
  else()
    include("${OpenCV_SOURCE_DIR}/3rdparty/fastcv/fastcv.cmake")
    set(FCV_ROOT_DIR "${OpenCV_BINARY_DIR}/3rdparty/fastcv")
@ -188,13 +195,15 @@ if(WITH_FASTCV)
      set(FastCV_INCLUDE_PATH "${FCV_ROOT_DIR}/inc" CACHE PATH "FastCV includes directory")
      set(FastCV_LIB_PATH "${FCV_ROOT_DIR}/libs" CACHE PATH "FastCV library directory")
      ocv_install_3rdparty_licenses(FastCV "${OpenCV_BINARY_DIR}/3rdparty/fastcv/LICENSE")
-      install(FILES "${FastCV_LIB_PATH}/libfastcvopt.so"
-              DESTINATION "${OPENCV_LIB_INSTALL_PATH}" COMPONENT "bin")
+      if(ANDROID)
+        set(FASTCV_LIBRARY "${FastCV_LIB_PATH}/libfastcvopt.so" CACHE PATH "FastCV library")
+        install(FILES "${FASTCV_LIBRARY}" DESTINATION "${OPENCV_LIB_INSTALL_PATH}" COMPONENT "bin")
+      else()
+        set(FASTCV_LIBRARY "${FastCV_LIB_PATH}/libfastcv.a" CACHE PATH "FastCV library")
+        install(FILES "${FASTCV_LIBRARY}" DESTINATION "${OPENCV_LIB_INSTALL_PATH}" COMPONENT "dev")
+      endif()
    else()
      set(HAVE_FASTCV FALSE CACHE BOOL "FastCV status")
    endif()
  endif()
-  if(HAVE_FASTCV)
-    set(FASTCV_LIBRARY "${FastCV_LIB_PATH}/libfastcvopt.so" CACHE PATH "FastCV library")
-  endif()
 endif(WITH_FASTCV)
--- a/cmake/OpenCVFindOpenEXR.cmake
+++ b/cmake/OpenCVFindOpenEXR.cmake
@ -94,17 +94,20 @@ FOREACH(SEARCH_PATH ${SEARCH_PATHS})
            IF (${contents} MATCHES "#define OPENEXR_VERSION_MINOR ([0-9]+)")
                SET(OPENEXR_VERSION_MINOR "${CMAKE_MATCH_1}")
            ENDIF ()
+            FILE (STRINGS ${OPENEXR_VERSION_FILE} contents REGEX "#define OPENEXR_VERSION_PATCH ")
+            IF (${contents} MATCHES "#define OPENEXR_VERSION_PATCH ([0-9]+)")
+                SET(OPENEXR_VERSION_PATCH "${CMAKE_MATCH_1}")
+            ENDIF ()
        ENDIF ()
    ENDIF ()

-    IF (OPENEXR_VERSION_MAJOR AND OPENEXR_VERSION_MINOR)
-        set(OPENEXR_VERSION "${OPENEXR_VERSION_MAJOR}_${OPENEXR_VERSION_MINOR}")
-    ENDIF ()
+    set(OPENEXR_VERSION_MM "${OPENEXR_VERSION_MAJOR}_${OPENEXR_VERSION_MINOR}")
+    set(OPENEXR_VERSION "${OPENEXR_VERSION_MAJOR}.${OPENEXR_VERSION_MINOR}.${OPENEXR_VERSION_PATCH}")

-    ocv_find_openexr("-${OPENEXR_VERSION}")
-    ocv_find_openexr("-${OPENEXR_VERSION}_s")
-    ocv_find_openexr("-${OPENEXR_VERSION}_d")
-    ocv_find_openexr("-${OPENEXR_VERSION}_s_d")
+    ocv_find_openexr("-${OPENEXR_VERSION_MM}")
+    ocv_find_openexr("-${OPENEXR_VERSION_MM}_s")
+    ocv_find_openexr("-${OPENEXR_VERSION_MM}_d")
+    ocv_find_openexr("-${OPENEXR_VERSION_MM}_s_d")
    ocv_find_openexr("")
    ocv_find_openexr("_s")
    ocv_find_openexr("_d")
@ -118,6 +121,7 @@ FOREACH(SEARCH_PATH ${SEARCH_PATHS})
    UNSET(OPENEXR_VERSION_FILE)
    UNSET(OPENEXR_VERSION_MAJOR)
    UNSET(OPENEXR_VERSION_MINOR)
+    UNSET(OPENEXR_VERSION_MM)
    UNSET(OPENEXR_VERSION)
 ENDFOREACH()

@ -144,6 +148,11 @@ IF(OPENEXR_FOUND)
  endif()
  if(NOT OPENEXR_VERSION)
    SET(OPENEXR_VERSION "Unknown")
+  else()
+    if(HAVE_CXX17 AND OPENEXR_VERSION VERSION_LESS "2.3.0")
+      message(STATUS "  OpenEXR(ver ${OPENEXR_VERSION}) doesn't support C++17 and higher. Updating OpenEXR 2.3.0+ is required.")
+      SET(OPENEXR_FOUND FALSE)
+    endif()
  endif()
 ELSE()
  IF(OPENEXR_FIND_REQUIRED)
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
--- a/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
+++ b/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
@ -24,6 +24,8 @@ red line. All the expected straight lines are bulged out. Visit [Distortion

 ![image](images/calib_radial.jpg)

+In the following sections several new parameters are introduced. Visit [Camera Calibration and 3D Reconstruction](#tutorial_table_of_content_calib3d) for more details.
+
 Radial distortion can be represented as follows:

 \f[x_{distorted} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\
--- a/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown
+++ b/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown
@ -131,7 +131,7 @@ Explanation
    The formation of the equations I mentioned above aims
    to finding major patterns in the input: in case of the chessboard this are corners of the
    squares and for the circles, well, the circles themselves. ChArUco board is equivalent to
-    chessboard, but corners are mached by ArUco markers. The position of these will form the
+    chessboard, but corners are matched by ArUco markers. The position of these will form the
    result which will be written into the *pointBuf* vector.
    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp find_pattern
    Depending on the type of the input pattern you use either the @ref cv::findChessboardCorners or
@ -144,9 +144,9 @@ Explanation

    @note Board size and amount of matched points is different for chessboard, circles grid and ChArUco.
    All chessboard related algorithm expects amount of inner corners as board width and height.
-    Board size of circles grid is just amount of circles by both grid dimentions. ChArUco board size
+    Board size of circles grid is just amount of circles by both grid dimensions. ChArUco board size
    is defined in squares, but detection result is list of inner corners and that's why is smaller
-    by 1 in both dimentions.
+    by 1 in both dimensions.

    Then again in case of cameras we only take camera images when an input delay time is passed.
    This is done in order to allow user moving the chessboard around and getting different images.
--- a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown
+++ b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown
@ -12,7 +12,7 @@ Interactive camera calibration application {#tutorial_interactive_calibration}
 | Compatibility | OpenCV >= 3.1 |


-According to classical calibration technique user must collect all data first and when run @ref cv::calibrateCamera function
+According to classical calibration technique user must collect all data first and then run @ref cv::calibrateCamera function
 to obtain camera parameters. If average re-projection error is huge or if estimated parameters seems to be wrong, process of
 selection or collecting data and starting of @ref cv::calibrateCamera repeats.

@ -96,9 +96,9 @@ By default values of advanced parameters are stored in defaultConfig.xml
 -  *charuco_square_length*: size of square on chAruco board (in pixels)
 -  *charuco_marker_size*: size of Aruco markers on chAruco board (in pixels)
 -  *calibration_step*: interval in frames between launches of @ref cv::calibrateCamera
-  *max_frames_num*: if number of frames for calibration is greater then this value frames filter starts working.
+-  *max_frames_num*: if number of frames for calibration is greater than this value frames filter starts working.
 After filtration size of calibration dataset is equals to *max_frames_num*
-  *min_frames_num*: if number of frames is greater then this value turns on auto flags tuning, undistorted view and quality evaluation
+-  *min_frames_num*: if number of frames is greater than this value turns on auto flags tuning, undistorted view and quality evaluation
 -  *solver_eps*: precision of Levenberg-Marquardt solver in @ref cv::calibrateCamera
 -  *solver_max_iters*: iterations limit of solver
 -  *fast_solver*: if this value is nonzero and Lapack is found QR decomposition is used instead of SVD in solver.
@ -129,7 +129,7 @@ This pattern is very sensitive to quality of production and measurements.

 Data filtration
 ------
-When size of calibration dataset is greater then *max_frames_num* starts working
+When size of calibration dataset is greater than *max_frames_num* starts working
 data filter. It tries to remove "bad" frames from dataset. Filter removes the frame
 on which \f$loss\_function\f$ takes maximum.

--- a/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown
+++ b/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown
@ -14,8 +14,7 @@ Real Time pose estimation of a textured object {#tutorial_real_time_pose}

 Nowadays, augmented reality is one of the top research topic in computer vision and robotics fields.
 The most elemental problem in augmented reality is the estimation of the camera pose respect of an
-object in the case of computer vision area to do later some 3D rendering or in the case of robotics
-obtain an object pose in order to grasp it and do some manipulation. However, this is not a trivial
+object in the case of computer vision area to perform subsequent 3D rendering or, in robotics, to obtain an object pose for grasping and manipulation. However, this is not a trivial
 problem to solve due to the fact that the most common issue in image processing is the computational
 cost of applying a lot of algorithms or mathematical operations for solving a problem which is basic
 and immediately for humans.
@ -23,7 +22,7 @@ and immediately for humans.
 Goal
 ----

-In this tutorial is explained how to build a real time application to estimate the camera pose in
+This tutorial explains how to build a real-time application to estimate the camera pose in
 order to track a textured object with six degrees of freedom given a 2D image and its 3D textured
 model.

@ -74,7 +73,7 @@ The tutorial consists of two main programs:

 -#  **Model registration**

-    This application is exclusive to whom don't have a 3D textured model of the object to be detected.
+    This application is intended for users who do not have a 3D textured model of the object to be detected.
    You can use this program to create your own textured 3D model. This program only works for planar
    objects, then if you want to model an object with complex shape you should use a sophisticated
    software to create it.
@ -82,7 +81,7 @@ The tutorial consists of two main programs:
    The application needs an input image of the object to be registered and its 3D mesh. We have also
    to provide the intrinsic parameters of the camera with which the input image was taken. All the
    files need to be specified using the absolute path or the relative one from your application’s
-    working directory. If none files are specified the program will try to open the provided default
+    working directory. If no files are specified the program will try to open the provided default
    parameters.

    The application starts up extracting the ORB features and descriptors from the input image and
@ -97,7 +96,7 @@ The tutorial consists of two main programs:

 -#  **Model detection**

-    The aim of this application is estimate in real time the object pose given its 3D textured model.
+    The aim of this application is to estimate in real time the object pose given its 3D textured model.

    The application starts up loading the 3D textured model in YAML file format with the same
    structure explained in the model registration program. From the scene, the ORB features and
@ -106,7 +105,7 @@ The tutorial consists of two main programs:
    Using the found matches along with @ref cv::solvePnPRansac function the `R` and `t` of
    the camera are computed. Finally, a KalmanFilter is applied in order to reject bad poses.

-    In the case that you compiled OpenCV with the samples, you can find it in opencv/build/bin/cpp-tutorial-pnp_detection\`.
+    In the case that you compiled OpenCV with the samples, you can find it in opencv/build/bin/cpp-tutorial-pnp_detection`.
    Then you can run the application and change some parameters:
    @code{.cpp}
    This program shows how to detect an object given its 3D textured model. You can choose to use a recorded video or the webcam.
@ -326,7 +325,7 @@ Here is explained in detail the code for the real time application:
    descriptors, match using *two Nearest Neighbour* the extracted descriptors with the given model
    descriptors and vice versa. Then, a ratio test is applied to the two direction matches in order to
    remove these matches which its distance ratio between the first and second best match is larger
-    than a given threshold. Finally, a symmetry test is applied in order the remove non symmetrical
+    than a given threshold. Finally, a symmetry test is applied in order to remove non symmetrical
    matches.
    @code{.cpp}
    void RobustMatcher::robustMatch( const cv::Mat& frame, std::vector<cv::DMatch>& good_matches,
@ -489,7 +488,7 @@ Here is explained in detail the code for the real time application:

    }
    @endcode
-    In the following code are the 3th and 4th steps of the main algorithm. The first, calling the
+    In the following code are the 3rd and 4th steps of the main algorithm. The first, calling the
    above function and the second taking the output inliers vector from RANSAC to get the 2D scene
    points for drawing purpose. As seen in the code we must be sure to apply RANSAC if we have
    matches, in the other case, the function @ref cv::solvePnPRansac crashes due to any OpenCV *bug*.
--- a/doc/tutorials/calib3d/usac.markdown
+++ b/doc/tutorials/calib3d/usac.markdown
@ -168,8 +168,8 @@ components:
        plays significant role as it requires less iterations,
        furthermore in average P3P solver has around 1.39
        estimated models. Also, in new version of `solvePnPRansac(...)`
-        with `UsacParams` there is an options to pass empty intrinsic
-        matrix `InputOutputArray cameraMatrix`. If matrix is empty than
+        with `UsacParams` there is an option to pass empty intrinsic
+        matrix `InputOutputArray cameraMatrix`. If matrix is empty then
        using Direct Linear Transformation algorithm (PnP with 6 points)
        framework outputs not only rotation and translation vector but
        also calibration matrix.
@ -201,7 +201,7 @@ a neighborhood graph. In framework there are 3 options to do it:
    cells using hash-table. The method is described in @cite barath2019progressive. Less
    accurate than `NEIGH_FLANN_RADIUS`, although significantly faster.

-Note, `NEIGH_FLANN_RADIUS` and `NEIGH_FLANN_RADIUS` are not able to PnP
+Note, `NEIGH_FLANN_RADIUS` and `NEIGH_GRID` are not able to PnP
 solver, since there are 3D object points.

 New flags:
@ -236,7 +236,7 @@ A few other important parameters:

 2.  `loIterations` – number of iterations for Local Optimization method.
    *The default value is 10*. By increasing `loIterations` the output
-    model could be more accurate, however, the computationial time may
+    model could be more accurate, however, the computational time may
    also increase.

 3.  `loSampleSize` – maximum sample number for Local Optimization. *The
@ -253,7 +253,7 @@ There are three new sample files in opencv/samples directory.
 1.  `epipolar_lines.cpp` – input arguments of `main` function are two
    paths to images. Then correspondences are found using
    SIFT detector. Fundamental matrix is found using RANSAC from
-    tentative correspondences and epipolar lines are plot.
+    tentative correspondences and epipolar lines are plotted.

 2.  `essential_mat_reconstr.cpp` – input arguments are path to data file
    containing image names and single intrinsic matrix and directory
@ -266,4 +266,4 @@ There are three new sample files in opencv/samples directory.

 3.  `essential_mat_reconstr.py` – the same functionality as in .cpp
    file, however instead of clustering points to plane the 3D map of
-    object points is plot.
+    object points is plotted.
--- a/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown
+++ b/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown
@ -24,7 +24,7 @@ In this tutorial you will learn:
 Theory
 ------

-@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and the article [SmartDeblur].
+@note The explanation is based on the books @cite Gonzalez1987 and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and the article [SmartDeblur].
@note The out-of-focus image on this page is a real world  image. The out-of-focus was achieved manually by camera optics.

 ### What is a degradation image model?
--- a/doc/tutorials/imgproc/periodic_noise_removing_filter/periodic_noise_removing_filter.markdown
+++ b/doc/tutorials/imgproc/periodic_noise_removing_filter/periodic_noise_removing_filter.markdown
@ -20,7 +20,7 @@ In this tutorial you will learn:
 Theory
 ------

-@note The explanation is based on the book @cite gonzalez. The image on this page is a real world image.
+@note The explanation is based on the book @cite Gonzalez1987. The image on this page is a real world image.

 Periodic noise produces spikes in the Fourier domain that can often be detected by visual analysis.

--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@ -293,31 +293,39 @@ TODO: other options: `WITH_OPENCL_SVM`, `WITH_OPENCLAMDFFT`, `WITH_OPENCLAMDBLAS

 Following formats can be read by OpenCV without help of any third-party library:

- [BMP](https://en.wikipedia.org/wiki/BMP_file_format)
- [HDR](https://en.wikipedia.org/wiki/RGBE_image_format) (`WITH_IMGCODEC_HDR`)
- [Sun Raster](https://en.wikipedia.org/wiki/Sun_Raster) (`WITH_IMGCODEC_SUNRASTER`)
- [PPM, PGM, PBM, PFM](https://en.wikipedia.org/wiki/Netpbm#File_formats) (`WITH_IMGCODEC_PXM`, `WITH_IMGCODEC_PFM`)
+| Formats | Option | Default |
+| --------| ------ | ------- |
+| [BMP](https://en.wikipedia.org/wiki/BMP_file_format) | (Always) | _ON_ |
+| [HDR](https://en.wikipedia.org/wiki/RGBE_image_format) | `WITH_IMGCODEC_HDR` | _ON_ |
+| [Sun Raster](https://en.wikipedia.org/wiki/Sun_Raster) | `WITH_IMGCODEC_SUNRASTER` | _ON_ |
+| [PPM, PGM, PBM, PAM](https://en.wikipedia.org/wiki/Netpbm#File_formats) | `WITH_IMGCODEC_PXM` | _ON_ |
+| [PFM](https://en.wikipedia.org/wiki/Netpbm#File_formats) | `WITH_IMGCODEC_PFM` | _ON_ |
+| [GIF](https://en.wikipedia.org/wiki/GIF) | `WITH_IMGCODEC_GIF` | _OFF_ |

+### PNG, JPEG, TIFF, WEBP, JPEG 2000, EXR, JPEG XL support

-### PNG, JPEG, TIFF, WEBP support
-
-| Formats | Option | Default | Force build own |
-| --------| ------ | ------- | --------------- |
-| [PNG](https://en.wikipedia.org/wiki/Portable_Network_Graphics) | `WITH_PNG` | _ON_ | `BUILD_PNG` |
-| [JPEG](https://en.wikipedia.org/wiki/JPEG) | `WITH_JPEG` | _ON_ | `BUILD_JPEG` |
-| [TIFF](https://en.wikipedia.org/wiki/TIFF) | `WITH_TIFF` | _ON_ | `BUILD_TIFF` |
-| [WEBP](https://en.wikipedia.org/wiki/WebP) | `WITH_WEBP` | _ON_ | `BUILD_WEBP` |
-| [JPEG2000 with OpenJPEG](https://en.wikipedia.org/wiki/OpenJPEG) | `WITH_OPENJPEG` | _ON_ | `BUILD_OPENJPEG` |
-| [JPEG2000 with JasPer](https://en.wikipedia.org/wiki/JasPer) | `WITH_JASPER` | _ON_ (see note) | `BUILD_JASPER` |
-| [EXR](https://en.wikipedia.org/wiki/OpenEXR) | `WITH_OPENEXR` | _ON_ | Not Supported (see note) |
-| [JPEG XL](https://en.wikipedia.org/wiki/JPEG_XL) | `WITH_JPEGXL` | _ON_ | Not supported. (see note) |
+| Formats | Library | Option | Default | Force build own |
+| --------| ------- | ------ | ------- | --------------- |
+| [PNG](https://en.wikipedia.org/wiki/Portable_Network_Graphics) | [libpng](https://en.wikipedia.org/wiki/Libpng)| `WITH_PNG` | _ON_ | `BUILD_PNG` |
+|^| [libspng(simple png)](https://libspng.org/) | `WITH_SPNG` | _OFF_ | `BUILD_SPNG` |
+| [JPEG](https://en.wikipedia.org/wiki/JPEG) | [libjpeg-turbo](https://en.wikipedia.org/wiki/Libjpeg) | `WITH_JPEG` | _ON_ | `BUILD_JPEG` |
+|^| [libjpeg](https://en.wikipedia.org/wiki/Libjpeg) | `WITH_JPEG` | _OFF_ | `BUILD_JPEG` with `BUILD_JPEG_TURBO_DISABLE` |
+| [TIFF](https://en.wikipedia.org/wiki/TIFF) | [LibTIFF](https://en.wikipedia.org/wiki/LibTIFF) | `WITH_TIFF` | _ON_ | `BUILD_TIFF` |
+| [WebP](https://en.wikipedia.org/wiki/WebP) || `WITH_WEBP` | _ON_ | `BUILD_WEBP` |
+| [JPEG 2000](https://en.wikipedia.org/wiki/JPEG_2000) | [OpenJPEG](https://en.wikipedia.org/wiki/OpenJPEG) | `WITH_OPENJPEG` | _ON_ | `BUILD_OPENJPEG` |
+|^| [JasPer](https://en.wikipedia.org/wiki/JasPer) | `WITH_JASPER` | _ON_ (see note) | `BUILD_JASPER` |
+| [OpenEXR](https://en.wikipedia.org/wiki/OpenEXR) || `WITH_OPENEXR` | _ON_ | `BUILD_OPENEXR` |
+| [JPEG XL](https://en.wikipedia.org/wiki/JPEG_XL) || `WITH_JPEGXL` | _ON_ | Not supported. (see note) |

 All libraries required to read images in these formats are included into OpenCV and will be built automatically if not found at the configuration stage. Corresponding `BUILD_*` options will force building and using own libraries, they are enabled by default on some platforms, e.g. Windows.

-@note OpenJPEG have higher priority than JasPer which is deprecated. In order to use JasPer, OpenJPEG must be disabled.
+@note (All) Only one library for each image format can be enabled(e.g. In order to use JasPer for JPEG 2000 format, OpenJPEG must be disabled).
+@note (JPEG 2000) OpenJPEG have higher priority than JasPer which is deprecated.
@note (JPEG XL) OpenCV doesn't contain libjxl source code, so `BUILD_JPEGXL` is not supported.
@note (EXR) OpenCV 5 doesn't contain OpenEXR source code, so `BUILD_OPENEXR` is not supported.

+@warning OpenEXR ver 2.2 or earlier cannot be used in combination with C++17 or later. In this case, updating OpenEXR ver 2.3.0 or later is required.
+
 ### GDAL integration

 `WITH_GDAL` (default: _OFF_)
--- a/modules/3d/include/opencv2/3d.hpp
+++ b/modules/3d/include/opencv2/3d.hpp
@ -2650,6 +2650,42 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)
                          );

+/**
+@brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.
+
+@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
+@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
+where N is the number of points. vector\<Point2d\> can be also passed here.
+@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
+@param distCoeffs Input vector of distortion coefficients (4x1/1x4).
+@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
+the model coordinate system to the camera coordinate system.
+@param tvec Output translation vector.
+@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
+the provided rvec and tvec values as initial approximations of the rotation and translation
+vectors, respectively, and further optimizes them.
+@param iterationsCount Number of iterations.
+@param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value
+is the maximum allowed distance between the observed and computed point projections to consider it
+an inlier.
+@param confidence The probability that the algorithm produces a useful result.
+@param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .
+@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags
+@param criteria Termination criteria for internal undistortPoints call.
+The function interally undistorts points with @ref undistortPoints and call @ref cv::solvePnP,
+thus the input are very similar. More information about Perspective-n-Points is described in @ref calib3d_solvePnP
+for more information.
+*/
+CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints,
+                                  InputArray cameraMatrix, InputArray distCoeffs,
+                                  OutputArray rvec, OutputArray tvec,
+                                  bool useExtrinsicGuess = false, int iterationsCount = 100,
+                                  float reprojectionError = 8.0, double confidence = 0.99,
+                                  OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE,
+                                  TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)
+                                );
+
 } // namespace fisheye

 /** @brief Octree for 3D vision.
--- a/modules/3d/src/fisheye.cpp
+++ b/modules/3d/src/fisheye.cpp
@ -646,4 +646,17 @@ bool cv::fisheye::solvePnP( InputArray opoints, InputArray ipoints,
    return cv::solvePnP(opoints, imagePointsNormalized, cameraMatrix, noArray(), rvec, tvec, useExtrinsicGuess, flags);
 }

+bool cv::fisheye::solvePnPRansac( InputArray opoints, InputArray ipoints,
+                                  InputArray cameraMatrix, InputArray distCoeffs,
+                                  OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess,
+                                  int iterationsCount, float reprojectionError,
+                                  double confidence, OutputArray inliers,
+                                  int flags, TermCriteria criteria)
+{
+    Mat imagePointsNormalized;
+    cv::fisheye::undistortPoints(ipoints, imagePointsNormalized, cameraMatrix, distCoeffs, noArray(), cameraMatrix, criteria);
+    return cv::solvePnPRansac(opoints, imagePointsNormalized, cameraMatrix, noArray(), rvec, tvec,
+                              useExtrinsicGuess, iterationsCount, reprojectionError, confidence, inliers, flags);
+}
+
 } // namespace cv
--- a/modules/3d/src/fundam.cpp
+++ b/modules/3d/src/fundam.cpp
@ -1297,7 +1297,7 @@ double sampsonDistance(InputArray _pt1, InputArray _pt2, InputArray _F)
 {
    CV_INSTRUMENT_REGION();

-    CV_Assert(_pt1.type() == CV_64F && _pt2.type() == CV_64F && _F.type() == CV_64F);
+    CV_Assert(_pt1.depth() == CV_64F && _pt2.depth() == CV_64F && _F.depth() == CV_64F);
    CV_DbgAssert(_pt1.rows() == 3 && _F.size() == Size(3, 3) && _pt1.rows() == _pt2.rows());

    Mat pt1(_pt1.getMat());
--- a/modules/3d/src/solvepnp.cpp
+++ b/modules/3d/src/solvepnp.cpp
@ -56,7 +56,7 @@ namespace cv {

 using namespace std;

-#if defined _DEBUG || defined CV_STATIC_ANALYSIS
+#if !defined(NDEBUG) || defined(CV_STATIC_ANALYSIS)
 static bool isPlanarObjectPoints(InputArray _objectPoints, double threshold)
 {
    CV_CheckType(_objectPoints.type(), _objectPoints.type() == CV_32FC3 || _objectPoints.type() == CV_64FC3,
@ -924,7 +924,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
    {
        CV_Assert(npoints == 4);

-#if defined _DEBUG || defined CV_STATIC_ANALYSIS
+#if !defined(NDEBUG) || defined(CV_STATIC_ANALYSIS)
        double Xs[4][3];
        if (opoints.depth() == CV_32F)
        {
--- a/modules/3d/test/test_fisheye.cpp
+++ b/modules/3d/test/test_fisheye.cpp
@ -245,6 +245,58 @@ TEST_F(fisheyeTest, solvePnP)
    ASSERT_TRUE(converged);
 }

+TEST_F(fisheyeTest, solvePnPRansac)
+{
+    const int inliers_n = 16;
+    const int outliers_n = 4;
+    const bool use_extrinsic_guess = false;
+    const int iterations_count = 100;
+    const float reprojection_error = 1.0;
+    const double confidence = 0.99;
+
+    const cv::Matx33d R_mat ( 9.9756700084424932e-01, 6.9698277640183867e-02, 1.4929569991321144e-03,
+                              -6.9711825162322980e-02, 9.9748249845531767e-01, 1.2997180766418455e-02,
+                              -5.8331736398316541e-04,-1.3069635393884985e-02, 9.9991441852366736e-01);
+
+    const cv::Vec3d T(-9.9217369356044638e-02, 3.1741831972356663e-03, 1.8551007952921010e-04);
+
+    cv::Mat rvec;
+    cv::Rodrigues(R_mat, rvec);
+
+    // inliers
+    cv::Mat inlier_obj_points(1, inliers_n, CV_64FC3);
+    theRNG().fill(inlier_obj_points, cv::RNG::NORMAL, 2, 1);
+    inlier_obj_points = cv::abs(inlier_obj_points) * 10;
+    cv::Mat inlier_img_points;
+    cv::fisheye::projectPoints(inlier_obj_points, inlier_img_points, rvec, T, this->K, this->D);
+
+    // outliers
+    cv::Mat outlier_obj_points(1, outliers_n, CV_64FC3);
+    theRNG().fill(outlier_obj_points, cv::RNG::NORMAL, 2, 1);
+    outlier_obj_points = cv::abs(outlier_obj_points) * 10;
+    cv::Mat outlier_img_points;
+    cv::fisheye::projectPoints(outlier_obj_points, outlier_img_points, rvec, (T * 10), this->K, this->D);
+
+    cv::Mat obj_points;
+    cv::hconcat(outlier_obj_points, inlier_obj_points, obj_points);
+
+    cv::Mat img_points;
+    cv::hconcat(outlier_img_points, inlier_img_points, img_points);
+
+    cv::Mat rvec_pred;
+    cv::Mat tvec_pred;
+    cv::Mat inliers_pred;
+
+    bool converged = cv::fisheye::solvePnPRansac(obj_points, img_points, this->K, this->D,
+                                                 rvec_pred, tvec_pred, use_extrinsic_guess,
+                                                 iterations_count, reprojection_error, confidence, inliers_pred);
+
+    EXPECT_MAT_NEAR(rvec, rvec_pred, 1e-5);
+    EXPECT_MAT_NEAR(T, tvec_pred, 1e-5);
+    EXPECT_EQ(inliers_pred.size[0], inliers_n);
+    ASSERT_TRUE(converged);
+}
+
 TEST_F(fisheyeTest, undistortImage)
 {
    // we use it to reduce patch size for images in testdata
--- a/modules/calib/misc/python/test/test_calibration.py
+++ b/modules/calib/misc/python/test/test_calibration.py
@ -84,5 +84,13 @@ class calibration_test(NewOpenCVTests):
        self.assertTrue(imagePoints is not None)
        self.assertTrue(jacobian is not None)

+    def test_sampsonDistance_valid2D(self):
+        pt1 = (np.random.rand(3, 10) * 256).astype(np.float64)
+        pt2 = (np.random.rand(3, 10) * 256).astype(np.float64)
+        F = (np.random.rand(3, 3) * 256).astype(np.float64)
+        dist = cv.sampsonDistance(pt1, pt2, F)
+        self.assertTrue(isinstance(dist, (float, np.floating)))
+        self.assertGreaterEqual(dist, 0.0)
+
 if __name__ == '__main__':
    NewOpenCVTests.bootstrap()
--- a/modules/calib/src/chessboard.cpp
+++ b/modules/calib/src/chessboard.cpp
@ -3720,10 +3720,11 @@ Chessboard::Board Chessboard::detectImpl(const Mat& gray,std::vector<cv::Mat> &f
                    continue;
                }

+                iter_boards->normalizeOrientation(false);
+
                if(iter_boards->getSize() == parameters.chessboard_size ||
                        iter_boards->getSize() == chessboard_size2)
                {
-                    iter_boards->normalizeOrientation(false);
                    if(iter_boards->getSize() != parameters.chessboard_size)
                    {
                        if(iter_boards->isCellBlack(0,0) == iter_boards->isCellBlack(0,int(iter_boards->colCount())-1))
--- a/modules/calib/test/test_chesscorners.cpp
+++ b/modules/calib/test/test_chesscorners.cpp
@ -849,5 +849,18 @@ TEST(Calib3d_RotatedCirclesPatternDetector, issue_24964)
    EXPECT_LE(error, precise_success_error_level);
 }

+TEST(Calib3d_CornerOrdering, issue_26830) {
+    const cv::String dataDir = string(TS::ptr()->get_data_path()) + "cv/cameracalibration/";
+    const cv::Mat image = cv::imread(dataDir + "checkerboard_marker_white.png");
+
+    std::vector<Point2f> cornersMinimumSizeMatchesPatternSize;
+    ASSERT_TRUE(cv::findChessboardCornersSB(image, Size(14, 9), cornersMinimumSizeMatchesPatternSize, CALIB_CB_MARKER | CALIB_CB_LARGER));
+
+    std::vector<Point2f> cornersMinimumSizeSmallerThanPatternSize;
+    ASSERT_TRUE(cv::findChessboardCornersSB(image, Size(4, 4), cornersMinimumSizeSmallerThanPatternSize, CALIB_CB_MARKER | CALIB_CB_LARGER));
+
+    ASSERT_EQ(cornersMinimumSizeMatchesPatternSize, cornersMinimumSizeSmallerThanPatternSize);
+}
+
 }} // namespace
 /* End of file. */
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@ -117,6 +117,12 @@ endif()
 if(HAVE_WIN32_ALIGNED_MALLOC)
  ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_WIN32_ALIGNED_MALLOC=1")
 endif()
+if(HAVE_GETAUXVAL)
+  ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/system.cpp "HAVE_GETAUXVAL=1")
+endif()
+if(HAVE_ELF_AUX_INFO)
+  ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/system.cpp "HAVE_ELF_AUX_INFO=1")
+endif()
 if(HAVE_VA_INTEL_OLD_HEADER)
  ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp" "HAVE_VA_INTEL_OLD_HEADER")
 endif()
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@ -197,8 +197,72 @@ enum DftFlags {
    DCT_ROWS           = DFT_ROWS
 };

-//! Various border types, image boundaries are denoted with `|`
-//! @see borderInterpolate, copyMakeBorder
+/*! Various border types, image boundaries are denoted with the `|` character in the table below, when describing each method.
+
+The following examples show the result of the @ref copyMakeBorder call according to different methods.
+Input image is `6x4` (width x height) size and the @ref copyMakeBorder function is used with a border size of 2 pixels
+in each direction, giving a resulting image of `10x8` resolution.
+
+@code
+Input image:
+[[ 0  1  2  3  4  5]
+ [ 6  7  8  9 10 11]
+ [12 13 14 15 16 17]
+ [18 19 20 21 22 23]]
+
+Border type: BORDER_CONSTANT (a constant value of 255 is used)
+[[255 255 255 255 255 255 255 255 255 255]
+ [255 255 255 255 255 255 255 255 255 255]
+ [255 255   0   1   2   3   4   5 255 255]
+ [255 255   6   7   8   9  10  11 255 255]
+ [255 255  12  13  14  15  16  17 255 255]
+ [255 255  18  19  20  21  22  23 255 255]
+ [255 255 255 255 255 255 255 255 255 255]
+ [255 255 255 255 255 255 255 255 255 255]]
+
+Border type: BORDER_REPLICATE
+[[ 0  0  0  1  2  3  4  5  5  5]
+ [ 0  0  0  1  2  3  4  5  5  5]
+ [ 0  0  0  1  2  3  4  5  5  5]
+ [ 6  6  6  7  8  9 10 11 11 11]
+ [12 12 12 13 14 15 16 17 17 17]
+ [18 18 18 19 20 21 22 23 23 23]
+ [18 18 18 19 20 21 22 23 23 23]
+ [18 18 18 19 20 21 22 23 23 23]]
+
+Border type: BORDER_REFLECT
+[[ 7  6  6  7  8  9 10 11 11 10]
+ [ 1  0  0  1  2  3  4  5  5  4]
+ [ 1  0  0  1  2  3  4  5  5  4]
+ [ 7  6  6  7  8  9 10 11 11 10]
+ [13 12 12 13 14 15 16 17 17 16]
+ [19 18 18 19 20 21 22 23 23 22]
+ [19 18 18 19 20 21 22 23 23 22]
+ [13 12 12 13 14 15 16 17 17 16]]
+
+Border type: BORDER_WRAP
+[[16 17 12 13 14 15 16 17 12 13]
+ [22 23 18 19 20 21 22 23 18 19]
+ [ 4  5  0  1  2  3  4  5  0  1]
+ [10 11  6  7  8  9 10 11  6  7]
+ [16 17 12 13 14 15 16 17 12 13]
+ [22 23 18 19 20 21 22 23 18 19]
+ [ 4  5  0  1  2  3  4  5  0  1]
+ [10 11  6  7  8  9 10 11  6  7]]
+
+Border type: BORDER_REFLECT_101
+[[14 13 12 13 14 15 16 17 16 15]
+ [ 8  7  6  7  8  9 10 11 10  9]
+ [ 2  1  0  1  2  3  4  5  4  3]
+ [ 8  7  6  7  8  9 10 11 10  9]
+ [14 13 12 13 14 15 16 17 16 15]
+ [20 19 18 19 20 21 22 23 22 21]
+ [14 13 12 13 14 15 16 17 16 15]
+ [ 8  7  6  7  8  9 10 11 10  9]]
+@endcode
+
+@see borderInterpolate, copyMakeBorder
+ */
 enum BorderTypes {
    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
@ -214,7 +278,6 @@ enum BorderTypes {

 //! @} core_array

-
 //! @addtogroup core_utils
 //! @{

--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -202,6 +202,7 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); }
 #define IPP_DISABLE_XYZ_RGB             1 // big accuracy difference
 #define IPP_DISABLE_HOUGH               1 // improper integration/results
 #define IPP_DISABLE_FILTER2D_BIG_MASK   1 // different results on masks > 7x7
+#define IPP_DISABLE_NORM_8U             1 // accuracy difference in perf test sanity check

 // Temporary disabled named IPP region. Performance
 #define IPP_DISABLE_PERF_COPYMAKE       1 // performance variations
--- a/modules/core/include/opencv2/core/vsx_utils.hpp
+++ b/modules/core/include/opencv2/core/vsx_utils.hpp
@ -257,8 +257,8 @@ VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu)
 VSX_IMPL_1VRG(vec_udword2, vec_dword2,  vpopcntd, vec_popcntu)

 // converts between single and double-precision
-VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
-VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, __builtin_vsx_xvcvspdp)
+VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, vec_floate)
+VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, vec_doubleo)

 // converts word and doubleword to double-precision
 #undef vec_ctd
@ -399,10 +399,6 @@ VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt)
 VSX_REDIRECT_1RG(vec_uint4,   vec_uint4,   vec_popcntu, vec_popcnt)
 VSX_REDIRECT_1RG(vec_udword2, vec_udword2, vec_popcntu, vec_popcnt)

-// converts between single and double precision
-VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
-VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, __builtin_vsx_xvcvspdp)
-
 // converts word and doubleword to double-precision
 #ifdef vec_ctd
 #   undef vec_ctd
--- a/modules/core/src/convert.dispatch.cpp
+++ b/modules/core/src/convert.dispatch.cpp
@ -167,6 +167,15 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const
    dst.create( dims, size, dtype, -1, allowTransposed );
    Mat dstMat = dst.getMat();

+    if( dims <= 2 )
+    {
+        CALL_HAL(convertScale, cv_hal_convertScale, src.data, src.step, dstMat.data, dstMat.step, src.cols * cn, src.rows, sdepth, ddepth, alpha, beta);
+    }
+    else if( src.isContinuous() && dstMat.isContinuous() )
+    {
+        CALL_HAL(convertScale, cv_hal_convertScale, src.data, 0, dstMat.data, 0, (int)src.total() * cn, 1, sdepth, ddepth, alpha, beta);
+    }
+
    BinaryFunc func = noScale ? getConvertFunc(sdepth, ddepth) : getConvertScaleFunc(sdepth, ddepth);
    double scale[] = {alpha, beta};
    CV_Assert( func != 0 );
--- a/modules/core/src/hal_replacement.hpp
+++ b/modules/core/src/hal_replacement.hpp
@ -214,7 +214,7 @@ inline int hal_ni_absDiffScalar8u8u  (const uchar* src_data, size_t src_step, uc
 Bitwise AND: _dst[i] = src1[i] & src2[i]_ @n
 Bitwise OR: _dst[i] = src1[i] | src2[i]_ @n
 Bitwise XOR: _dst[i] = src1[i] ^ src2[i]_ @n
-Bitwise NOT: _dst[i] = !src[i]_
+Bitwise NOT: _dst[i] = ~src[i]_
@param src1_data first source image data
@param src1_step first source image step
@param src2_data second source image data
@ -313,7 +313,7 @@ For 8s input type 128 is added to LUT index
 Destination should have the same element type and number of channels as lookup table elements
@param src_data Source image data
@param src_step Source image step
-@param src_type Sorce image type
+@param src_type Source image type
@param lut_data Pointer to lookup table
@param lut_channel_size Size of each channel in bytes
@param lut_channels Number of channels in lookup table
@ -357,9 +357,69 @@ Hamming distance between two vectors
 inline int hal_ni_normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize, int* result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
 //! @}

+/**
+@brief Generic norm of an array.
+@param src Source image
+@param src_step Source image
+@param mask Specified array region.
+@param mask_step Mask array step.
+@param width Source image dimensions
+@param height Source image dimensions
+@param type Element type of source image
+@param norm_type Type of the norm
+@param result Pointer to result output
+*/
+//! @addtogroup core_hal_interface_norm Absolute norm
+//! @{
+inline int hal_ni_norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width,
+                       int height, int type, int norm_type, double* result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+
+/**
+@brief Generic norm between two arrays.
+@param src1 First source image
+@param src1_step First source image
+@param src2 Second source image
+@param src2_step Second source image
+@param mask Specified array region.
+@param mask_step Mask array step.
+@param width Source image dimensions
+@param height Source image dimensions
+@param type Element type of source image
+@param norm_type Type of the norm
+@param result Pointer to result output
+*/
+//! @addtogroup core_hal_interface_norm Absolute norm
+//! @{
+inline int hal_ni_normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask,
+                           size_t mask_step, int width, int height, int type, int norm_type, double* result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+
+/**
+@brief Convert array to another with specified type.
+@param src Source image
+@param src_step Source image
+@param dst Destination image
+@param dst_step Destination image
+@param width Source image dimensions
+@param height Source image dimensions
+@param sdepth Depth of source image
+@param ddepth Depth of destination image
+@param alpha Scale value
+@param beta Shift value
+*/
+//! @addtogroup core_hal_interface_convert Array convert
+//! @{
+inline int hal_ni_convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height,
+                               int sdepth, int ddepth, double alpha, double beta) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+
 //! @cond IGNORED
 #define cv_hal_normHamming8u hal_ni_normHamming8u
 #define cv_hal_normHammingDiff8u hal_ni_normHammingDiff8u
+#define cv_hal_norm hal_ni_norm
+#define cv_hal_normDiff hal_ni_normDiff
+#define cv_hal_convertScale hal_ni_convertScale
 //! @endcond

 /**
@ -464,7 +524,7 @@ inline int hal_ni_div16bf(const cv_hal_bf16 *src1_data, size_t src1_step, const
 //! @}

 /**
-Computes reciprocial: _dst[i] = scale / src[i]_
+Computes reciprocal: _dst[i] = scale / src[i]_
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@ -473,7 +533,7 @@ Computes reciprocial: _dst[i] = scale / src[i]_
@param height height of the images
@param scale additional multiplier
 */
-//! @addtogroup core_hal_interface_reciprocial Element-wise reciprocial
+//! @addtogroup core_hal_interface_reciprocal Element-wise reciprocal
 //! @{
 inline int hal_ni_recip8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
 inline int hal_ni_recip8s(const schar *src_data, size_t src_step, schar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
@ -864,14 +924,14 @@ inline int hal_ni_dctFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME
 Performs \f$LU\f$ decomposition of square matrix \f$A=P*L*U\f$ (where \f$P\f$ is permutation matrix) and solves matrix equation \f$A*X=B\f$.
 Function returns the \f$sign\f$ of permutation \f$P\f$ via parameter info.
@param src1 pointer to input matrix \f$A\f$ stored in row major order. After finish of work src1 contains at least \f$U\f$ part of \f$LU\f$
-decomposition which is appropriate for determainant calculation: \f$det(A)=sign*\prod_{j=1}^{M}a_{jj}\f$.
+decomposition which is appropriate for determinant calculation: \f$det(A)=sign*\prod_{j=1}^{M}a_{jj}\f$.
@param src1_step number of bytes between two consequent rows of matrix \f$A\f$.
@param m size of square matrix \f$A\f$.
@param src2 pointer to \f$M\times N\f$ matrix \f$B\f$ which is the right-hand side of system \f$A*X=B\f$. \f$B\f$ stored in row major order.
 If src2 is null pointer only \f$LU\f$ decomposition will be performed. After finish of work src2 contains solution \f$X\f$ of system \f$A*X=B\f$.
@param src2_step number of bytes between two consequent rows of matrix \f$B\f$.
@param n number of right-hand vectors in \f$M\times N\f$ matrix \f$B\f$.
-@param info indicates success of decomposition. If *info is equals to zero decomposition failed, othervise *info is equals to \f$sign\f$.
+@param info indicates success of decomposition. If *info is equals to zero decomposition failed, otherwise *info is equals to \f$sign\f$.
 */
 //! @addtogroup core_hal_interface_decomp_lu LU matrix decomposition
 //! @{
@ -1011,8 +1071,26 @@ inline int hal_ni_gemm64fc(const double* src1, size_t src1_step, const double* s
 inline int hal_ni_minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
                            int* minIdx, int* maxIdx, uchar* mask) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }

+/**
+   @brief Finds the global minimum and maximum in an array.
+   @param src_data Source image
+   @param src_step Source image
+   @param width Source image dimensions
+   @param height Source image dimensions
+   @param depth Depth of source image
+   @param minVal Pointer to the returned global minimum and maximum in an array.
+   @param maxVal Pointer to the returned global minimum and maximum in an array.
+   @param minIdx Pointer to the returned minimum and maximum location.
+   @param maxIdx Pointer to the returned minimum and maximum location.
+   @param mask Specified array region.
+   @param mask_step Mask array step.
+*/
+inline int hal_ni_minMaxIdxMaskStep(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
+                                    int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
 //! @cond IGNORED
 #define cv_hal_minMaxIdx hal_ni_minMaxIdx
+#define cv_hal_minMaxIdxMaskStep hal_ni_minMaxIdxMaskStep
 //! @endcond

 /**
@ -1126,26 +1204,23 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst
 #include "custom_hal.hpp"

 //! @cond IGNORED
-#define CALL_HAL_RET(name, fun, retval, ...) \
+
+#define CALL_HAL_RET2(name, fun, retval, ...) \
 { \
-    int res = __CV_EXPAND(fun(__VA_ARGS__, &retval)); \
+    int res = __CV_EXPAND(fun(__VA_ARGS__)); \
    if (res == CV_HAL_ERROR_OK) \
        return retval; \
    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
        CV_Error_(cv::Error::StsInternal, \
-            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); \
+        ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); \
 }

+#define CALL_HAL_RET(name, fun, retval, ...) \
+CALL_HAL_RET2(name, fun, retval, __VA_ARGS__, &retval)

 #define CALL_HAL(name, fun, ...) \
-{ \
-    int res = __CV_EXPAND(fun(__VA_ARGS__)); \
-    if (res == CV_HAL_ERROR_OK) \
-        return; \
-    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
-        CV_Error_(cv::Error::StsInternal, \
-            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); \
-}
+CALL_HAL_RET2(name, fun, ,__VA_ARGS__)
+
 //! @endcond

 #endif
--- a/modules/core/src/lut.cpp
+++ b/modules/core/src/lut.cpp
@ -363,7 +363,6 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
    _dst.createSameSize(_src, CV_MAKETYPE(_lut.depth(), cn));
    Mat dst = _dst.getMat();

-
    CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data,
             lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);

--- a/modules/core/src/matrix_operations.cpp
+++ b/modules/core/src/matrix_operations.cpp
@ -92,7 +92,7 @@ void cv::hconcat(InputArray _src, OutputArray dst)

    std::vector<Mat> src;
    _src.getMatVector(src);
-    hconcat(!src.empty() ? &src[0] : 0, src.size(), dst);
+    hconcat(!src.empty() ? &src[0] : nullptr, src.size(), dst);
 }

 void cv::vconcat(const Mat* src, size_t nsrc, OutputArray _dst)
@ -137,7 +137,7 @@ void cv::vconcat(InputArray _src, OutputArray dst)

    std::vector<Mat> src;
    _src.getMatVector(src);
-    vconcat(!src.empty() ? &src[0] : 0, src.size(), dst);
+    vconcat(!src.empty() ? &src[0] : nullptr, src.size(), dst);
 }

 //////////////////////////////////////// set identity ////////////////////////////////////////////
@ -175,7 +175,7 @@ static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s )
           ocl::KernelArg::Constant(Mat(1, 1, sctype, s)));

    size_t globalsize[2] = { (size_t)m.cols * cn / kercn, ((size_t)m.rows + rowsPerWI - 1) / rowsPerWI };
-    return k.run(2, globalsize, NULL, false);
+    return k.run(2, globalsize, nullptr, false);
 }

 }
@ -216,8 +216,9 @@ void cv::setIdentity( InputOutputArray _m, const Scalar& s )

        for( int i = 0; i < rows; i++, data += step )
        {
-            for( int j = 0; j < cols; j++ )
-                data[j] = j == i ? val : 0;
+            std::fill(data, data + cols, 0.0);
+            if (i < cols)
+                data[i] = val;
        }
    }
    else
--- a/modules/core/src/mean.dispatch.cpp
+++ b/modules/core/src/mean.dispatch.cpp
@ -129,13 +129,29 @@ Scalar mean(InputArray _src, InputArray _mask)
    CV_Assert( mask.empty() || mask.type() == CV_8U || mask.type() == CV_8S || mask.type() == CV_Bool);

    int k, cn = src.channels(), depth = src.depth();
-    Scalar s;
+    Scalar s = Scalar::all(0.0);
+
+    CV_Assert( cn <= 4 );

    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_mean(src, mask, s), s)

+    if (src.isContinuous() && mask.isContinuous())
+    {
+        CALL_HAL_RET2(meanStdDev, cv_hal_meanStdDev, s, src.data, 0, (int)src.total(), 1, src.type(),
+                      &s[0], nullptr /*stddev*/, mask.data, 0);
+    }
+    else
+    {
+        if (src.dims <= 2)
+        {
+            CALL_HAL_RET2(meanStdDev, cv_hal_meanStdDev, s, src.data, src.step, src.cols, src.rows, src.type(),
+                          &s[0], nullptr, mask.data, mask.step);
+        }
+    }
+
    SumFunc func = getSumFunc(depth);

-    CV_Assert( cn <= 4 && func != 0 );
+    CV_Assert( func != 0 );

    const Mat* arrays[] = {&src, &mask, 0};
    uchar* ptrs[2] = {};
@ -318,7 +334,6 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
 }
 #endif

-
 #ifdef HAVE_IPP
 static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
 {
@ -532,7 +547,8 @@ void meanStdDev(InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray
    uchar* ptrs[2] = {};
    NAryMatIterator it(arrays, ptrs);
    int total = (int)it.size, blockSize = total, partialBlockSize = 0;
-    int j, count = 0, nz0 = 0;
+    int j;
+    int64_t count = 0, nz0 = 0;
    double _buf[CV_CN_MAX*4];
    double *s = _buf, *sq = s + cn;
    int *sbuf = (int*)s, *sqbuf = (int*)sq;
--- a/modules/core/src/minmax.dispatch.cpp
+++ b/modules/core/src/minmax.dispatch.cpp
@ -314,10 +314,18 @@ void cv::minMaxIdx(InputArray _src, double* minVal,

    if (src.dims <= 2)
    {
-        CALL_HAL(minMaxIdx, cv_hal_minMaxIdx, src.data, src.step, src.cols*cn, src.rows,
-                 src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data);
+        if ((size_t)src.step == (size_t)mask.step)
+        {
+            CALL_HAL(minMaxIdx, cv_hal_minMaxIdx, src.data, src.step, src.cols*cn, src.rows,
+                     src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data);
+        }
+        else
+        {
+            CALL_HAL(minMaxIdxMaskStep, cv_hal_minMaxIdxMaskStep, src.data, src.step, src.cols*cn, src.rows,
+                     src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data, mask.step);
+        }
    }
-    else if (src.isContinuous())
+    else if (src.isContinuous() && mask.isContinuous())
    {
        int res = cv_hal_minMaxIdx(src.data, 0, (int)src.total()*cn, 1, src.depth(),
                                   minVal, maxVal, minIdx, maxIdx, mask.data);
--- a/modules/core/src/norm.cpp
+++ b/modules/core/src/norm.cpp
@ -756,7 +756,10 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
-                (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
+                (
+                #if !IPP_DISABLE_NORM_8U
+                type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
+                #endif
                type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
                type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
                0) : 0;
@ -800,9 +803,20 @@ double norm( InputArray _src, int normType, InputArray _mask )
 #endif

    Mat src = _src.getMat(), mask = _mask.getMat();
+    int depth = src.depth(), cn = src.channels();
+    if( src.dims <= 2 )
+    {
+        double result;
+        CALL_HAL_RET(norm, cv_hal_norm, result, src.data, src.step, mask.data, mask.step, src.cols, src.rows, src.type(), normType);
+    }
+    else if( src.isContinuous() && mask.isContinuous() )
+    {
+        double result;
+        CALL_HAL_RET(norm, cv_hal_norm, result, src.data, 0, mask.data, 0, (int)src.total(), 1, src.type(), normType);
+    }
+
    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(src, normType, mask, _result), _result);

-    int depth = src.depth(), cn = src.channels();
    if( src.isContinuous() && mask.empty() )
    {
        size_t len = src.total()*cn;
@ -1083,18 +1097,27 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                    0) : 0;
                ippiNormRelFuncNoHint ippiNormRel =
                    normType == NORM_INF ?
-                    (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
+                    (
+                    #if !IPP_DISABLE_NORM_8U
+                    type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
+                    #endif
                    type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
                    type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
                    type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
                    0) :
                    normType == NORM_L1 ?
-                    (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
+                    (
+                    #if !IPP_DISABLE_NORM_8U
+                    type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
+                    #endif
                    type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
                    type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
                    0) :
                    normType == NORM_L2 || normType == NORM_L2SQR ?
-                    (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
+                    (
+                    #if !IPP_DISABLE_NORM_8U
+                    type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
+                    #endif
                    type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
                    type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
                    0) : 0;
@ -1202,18 +1225,27 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
                0) : 0;
            ippiNormDiffFuncNoHint ippiNormDiff =
                normType == NORM_INF ?
-                (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
+                (
+                #if !IPP_DISABLE_NORM_8U
+                type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
+                #endif
                type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
                type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
                type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
                0) :
                normType == NORM_L1 ?
-                (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
+                (
+                #if !IPP_DISABLE_NORM_8U
+                type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
+                #endif
                type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
                type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
                0) :
                normType == NORM_L2 || normType == NORM_L2SQR ?
-                (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
+                (
+                #if !IPP_DISABLE_NORM_8U
+                type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
+                #endif
                type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
                type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
                0) : 0;
@ -1255,6 +1287,19 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
                _result)
 #endif

+    Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
+    int depth = src1.depth(), cn = src1.channels();
+    if( src1.dims <= 2 )
+    {
+        double result;
+        CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, src1.step, src2.data, src2.step, mask.data, mask.step, src1.cols, src1.rows, src1.type(), normType);
+    }
+    else if( src1.isContinuous() && src2.isContinuous() && mask.isContinuous() )
+    {
+        double result;
+        CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, 0, src2.data, 0, mask.data, 0, (int)src1.total(), 1, src1.type(), normType);
+    }
+
    CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(_src1, _src2, normType, _mask, _result), _result);

    if( normType & NORM_RELATIVE )
@ -1262,9 +1307,6 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
        return norm(_src1, _src2, normType & ~NORM_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
    }

-    Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
-    int depth = src1.depth(), cn = src1.channels();
-
    normType &= 7;
    CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
               normType == NORM_L2 || normType == NORM_L2SQR ||
--- a/modules/core/src/opencl/runtime/opencl_core.cpp
+++ b/modules/core/src/opencl/runtime/opencl_core.cpp
@ -152,7 +152,7 @@ static void* WinGetProcAddress(const char* name)
 #define CV_CL_GET_PROC_ADDRESS(name) WinGetProcAddress(name)
 #endif // _WIN32

-#if defined(__linux__) || defined(__FreeBSD__)
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__)
 #include <dlfcn.h>
 #include <stdio.h>

--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -152,7 +152,7 @@ const uint64_t AT_HWCAP = NT_GNU_HWCAP;
 #endif


-#if (defined __ppc64__ || defined __PPC64__) && defined __unix__
+#if ((defined __ppc64__ || defined __PPC64__) && (defined HAVE_GETAUXVAL || defined HAVE_ELF_AUX_INFO))
 # include "sys/auxv.h"
 # ifndef AT_HWCAP2
 #   define AT_HWCAP2 26
@ -738,7 +738,7 @@ struct HWFeatures
        have[CV_CPU_MSA] = true;
    #endif

-    #if (defined __ppc64__ || defined __PPC64__) && defined __linux__
+    #if (defined __ppc64__ || defined __PPC64__) && defined HAVE_GETAUXVAL
        unsigned int hwcap = getauxval(AT_HWCAP);
        if (hwcap & PPC_FEATURE_HAS_VSX) {
            hwcap = getauxval(AT_HWCAP2);
@ -748,7 +748,7 @@ struct HWFeatures
                have[CV_CPU_VSX] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
            }
        }
-    #elif (defined __ppc64__ || defined __PPC64__) && defined __FreeBSD__
+    #elif (defined __ppc64__ || defined __PPC64__) && defined HAVE_ELF_AUX_INFO
        unsigned long hwcap = 0;
        elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
        if (hwcap & PPC_FEATURE_HAS_VSX) {
@ -760,7 +760,7 @@ struct HWFeatures
            }
        }
    #else
-        // TODO: AIX, OpenBSD
+        // TODO: AIX
        #if CV_VSX || defined _ARCH_PWR8 || defined __POWER9_VECTOR__
            have[CV_CPU_VSX] = true;
        #endif
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -3006,6 +3006,19 @@ TEST(Core_MeanStdDev, regression_multichannel)
    }
 }

+// Related issue : https://github.com/opencv/opencv/issues/26861
+TEST(Core_MeanStdDevTest, LargeImage)
+{
+    applyTestTag(CV_TEST_TAG_VERYLONG);
+    applyTestTag(CV_TEST_TAG_MEMORY_14GB);
+    // (1<<16) * ((1<<15)+10) = ~2.147 billion
+    cv::Mat largeImage = cv::Mat::ones((1 << 16), ((1 << 15) + 10), CV_8U);
+    cv::Scalar mean, stddev;
+    cv::meanStdDev(largeImage, mean, stddev);
+    EXPECT_NEAR(mean[0], 1.0, 1e-5);
+    EXPECT_NEAR(stddev[0], 0.0, 1e-5);
+}
+
 template <typename T> static inline
 void testDivideInitData(Mat& src1, Mat& src2)
 {
--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@ -505,7 +505,7 @@ double Core_DotProductTest::get_success_error_level( int test_case_idx, int i, i
 #ifdef __riscv
    const int depth = test_mat[i][j].depth();
    if (depth == CV_64F)
-        return 1.7e-5;
+        return 2.5e-5;
 #endif
    return Core_MatrixTest::get_success_error_level( test_case_idx, i, j );
 }
--- a/modules/dnn/src/vkcom/src/context.cpp
+++ b/modules/dnn/src/vkcom/src/context.cpp
@ -187,13 +187,6 @@ bool checkExtensionAvailability(const char *extension_name,

 static int init_instance_extension(VkInstance& kInstance)
 {
-#if defined(__ANDROID_API__) && __ANDROID_API__ >= 26
-    if (support_VK_KHR_android_surface)
-    {
-        vkCreateAndroidSurfaceKHR = (PFN_vkCreateAndroidSurfaceKHR)vkGetInstanceProcAddr(kInstance, "vkCreateAndroidSurfaceKHR");
-    }
-#endif // __ANDROID_API__ >= 26
-
    return 0;
 }

@ -648,7 +641,7 @@ GPUInfo Context::parseGPUInfo(VkPhysicalDevice& kPhysicalDevice)
    info.support_VK_EXT_memory_budget = 0;
    info.support_VK_EXT_queue_family_foreign = 0;
 #if defined(__ANDROID_API__) && __ANDROID_API__ >= 26
-    gpu_info.support_VK_ANDROID_external_memory_android_hardware_buffer = 0;
+    info.support_VK_ANDROID_external_memory_android_hardware_buffer = 0;
 #endif // __ANDROID_API__ >= 26
    info.support_VK_NV_cooperative_matrix = 0;
    for (uint32_t j = 0; j < deviceExtensionPropertyCount; j++)
--- a/modules/features/src/fast.cpp
+++ b/modules/features/src/fast.cpp
@ -368,8 +368,6 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
 }
 #endif

-
-
 static inline int hal_FAST(cv::Mat& src, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, FastFeatureDetector::DetectorType type)
 {
    if (threshold > 20)
@ -437,7 +435,9 @@ void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool
    cv::Mat img = _img.getMat();
    CALL_HAL(fast_dense, hal_FAST, img, keypoints, threshold, nonmax_suppression, type);

-    size_t keypoints_count;
+    size_t keypoints_count = 10000;
+    keypoints.clear();
+    keypoints.resize(keypoints_count);
    CALL_HAL(fast, cv_hal_FAST, img.data, img.step, img.cols, img.rows,
             (uchar*)(keypoints.data()), &keypoints_count, threshold, nonmax_suppression, type);

--- a/modules/features/test/test_fast.cpp
+++ b/modules/features/test/test_fast.cpp
@ -118,8 +118,8 @@ void CV_FastTest::run( int )
    read( fs["exp_kps2"], exp_kps2, Mat() );
    fs.release();

-     if ( exp_kps1.size != kps1.size || 0 != cvtest::norm(exp_kps1, kps1, NORM_L2) ||
-          exp_kps2.size != kps2.size || 0 != cvtest::norm(exp_kps2, kps2, NORM_L2))
+    if ( exp_kps1.size != kps1.size || 0 != cvtest::norm(exp_kps1, kps1, NORM_L2) ||
+         exp_kps2.size != kps2.size || 0 != cvtest::norm(exp_kps2, kps2, NORM_L2))
    {
        ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
        return;
@ -135,4 +135,34 @@ void CV_FastTest::run( int )

 TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }

+// #define DUMP_TEST_DATA
+
+TEST(Features2d_FAST, noNMS)
+{
+    Mat img = imread(string(cvtest::TS::ptr()->get_data_path()) + "inpaint/orig.png", cv::IMREAD_GRAYSCALE);
+    string xml = string(cvtest::TS::ptr()->get_data_path()) + "fast/result_no_nonmax.xml";
+
+    vector<KeyPoint> keypoints;
+    FAST(img, keypoints, 100, false, FastFeatureDetector::DetectorType::TYPE_9_16);
+    Mat kps(1, (int)(keypoints.size() * sizeof(KeyPoint)), CV_8U, &keypoints[0]);
+
+    Mat gt_kps;
+    FileStorage fs(xml, FileStorage::READ);
+#ifdef DUMP_TEST_DATA
+    if (!fs.isOpened())
+    {
+        fs.open(xml, FileStorage::WRITE);
+        fs << "exp_kps" << kps;
+        fs.release();
+        fs.open(xml, FileStorage::READ);
+    }
+#endif
+    ASSERT_TRUE(fs.isOpened());
+    fs["exp_kps"] >> gt_kps;
+    fs.release();
+    ASSERT_GT(gt_kps.total(), size_t(0));
+
+    ASSERT_EQ( 0, cvtest::norm(gt_kps, kps, NORM_L2));
+}
+
 }} // namespace
--- a/modules/highgui/src/window_cocoa.mm
+++ b/modules/highgui/src/window_cocoa.mm
--- a/modules/highgui/src/window_w32.cpp
+++ b/modules/highgui/src/window_w32.cpp
@ -2123,9 +2123,15 @@ static void showSaveDialog(CvWindow& window)
 #ifdef HAVE_WEBP
                      "WebP files (*.webp)\0*.webp\0"
 #endif
-                      "Portable image format (*.pbm;*.pgm;*.ppm;*.pxm;*.pnm)\0*.pbm;*.pgm;*.ppm;*.pxm;*.pnm\0"
+                      "Portable image format (*.pbm;*.pgm;*.ppm;*.pnm;*.pam)\0*.pbm;*.pgm;*.ppm;*.pnm;*.pam\0"
 #ifdef HAVE_OPENEXR
                      "OpenEXR Image files (*.exr)\0*.exr\0"
+#endif
+#ifdef HAVE_AVIF
+                      "AVIF files (*.avif)\0*.avif\0"
+#endif
+#ifdef HAVE_IMGCODEC_GIF
+                      "Graphics Interchange Format 89a(*.gif)\0*.gif\0"
 #endif
                      "Radiance HDR (*.hdr;*.pic)\0*.hdr;*.pic\0"
                      "Sun raster files (*.sr;*.ras)\0*.sr;*.ras\0"
@ -2147,7 +2153,7 @@ static void showSaveDialog(CvWindow& window)
    }
 #else
    CV_UNUSED(window);
-    CV_LOG_WARNING("Save dialog requires enabled 'imgcodecs' module.");
+    CV_LOG_WARNING(NULL, "Save dialog requires enabled 'imgcodecs' module.");
    return;
 #endif
 }
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@ -65,6 +65,7 @@ namespace cv
 //! @{

 //! Imread flags
+//! @note IMREAD_COLOR_BGR (IMREAD_COLOR) and IMREAD_COLOR_RGB can not be set at the same time.
 enum ImreadModes {
       IMREAD_UNCHANGED            = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). Ignore EXIF orientation.
       IMREAD_GRAYSCALE            = 0,  //!< If set, always convert image to the single channel grayscale image (codec internal conversion).
@ -263,11 +264,11 @@ struct CV_EXPORTS_W_SIMPLE Animation
    - If a negative value or a value beyond the maximum of `0xffff` (65535) is provided, it is reset to `0`
    (infinite looping) to maintain valid bounds.

-    @param bgColor A `Scalar` object representing the background color in BGRA format:
+    @param bgColor A `Scalar` object representing the background color in BGR format:
    - Defaults to `Scalar()`, indicating an empty color (usually transparent if supported).
    - This background color provides a solid fill behind frames that have transparency, ensuring a consistent display appearance.
    */
-    Animation(int loopCount = 0, Scalar bgColor = Scalar());
+    CV_WRAP Animation(int loopCount = 0, Scalar bgColor = Scalar());
 };

 /** @brief Loads an image from a file.
@ -413,13 +414,13 @@ can be saved using this function, with these exceptions:
 - With JPEG 2000 encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved.
 - With JPEG XL encoder, 8-bit unsigned (CV_8U), 16-bit unsigned (CV_16U) and 32-bit float(CV_32F) images can be saved.
  - JPEG XL images with an alpha channel can be saved using this function.
-    To do this, create 8-bit (or 16-bit, 32-bit float) 4-channel image BGRA, where the alpha channel goes last.
-    Fully transparent pixels should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535/1.0.
+    To achieve this, create an 8-bit 4-channel (CV_8UC4) / 16-bit 4-channel (CV_16UC4) / 32-bit float 4-channel (CV_32FC4) BGRA image, ensuring the alpha channel is the last component.
+    Fully transparent pixels should have an alpha value of 0, while fully opaque pixels should have an alpha value of 255/65535/1.0.
 - With PAM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved.
 - With PNG encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved.
-  - PNG images with an alpha channel can be saved using this function. To do this, create
-    8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels
-    should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below).
+  - PNG images with an alpha channel can be saved using this function.
+    To achieve this, create an 8-bit 4-channel (CV_8UC4) / 16-bit 4-channel (CV_16UC4) BGRA image, ensuring the alpha channel is the last component.
+    Fully transparent pixels should have an alpha value of 0, while fully opaque pixels should have an alpha value of 255/65535(see the code sample below).
 - With PGM/PPM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved.
 - With TIFF encoder, 8-bit unsigned (CV_8U), 8-bit signed (CV_8S),
                     16-bit unsigned (CV_16U), 16-bit signed (CV_16S),
@ -429,6 +430,11 @@ can be saved using this function, with these exceptions:
  - Multiple images (vector of Mat) can be saved in TIFF format (see the code sample below).
  - 32-bit float 3-channel (CV_32FC3) TIFF images will be saved
    using the LogLuv high dynamic range encoding (4 bytes per pixel)
+- With GIF encoder, 8-bit unsigned (CV_8U) images can be saved.
+  - GIF images with an alpha channel can be saved using this function.
+    To achieve this, create an 8-bit 4-channel (CV_8UC4) BGRA image, ensuring the alpha channel is the last component.
+    Fully transparent pixels should have an alpha value of 0, while fully opaque pixels should have an alpha value of 255.
+  - 8-bit single-channel images (CV_8UC1) are not supported due to GIF's limitation to indexed color formats.

 If the image format is not supported, the image will be converted to 8-bit unsigned (CV_8U) and saved that way.

--- a/modules/imgcodecs/misc/java/test/ImgcodecsTest.java
+++ b/modules/imgcodecs/misc/java/test/ImgcodecsTest.java
@ -1,13 +1,46 @@
 package org.opencv.test.imgcodecs;

+import org.opencv.core.Mat;
 import org.opencv.core.MatOfByte;
 import org.opencv.core.MatOfInt;
+import org.opencv.imgproc.Imgproc;
 import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgcodecs.Animation;
 import org.opencv.test.OpenCVTestCase;
 import org.opencv.test.OpenCVTestRunner;

+import java.util.ArrayList;
+import java.util.List;
+
 public class ImgcodecsTest extends OpenCVTestCase {

+    public void testAnimation() {
+        Mat src = Imgcodecs.imread(OpenCVTestRunner.LENA_PATH, Imgcodecs.IMREAD_REDUCED_COLOR_4);
+        assertFalse(src.empty());
+
+        Mat rgb = new Mat();
+        Imgproc.cvtColor(src, rgb, Imgproc.COLOR_BGR2RGB);
+
+        Animation animation = new Animation();
+        List<Mat> frames = new ArrayList<>();
+        MatOfInt durations = new MatOfInt(100, 100);
+
+        frames.add(src);
+        frames.add(rgb);
+
+        animation.set_frames(frames);
+        animation.set_durations(durations);
+
+        String filename = OpenCVTestRunner.getTempFileName("png");
+        assertTrue(Imgcodecs.imwriteanimation(filename, animation));
+
+        Animation readAnimation = new Animation();
+        assertTrue(Imgcodecs.imreadanimation(filename, readAnimation));
+
+        List<Mat> readFrames = readAnimation.get_frames();
+        assertTrue(readFrames.size() == 2);
+    }
+
    public void testImdecode() {
        fail("Not yet implemented");
    }
--- a/modules/imgcodecs/perf/perf_decode_encode.cpp
+++ b/modules/imgcodecs/perf/perf_decode_encode.cpp
@ -0,0 +1,131 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test
+{
+
+#ifdef HAVE_PNG
+
+using namespace perf;
+
+typedef perf::TestBaseWithParam<std::string> Decode;
+typedef perf::TestBaseWithParam<std::string> Encode;
+
+const string exts[] = {
+#ifdef HAVE_AVIF
+    ".avif",
+#endif
+    ".bmp",
+#ifdef HAVE_IMGCODEC_GIF
+    ".gif",
+#endif
+#if (defined(HAVE_JASPER) && defined(OPENCV_IMGCODECS_ENABLE_JASPER_TESTS)) \
+    || defined(HAVE_OPENJPEG)
+    ".jp2",
+#endif
+#ifdef HAVE_JPEG
+    ".jpg",
+#endif
+#ifdef HAVE_JPEGXL
+    ".jxl",
+#endif
+    ".png",
+#ifdef HAVE_IMGCODEC_PXM
+    ".ppm",
+#endif
+#ifdef HAVE_IMGCODEC_SUNRASTER
+    ".ras",
+#endif
+#ifdef HAVE_TIFF
+    ".tiff",
+#endif
+#ifdef HAVE_WEBP
+    ".webp",
+#endif
+};
+
+const string exts_multi[] = {
+#ifdef HAVE_AVIF
+    ".avif",
+#endif
+#ifdef HAVE_IMGCODEC_GIF
+    ".gif",
+#endif
+    ".png",
+#ifdef HAVE_TIFF
+    ".tiff",
+#endif
+#ifdef HAVE_WEBP
+    ".webp",
+#endif
+};
+
+PERF_TEST_P(Decode, bgr, testing::ValuesIn(exts))
+{
+    String filename = getDataPath("perf/1920x1080.png");
+
+    Mat src = imread(filename);
+    EXPECT_FALSE(src.empty()) << "Cannot open test image perf/1920x1080.png";
+    vector<uchar> buf;
+    EXPECT_TRUE(imencode(GetParam(), src, buf));
+
+    TEST_CYCLE() imdecode(buf, IMREAD_UNCHANGED);
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(Decode, rgb, testing::ValuesIn(exts))
+{
+    String filename = getDataPath("perf/1920x1080.png");
+
+    Mat src = imread(filename);
+    EXPECT_FALSE(src.empty()) << "Cannot open test image perf/1920x1080.png";
+    vector<uchar> buf;
+    EXPECT_TRUE(imencode(GetParam(), src, buf));
+
+    TEST_CYCLE() imdecode(buf, IMREAD_COLOR_RGB);
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(Encode, bgr, testing::ValuesIn(exts))
+{
+    String filename = getDataPath("perf/1920x1080.png");
+
+    Mat src = imread(filename);
+    EXPECT_FALSE(src.empty()) << "Cannot open test image perf/1920x1080.png";
+    vector<uchar> buf;
+
+    TEST_CYCLE() imencode(GetParam(), src, buf);
+
+    std::cout << "Encoded buffer size: " << buf.size()
+        << " bytes, Compression ratio: " << std::fixed << std::setprecision(2)
+        << (static_cast<double>(buf.size()) / (src.total() * src.channels())) * 100.0 << "%" << std::endl;
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(Encode, multi, testing::ValuesIn(exts_multi))
+{
+    String filename = getDataPath("perf/1920x1080.png");
+    vector<Mat> vec;
+    EXPECT_TRUE(imreadmulti(filename, vec));
+    vec.push_back(vec.back().clone());
+    circle(vec.back(), Point(100, 100), 45, Scalar(0, 0, 255, 0), 2, LINE_AA);
+    vector<uchar> buf;
+    EXPECT_TRUE(imwrite("test" + GetParam(), vec));
+
+    TEST_CYCLE() imencode(GetParam(), vec, buf);
+
+    std::cout << "Encoded buffer size: " << buf.size()
+        << " bytes, Compression ratio: " << std::fixed << std::setprecision(2)
+        << (static_cast<double>(buf.size()) / (vec[0].total() * vec[0].channels())) * 100.0 << "%" << std::endl;
+
+    SANITY_CHECK_NOTHING();
+}
+#endif // HAVE_PNG
+
+} // namespace
--- a/modules/imgcodecs/src/grfmt_avif.cpp
+++ b/modules/imgcodecs/src/grfmt_avif.cpp
@ -298,11 +298,6 @@ bool AvifEncoder::isFormatSupported(int depth) const {
  return (depth == CV_8U || depth == CV_16U);
 }

-bool AvifEncoder::write(const Mat &img, const std::vector<int> &params) {
-  std::vector<Mat> img_vec(1, img);
-  return writemulti(img_vec, params);
-}
-
 bool AvifEncoder::writeanimation(const Animation& animation,
                                 const std::vector<int> &params) {
  int bit_depth = 8;
--- a/modules/imgcodecs/src/grfmt_avif.hpp
+++ b/modules/imgcodecs/src/grfmt_avif.hpp
@ -41,7 +41,6 @@ class AvifEncoder CV_FINAL : public BaseImageEncoder {
  ~AvifEncoder() CV_OVERRIDE;

  bool isFormatSupported(int depth) const CV_OVERRIDE;
-  bool write(const Mat& img, const std::vector<int>& params) CV_OVERRIDE;
  bool writeanimation(const Animation& animation, const std::vector<int>& params) CV_OVERRIDE;

  ImageEncoder newEncoder() const CV_OVERRIDE;
--- a/modules/imgcodecs/src/grfmt_base.cpp
+++ b/modules/imgcodecs/src/grfmt_base.cpp
@ -140,6 +140,11 @@ bool BaseImageEncoder::setDestination( std::vector<uchar>& buf )
    return true;
 }

+bool BaseImageEncoder::write(const Mat &img, const std::vector<int> &params) {
+    std::vector<Mat> img_vec(1, img);
+    return writemulti(img_vec, params);
+}
+
 bool BaseImageEncoder::writemulti(const std::vector<Mat>& img_vec, const std::vector<int>& params)
 {
    if(img_vec.size() > 1)
@ -157,6 +162,7 @@ bool BaseImageEncoder::writemulti(const std::vector<Mat>& img_vec, const std::ve

 bool BaseImageEncoder::writeanimation(const Animation&, const std::vector<int>& )
 {
+    CV_LOG_WARNING(NULL, "No Animation encoder for specified file extension");
    return false;
 }

@ -165,7 +171,7 @@ ImageEncoder BaseImageEncoder::newEncoder() const
    return ImageEncoder();
 }

-void BaseImageEncoder::throwOnEror() const
+void BaseImageEncoder::throwOnError() const
 {
    if(!m_last_error.empty())
    {
--- a/modules/imgcodecs/src/grfmt_base.hpp
+++ b/modules/imgcodecs/src/grfmt_base.hpp
@ -202,12 +202,11 @@ public:

    /**
     * @brief Encode and write the image data.
-     * This is a pure virtual function that must be implemented by derived classes.
     * @param img The Mat object containing the image data to be encoded.
     * @param params A vector of parameters controlling the encoding process (e.g., compression level).
     * @return true if the image was successfully written, false otherwise.
     */
-    virtual bool write(const Mat& img, const std::vector<int>& params) = 0;
+    virtual bool write(const Mat& img, const std::vector<int>& params);

    /**
     * @brief Encode and write multiple images (e.g., for animated formats).
@ -236,7 +235,7 @@ public:
     * @brief Throw an exception based on the last error encountered during encoding.
     * This method can be used to propagate error conditions back to the caller.
     */
-    virtual void throwOnEror() const;
+    virtual void throwOnError() const;

 protected:
    String m_description;    ///< Description of the encoder (e.g., format name, capabilities).
--- a/modules/imgcodecs/src/grfmt_gif.cpp
+++ b/modules/imgcodecs/src/grfmt_gif.cpp
@ -14,7 +14,7 @@ namespace cv
 //////////////////////////////////////////////////////////////////////
 GifDecoder::GifDecoder() {
    m_signature = R"(GIF)";
-    m_type = CV_8UC4;
+    m_type = CV_8UC3;
    bgColor = -1;
    m_buf_supported = true;
    globalColorTableSize = 0;
@ -172,12 +172,17 @@ bool GifDecoder::readData(Mat &img) {
            } else {
                cvtColor(img_, img, COLOR_BGRA2BGR);
            }
-        } else {
+        } else if (img.channels() == 4){
            if (m_use_rgb) {
                cvtColor(img_, img, COLOR_BGRA2RGBA);
            } else {
                img_.copyTo(img);
            }
+        } else if (img.channels() == 1){
+            cvtColor(img_, img, COLOR_BGRA2GRAY);
+        } else {
+            CV_LOG_WARNING(NULL, cv::format("Unsupported channels: %d", img.channels()));
+            hasRead = false;
        }
    }

@ -414,6 +419,7 @@ bool GifDecoder::getFrameCount_() {
            if (extension == 0xFF) {
                int len = m_strm.getByte();
                while (len) {
+                    // TODO: In strictly, Application Identifier and Authentication Code should be checked.
                    if (len == 3) {
                        if (m_strm.getByte() == 0x01) {
                            m_animation.loop_count = m_strm.getWord();
@ -427,9 +433,28 @@ bool GifDecoder::getFrameCount_() {
                    }
                    len = m_strm.getByte();
                }
+            } else if (extension == 0xF9) {
+                int len = m_strm.getByte();
+                while (len) {
+                    if (len == 4) {
+                        int packedFields = m_strm.getByte();
+                        //  3 bit : Reserved
+                        //  3 bit : Disposal Method
+                        //  1 bit : User Input Flag
+                        //  1 bit : Transparent Color Flag
+                        if ( (packedFields & 0x01)== 0x01) {
+                            m_type = CV_8UC4; // Transparent Index is given.
+                        }
+                        m_strm.skip(2); // Delay Time
+                        m_strm.skip(1); // Transparent Color Index
+                    } else {
+                        m_strm.skip(len);
+                    }
+                    len = m_strm.getByte();
+                }
            } else {
                // if it does not belong to any of the extension type mentioned in the GIF Specification
-                if (extension != 0xF9 && extension != 0xFE && extension != 0x01) {
+                if (extension != 0xFE && extension != 0x01) {
                    CV_LOG_WARNING(NULL, "found Unknown Extension Type: " + std::to_string(extension));
                }
                int len = m_strm.getByte();
@ -514,19 +539,11 @@ GifEncoder::~GifEncoder() {
    close();
 }

-bool GifEncoder::isFormatSupported(int depth) const {
-    return depth == CV_8U;
-}
-
-bool GifEncoder::write(const Mat &img, const std::vector<int> &params) {
-    std::vector<Mat> img_vec(1, img);
-    return writemulti(img_vec, params);
-}
-
 bool GifEncoder::writeanimation(const Animation& animation, const std::vector<int>& params) {
    if (animation.frames.empty()) {
        return false;
    }
+    CV_CheckDepthEQ(animation.frames[0].depth(), CV_8U, "GIF encoder supports only 8-bit unsigned images");

    if (m_buf) {
        if (!strm.open(*m_buf)) {
--- a/modules/imgcodecs/src/grfmt_gif.hpp
+++ b/modules/imgcodecs/src/grfmt_gif.hpp
@ -83,9 +83,6 @@ public:
    GifEncoder();
    ~GifEncoder() CV_OVERRIDE;

-    bool isFormatSupported(int depth) const CV_OVERRIDE;
-
-    bool write(const Mat& img, const std::vector<int>& params) CV_OVERRIDE;
    bool writeanimation(const Animation& animation, const std::vector<int>& params) CV_OVERRIDE;

    ImageEncoder newEncoder() const CV_OVERRIDE;
--- a/modules/imgcodecs/src/grfmt_jpegxl.cpp
+++ b/modules/imgcodecs/src/grfmt_jpegxl.cpp
@ -12,16 +12,31 @@

 namespace cv
 {
+// Callback functions for JpegXLDecoder
+static void cbRGBtoBGR_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBAtoBGRA_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBtoBGR_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBAtoBGRA_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBtoBGR_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBAtoBGRA_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBtoGRAY_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBAtoGRAY_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBtoGRAY_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBAtoGRAY_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBtoGRAY_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);
+static void cbRGBAtoGRAY_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels);

 /////////////////////// JpegXLDecoder ///////////////////

-JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, &fclose)
+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, &fclose),
+                                 m_read_buffer(16384,0) // 16KB chunks
 {
    m_signature = "\xFF\x0A";
    m_decoder = nullptr;
-    m_buf_supported = false;
-    m_type = m_convert = -1;
+    m_buf_supported = true;
+    m_type = -1;
    m_status = JXL_DEC_NEED_MORE_INPUT;
+    m_is_mbuf_set = false;
 }

 JpegXLDecoder::~JpegXLDecoder()
@ -32,13 +47,14 @@ JpegXLDecoder::~JpegXLDecoder()
 void JpegXLDecoder::close()
 {
    if (m_decoder)
-        m_decoder.release();
+        m_decoder.reset();
    if (m_f)
-        m_f.release();
+        m_f.reset();
    m_read_buffer = {};
    m_width = m_height = 0;
-    m_type = m_convert = -1;
+    m_type = -1;
    m_status = JXL_DEC_NEED_MORE_INPUT;
+    m_is_mbuf_set = false;
 }

 // see https://github.com/libjxl/libjxl/blob/v0.10.0/doc/format_overview.md
@ -76,13 +92,16 @@ ImageDecoder JpegXLDecoder::newDecoder() const
    return makePtr<JpegXLDecoder>();
 }

-bool JpegXLDecoder::read(Mat* pimg)
+bool JpegXLDecoder::readHeader()
 {
-    // Open file
-    if (!m_f) {
-        m_f.reset(fopen(m_filename.c_str(), "rb"));
-        if (!m_f)
-            return false;
+    if (m_buf.empty()) {
+        // Open file
+        if (!m_f) {
+            m_f.reset(fopen(m_filename.c_str(), "rb"));
+            if (!m_f) {
+                return false;
+            }
+        }
    }

    // Initialize decoder
@ -106,51 +125,132 @@ bool JpegXLDecoder::read(Mat* pimg)
        }
    }

-    // Create buffer for reading
-    const size_t read_buffer_size = 16384;  // 16KB chunks
-    if (m_read_buffer.capacity() < read_buffer_size)
-        m_read_buffer.resize(read_buffer_size);
+    // Reset to read header data stream
+    m_is_mbuf_set = false;

-    // Create image if needed
-    if (m_type != -1 && pimg) {
-        pimg->create(m_height, m_width, m_type);
-        if (!pimg->isContinuous())
+    return read();
+}
+
+bool JpegXLDecoder::readData(Mat& img)
+{
+    if (!m_decoder || m_width == 0 || m_height == 0 || m_type == -1)
+        return false;
+
+    // Prepare to decode image
+    const uint32_t scn = CV_MAT_CN(m_type);        // from image
+    const uint32_t dcn = (uint32_t)img.channels(); // to OpenCV
+    const int depth = CV_MAT_DEPTH(img.type());
+    JxlImageOutCallback cbFunc = nullptr;
+
+    CV_CheckChannels(scn, (scn == 1 || scn == 3 || scn == 4), "Unsupported src channels");
+    CV_CheckChannels(dcn, (dcn == 1 || dcn == 3 || dcn == 4), "Unsupported dst channels");
+    CV_CheckDepth(depth, (depth == CV_8U || depth == CV_16U || depth == CV_32F), "Unsupported depth");
+
+    m_format = {
+        dcn,
+        JXL_TYPE_UINT8, // (temporary)
+        JXL_NATIVE_ENDIAN, // endianness
+        0 // align stride to bytes
+    };
+    switch (depth) {
+        case CV_8U:  m_format.data_type = JXL_TYPE_UINT8; break;
+        case CV_16U: m_format.data_type = JXL_TYPE_UINT16; break;
+        case CV_32F: m_format.data_type = JXL_TYPE_FLOAT; break;
+        default: break;
+    }
+    // libjxl cannot read to BGR pixel order directly.
+    // So we have to use callback function to convert from RGB(A) to BGR(A).
+    if (!m_use_rgb) {
+        switch (dcn) {
+            case 1:  break;
+            case 3:  cbFunc = (depth == CV_32F)? cbRGBtoBGR_32F:   (depth == CV_16U)? cbRGBtoBGR_16U:   cbRGBtoBGR_8U; break;
+            case 4:  cbFunc = (depth == CV_32F)? cbRGBAtoBGRA_32F: (depth == CV_16U)? cbRGBAtoBGRA_16U: cbRGBAtoBGRA_8U; break;
+            default: break;
+        }
+    }
+    // libjxl cannot convert from color image to gray image directly.
+    // So we have to use callback function to convert from RGB(A) to GRAY.
+    if( (scn >= 3) && (dcn == 1) )
+    {
+        m_format.num_channels = scn;
+        switch (scn) {
+            case 3:  cbFunc = (depth == CV_32F)? cbRGBtoGRAY_32F:  (depth == CV_16U)? cbRGBtoGRAY_16U:  cbRGBtoGRAY_8U; break;
+            case 4:  cbFunc = (depth == CV_32F)? cbRGBAtoGRAY_32F: (depth == CV_16U)? cbRGBAtoGRAY_16U: cbRGBAtoGRAY_8U; break;
+            default: break;
+        }
+    }
+    if(cbFunc != nullptr)
+    {
+        if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutCallback(m_decoder.get(),
+                                                             &m_format,
+                                                             cbFunc,
+                                                             static_cast<void*>(&img)))
+        {
            return false;
+        }
+    }else{
        if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(),
                                                           &m_format,
-                                                           pimg->ptr<uint8_t>(),
-                                                           pimg->total() * pimg->elemSize())) {
+                                                           img.ptr<uint8_t>(),
+                                                           img.total() * img.elemSize()))
+        {
            return false;
        }
    }

+    return read();
+}
+
+// Common reading routine for readHeader() and readBody()
+bool JpegXLDecoder::read()
+{
    // Start decoding loop
    do {
        // Check if we need more input
        if (m_status == JXL_DEC_NEED_MORE_INPUT) {
-            size_t remaining = JxlDecoderReleaseInput(m_decoder.get());
-            // Move any remaining bytes to the beginning
-            if (remaining > 0)
-                memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);
-            // Read more data from file
-            size_t bytes_read = fread(m_read_buffer.data() + remaining,
-                                    1, m_read_buffer.size() - remaining, m_f.get());
-            if (bytes_read == 0) {
-                if (ferror(m_f.get())) {
-                    CV_LOG_WARNING(NULL, "Error reading input file");
+            uint8_t* data_ptr = nullptr;
+            size_t   data_len = 0;
+
+            if( !m_buf.empty() ) {
+                // When data source in on memory
+                if (m_is_mbuf_set) {
+                    // We expect m_buf contains whole JpegXL data stream.
+                    // If it had been truncated, m_status will be JXL_DEC_NEED_MORE_INPUT again.
+                    CV_LOG_WARNING(NULL, "Truncated JXL data in memory");
                    return false;
                }
-                // If we reached EOF but decoder needs more input, file is truncated
-                if (m_status == JXL_DEC_NEED_MORE_INPUT) {
-                    CV_LOG_WARNING(NULL, "Truncated JXL file");
-                    return false;
+                data_ptr = m_buf.ptr();
+                data_len = m_buf.total();
+                m_is_mbuf_set = true;
+            }
+            else {
+                // When data source is on file
+                // Release input buffer if it had been set already. If not, there are no errors.
+                size_t remaining = JxlDecoderReleaseInput(m_decoder.get());
+                // Move any remaining bytes to the beginning
+                if (remaining > 0)
+                    memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);
+                // Read more data from file
+                size_t bytes_read = fread(m_read_buffer.data() + remaining,
+                                          1, m_read_buffer.size() - remaining, m_f.get());
+                if (bytes_read == 0) {
+                    if (ferror(m_f.get())) {
+                        CV_LOG_WARNING(NULL, "Error reading input file");
+                        return false;
+                    }
+                    // If we reached EOF but decoder needs more input, file is truncated
+                    if (m_status == JXL_DEC_NEED_MORE_INPUT) {
+                        CV_LOG_WARNING(NULL, "Truncated JXL file");
+                        return false;
+                    }
                }
+                data_ptr = m_read_buffer.data();
+                data_len = bytes_read + remaining;
            }

            // Set input buffer
-            if (JXL_DEC_SUCCESS != JxlDecoderSetInput(m_decoder.get(),
-                                                      m_read_buffer.data(),
-                                                      bytes_read + remaining)) {
+            // It must be kept until calling JxlDecoderReleaseInput() or m_decoder.reset().
+            if (JXL_DEC_SUCCESS != JxlDecoderSetInput(m_decoder.get(), data_ptr, data_len)) {
                return false;
            }
        }
@ -163,6 +263,7 @@ bool JpegXLDecoder::read(Mat* pimg)
            case JXL_DEC_BASIC_INFO: {
                if (m_type != -1)
                    return false;
+
                JxlBasicInfo info;
                if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(m_decoder.get(), &info))
                    return false;
@ -172,49 +273,18 @@ bool JpegXLDecoder::read(Mat* pimg)

                m_width = info.xsize;
                m_height = info.ysize;
-                m_format = {
-                    ncn,
-                    JXL_TYPE_UINT8, // (temporary)
-                    JXL_LITTLE_ENDIAN, // endianness
-                    0 // align stride to bytes
-                };
-                if (!m_use_rgb) {
-                    switch (ncn) {
-                    case 3:
-                        m_convert = cv::COLOR_RGB2BGR;
-                        break;
-                    case 4:
-                        m_convert = cv::COLOR_RGBA2BGRA;
-                        break;
-                    default:
-                        m_convert = -1;
-                    }
+                int depth = (info.exponent_bits_per_sample > 0)?CV_32F:
+                            (info.bits_per_sample == 16)?CV_16U:
+                            (info.bits_per_sample == 8)?CV_8U: -1;
+                if(depth == -1)
+                {
+                    return false; // Return to readHeader()
                }
-                if (info.exponent_bits_per_sample > 0) {
-                    m_format.data_type = JXL_TYPE_FLOAT;
-                    m_type = CV_MAKETYPE( CV_32F, ncn );
-                } else {
-                    switch (info.bits_per_sample) {
-                        case 8:
-                            m_format.data_type = JXL_TYPE_UINT8;
-                            m_type = CV_MAKETYPE( CV_8U, ncn );
-                            break;
-                        case 16:
-                            m_format.data_type = JXL_TYPE_UINT16;
-                            m_type = CV_MAKETYPE( CV_16U, ncn );
-                            break;
-                        default:
-                            return false;
-                    }
-                }
-                if (!pimg)
-                    return true;
-                break;
+                m_type = CV_MAKETYPE( depth, ncn );
+                return true;
            }
            case JXL_DEC_FULL_IMAGE: {
                // Image is ready
-                if (m_convert != -1)
-                    cv::cvtColor(*pimg, *pimg, m_convert);
                break;
            }
            case JXL_DEC_ERROR: {
@ -229,17 +299,172 @@ bool JpegXLDecoder::read(Mat* pimg)
    return true;
 }

-bool JpegXLDecoder::readHeader()
+// Callback functopms
+static void cbRGBtoBGR_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
 {
-    close();
-    return read(nullptr);
+    const uint8_t* src = static_cast<const uint8_t*>(pixels);
+
+    constexpr int dstStep = 3;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint8_t* dstBase = const_cast<uint8_t*>(pDst->ptr(y));
+    uint8_t* dst = dstBase + x * dstStep;
+
+    icvCvt_RGB2BGR_8u_C3R( src, 0, dst, 0, Size(num_pixels , 1) );
+}
+static void cbRGBAtoBGRA_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    const uint8_t* src = static_cast<const uint8_t*>(pixels);
+
+    constexpr int dstStep = 4;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint8_t* dstBase = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(pDst->ptr(y)));
+    uint8_t* dst = dstBase + x * dstStep;
+
+    icvCvt_RGBA2BGRA_8u_C4R( src, 0, dst, 0, Size(num_pixels, 1) );
+}
+static void cbRGBtoBGR_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    const uint16_t* src = static_cast<const uint16_t*>(pixels);
+
+    constexpr int dstStep = 3;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint16_t* dstBase = const_cast<uint16_t*>(reinterpret_cast<const uint16_t*>(pDst->ptr(y)));
+    uint16_t* dst = dstBase + x * dstStep;
+
+    icvCvt_BGR2RGB_16u_C3R( src, 0, dst, 0, Size(num_pixels, 1));
+}
+static void cbRGBAtoBGRA_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    const uint16_t* src = static_cast<const uint16_t*>(pixels);
+
+    constexpr int dstStep = 4;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint16_t* dstBase = const_cast<uint16_t*>(reinterpret_cast<const uint16_t*>(pDst->ptr(y)));
+    uint16_t* dst = dstBase + x * dstStep;
+
+    icvCvt_BGRA2RGBA_16u_C4R( src, 0, dst, 0, Size(num_pixels, 1));
+}
+static void cbRGBtoBGR_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    constexpr int srcStep = 3;
+    const uint32_t* src = static_cast<const uint32_t*>(pixels);
+
+    constexpr int dstStep = 3;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint32_t* dstBase = const_cast<uint32_t*>(reinterpret_cast<const uint32_t*>(pDst->ptr(y)));
+    uint32_t* dst = dstBase + x * dstStep;
+
+    for(size_t i = 0 ; i < num_pixels; i++)
+    {
+        dst[ i * dstStep + 0 ] = src[ i * srcStep + 2];
+        dst[ i * dstStep + 1 ] = src[ i * srcStep + 1];
+        dst[ i * dstStep + 2 ] = src[ i * srcStep + 0];
+    }
+}
+static void cbRGBAtoBGRA_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    constexpr int srcStep = 4;
+    const uint32_t* src = static_cast<const uint32_t*>(pixels);
+
+    constexpr int dstStep = 4;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint32_t* dstBase = const_cast<uint32_t*>(reinterpret_cast<const uint32_t*>(pDst->ptr(y)));
+    uint32_t* dst = dstBase + x * dstStep;
+
+    for(size_t i = 0 ; i < num_pixels; i++)
+    {
+        dst[ i * dstStep + 0 ] = src[ i * srcStep + 2];
+        dst[ i * dstStep + 1 ] = src[ i * srcStep + 1];
+        dst[ i * dstStep + 2 ] = src[ i * srcStep + 0];
+        dst[ i * dstStep + 3 ] = src[ i * srcStep + 3];
+    }
 }

-bool JpegXLDecoder::readData(Mat& img)
+static void cbRGBtoGRAY_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
 {
-    if (!m_decoder || m_width == 0 || m_height == 0)
-        return false;
-    return read(&img);
+    const uint8_t* src = static_cast<const uint8_t*>(pixels);
+
+    constexpr int dstStep = 1;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint8_t* dstBase = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(pDst->ptr(y)));
+    uint8_t* dst = dstBase + x * dstStep;
+
+    icvCvt_BGR2Gray_8u_C3C1R(src, 0, dst, 0, Size(num_pixels, 1) );
+}
+static void cbRGBAtoGRAY_8U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    const uint8_t* src = static_cast<const uint8_t*>(pixels);
+
+    constexpr int dstStep = 1;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint8_t* dstBase = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(pDst->ptr(y)));
+    uint8_t* dst = dstBase + x * dstStep;
+
+    icvCvt_BGRA2Gray_8u_C4C1R(src, 0, dst, 0, Size(num_pixels, 1) );
+}
+static void cbRGBtoGRAY_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    const uint16_t* src = static_cast<const uint16_t*>(pixels);
+
+    constexpr int dstStep = 1;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint16_t* dstBase = const_cast<uint16_t*>(reinterpret_cast<const uint16_t*>(pDst->ptr(y)));
+    uint16_t* dst = dstBase + x * dstStep;
+
+    icvCvt_BGRA2Gray_16u_CnC1R(src, 0, dst, 0, Size(num_pixels, 1), /* ncn= */ 3 );
+}
+static void cbRGBAtoGRAY_16U(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    const uint16_t* src = static_cast<const uint16_t*>(pixels);
+
+    constexpr int dstStep = 1;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    uint16_t* dstBase = const_cast<uint16_t*>(reinterpret_cast<const uint16_t*>(pDst->ptr(y)));
+    uint16_t* dst = dstBase + x * dstStep;
+
+    icvCvt_BGRA2Gray_16u_CnC1R(src, 0, dst, 0, Size(num_pixels, 1), /* ncn= */ 4 );
+}
+static void cbRGBtoGRAY_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    constexpr float cR = 0.299f;
+    constexpr float cG = 0.587f;
+    constexpr float cB = 1.000f - cR - cG;
+
+    constexpr int srcStep = 3;
+    const float* src = static_cast<const float*>(pixels);
+
+    constexpr int dstStep = 1;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    float* dstBase = const_cast<float*>(reinterpret_cast<const float*>(pDst->ptr(y)));
+    float* dst = dstBase + x * dstStep;
+
+    for(size_t i = 0 ; i < num_pixels; i++)
+    {
+        dst[ i * dstStep ] = src[ i * srcStep + 0] * cR +
+                             src[ i * srcStep + 1] * cG +
+                             src[ i * srcStep + 2] * cB;
+    }
+}
+static void cbRGBAtoGRAY_32F(void *opaque, size_t x, size_t y, size_t num_pixels, const void *pixels)
+{
+    constexpr float cR = 0.299f;
+    constexpr float cG = 0.587f;
+    constexpr float cB = 1.000f - cR - cG;
+
+    constexpr int srcStep = 4;
+    const float* src = static_cast<const float*>(pixels);
+
+    constexpr int dstStep = 1;
+    const cv::Mat *pDst = static_cast<cv::Mat*>(opaque);
+    float* dstBase = const_cast<float*>(reinterpret_cast<const float*>(pDst->ptr(y)));
+    float* dst = dstBase + x * dstStep;
+
+    for(size_t i = 0 ; i < num_pixels; i++)
+    {
+        dst[ i * dstStep ] = src[ i * srcStep + 0] * cR +
+                             src[ i * srcStep + 1] * cG +
+                             src[ i * srcStep + 2] * cB;
+    }
 }

 /////////////////////// JpegXLEncoder ///////////////////
--- a/modules/imgcodecs/src/grfmt_jpegxl.hpp
+++ b/modules/imgcodecs/src/grfmt_jpegxl.hpp
@ -41,12 +41,12 @@ protected:
    JxlDecoderPtr m_decoder;
    JxlThreadParallelRunnerPtr m_parallel_runner;
    JxlPixelFormat m_format;
-    int m_convert;
    std::vector<uint8_t> m_read_buffer;
    JxlDecoderStatus m_status;
+    bool m_is_mbuf_set;

 private:
-    bool read(Mat* pimg);
+    bool read();
 };


--- a/modules/imgcodecs/src/grfmt_png.cpp
+++ b/modules/imgcodecs/src/grfmt_png.cpp
@ -121,15 +121,16 @@
 namespace cv
 {

-const uint32_t id_IHDR = 0x52444849; // PNG header
-const uint32_t id_acTL = 0x4C546361; // Animation control chunk
-const uint32_t id_fcTL = 0x4C546366; // Frame control chunk
-const uint32_t id_IDAT = 0x54414449; // first frame and/or default image
-const uint32_t id_fdAT = 0x54416466; // Frame data chunk
-const uint32_t id_PLTE = 0x45544C50;
-const uint32_t id_bKGD = 0x44474B62;
-const uint32_t id_tRNS = 0x534E5274;
-const uint32_t id_IEND = 0x444E4549; // end/footer chunk
+const uint32_t id_IHDR = 0x49484452; // PNG header
+const uint32_t id_acTL = 0x6163544C; // Animation control chunk
+const uint32_t id_fcTL = 0x6663544C; // Frame control chunk
+const uint32_t id_IDAT = 0x49444154; // first frame and/or default image
+const uint32_t id_fdAT = 0x66644154; // Frame data chunk
+const uint32_t id_PLTE = 0x504C5445; // The PLTE chunk contains a color palette for indexed-color images
+const uint32_t id_bKGD = 0x624B4744; // The bKGD chunk specifies a default background color for the image
+const uint32_t id_tRNS = 0x74524E53; // The tRNS chunk provides transparency information
+const uint32_t id_tEXt = 0x74455874; // The tEXt chunk stores metadata as text in key-value pairs
+const uint32_t id_IEND = 0x49454E44; // end/footer chunk

 APNGFrame::APNGFrame()
 {
@ -198,6 +199,7 @@ PngDecoder::PngDecoder()

 PngDecoder::~PngDecoder()
 {
+    ClearPngPtr();
    if( m_f )
    {
        fclose( m_f );
@ -205,6 +207,26 @@ PngDecoder::~PngDecoder()
    }
 }

+bool PngDecoder::InitPngPtr() {
+    ClearPngPtr();
+
+    m_png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0);
+    if (!m_png_ptr)
+        return false;
+
+    m_info_ptr = png_create_info_struct(m_png_ptr);
+    m_end_info = png_create_info_struct(m_png_ptr);
+    return (m_info_ptr && m_end_info);
+}
+
+void PngDecoder::ClearPngPtr() {
+    if (m_png_ptr)
+        png_destroy_read_struct(&m_png_ptr, &m_info_ptr, &m_end_info);
+    m_png_ptr = nullptr;
+    m_info_ptr = nullptr;
+    m_end_info = nullptr;
+}
+
 ImageDecoder PngDecoder::newDecoder() const
 {
    return makePtr<PngDecoder>();
@ -227,167 +249,144 @@ void  PngDecoder::readDataFromBuf( void* _png_ptr, unsigned char* dst, size_t si

 bool  PngDecoder::readHeader()
 {
-    volatile bool result = false;
+    // Declare dynamic variables before a potential longjmp.
+    Chunk chunk;

-    PngPtrs png_ptrs;
-    png_structp png_ptr = png_ptrs.getPng();
-    png_infop info_ptr = png_ptrs.getInfo();
-    png_infop end_info = png_ptrs.getEndInfo();
+    if (!InitPngPtr())
+        return false;

-    if( png_ptr && info_ptr && end_info )
+    if (setjmp(png_jmpbuf(m_png_ptr)))
+        return false;
+
+    m_buf_pos = 0;
+    unsigned char sig[8];
+    uint32_t id = 0;
+
+    if( !m_buf.empty() )
+        png_set_read_fn(m_png_ptr, this, (png_rw_ptr)readDataFromBuf );
+    else
    {
-        m_buf_pos = 0;
-        if( setjmp( png_jmpbuf( png_ptr ) ) == 0 )
+        m_f = fopen(m_filename.c_str(), "rb");
+        if (!m_f)
        {
-            unsigned char sig[8];
-            uint32_t id = 0;
-            Chunk chunk;
-
-            if( !m_buf.empty() )
-                png_set_read_fn(png_ptr, this, (png_rw_ptr)readDataFromBuf );
-            else
-            {
-                m_f = fopen(m_filename.c_str(), "rb");
-                if (!m_f)
-                {
-                    return false;
-                }
-                png_init_io(png_ptr, m_f);
-            }
-
-            // Read PNG header: 137 80 78 71 13 10 26 10
-            if (!read_from_io(&sig, 8))
-                return false;
-
-            id = read_chunk(m_chunkIHDR);
-            // 8=HDR+size, 13=size of IHDR chunk, 4=CRC
-            // http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR
-            if (!(id == id_IHDR && m_chunkIHDR.p.size() == 8 + 13 + 4))
-            {
-                return false;
-            }
-
-            while (true)
-            {
-                m_is_fcTL_loaded = false;
-                id = read_chunk(chunk);
-
-                if (!id || (m_f && feof(m_f)) || (!m_buf.empty() && m_buf_pos > m_buf.total()))
-                {
-                    return false;
-                }
-
-                if (id == id_IDAT)
-                {
-                    if (m_f)
-                        fseek(m_f, 0, SEEK_SET);
-                    else
-                        m_buf_pos = 0;
-                    break;
-                }
-
-                if (id == id_acTL)
-                {
-                    // 8=HDR+size, 8=size of acTL chunk, 4=CRC
-                    // https://wiki.mozilla.org/APNG_Specification#%60acTL%60:_The_Animation_Control_Chunk
-                    if (chunk.p.size() != 8 + 8 + 4)
-                        return false;
-                    m_animation.loop_count = png_get_uint_32(&chunk.p[12]);
-
-                    m_frame_count = png_get_uint_32(&chunk.p[8]);
-                    if (m_frame_count == 0)
-                        return false;
-                }
-
-                if (id == id_fcTL)
-                {
-                    // 8=HDR+size, 26=size of fcTL chunk, 4=CRC
-                    // https://wiki.mozilla.org/APNG_Specification#%60fcTL%60:_The_Frame_Control_Chunk
-                    if (chunk.p.size() != 8 + 26 + 4)
-                        return false;
-                    m_is_fcTL_loaded = true;
-                    w0 = png_get_uint_32(&chunk.p[12]);
-                    h0 = png_get_uint_32(&chunk.p[16]);
-                    x0 = png_get_uint_32(&chunk.p[20]);
-                    y0 = png_get_uint_32(&chunk.p[24]);
-                    delay_num = png_get_uint_16(&chunk.p[28]);
-                    delay_den = png_get_uint_16(&chunk.p[30]);
-                    dop = chunk.p[32];
-                    bop = chunk.p[33];
-                }
-
-                if (id == id_bKGD)
-                {
-                    // 8=HDR+size, ??=size of bKGD chunk, 4=CRC
-                    // The spec is actually more complex: http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.bKGD
-                    // TODO: we only check that 4 bytes can be read from &chunk.p[8]. Fix.
-                    if (chunk.p.size() < 8 + 4)
-                        return false;
-                    int bgcolor = png_get_uint_32(&chunk.p[8]);
-                    m_animation.bgcolor[3] = (bgcolor >> 24) & 0xFF;
-                    m_animation.bgcolor[2] = (bgcolor >> 16) & 0xFF;
-                    m_animation.bgcolor[1] = (bgcolor >> 8) & 0xFF;
-                    m_animation.bgcolor[0] = bgcolor & 0xFF;
-                }
-
-                if (id == id_PLTE || id == id_tRNS)
-                    m_chunksInfo.push_back(chunk);
-            }
-
-            png_uint_32 wdth, hght;
-            int bit_depth, color_type, num_trans=0;
-            png_bytep trans;
-            png_color_16p trans_values;
-
-            // Free chunk in case png_read_info uses longjmp.
-            chunk.p.clear();
-            chunk.p.shrink_to_fit();
-
-            png_read_info( png_ptr, info_ptr );
-            png_get_IHDR(png_ptr, info_ptr, &wdth, &hght,
-                &bit_depth, &color_type, 0, 0, 0);
-
-            m_width = (int)wdth;
-            m_height = (int)hght;
-            m_color_type = color_type;
-            m_bit_depth = bit_depth;
-
-            if (bit_depth <= 8 || bit_depth == 16)
-            {
-                switch (color_type)
-                {
-                case PNG_COLOR_TYPE_RGB:
-                case PNG_COLOR_TYPE_PALETTE:
-                    png_get_tRNS(png_ptr, info_ptr, &trans, &num_trans, &trans_values);
-                    if (num_trans > 0)
-                        m_type = CV_8UC4;
-                    else
-                        m_type = CV_8UC3;
-                    break;
-                case PNG_COLOR_TYPE_GRAY_ALPHA:
-                case PNG_COLOR_TYPE_RGB_ALPHA:
-                    m_type = CV_8UC4;
-                    break;
-                default:
-                    m_type = CV_8UC1;
-                }
-                if (bit_depth == 16)
-                    m_type = CV_MAKETYPE(CV_16U, CV_MAT_CN(m_type));
-                result = true;
-            }
+            return false;
        }
+        png_init_io(m_png_ptr, m_f);
    }

-    if(result)
+    // Read PNG header: 137 80 78 71 13 10 26 10
+    if (!readFromStreamOrBuffer(&sig, 8))
+        return false;
+
+    id = read_chunk(m_chunkIHDR);
+    if (id != id_IHDR)
+        return false;
+
+    m_is_fcTL_loaded = false;
+    while (true)
    {
-        m_png_ptrs = std::move(png_ptrs);
+        id = read_chunk(chunk);
+
+        if (!id || (m_f && feof(m_f)) || (!m_buf.empty() && m_buf_pos > m_buf.total()))
+        {
+            return false;
+        }
+
+        if (id == id_IDAT)
+        {
+            if (m_f)
+                fseek(m_f, 0, SEEK_SET);
+            else
+                m_buf_pos = 0;
+            break;
+        }
+
+        if (id == id_acTL)
+        {
+            // https://wiki.mozilla.org/APNG_Specification#%60acTL%60:_The_Animation_Control_Chunk
+            m_animation.loop_count = png_get_uint_32(&chunk.p[12]);
+
+            m_frame_count = png_get_uint_32(&chunk.p[8]);
+            if (m_frame_count == 0)
+                return false;
+        }
+
+        if (id == id_fcTL)
+        {
+            // https://wiki.mozilla.org/APNG_Specification#%60fcTL%60:_The_Frame_Control_Chunk
+            m_is_fcTL_loaded = true;
+            w0 = png_get_uint_32(&chunk.p[12]);
+            h0 = png_get_uint_32(&chunk.p[16]);
+            x0 = png_get_uint_32(&chunk.p[20]);
+            y0 = png_get_uint_32(&chunk.p[24]);
+            delay_num = png_get_uint_16(&chunk.p[28]);
+            delay_den = png_get_uint_16(&chunk.p[30]);
+            dop = chunk.p[32];
+            bop = chunk.p[33];
+        }
+
+        if (id == id_PLTE || id == id_tRNS)
+            m_chunksInfo.push_back(chunk);
    }

-    return result;
+    png_uint_32 wdth, hght;
+    int bit_depth, color_type, num_trans=0;
+    png_bytep trans;
+    png_color_16p trans_values;
+
+    // Free chunk in case png_read_info uses longjmp.
+    chunk.p.clear();
+    chunk.p.shrink_to_fit();
+
+    png_read_info( m_png_ptr, m_info_ptr );
+    png_get_IHDR(m_png_ptr, m_info_ptr, &wdth, &hght,
+        &bit_depth, &color_type, 0, 0, 0);
+
+    m_width = (int)wdth;
+    m_height = (int)hght;
+    m_color_type = color_type;
+    m_bit_depth = bit_depth;
+
+    if (m_is_fcTL_loaded && ((long long int)x0 + w0 > m_width || (long long int)y0 + h0 > m_height || dop > 2 || bop > 1))
+        return false;
+
+    png_color_16p background_color;
+    if (png_get_bKGD(m_png_ptr, m_info_ptr, &background_color))
+        m_animation.bgcolor = Scalar(background_color->blue, background_color->green, background_color->red);
+
+    if (bit_depth <= 8 || bit_depth == 16)
+    {
+        switch (color_type)
+        {
+        case PNG_COLOR_TYPE_RGB:
+        case PNG_COLOR_TYPE_PALETTE:
+            png_get_tRNS(m_png_ptr, m_info_ptr, &trans, &num_trans, &trans_values);
+            if (num_trans > 0)
+                m_type = CV_8UC4;
+            else
+                m_type = CV_8UC3;
+            break;
+        case PNG_COLOR_TYPE_GRAY_ALPHA:
+        case PNG_COLOR_TYPE_RGB_ALPHA:
+            m_type = CV_8UC4;
+            break;
+        default:
+            m_type = CV_8UC1;
+        }
+        if (bit_depth == 16)
+            m_type = CV_MAKETYPE(CV_16U, CV_MAT_CN(m_type));
+    }
+
+    return true;
 }

 bool  PngDecoder::readData( Mat& img )
 {
+    // Declare dynamic variables before a potential longjmp.
+    AutoBuffer<unsigned char*> _buffer(m_height);
+    unsigned char** buffer = _buffer.data();
+    Chunk chunk;
+
    if (m_frame_count > 1)
    {
        Mat mat_cur = Mat::zeros(img.rows, img.cols, m_type);
@ -412,13 +411,14 @@ bool  PngDecoder::readData( Mat& img )

        frameCur.setMat(mat_cur);

-        processing_start((void*)&frameRaw, mat_cur);
-        png_structp png_ptr = m_png_ptrs.getPng();
-        png_infop info_ptr = m_png_ptrs.getInfo();
+        if (!processing_start((void*)&frameRaw, mat_cur))
+            return false;
+
+        if(setjmp(png_jmpbuf(m_png_ptr)))
+            return false;

        while (true)
        {
-            Chunk chunk;
            id = read_chunk(chunk);
            if (!id)
                return false;
@ -482,14 +482,14 @@ bool  PngDecoder::readData( Mat& img )
            else if (id == id_IDAT)
            {
                m_is_IDAT_loaded = true;
-                png_process_data(png_ptr, info_ptr, chunk.p.data(), chunk.p.size());
+                png_process_data(m_png_ptr, m_info_ptr, chunk.p.data(), chunk.p.size());
            }
            else if (id == id_fdAT && m_is_fcTL_loaded)
            {
                m_is_IDAT_loaded = true;
                png_save_uint_32(&chunk.p[4], static_cast<uint32_t>(chunk.p.size() - 16));
                memcpy(&chunk.p[8], "IDAT", 4);
-                png_process_data(png_ptr, info_ptr, &chunk.p[4], chunk.p.size() - 4);
+                png_process_data(m_png_ptr, m_info_ptr, &chunk.p[4], chunk.p.size() - 4);
            }
            else if (id == id_IEND)
            {
@ -513,30 +513,24 @@ bool  PngDecoder::readData( Mat& img )
                return true;
            }
            else
-                png_process_data(png_ptr, info_ptr, chunk.p.data(), chunk.p.size());
+                png_process_data(m_png_ptr, m_info_ptr, chunk.p.data(), chunk.p.size());
        }
        return false;
    }

    volatile bool result = false;
-    AutoBuffer<unsigned char*> _buffer(m_height);
-    unsigned char** buffer = _buffer.data();
    bool color = img.channels() > 1;

-    png_structp png_ptr = m_png_ptrs.getPng();
-    png_infop info_ptr = m_png_ptrs.getInfo();
-    png_infop end_info = m_png_ptrs.getEndInfo();
-
-    if( png_ptr && info_ptr && end_info && m_width && m_height )
+    if( m_png_ptr && m_info_ptr && m_end_info && m_width && m_height )
    {
-        if( setjmp( png_jmpbuf ( png_ptr ) ) == 0 )
+        if( setjmp( png_jmpbuf ( m_png_ptr ) ) == 0 )
        {
            int y;

            if( img.depth() == CV_8U && m_bit_depth == 16 )
-                png_set_strip_16( png_ptr );
+                png_set_strip_16( m_png_ptr );
            else if( !isBigEndian() )
-                png_set_swap( png_ptr );
+                png_set_swap( m_png_ptr );

            if(img.channels() < 4)
            {
@ -548,46 +542,46 @@ bool  PngDecoder::readData( Mat& img )
                 * indicate that it is a good idea to always ask for
                 * stripping alpha..  18.11.2004 Axel Walthelm
                 */
-                 png_set_strip_alpha( png_ptr );
+                 png_set_strip_alpha( m_png_ptr );
            } else
-                png_set_tRNS_to_alpha( png_ptr );
+                png_set_tRNS_to_alpha( m_png_ptr );

            if( m_color_type == PNG_COLOR_TYPE_PALETTE )
-                png_set_palette_to_rgb( png_ptr );
+                png_set_palette_to_rgb( m_png_ptr );

            if( (m_color_type & PNG_COLOR_MASK_COLOR) == 0 && m_bit_depth < 8 )
 #if (PNG_LIBPNG_VER_MAJOR*10000 + PNG_LIBPNG_VER_MINOR*100 + PNG_LIBPNG_VER_RELEASE >= 10209) || \
    (PNG_LIBPNG_VER_MAJOR == 1 && PNG_LIBPNG_VER_MINOR == 0 && PNG_LIBPNG_VER_RELEASE >= 18)
-                png_set_expand_gray_1_2_4_to_8( png_ptr );
+                png_set_expand_gray_1_2_4_to_8( m_png_ptr );
 #else
                png_set_gray_1_2_4_to_8( png_ptr );
 #endif

            if( (m_color_type & PNG_COLOR_MASK_COLOR) && color && !m_use_rgb)
-                png_set_bgr( png_ptr ); // convert RGB to BGR
+                png_set_bgr( m_png_ptr ); // convert RGB to BGR
            else if( color )
-                png_set_gray_to_rgb( png_ptr ); // Gray->RGB
+                png_set_gray_to_rgb( m_png_ptr ); // Gray->RGB
            else
-                png_set_rgb_to_gray( png_ptr, 1, 0.299, 0.587 ); // RGB->Gray
+                png_set_rgb_to_gray( m_png_ptr, 1, 0.299, 0.587 ); // RGB->Gray

-            png_set_interlace_handling( png_ptr );
-            png_read_update_info( png_ptr, info_ptr );
+            png_set_interlace_handling( m_png_ptr );
+            png_read_update_info( m_png_ptr, m_info_ptr );

            for( y = 0; y < m_height; y++ )
                buffer[y] = img.data + y*img.step;

-            png_read_image( png_ptr, buffer );
-            png_read_end( png_ptr, end_info );
+            png_read_image( m_png_ptr, buffer );
+            png_read_end( m_png_ptr, m_end_info );

 #ifdef PNG_eXIf_SUPPORTED
            png_uint_32 num_exif = 0;
            png_bytep exif = 0;

            // Exif info could be in info_ptr (intro_info) or end_info per specification
-            if( png_get_valid(png_ptr, info_ptr, PNG_INFO_eXIf) )
-                png_get_eXIf_1(png_ptr, info_ptr, &num_exif, &exif);
-            else if( png_get_valid(png_ptr, end_info, PNG_INFO_eXIf) )
-                png_get_eXIf_1(png_ptr, end_info, &num_exif, &exif);
+            if( png_get_valid(m_png_ptr, m_info_ptr, PNG_INFO_eXIf) )
+                png_get_eXIf_1(m_png_ptr, m_info_ptr, &num_exif, &exif);
+            else if( png_get_valid(m_png_ptr, m_end_info, PNG_INFO_eXIf) )
+                png_get_eXIf_1(m_png_ptr, m_end_info, &num_exif, &exif);

            if( exif && num_exif > 0 )
            {
@ -683,7 +677,7 @@ void PngDecoder::compose_frame(std::vector<png_bytep>& rows_dst, const std::vect
            });
 }

-bool PngDecoder::read_from_io(void* buffer, size_t num_bytes)
+bool PngDecoder::readFromStreamOrBuffer(void* buffer, size_t num_bytes)
 {
    if (m_f)
        return fread(buffer, 1, num_bytes, m_f) == num_bytes;
@ -700,61 +694,78 @@ bool PngDecoder::read_from_io(void* buffer, size_t num_bytes)

 uint32_t PngDecoder::read_chunk(Chunk& chunk)
 {
-    unsigned char len[4];
-    if (read_from_io(&len, 4))
-    {
-        const size_t size = static_cast<size_t>(png_get_uint_32(len)) + 12;
+    unsigned char size_id[8];
+    if (!readFromStreamOrBuffer(&size_id, 8))
+        return 0;
+    const size_t size = static_cast<size_t>(png_get_uint_32(size_id)) + 12;
+
+    const uint32_t id = png_get_uint_32(size_id + 4);
+    if (id == id_IHDR) {
+        // 8=HDR+size, 13=size of IHDR chunk, 4=CRC
+        // http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR
+        if (size != 8 + 13 + 4)
+            return 0;
+    } else if (id == id_acTL) {
+        // 8=HDR+size, 8=size of acTL chunk, 4=CRC
+        // https://wiki.mozilla.org/APNG_Specification#%60acTL%60:_The_Animation_Control_Chunk
+        if (size != 8 + 8 + 4)
+            return 0;
+    } else if (id == id_fcTL) {
+        // 8=HDR+size, 26=size of fcTL chunk, 4=CRC
+        // https://wiki.mozilla.org/APNG_Specification#%60fcTL%60:_The_Frame_Control_Chunk
+        if (size != 8 + 26 + 4)
+            return 0;
+    } else if (id == id_bKGD) {
+        // 8=HDR+size, (1, 2 or 6)=size of bKGD chunk, 4=CRC
+        // The spec is actually more complex:
+        // http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.bKGD
+        if (size != 8 + 1 + 4 && size != 8 + 2 + 4 && size != 8 + 6 + 4)
+            return 0;
+    } else if (id != id_fdAT && id != id_IDAT && id != id_IEND && id != id_PLTE && id != id_tEXt && id != id_tRNS) {
        if (size > PNG_USER_CHUNK_MALLOC_MAX)
        {
-            CV_LOG_WARNING(NULL, "chunk data is too large");
+            CV_LOG_WARNING(NULL, "user chunk data is too large");
            return 0;
        }
-        chunk.p.resize(size);
-        memcpy(chunk.p.data(), len, 4);
-        if (read_from_io(&chunk.p[4], chunk.p.size() - 4))
-            return *(uint32_t*)(&chunk.p[4]);
    }
+
+    chunk.p.resize(size);
+    memcpy(chunk.p.data(), size_id, 8);
+    if (readFromStreamOrBuffer(&chunk.p[8], chunk.p.size() - 8))
+        return id;
    return 0;
 }

 bool PngDecoder::processing_start(void* frame_ptr, const Mat& img)
 {
+    if (!InitPngPtr())
+        return false;
+
+    if (setjmp(png_jmpbuf(m_png_ptr)))
+        return false;
+
    static uint8_t header[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };

-    PngPtrs png_ptrs;
-    png_structp png_ptr = png_ptrs.getPng();
-    png_infop info_ptr = png_ptrs.getInfo();
-
-    if (!png_ptr || !info_ptr) {
-        return false;
-    }
-
-    if (setjmp(png_jmpbuf(png_ptr)))
-    {
-        return false;
-    }
-
-    m_png_ptrs = std::move(png_ptrs);
-    png_set_crc_action(png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
-    png_set_progressive_read_fn(png_ptr, frame_ptr, (png_progressive_info_ptr)info_fn, row_fn, NULL);
+    png_set_crc_action(m_png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
+    png_set_progressive_read_fn(m_png_ptr, frame_ptr, (png_progressive_info_ptr)info_fn, row_fn, NULL);

    if (img.channels() < 4)
-        png_set_strip_alpha(png_ptr);
+        png_set_strip_alpha(m_png_ptr);
    else
-        png_set_tRNS_to_alpha(png_ptr);
+        png_set_tRNS_to_alpha(m_png_ptr);

-    png_process_data(png_ptr, info_ptr, header, 8);
-    png_process_data(png_ptr, info_ptr, m_chunkIHDR.p.data(), m_chunkIHDR.p.size());
+    png_process_data(m_png_ptr, m_info_ptr, header, 8);
+    png_process_data(m_png_ptr, m_info_ptr, m_chunkIHDR.p.data(), m_chunkIHDR.p.size());

    if ((m_color_type & PNG_COLOR_MASK_COLOR) && img.channels() > 1 && !m_use_rgb)
-        png_set_bgr(png_ptr); // convert RGB to BGR
+        png_set_bgr(m_png_ptr); // convert RGB to BGR
    else if (img.channels() > 1)
-        png_set_gray_to_rgb(png_ptr); // Gray->RGB
+        png_set_gray_to_rgb(m_png_ptr); // Gray->RGB
    else
-        png_set_rgb_to_gray(png_ptr, 1, 0.299, 0.587); // RGB->Gray
+        png_set_rgb_to_gray(m_png_ptr, 1, 0.299, 0.587); // RGB->Gray

    for (size_t i = 0; i < m_chunksInfo.size(); i++)
-        png_process_data(png_ptr, info_ptr, m_chunksInfo[i].p.data(), m_chunksInfo[i].p.size());
+        png_process_data(m_png_ptr, m_info_ptr, m_chunksInfo[i].p.data(), m_chunksInfo[i].p.size());

    return true;
 }
@ -763,22 +774,17 @@ bool PngDecoder::processing_finish()
 {
    static uint8_t footer[12] = { 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130 };

-    png_structp png_ptr = m_png_ptrs.getPng();
-    png_infop info_ptr = m_png_ptrs.getInfo();
-
-    if (!png_ptr) {
-        m_png_ptrs.clear();
+    if (!m_png_ptr) {
        return false;
    }

-    if (setjmp(png_jmpbuf(png_ptr)))
+    if (setjmp(png_jmpbuf(m_png_ptr)))
    {
-        m_png_ptrs.clear();
        return false;
    }

-    png_process_data(png_ptr, info_ptr, footer, 12);
-    m_png_ptrs.clear();
+    png_process_data(m_png_ptr, m_info_ptr, footer, 12);
+    ClearPngPtr();

    return true;
 }
@ -948,15 +954,24 @@ bool  PngEncoder::write( const Mat& img, const std::vector<int>& params )
    return result;
 }

-size_t PngEncoder::write_to_io(void const* _Buffer, size_t  _ElementSize, size_t _ElementCount, FILE * _Stream)
+size_t PngEncoder::writeToStreamOrBuffer(void const* buffer, size_t num_bytes, FILE* stream)
 {
-    if (_Stream)
-        return fwrite(_Buffer, _ElementSize, _ElementCount, _Stream);
+    if (!buffer || !num_bytes)
+        return 0; // Handle null buffer or empty writes
+
+    if (stream)
+    {
+        size_t written = fwrite(buffer, 1, num_bytes, stream);
+        return written; // fwrite handles the write count
+    }

    size_t cursz = m_buf->size();
-    m_buf->resize(cursz + _ElementCount);
-    memcpy( &(*m_buf)[cursz], _Buffer, _ElementCount );
-    return _ElementCount;
+    if (cursz + num_bytes > m_buf->max_size())
+        throw std::runtime_error("Buffer size exceeds maximum capacity");
+
+    m_buf->resize(cursz + num_bytes);
+    memcpy(&(*m_buf)[cursz], buffer, num_bytes);
+    return num_bytes;
 }

 void PngEncoder::writeChunk(FILE* f, const char* name, unsigned char* data, uint32_t length)
@ -965,26 +980,26 @@ void PngEncoder::writeChunk(FILE* f, const char* name, unsigned char* data, uint
    uint32_t crc = crc32(0, Z_NULL, 0);

    png_save_uint_32(buf, length);
-    write_to_io(buf, 1, 4, f);
-    write_to_io(name, 1, 4, f);
+    writeToStreamOrBuffer(buf, 4, f);
+    writeToStreamOrBuffer(name, 4, f);
    crc = crc32(crc, (const Bytef*)name, 4);

    if (memcmp(name, "fdAT", 4) == 0)
    {
        png_save_uint_32(buf, next_seq_num++);
-        write_to_io(buf, 1, 4, f);
+        writeToStreamOrBuffer(buf, 4, f);
        crc = crc32(crc, buf, 4);
        length -= 4;
    }

    if (data != NULL && length > 0)
    {
-        write_to_io(data, 1, length, f);
+        writeToStreamOrBuffer(data, length, f);
        crc = crc32(crc, data, length);
    }

    png_save_uint_32(buf, crc);
-    write_to_io(buf, 1, 4, f);
+    writeToStreamOrBuffer(buf, 4, f);
 }

 void PngEncoder::writeIDATs(FILE* f, int frame, unsigned char* data, uint32_t length, uint32_t idat_size)
@ -1397,6 +1412,9 @@ void PngEncoder::deflateRectFin(unsigned char* zbuf, uint32_t* zsize, int bpp, i

 bool PngEncoder::writeanimation(const Animation& animation, const std::vector<int>& params)
 {
+    int frame_type = animation.frames[0].type();
+    int frame_depth = animation.frames[0].depth();
+    CV_CheckType(frame_type, frame_depth == CV_8U || frame_depth == CV_16U, "APNG decoder supports only 8 or 16 bit unsigned images");
    int compression_level = 6;
    int compression_strategy = IMWRITE_PNG_STRATEGY_RLE; // Default strategy
    bool isBilevel = false;
@ -1420,7 +1438,8 @@ bool PngEncoder::writeanimation(const Animation& animation, const std::vector<in
        }
    }

-    CV_UNUSED(isBilevel);
+    if (isBilevel)
+        CV_LOG_WARNING(NULL, "IMWRITE_PNG_BILEVEL parameter is not supported yet.");
    uint32_t first =0;
    uint32_t loops= animation.loop_count;
    uint32_t coltype= animation.frames[0].channels() == 1 ? PNG_COLOR_TYPE_GRAY : animation.frames[0].channels() == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGB_ALPHA;
@ -1509,7 +1528,7 @@ bool PngEncoder::writeanimation(const Animation& animation, const std::vector<in
        png_save_uint_32(buf_acTL, num_frames - first);
        png_save_uint_32(buf_acTL + 4, loops);

-        write_to_io(header, 1, 8, m_f);
+        writeToStreamOrBuffer(header, 8, m_f);

        writeChunk(m_f, "IHDR", buf_IHDR, 13);

@ -1521,13 +1540,13 @@ bool PngEncoder::writeanimation(const Animation& animation, const std::vector<in
        if (palsize > 0)
            writeChunk(m_f, "PLTE", (unsigned char*)(&palette), palsize * 3);

-        if ((animation.bgcolor != Scalar()) && (animation.frames.size() > 1))
+        if ((animation.bgcolor != Scalar()) && coltype)
        {
-            uint64_t bgvalue = (static_cast<int>(animation.bgcolor[0]) & 0xFF) << 24 |
-                (static_cast<int>(animation.bgcolor[1]) & 0xFF) << 16 |
-                (static_cast<int>(animation.bgcolor[2]) & 0xFF) << 8 |
-                (static_cast<int>(animation.bgcolor[3]) & 0xFF);
-            writeChunk(m_f, "bKGD", (unsigned char*)(&bgvalue), 6); //the bKGD chunk must precede the first IDAT chunk, and must follow the PLTE chunk.
+            unsigned char bgvalue[6] = {};
+            bgvalue[1] = animation.bgcolor[2];
+            bgvalue[3] = animation.bgcolor[1];
+            bgvalue[5] = animation.bgcolor[0];
+            writeChunk(m_f, "bKGD", bgvalue, 6); //the bKGD chunk must precede the first IDAT chunk, and must follow the PLTE chunk.
        }

        if (trnssize > 0)
--- a/modules/imgcodecs/src/grfmt_png.hpp
+++ b/modules/imgcodecs/src/grfmt_png.hpp
@ -130,56 +130,27 @@ public:

    ImageDecoder newDecoder() const CV_OVERRIDE;

-protected:
+private:
    static void readDataFromBuf(void* png_ptr, uchar* dst, size_t size);
    static void info_fn(png_structp png_ptr, png_infop info_ptr);
    static void row_fn(png_structp png_ptr, png_bytep new_row, png_uint_32 row_num, int pass);
-    bool processing_start(void* frame_ptr, const Mat& img);
-    bool processing_finish();
+    CV_NODISCARD_STD bool processing_start(void* frame_ptr, const Mat& img);
+    CV_NODISCARD_STD bool processing_finish();
    void compose_frame(std::vector<png_bytep>& rows_dst, const std::vector<png_bytep>& rows_src, unsigned char bop, uint32_t x, uint32_t y, uint32_t w, uint32_t h, Mat& img);
-    bool read_from_io(void* buffer, size_t num_bytes);
+    /**
+     * @brief Reads data from an I/O source into the provided buffer.
+     * @param buffer Pointer to the buffer where the data will be stored.
+     * @param num_bytes Number of bytes to read into the buffer.
+     * @return true if the operation is successful, false otherwise.
+     */
+    CV_NODISCARD_STD bool readFromStreamOrBuffer(void* buffer, size_t num_bytes);
    uint32_t  read_chunk(Chunk& chunk);
+    CV_NODISCARD_STD bool InitPngPtr();
+    void ClearPngPtr();

-    struct PngPtrs {
-        public:
-            PngPtrs() {
-                png_ptr = png_create_read_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
-                if (png_ptr) {
-                    info_ptr = png_create_info_struct( png_ptr );
-                    end_info = png_create_info_struct( png_ptr );
-                } else {
-                    info_ptr = end_info = nullptr;
-                }
-            }
-            ~PngPtrs() {
-                clear();
-            }
-            PngPtrs& operator=(PngPtrs&& other) {
-                clear();
-                png_ptr = other.png_ptr;
-                info_ptr = other.info_ptr;
-                end_info = other.end_info;
-                other.png_ptr = nullptr;
-                other.info_ptr = other.end_info = nullptr;
-                return *this;
-            }
-            void clear() {
-                if (png_ptr) {
-                    png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
-                    png_ptr = nullptr;
-                    info_ptr = end_info = nullptr;
-                }
-            }
-            png_structp getPng() const { return png_ptr; }
-            png_infop getInfo() const { return info_ptr; }
-            png_infop getEndInfo() const { return end_info; }
-        private:
-            png_structp png_ptr; // pointer to decompression structure
-            png_infop info_ptr; // pointer to image information structure
-            png_infop end_info; // pointer to one more image information structure
-    };
-
-    PngPtrs m_png_ptrs;
+    png_structp m_png_ptr = nullptr; // pointer to decompression structure
+    png_infop m_info_ptr = nullptr; // pointer to image information structure
+    png_infop m_end_info = nullptr; // pointer to one more image information structure
    int   m_bit_depth;
    FILE* m_f;
    int   m_color_type;
@ -220,7 +191,24 @@ public:
 protected:
    static void writeDataToBuf(void* png_ptr, unsigned char* src, size_t size);
    static void flushBuf(void* png_ptr);
-    size_t write_to_io(void const* _Buffer, size_t  _ElementSize, size_t _ElementCount, FILE* _Stream);
+    /**
+    * @brief Writes data to an output destination, either a file stream or an in-memory buffer.
+    *
+    * This function handles two output scenarios:
+    * 1. If a file stream is provided, the data is written to the stream using `fwrite`.
+    * 2. If `stream` is null, the data is written to an in-memory buffer (`m_buf`), which is resized as needed.
+    *
+    * @param buffer Pointer to the data to be written.
+    * @param num_bytes The number of bytes to be written.
+    * @param stream Pointer to the file stream for writing. If null, the data is written to the in-memory buffer.
+    * @return The number of bytes successfully written.
+    *         - For file-based writes, this is the number of bytes written to the stream.
+    *         - For buffer-based writes, this is the total number of bytes added to the buffer.
+    *
+    * @throws std::runtime_error If the in-memory buffer (`m_buf`) exceeds its maximum capacity.
+    * @note If `num_bytes` is 0 or `buffer` is null, the function returns 0.
+    */
+    size_t writeToStreamOrBuffer(void const* buffer, size_t  num_bytes, FILE* stream);

 private:
    void writeChunk(FILE* f, const char* name, unsigned char* data, uint32_t length);
--- a/modules/imgcodecs/src/loadsave.cpp
+++ b/modules/imgcodecs/src/loadsave.cpp
@ -83,6 +83,13 @@ static Size validateInputImageSize(const Size& size)

 static inline int calcType(int type, int flags)
 {
+    if(flags != IMREAD_UNCHANGED)
+    {
+        CV_CheckNE(flags & (IMREAD_COLOR_BGR | IMREAD_COLOR_RGB),
+                   IMREAD_COLOR_BGR | IMREAD_COLOR_RGB,
+                   "IMREAD_COLOR_BGR (IMREAD_COLOR) and IMREAD_COLOR_RGB can not be set at the same time.");
+    }
+
    if ( (flags & (IMREAD_COLOR | IMREAD_ANYCOLOR | IMREAD_ANYDEPTH)) == (IMREAD_COLOR | IMREAD_ANYCOLOR | IMREAD_ANYDEPTH))
        return type;

@ -1335,7 +1342,7 @@ bool imencode( const String& ext, InputArray _img,
        else
            code = encoder->writemulti(write_vec, params);

-        encoder->throwOnEror();
+        encoder->throwOnError();
        CV_Assert( code );
    }
    catch (const cv::Exception& e)
--- a/modules/imgcodecs/test/test_animation.cpp
+++ b/modules/imgcodecs/test/test_animation.cpp
@ -425,6 +425,39 @@ TEST(Imgcodecs_APNG, imwriteanimation_rgb)
    EXPECT_EQ(0, remove(output.c_str()));
 }

+TEST(Imgcodecs_APNG, imwriteanimation_gray)
+{
+    Animation s_animation, l_animation;
+    EXPECT_TRUE(fillFrames(s_animation, false));
+
+    for (size_t i = 0; i < s_animation.frames.size(); i++)
+    {
+        cvtColor(s_animation.frames[i], s_animation.frames[i], COLOR_BGR2GRAY);
+    }
+
+    s_animation.bgcolor = Scalar(50, 100, 150);
+    string output = cv::tempfile(".png");
+    // Write the animation to a .png file and verify success.
+    EXPECT_TRUE(imwriteanimation(output, s_animation));
+
+    // Read the animation back and compare with the original.
+    EXPECT_TRUE(imreadanimation(output, l_animation));
+
+    EXPECT_EQ(Scalar(), l_animation.bgcolor);
+    size_t expected_frame_count = s_animation.frames.size() - 2;
+
+    // Verify that the number of frames matches the expected count.
+    EXPECT_EQ(expected_frame_count, imcount(output));
+    EXPECT_EQ(expected_frame_count, l_animation.frames.size());
+
+    EXPECT_EQ(0, remove(output.c_str()));
+
+    for (size_t i = 0; i < l_animation.frames.size(); i++)
+    {
+        EXPECT_EQ(0, cvtest::norm(s_animation.frames[i], l_animation.frames[i], NORM_INF));
+    }
+}
+
 TEST(Imgcodecs_APNG, imwritemulti_rgba)
 {
    Animation s_animation;
@ -492,7 +525,7 @@ TEST(Imgcodecs_APNG, imwriteanimation_bgcolor)
 {
    Animation s_animation, l_animation;
    EXPECT_TRUE(fillFrames(s_animation, true, 2));
-    s_animation.bgcolor = Scalar(50, 100, 150, 128); // different values for test purpose.
+    s_animation.bgcolor = Scalar(50, 100, 150); // will be written in bKGD chunk as RGB.

    // Create a temporary output filename for saving the animation.
    string output = cv::tempfile(".png");
--- a/modules/imgcodecs/test/test_gif.cpp
+++ b/modules/imgcodecs/test/test_gif.cpp
@ -241,17 +241,17 @@ TEST(Imgcodecs_Gif, read_gif_special){
    const string gif_filename2 = root + "gifsuite/special2.gif";
    const string png_filename2 = root + "gifsuite/special2.png";
    cv::Mat gif_img1;
-    ASSERT_NO_THROW(gif_img1 = cv::imread(gif_filename1,IMREAD_UNCHANGED));
+    ASSERT_NO_THROW(gif_img1 = cv::imread(gif_filename1,IMREAD_COLOR));
    ASSERT_FALSE(gif_img1.empty());
    cv::Mat png_img1;
-    ASSERT_NO_THROW(png_img1 = cv::imread(png_filename1,IMREAD_UNCHANGED));
+    ASSERT_NO_THROW(png_img1 = cv::imread(png_filename1,IMREAD_COLOR));
    ASSERT_FALSE(png_img1.empty());
    EXPECT_PRED_FORMAT2(cvtest::MatComparator(0, 0), gif_img1, png_img1);
    cv::Mat gif_img2;
-    ASSERT_NO_THROW(gif_img2 = cv::imread(gif_filename2,IMREAD_UNCHANGED));
+    ASSERT_NO_THROW(gif_img2 = cv::imread(gif_filename2,IMREAD_COLOR));
    ASSERT_FALSE(gif_img2.empty());
    cv::Mat png_img2;
-    ASSERT_NO_THROW(png_img2 = cv::imread(png_filename2,IMREAD_UNCHANGED));
+    ASSERT_NO_THROW(png_img2 = cv::imread(png_filename2,IMREAD_COLOR));
    ASSERT_FALSE(png_img2.empty());
    EXPECT_PRED_FORMAT2(cvtest::MatComparator(0, 0), gif_img2, png_img2);
 }
@ -351,6 +351,21 @@ TEST(Imgcodecs_Gif, write_gif_multi) {
    EXPECT_EQ(0, remove(gif_filename.c_str()));
 }

+TEST(Imgcodecs_Gif, encode_IMREAD_GRAYSCALE) {
+    cv::Mat src;
+    cv::Mat decoded;
+    vector<uint8_t> buf;
+    vector<int> param;
+    bool ret = false;
+
+    src = cv::Mat(240,240,CV_8UC3,cv::Scalar(128,64,32));
+    EXPECT_NO_THROW(ret = imencode(".gif", src, buf, param));
+    EXPECT_TRUE(ret);
+    EXPECT_NO_THROW(decoded = imdecode(buf, cv::IMREAD_GRAYSCALE));
+    EXPECT_FALSE(decoded.empty());
+    EXPECT_EQ(decoded.channels(), 1);
+}
+
 }//opencv_test
 }//namespace

--- a/modules/imgcodecs/test/test_grfmt.cpp
+++ b/modules/imgcodecs/test/test_grfmt.cpp
@ -164,6 +164,8 @@ TEST_P(Imgcodecs_ExtSize, write_imageseq)
            continue;
        if (cn != 3 && ext == ".ppm")
            continue;
+        if (cn == 1 && ext == ".gif")
+            continue;
        string filename = cv::tempfile(format("%d%s", cn, ext.c_str()).c_str());

        Mat img_gt(size, CV_MAKETYPE(CV_8U, cn), Scalar::all(0));
@ -179,8 +181,14 @@ TEST_P(Imgcodecs_ExtSize, write_imageseq)
        ASSERT_TRUE(imwrite(filename, img_gt, parameters));
        Mat img = imread(filename, IMREAD_UNCHANGED);
        ASSERT_FALSE(img.empty());
-        EXPECT_EQ(img.size(), img.size());
-        EXPECT_EQ(img.type(), img.type());
+        EXPECT_EQ(img_gt.size(), img.size());
+        EXPECT_EQ(img_gt.channels(), img.channels());
+        if (ext == ".pfm") {
+            EXPECT_EQ(img_gt.depth(), CV_8U);
+            EXPECT_EQ(img.depth(),    CV_32F);
+        } else {
+            EXPECT_EQ(img_gt.depth(), img.depth());
+        }
        EXPECT_EQ(cn, img.channels());


@ -200,6 +208,14 @@ TEST_P(Imgcodecs_ExtSize, write_imageseq)
            EXPECT_LT(n, 1.);
            EXPECT_PRED_FORMAT2(cvtest::MatComparator(0, 0), img, img_gt);
        }
+        else if (ext == ".gif")
+        {
+            // GIF encoder will reduce the number of colors to 256.
+            // It is hard to compare image comparison by pixel unit.
+            double n = cvtest::norm(img, img_gt, NORM_L1);
+            double expected = 0.03 * img.size().area();
+            EXPECT_LT(n, expected);
+        }
        else
        {
            double n = cvtest::norm(img, img_gt, NORM_L2);
@ -238,6 +254,9 @@ const string all_exts[] =
 #ifdef HAVE_IMGCODEC_PFM
    ".pfm",
 #endif
+#ifdef HAVE_IMGCODEC_GIF
+    ".gif",
+#endif
 };

 vector<Size> all_sizes()
--- a/modules/imgcodecs/test/test_jpegxl.cpp
+++ b/modules/imgcodecs/test/test_jpegxl.cpp
@ -180,6 +180,132 @@ TEST(Imgcodecs_JpegXL, encode_from_uncontinued_image)
    EXPECT_TRUE(ret);
 }

+// See https://github.com/opencv/opencv/issues/26767
+
+typedef tuple<perf::MatType, ImreadModes> MatType_and_ImreadFlag;
+typedef testing::TestWithParam<MatType_and_ImreadFlag> Imgcodecs_JpegXL_MatType_ImreadFlag;
+
+TEST_P(Imgcodecs_JpegXL_MatType_ImreadFlag, all_imreadFlags)
+{
+    string tmp_fname = cv::tempfile(".jxl");
+    const int matType  = get<0>(GetParam());
+    const int imreadFlag  = get<1>(GetParam());
+
+    Mat img(240, 320, matType);
+    randu(img, Scalar(0, 0, 0, 255), Scalar(255, 255, 255, 255));
+
+    vector<int> param;
+    param.push_back(IMWRITE_JPEGXL_DISTANCE);
+    param.push_back(0 /* Lossless */);
+    EXPECT_NO_THROW(imwrite(tmp_fname, img, param));
+
+    Mat img_decoded;
+    EXPECT_NO_THROW(img_decoded = imread(tmp_fname, imreadFlag));
+    EXPECT_FALSE(img_decoded.empty());
+
+    switch( imreadFlag )
+    {
+        case IMREAD_UNCHANGED:
+            EXPECT_EQ( img.type(), img_decoded.type() );
+            break;
+        case IMREAD_GRAYSCALE:
+            EXPECT_EQ( img_decoded.depth(), CV_8U );
+            EXPECT_EQ( img_decoded.channels(), 1 );
+            break;
+        case IMREAD_COLOR:
+        case IMREAD_COLOR_RGB:
+            EXPECT_EQ( img_decoded.depth(), CV_8U );
+            EXPECT_EQ( img_decoded.channels(), 3 );
+            break;
+        case IMREAD_ANYDEPTH:
+            EXPECT_EQ( img_decoded.depth(), img.depth() );
+            EXPECT_EQ( img_decoded.channels(), 1 );
+            break;
+        case IMREAD_ANYCOLOR:
+            EXPECT_EQ( img_decoded.depth(), CV_8U ) ;
+            EXPECT_EQ( img_decoded.channels(), img.channels() == 1 ? 1 : 3 ); // Alpha channel will be dropped.
+            break;
+    }
+    remove(tmp_fname.c_str());
+}
+
+INSTANTIATE_TEST_CASE_P(
+    /**/,
+    Imgcodecs_JpegXL_MatType_ImreadFlag,
+    testing::Combine(
+        testing::Values(
+            CV_8UC1,  CV_8UC3,  CV_8UC4,
+            CV_16UC1, CV_16UC3, CV_16UC4,
+            CV_32FC1, CV_32FC3, CV_32FC4
+        ),
+        testing::Values(
+            IMREAD_UNCHANGED,
+            IMREAD_GRAYSCALE,
+            IMREAD_COLOR,
+            IMREAD_COLOR_RGB,
+            IMREAD_ANYDEPTH,
+            IMREAD_ANYCOLOR
+        )
+) );
+
+TEST(Imgcodecs_JpegXL, imdecode_truncated_stream)
+{
+    cv::Mat src(100, 100, CV_8UC1, Scalar(40,50,10));
+    vector<uint8_t> buff;
+    vector<int> param;
+
+    bool ret = false;
+    EXPECT_NO_THROW(ret = cv::imencode(".jxl", src, buff, param));
+    EXPECT_TRUE(ret);
+
+    // Try to decode non-truncated image.
+    cv::Mat decoded;
+    EXPECT_NO_THROW(decoded = cv::imdecode(buff, cv::IMREAD_COLOR));
+    EXPECT_FALSE(decoded.empty());
+
+    // Try to decode truncated image.
+    buff.resize(buff.size() - 1 );
+    EXPECT_NO_THROW(decoded = cv::imdecode(buff, cv::IMREAD_COLOR));
+    EXPECT_TRUE(decoded.empty());
+}
+
+TEST(Imgcodecs_JpegXL, imread_truncated_stream)
+{
+    string tmp_fname = cv::tempfile(".jxl");
+    cv::Mat src(100, 100, CV_8UC1, Scalar(40,50,10));
+    vector<uint8_t> buff;
+    vector<int> param;
+
+    bool ret = false;
+    EXPECT_NO_THROW(ret = cv::imencode(".jxl", src, buff, param));
+    EXPECT_TRUE(ret);
+
+    // Try to decode non-truncated image.
+    FILE *fp = nullptr;
+
+    fp = fopen(tmp_fname.c_str(), "wb");
+    EXPECT_TRUE(fp != nullptr);
+    fwrite(&buff[0], sizeof(uint8_t), buff.size(), fp);
+    fclose(fp);
+
+    cv::Mat decoded;
+    EXPECT_NO_THROW(decoded = cv::imread(tmp_fname, cv::IMREAD_COLOR));
+    EXPECT_FALSE(decoded.empty());
+
+    // Try to decode truncated image.
+    fp = fopen(tmp_fname.c_str(), "wb");
+    EXPECT_TRUE(fp != nullptr);
+    fwrite(&buff[0], sizeof(uint8_t), buff.size() - 1, fp);
+    fclose(fp);
+
+    EXPECT_NO_THROW(decoded = cv::imread(tmp_fname, cv::IMREAD_COLOR));
+    EXPECT_TRUE(decoded.empty());
+
+    // Delete temporary file
+    remove(tmp_fname.c_str());
+}
+
+
 #endif  // HAVE_JPEGXL

 }  // namespace
--- a/modules/imgcodecs/test/test_precomp.hpp
+++ b/modules/imgcodecs/test/test_precomp.hpp
@ -25,6 +25,16 @@ void PrintTo(const ImreadModes& val, std::ostream* os)
        v &= ~IMREAD_COLOR;
        *os << "IMREAD_COLOR" << (v == 0 ? "" : " | ");
    }
+    else if ((v & IMREAD_COLOR_RGB) != 0)
+    {
+        CV_Assert(IMREAD_COLOR_RGB == 256);
+        v &= ~IMREAD_COLOR_RGB;
+        *os << "IMREAD_COLOR_RGB" << (v == 0 ? "" : " | ");
+    }
+    else if ((v & IMREAD_ANYCOLOR) != 0)
+    {
+        // Do nothing
+    }
    else
    {
        CV_Assert(IMREAD_GRAYSCALE == 0);
@ -50,11 +60,6 @@ void PrintTo(const ImreadModes& val, std::ostream* os)
        v &= ~IMREAD_IGNORE_ORIENTATION;
        *os << "IMREAD_IGNORE_ORIENTATION" << (v == 0 ? "" : " | ");
    }
-    if ((v & IMREAD_COLOR_RGB) != 0)
-    {
-        v &= ~IMREAD_COLOR_RGB;
-        *os << "IMREAD_COLOR_RGB" << (v == 0 ? "" : " | ");
-    }
    switch (v)
    {
        case IMREAD_UNCHANGED: return;
--- a/modules/imgcodecs/test/test_tiff.cpp
+++ b/modules/imgcodecs/test/test_tiff.cpp
@ -46,41 +46,18 @@ TEST(Imgcodecs_Tiff, decode_tile16384x16384)
 //==================================================================================================
 // See https://github.com/opencv/opencv/issues/22388

-/**
- * Dummy enum to show combination of IMREAD_*.
- */
-enum ImreadMixModes
-{
-    IMREAD_MIX_UNCHANGED                   = IMREAD_UNCHANGED                                     ,
-    IMREAD_MIX_GRAYSCALE                   = IMREAD_GRAYSCALE                                     ,
-    IMREAD_MIX_COLOR                       = IMREAD_COLOR     | IMREAD_COLOR_RGB                  ,
-    IMREAD_MIX_GRAYSCALE_ANYDEPTH          = IMREAD_GRAYSCALE | IMREAD_ANYDEPTH                   ,
-    IMREAD_MIX_GRAYSCALE_ANYCOLOR          = IMREAD_GRAYSCALE                    | IMREAD_ANYCOLOR,
-    IMREAD_MIX_GRAYSCALE_ANYDEPTH_ANYCOLOR = IMREAD_GRAYSCALE | IMREAD_ANYDEPTH  | IMREAD_ANYCOLOR,
-    IMREAD_MIX_COLOR_ANYDEPTH              = IMREAD_COLOR     | IMREAD_ANYDEPTH                   ,
-    IMREAD_MIX_COLOR_ANYCOLOR              = IMREAD_COLOR                        | IMREAD_ANYCOLOR,
-    IMREAD_MIX_COLOR_ANYDEPTH_ANYCOLOR     = IMREAD_COLOR     | IMREAD_ANYDEPTH  | IMREAD_ANYCOLOR
-};
-
-typedef tuple< uint64_t, tuple<string, int>, ImreadMixModes > Bufsize_and_Type;
+typedef tuple< uint64_t, perf::MatType, ImreadModes > Bufsize_and_Type;
 typedef testing::TestWithParam<Bufsize_and_Type> Imgcodecs_Tiff_decode_Huge;

-static inline
-void PrintTo(const ImreadMixModes& val, std::ostream* os)
-{
-    PrintTo( static_cast<ImreadModes>(val), os );
-}
-
 TEST_P(Imgcodecs_Tiff_decode_Huge, regression)
 {
    // Get test parameters
    const uint64_t buffer_size   = get<0>(GetParam());
-    const string mat_type_string =   get<0>(get<1>(GetParam()));
-    const int mat_type           =   get<1>(get<1>(GetParam()));
+    const perf::MatType mat_type = get<1>(GetParam());
    const int imread_mode        = get<2>(GetParam());

    // Detect data file
-    const string req_filename = cv::format("readwrite/huge-tiff/%s_%zu.tif", mat_type_string.c_str(), (size_t)buffer_size);
+    const string req_filename = cv::format("readwrite/huge-tiff/%s_%zu.tif", typeToString(mat_type).c_str(), (size_t)buffer_size);
    const string filename = findDataFile( req_filename );

    // Preparation process for test
@ -125,7 +102,7 @@ TEST_P(Imgcodecs_Tiff_decode_Huge, regression)
            case IMREAD_GRAYSCALE | IMREAD_ANYCOLOR | IMREAD_ANYDEPTH:
                ncn = (ncn == 1)?1:3;
                break;
-            case IMREAD_COLOR | IMREAD_COLOR_RGB:
+            case IMREAD_COLOR:
                ncn = 3;
                depth = 1;
                break;
@ -402,9 +379,9 @@ TEST_P(Imgcodecs_Tiff_decode_Huge, regression)
    case MAKE_FLAG(CV_16UC1, CV_16UC4):
    case MAKE_FLAG(CV_16UC3, CV_16UC4):
    default:
-        FAIL() << cv::format("Unknown test pattern: from = %d ( %d, %d) to = %d ( %d, %d )",
-                              mat_type,   (int)CV_MAT_CN(mat_type   ), ( CV_MAT_DEPTH(mat_type   )==CV_16U)?16:8,
-                              img.type(), (int)CV_MAT_CN(img.type() ), ( CV_MAT_DEPTH(img.type() )==CV_16U)?16:8);
+        FAIL() << cv::format("Unknown test pattern: from = ( %d, %d) to = ( %d, %d )",
+                              (int)CV_MAT_CN(mat_type   ), ( CV_MAT_DEPTH(mat_type   )==CV_16U)?16:8,
+                              (int)CV_MAT_CN(img.type() ), ( CV_MAT_DEPTH(img.type() )==CV_16U)?16:8);
        break;
    }

@ -414,8 +391,8 @@ TEST_P(Imgcodecs_Tiff_decode_Huge, regression)
 // Basic Test
 const Bufsize_and_Type Imgcodecs_Tiff_decode_Huge_list_basic[] =
 {
-    make_tuple<uint64_t, tuple<string,int>,ImreadMixModes>( 1073479680ull, make_tuple<string,int>("CV_8UC1",  CV_8UC1),  IMREAD_MIX_COLOR ),
-    make_tuple<uint64_t, tuple<string,int>,ImreadMixModes>( 2147483648ull, make_tuple<string,int>("CV_16UC4", CV_16UC4), IMREAD_MIX_COLOR ),
+    make_tuple<uint64_t, perf::MatType, ImreadModes>( 1073479680ull, CV_8UC1,  IMREAD_COLOR ),
+    make_tuple<uint64_t, perf::MatType, ImreadModes>( 2147483648ull, CV_16UC4, IMREAD_COLOR ),
 };

 INSTANTIATE_TEST_CASE_P(Imgcodecs_Tiff, Imgcodecs_Tiff_decode_Huge,
@ -423,21 +400,25 @@ INSTANTIATE_TEST_CASE_P(Imgcodecs_Tiff, Imgcodecs_Tiff_decode_Huge,
 );

 // Full Test
+//  This full test is disabled in default, following steps are required to run.
+//  (1) replace "DISABLED_Imgcodecs_Tiff_Full" to "Imgcodecs_Tiff_Full" and rebuild opencv_test_imgcodecs.
+//  (2) set "OPENCV_IO_MAX_IMAGE_PIXELS=2147483648" in environment variable.
+//  (3) run "./bin/opencv_test_imgcodecs --test_tag_enable=mem_6gb,verylong,debug_verylong" .

 /**
 * Test lists for combination of IMREAD_*.
 */
-const ImreadMixModes all_modes_Huge_Full[] =
+const ImreadModes all_modes_Huge_Full[] =
 {
-    IMREAD_MIX_UNCHANGED,
-    IMREAD_MIX_GRAYSCALE,
-    IMREAD_MIX_GRAYSCALE_ANYDEPTH,
-    IMREAD_MIX_GRAYSCALE_ANYCOLOR,
-    IMREAD_MIX_GRAYSCALE_ANYDEPTH_ANYCOLOR,
-    IMREAD_MIX_COLOR,
-    IMREAD_MIX_COLOR_ANYDEPTH,
-    IMREAD_MIX_COLOR_ANYCOLOR,
-    IMREAD_MIX_COLOR_ANYDEPTH_ANYCOLOR,
+    static_cast<ImreadModes>(IMREAD_UNCHANGED                                     ) ,
+    static_cast<ImreadModes>(IMREAD_GRAYSCALE                                     ) ,
+    static_cast<ImreadModes>(IMREAD_COLOR                                         ) ,
+    static_cast<ImreadModes>(IMREAD_GRAYSCALE | IMREAD_ANYDEPTH                   ) ,
+    static_cast<ImreadModes>(IMREAD_GRAYSCALE                    | IMREAD_ANYCOLOR) ,
+    static_cast<ImreadModes>(IMREAD_GRAYSCALE | IMREAD_ANYDEPTH  | IMREAD_ANYCOLOR) ,
+    static_cast<ImreadModes>(IMREAD_COLOR     | IMREAD_ANYDEPTH                   ) ,
+    static_cast<ImreadModes>(IMREAD_COLOR                        | IMREAD_ANYCOLOR) ,
+    static_cast<ImreadModes>(IMREAD_COLOR     | IMREAD_ANYDEPTH  | IMREAD_ANYCOLOR)
 };

 const uint64_t huge_buffer_sizes_decode_Full[] =
@ -448,16 +429,17 @@ const uint64_t huge_buffer_sizes_decode_Full[] =
    2147483648ull, // 2048 * 1024 * 1024
 };

-const tuple<string, int> mat_types_Full[] =
+const perf::MatType mat_types_Full[] =
 {
-    make_tuple<string, int>("CV_8UC1",  CV_8UC1),  // 8bit  GRAY
-    make_tuple<string, int>("CV_8UC3",  CV_8UC3),  // 24bit RGB
-    make_tuple<string, int>("CV_8UC4",  CV_8UC4),  // 32bit RGBA
-    make_tuple<string, int>("CV_16UC1", CV_16UC1), // 16bit GRAY
-    make_tuple<string, int>("CV_16UC3", CV_16UC3), // 48bit RGB
-    make_tuple<string, int>("CV_16UC4", CV_16UC4), // 64bit RGBA
+    CV_8UC1,  // 8bit  GRAY
+    CV_8UC3,  // 24bit RGB
+    CV_8UC4,  // 32bit RGBA
+    CV_16UC1, // 16bit GRAY
+    CV_16UC3, // 48bit RGB
+    CV_16UC4, // 64bit RGBA
 };

+// INSTANTIATE_TEST_CASE_P(Imgcodecs_Tiff_Full, Imgcodecs_Tiff_decode_Huge,
 INSTANTIATE_TEST_CASE_P(DISABLED_Imgcodecs_Tiff_Full, Imgcodecs_Tiff_decode_Huge,
        testing::Combine(
            testing::ValuesIn(huge_buffer_sizes_decode_Full),
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@ -324,7 +324,8 @@ enum ThresholdTypes {
    THRESH_TOZERO_INV = 4, //!< \f[\texttt{dst} (x,y) =  \fork{0}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{src}(x,y)}{otherwise}\f]
    THRESH_MASK       = 7,
    THRESH_OTSU       = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value
-    THRESH_TRIANGLE   = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value
+    THRESH_TRIANGLE   = 16, //!< flag, use Triangle algorithm to choose the optimal threshold value
+    THRESH_DRYRUN     = 128 //!< flag, compute threshold only (useful for OTSU/TRIANGLE) but does not actually run thresholding
 };

 //! adaptive threshold algorithm
@ -3870,7 +3871,7 @@ is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \t
            of channels as template or only one channel, which is then used for all template and
            image channels. If the data type is #CV_8U, the mask is interpreted as a binary mask,
            meaning only elements where mask is nonzero are used and are kept unchanged independent
-            of the actual mask value (weight equals 1). For data tpye #CV_32F, the mask values are
+            of the actual mask value (weight equals 1). For data type #CV_32F, the mask values are
            used as weights. The exact formulas are documented in #TemplateMatchModes.
 */
 CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ,
@ -4813,6 +4814,11 @@ The function cv::putText renders the specified text string in the image. Symbols
 using the specified font are replaced by question marks. See #getTextSize for a text rendering code
 example.

+The `fontScale` parameter is a scale factor that is multiplied by the base font size:
+- When scale > 1, the text is magnified.
+- When 0 < scale < 1, the text is minimized.
+- When scale < 0, the text is mirrored or reversed.
+
@param img Image.
@param text Text string to be drawn.
@param org Bottom-left corner of the text string in the image.
--- a/modules/imgproc/src/box_filter.dispatch.cpp
+++ b/modules/imgproc/src/box_filter.dispatch.cpp
@ -313,7 +313,6 @@ Ptr<FilterEngine> createBoxFilter(int srcType, int dstType, Size ksize,
        CV_CPU_DISPATCH_MODES_ALL);
 }

-
 #if 0 //defined(HAVE_IPP)
 static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
 {
--- a/modules/imgproc/src/deriv.cpp
+++ b/modules/imgproc/src/deriv.cpp
@ -378,7 +378,6 @@ void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
    CALL_HAL(sobel, cv_hal_sobel, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn,
             ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, dx, dy, ksize, scale, delta, borderType&~BORDER_ISOLATED);

-
    //CV_IPP_RUN_FAST(ipp_Deriv(src, dst, dx, dy, ksize, scale, delta, borderType));

    sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
--- a/modules/imgproc/src/filter.dispatch.cpp
+++ b/modules/imgproc/src/filter.dispatch.cpp
@ -592,7 +592,6 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,

    // For smaller filter kernels, there is a special kernel that is more
    // efficient than the general one.
-    UMat kernalDataUMat;
    if (device.isIntel() && (device.type() & ocl::Device::TYPE_GPU) &&
        ((ksize.width < 5 && ksize.height < 5) ||
        (ksize.width == 5 && ksize.height == 5 && cn == 1)))
--- a/modules/imgproc/src/filter.hpp
+++ b/modules/imgproc/src/filter.hpp
@ -43,6 +43,9 @@
 #ifndef OPENCV_IMGPROC_FILTER_HPP
 #define OPENCV_IMGPROC_FILTER_HPP

+#include <opencv2/core.hpp>
+#include <vector>
+
 namespace cv
 {
 #ifdef HAVE_OPENCL
--- a/modules/imgproc/src/floodfill.cpp
+++ b/modules/imgproc/src/floodfill.cpp
@ -283,7 +283,7 @@ floodFillGrad_CnIR( Mat& image, Mat& msk,
                   Diff diff, ConnectedComp* region, int flags,
                   std::vector<FFillSegment>* buffer )
 {
-    size_t step = image.step, maskStep = msk.step;
+    auto step = static_cast<std::ptrdiff_t>(image.step), maskStep = static_cast<std::ptrdiff_t>(msk.step);
    uchar* pImage = image.ptr();
    _Tp* img = (_Tp*)(pImage + step*seed.y);
    uchar* pMask = msk.ptr() + maskStep + sizeof(_MTp);
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -957,7 +957,7 @@ static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
                    sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
                    S += sstep;
                    sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
-                    S += 1 - sstep*3;
+                    S -= sstep * 3 - 1;
                    D[k] = castOp(sum);
                }
            }
@ -990,9 +990,9 @@ static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
                    for(int i = 0; i < 4; i++, w += 4 )
                    {
                        int yi = y[i];
-                        const T* S = S0 + yi*sstep;
                        if( yi < 0 )
                            continue;
+                        const T* S = S0 + yi*sstep;
                        if( x[0] >= 0 )
                            sum += (S[x[0]] - cv)*w[0];
                        if( x[1] >= 0 )
@ -1050,9 +1050,9 @@ static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
            const int off_x = isRelative ? (_offset.x+dx) : 0;
            int sx = XY[dx*2]-3+off_x, sy = XY[dx*2+1]-3+off_y;
            const AT* w = wtab + FXY[dx]*64;
-            const T* S = S0 + sy*sstep + sx*cn;
            if( (unsigned)sx < width1 && (unsigned)sy < height1 )
            {
+                const T* S = S0 + sy*sstep + sx*cn;
                for(int k = 0; k < cn; k++ )
                {
                    WT sum = 0;
@ -1093,9 +1093,9 @@ static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
                    for(int i = 0; i < 8; i++, w += 8 )
                    {
                        int yi = y[i];
-                        const T* S1 = S0 + yi*sstep;
                        if( yi < 0 )
                            continue;
+                        const T* S1 = S0 + yi*sstep;
                        if( x[0] >= 0 )
                            sum += (S1[x[0]] - cv)*w[0];
                        if( x[1] >= 0 )
--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@ -45,7 +45,6 @@
 #include "opencl_kernels_imgproc.hpp"
 #include "opencv2/core/hal/intrin.hpp"

-
 namespace cv
 {

--- a/modules/imgproc/src/shapedescr.cpp
+++ b/modules/imgproc/src/shapedescr.cpp
@ -340,9 +340,10 @@ double cv::contourArea( InputArray _contour, bool oriented )
 namespace cv
 {

-static inline Point2f getOfs(int i, float eps)
+static inline Point2f getOfs(float eps)
 {
-    return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);
+    RNG& rng = theRNG();
+    return Point2f(rng.uniform(-eps, eps), rng.uniform(-eps, eps));
 }

 static RotatedRect fitEllipseNoDirect( InputArray _points )
@ -419,7 +420,7 @@ static RotatedRect fitEllipseNoDirect( InputArray _points )
        float eps = (float)(s/(n*2)*1e-3);
        for( i = 0; i < n; i++ )
        {
-            Point2f p = ptsf_copy[i] + getOfs(i, eps);
+            const Point2f p = ptsf_copy[i] + getOfs(eps);
            ptsf_copy[i] = p;
        }

@ -515,6 +516,7 @@ cv::RotatedRect cv::fitEllipseAMS( InputArray _points )
    Mat points = _points.getMat();
    int i, n = points.checkVector(2);
    int depth = points.depth();
+    float eps = 0;
    CV_Assert( n >= 0 && (depth == CV_32F || depth == CV_32S));

    RotatedRect box;
@ -552,57 +554,70 @@ cv::RotatedRect cv::fitEllipseAMS( InputArray _points )
    }
    double scale = 100./(s > FLT_EPSILON ? s : (double)FLT_EPSILON);

-    for( i = 0; i < n; i++ )
+    // first, try the original pointset.
+    // if it's singular, try to shift the points a bit
+    int iter = 0;
+    for( iter = 0; iter < 2; iter++ )
    {
-        Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
-        double px = (p.x - c.x)*scale, py = (p.y - c.y)*scale;
+        for( i = 0; i < n; i++ )
+        {
+            Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
+            const Point2f delta = getOfs(eps);
+            const double px = (p.x + delta.x - c.x)*scale, py = (p.y + delta.y - c.y)*scale;

-        A.at<double>(i,0) = px*px;
-        A.at<double>(i,1) = px*py;
-        A.at<double>(i,2) = py*py;
-        A.at<double>(i,3) = px;
-        A.at<double>(i,4) = py;
-        A.at<double>(i,5) = 1.0;
+            A.at<double>(i,0) = px*px;
+            A.at<double>(i,1) = px*py;
+            A.at<double>(i,2) = py*py;
+            A.at<double>(i,3) = px;
+            A.at<double>(i,4) = py;
+            A.at<double>(i,5) = 1.0;
+        }
+        cv::mulTransposed( A, DM, true, noArray(), 1.0, -1 );
+        DM *= (1.0/n);
+        double dnm = ( DM(2,5)*(DM(0,5) + DM(2,5)) - (DM(1,5)*DM(1,5)) );
+        double ddm =  (4.*(DM(0,5) + DM(2,5))*( (DM(0,5)*DM(2,5)) - (DM(1,5)*DM(1,5))));
+        double ddmm = (2.*(DM(0,5) + DM(2,5))*( (DM(0,5)*DM(2,5)) - (DM(1,5)*DM(1,5))));
+
+        M(0,0)=((-DM(0,0) + DM(0,2) + DM(0,5)*DM(0,5))*(DM(1,5)*DM(1,5)) + (-2*DM(0,1)*DM(1,5) + DM(0,5)*(DM(0,0) \
+                - (DM(0,5)*DM(0,5)) + (DM(1,5)*DM(1,5))))*DM(2,5) + (DM(0,0) - (DM(0,5)*DM(0,5)))*(DM(2,5)*DM(2,5))) / ddm;
+        M(0,1)=((DM(1,5)*DM(1,5))*(-DM(0,1) + DM(1,2) + DM(0,5)*DM(1,5)) + (DM(0,1)*DM(0,5) - ((DM(0,5)*DM(0,5)) + 2*DM(1,1))*DM(1,5) + \
+                (DM(1,5)*DM(1,5)*DM(1,5)))*DM(2,5) + (DM(0,1) - DM(0,5)*DM(1,5))*(DM(2,5)*DM(2,5))) / ddm;
+        M(0,2)=(-2*DM(1,2)*DM(1,5)*DM(2,5) - DM(0,5)*(DM(2,5)*DM(2,5))*(DM(0,5) + DM(2,5)) + DM(0,2)*dnm + \
+                (DM(1,5)*DM(1,5))*(DM(2,2) + DM(2,5)*(DM(0,5) + DM(2,5))))/ddm;
+        M(0,3)=(DM(1,5)*(DM(1,5)*DM(2,3) - 2*DM(1,3)*DM(2,5)) + DM(0,3)*dnm) / ddm;
+        M(0,4)=(DM(1,5)*(DM(1,5)*DM(2,4) - 2*DM(1,4)*DM(2,5)) + DM(0,4)*dnm) / ddm;
+        M(1,0)=(-(DM(0,2)*DM(0,5)*DM(1,5)) + (2*DM(0,1)*DM(0,5) - DM(0,0)*DM(1,5))*DM(2,5))/ddmm;
+        M(1,1)=(-(DM(0,1)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,2)*DM(1,5)) + 2*DM(1,1)*DM(2,5)))/ddmm;
+        M(1,2)=(-(DM(0,2)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,2)) + 2*DM(1,2)*DM(2,5)))/ddmm;
+        M(1,3)=(-(DM(0,3)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,3)) + 2*DM(1,3)*DM(2,5)))/ddmm;
+        M(1,4)=(-(DM(0,4)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,4)) + 2*DM(1,4)*DM(2,5)))/ddmm;
+        M(2,0)=(-2*DM(0,1)*DM(0,5)*DM(1,5) + (DM(0,0) + (DM(0,5)*DM(0,5)))*(DM(1,5)*DM(1,5)) + DM(0,5)*(-(DM(0,5)*DM(0,5)) \
+                + (DM(1,5)*DM(1,5)))*DM(2,5) - (DM(0,5)*DM(0,5))*(DM(2,5)*DM(2,5)) + DM(0,2)*(-(DM(1,5)*DM(1,5)) + DM(0,5)*(DM(0,5) + DM(2,5)))) / ddm;
+        M(2,1)=((DM(0,5)*DM(0,5))*(DM(1,2) - DM(1,5)*DM(2,5)) + (DM(1,5)*DM(1,5))*(DM(0,1) - DM(1,2) + DM(1,5)*DM(2,5)) \
+                + DM(0,5)*(DM(1,2)*DM(2,5) + DM(1,5)*(-2*DM(1,1) + (DM(1,5)*DM(1,5)) - (DM(2,5)*DM(2,5))))) / ddm;
+        M(2,2)=((DM(0,5)*DM(0,5))*(DM(2,2) - (DM(2,5)*DM(2,5))) + (DM(1,5)*DM(1,5))*(DM(0,2) - DM(2,2) + (DM(2,5)*DM(2,5))) + \
+                DM(0,5)*(-2*DM(1,2)*DM(1,5) + DM(2,5)*((DM(1,5)*DM(1,5)) + DM(2,2) - (DM(2,5)*DM(2,5))))) / ddm;
+        M(2,3)=((DM(1,5)*DM(1,5))*(DM(0,3) - DM(2,3)) + (DM(0,5)*DM(0,5))*DM(2,3) + DM(0,5)*(-2*DM(1,3)*DM(1,5) + DM(2,3)*DM(2,5))) / ddm;
+        M(2,4)=((DM(1,5)*DM(1,5))*(DM(0,4) - DM(2,4)) + (DM(0,5)*DM(0,5))*DM(2,4) + DM(0,5)*(-2*DM(1,4)*DM(1,5) + DM(2,4)*DM(2,5))) / ddm;
+        M(3,0)=DM(0,3);
+        M(3,1)=DM(1,3);
+        M(3,2)=DM(2,3);
+        M(3,3)=DM(3,3);
+        M(3,4)=DM(3,4);
+        M(4,0)=DM(0,4);
+        M(4,1)=DM(1,4);
+        M(4,2)=DM(2,4);
+        M(4,3)=DM(3,4);
+        M(4,4)=DM(4,4);
+
+        if (fabs(cv::determinant(M)) > 1.0e-10) {
+            break;
+        }
+
+        eps = (float)(s/(n*2)*1e-2);
    }
-    cv::mulTransposed( A, DM, true, noArray(), 1.0, -1 );
-    DM *= (1.0/n);
-    double dnm = ( DM(2,5)*(DM(0,5) + DM(2,5)) - (DM(1,5)*DM(1,5)) );
-    double ddm =  (4.*(DM(0,5) + DM(2,5))*( (DM(0,5)*DM(2,5)) - (DM(1,5)*DM(1,5))));
-    double ddmm = (2.*(DM(0,5) + DM(2,5))*( (DM(0,5)*DM(2,5)) - (DM(1,5)*DM(1,5))));

-    M(0,0)=((-DM(0,0) + DM(0,2) + DM(0,5)*DM(0,5))*(DM(1,5)*DM(1,5)) + (-2*DM(0,1)*DM(1,5) + DM(0,5)*(DM(0,0) \
-            - (DM(0,5)*DM(0,5)) + (DM(1,5)*DM(1,5))))*DM(2,5) + (DM(0,0) - (DM(0,5)*DM(0,5)))*(DM(2,5)*DM(2,5))) / ddm;
-    M(0,1)=((DM(1,5)*DM(1,5))*(-DM(0,1) + DM(1,2) + DM(0,5)*DM(1,5)) + (DM(0,1)*DM(0,5) - ((DM(0,5)*DM(0,5)) + 2*DM(1,1))*DM(1,5) + \
-            (DM(1,5)*DM(1,5)*DM(1,5)))*DM(2,5) + (DM(0,1) - DM(0,5)*DM(1,5))*(DM(2,5)*DM(2,5))) / ddm;
-    M(0,2)=(-2*DM(1,2)*DM(1,5)*DM(2,5) - DM(0,5)*(DM(2,5)*DM(2,5))*(DM(0,5) + DM(2,5)) + DM(0,2)*dnm + \
-            (DM(1,5)*DM(1,5))*(DM(2,2) + DM(2,5)*(DM(0,5) + DM(2,5))))/ddm;
-    M(0,3)=(DM(1,5)*(DM(1,5)*DM(2,3) - 2*DM(1,3)*DM(2,5)) + DM(0,3)*dnm) / ddm;
-    M(0,4)=(DM(1,5)*(DM(1,5)*DM(2,4) - 2*DM(1,4)*DM(2,5)) + DM(0,4)*dnm) / ddm;
-    M(1,0)=(-(DM(0,2)*DM(0,5)*DM(1,5)) + (2*DM(0,1)*DM(0,5) - DM(0,0)*DM(1,5))*DM(2,5))/ddmm;
-    M(1,1)=(-(DM(0,1)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,2)*DM(1,5)) + 2*DM(1,1)*DM(2,5)))/ddmm;
-    M(1,2)=(-(DM(0,2)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,2)) + 2*DM(1,2)*DM(2,5)))/ddmm;
-    M(1,3)=(-(DM(0,3)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,3)) + 2*DM(1,3)*DM(2,5)))/ddmm;
-    M(1,4)=(-(DM(0,4)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,4)) + 2*DM(1,4)*DM(2,5)))/ddmm;
-    M(2,0)=(-2*DM(0,1)*DM(0,5)*DM(1,5) + (DM(0,0) + (DM(0,5)*DM(0,5)))*(DM(1,5)*DM(1,5)) + DM(0,5)*(-(DM(0,5)*DM(0,5)) \
-            + (DM(1,5)*DM(1,5)))*DM(2,5) - (DM(0,5)*DM(0,5))*(DM(2,5)*DM(2,5)) + DM(0,2)*(-(DM(1,5)*DM(1,5)) + DM(0,5)*(DM(0,5) + DM(2,5)))) / ddm;
-    M(2,1)=((DM(0,5)*DM(0,5))*(DM(1,2) - DM(1,5)*DM(2,5)) + (DM(1,5)*DM(1,5))*(DM(0,1) - DM(1,2) + DM(1,5)*DM(2,5)) \
-            + DM(0,5)*(DM(1,2)*DM(2,5) + DM(1,5)*(-2*DM(1,1) + (DM(1,5)*DM(1,5)) - (DM(2,5)*DM(2,5))))) / ddm;
-    M(2,2)=((DM(0,5)*DM(0,5))*(DM(2,2) - (DM(2,5)*DM(2,5))) + (DM(1,5)*DM(1,5))*(DM(0,2) - DM(2,2) + (DM(2,5)*DM(2,5))) + \
-             DM(0,5)*(-2*DM(1,2)*DM(1,5) + DM(2,5)*((DM(1,5)*DM(1,5)) + DM(2,2) - (DM(2,5)*DM(2,5))))) / ddm;
-    M(2,3)=((DM(1,5)*DM(1,5))*(DM(0,3) - DM(2,3)) + (DM(0,5)*DM(0,5))*DM(2,3) + DM(0,5)*(-2*DM(1,3)*DM(1,5) + DM(2,3)*DM(2,5))) / ddm;
-    M(2,4)=((DM(1,5)*DM(1,5))*(DM(0,4) - DM(2,4)) + (DM(0,5)*DM(0,5))*DM(2,4) + DM(0,5)*(-2*DM(1,4)*DM(1,5) + DM(2,4)*DM(2,5))) / ddm;
-    M(3,0)=DM(0,3);
-    M(3,1)=DM(1,3);
-    M(3,2)=DM(2,3);
-    M(3,3)=DM(3,3);
-    M(3,4)=DM(3,4);
-    M(4,0)=DM(0,4);
-    M(4,1)=DM(1,4);
-    M(4,2)=DM(2,4);
-    M(4,3)=DM(3,4);
-    M(4,4)=DM(4,4);
-
-    if (fabs(cv::determinant(M)) > 1.0e-10) {
+    if (iter < 2) {
            Mat eVal, eVec;
            eigenNonSymmetric(M, eVal, eVec);

@ -744,7 +759,7 @@ cv::RotatedRect cv::fitEllipseDirect( InputArray _points )
        for( i = 0; i < n; i++ )
        {
            Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
-            Point2f delta = getOfs(i, eps);
+            const Point2f delta = getOfs(eps);
            double px = (p.x + delta.x - c.x)*scale, py = (p.y + delta.y - c.y)*scale;

            A.at<double>(i,0) = px*px;
--- a/modules/imgproc/src/sumpixels.avx512_skx.hpp
+++ b/modules/imgproc/src/sumpixels.avx512_skx.hpp
@ -385,7 +385,7 @@ __m512d IntegralCalculator < 3 > ::calculate_integral(const __m512i src_longs, c
    //    shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
    //   Vertical LANES:     76543210
    //   src_longs       :   HGFEDCBA
-    //   shit3lanes      : + EDCBA
+    //   shift3lanes      : + EDCBA
    //   shift6lanes     : + BA
    //   carry_over_idxs : + 65765765  (index position of result from previous iteration)
    //                     = integral
@ -418,7 +418,7 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
    //    shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
    //   Vertical LANES:     76543210
    //   src_longs       :   HGFEDCBA
-    //   shit4lanes      : + DCBA
+    //   shift4lanes      : + DCBA
    //   carry_over_idxs : + 76547654  (index position of result from previous iteration)
    //                     = integral
    __m512i shifted4lanes = _mm512_maskz_expand_epi64(0xF0, src_longs);
@ -464,6 +464,7 @@ void calculate_integral_avx512(const uchar *src,   size_t _srcstep,
        case 4: {
            IntegralCalculator< 4 > calculator;
            calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height);
+            break;
        }
    }
 }
--- a/modules/imgproc/src/thresh.cpp
+++ b/modules/imgproc/src/thresh.cpp
@ -1402,10 +1402,13 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+    const bool isDisabled = ((thresh_type & THRESH_DRYRUN) != 0);
+    thresh_type &= ~THRESH_DRYRUN;

-    if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
-           thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
-         (!doubleSupport && depth == CV_64F))
+    if ( isDisabled ||
+        !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
+          thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
+        (!doubleSupport && depth == CV_64F))
        return false;

    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
@ -1450,10 +1453,14 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
    CV_OCL_RUN_(_src.dims() <= 2 && _dst.isUMat(),
                ocl_threshold(_src, _dst, thresh, maxval, type), thresh)

+    const bool isDisabled = ((type & THRESH_DRYRUN) != 0);
+    type &= ~THRESH_DRYRUN;
+
    Mat src = _src.getMat();

-    _dst.create( src.size(), src.type() );
-    Mat dst = _dst.getMat();
+    if (!isDisabled)
+        _dst.create( src.size(), src.type() );
+    Mat dst = isDisabled ? cv::Mat() : _dst.getMat();

    int automatic_thresh = (type & ~cv::THRESH_MASK);
    type &= THRESH_MASK;
@ -1480,6 +1487,9 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
    {
        int ithresh = cvFloor(thresh);
        thresh = ithresh;
+        if (isDisabled)
+            return thresh;
+
        int imaxval = cvRound(maxval);
        if( type == THRESH_TRUNC )
            imaxval = ithresh;
@ -1501,7 +1511,6 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
            return thresh;
        }

-
        thresh = ithresh;
        maxval = imaxval;
    }
@ -1509,6 +1518,9 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
    {
        int ithresh = cvFloor(thresh);
        thresh = ithresh;
+        if (isDisabled)
+            return thresh;
+
        int imaxval = cvRound(maxval);
        if( type == THRESH_TRUNC )
            imaxval = ithresh;
@ -1536,6 +1548,9 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
    {
        int ithresh = cvFloor(thresh);
        thresh = ithresh;
+        if (isDisabled)
+            return thresh;
+
        int imaxval = cvRound(maxval);
        if (type == THRESH_TRUNC)
            imaxval = ithresh;
@ -1567,6 +1582,9 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
    else
        CV_Error( cv::Error::StsUnsupportedFormat, "" );

+    if (isDisabled)
+        return thresh;
+
    parallel_for_(Range(0, dst.rows),
                  ThresholdRunner(src, dst, thresh, maxval, type),
                  dst.total()/(double)(1<<16));
--- a/modules/imgproc/test/ocl/test_imgproc.cpp
+++ b/modules/imgproc/test/ocl/test_imgproc.cpp
@ -386,6 +386,40 @@ OCL_TEST_P(Threshold, Mat)
    }
 }

+struct Threshold_Dryrun :
+        public ImgprocTestBase
+{
+    int thresholdType;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        thresholdType = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+    }
+};
+
+OCL_TEST_P(Threshold_Dryrun, Mat)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        random_roi();
+
+        double maxVal = randomDouble(20.0, 127.0);
+        double thresh = randomDouble(0.0, maxVal);
+
+        const int _thresholdType = thresholdType | THRESH_DRYRUN;
+
+        src_roi.copyTo(dst_roi);
+        usrc_roi.copyTo(udst_roi);
+
+        OCL_OFF(cv::threshold(src_roi, dst_roi, thresh, maxVal, _thresholdType));
+        OCL_ON(cv::threshold(usrc_roi, udst_roi, thresh, maxVal, _thresholdType));
+
+        OCL_EXPECT_MATS_NEAR(dst, 0);
+    }
+}
+
 /////////////////////////////////////////// CLAHE //////////////////////////////////////////////////

 PARAM_TEST_CASE(CLAHETest, Size, double, bool)
@ -483,6 +517,16 @@ OCL_INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
                                   ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)),
                            Bool()));

+OCL_INSTANTIATE_TEST_CASE_P(Imgproc, Threshold_Dryrun, Combine(
+                            Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
+                                   CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
+                                   CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
+                            Values(0),
+                            Values(ThreshOp(THRESH_BINARY),
+                                   ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC),
+                                   ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)),
+                            Bool()));
+
 OCL_INSTANTIATE_TEST_CASE_P(Imgproc, CLAHETest, Combine(
                            Values(Size(4, 4), Size(32, 8), Size(8, 64)),
                            Values(0.0, 10.0, 62.0, 300.0),
--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@ -1485,9 +1485,6 @@ double CV_ColorRGBTest::get_success_error_level( int /*test_case_idx*/, int /*i*
 void CV_ColorRGBTest::convert_forward( const Mat& src, Mat& dst )
 {
    int depth = src.depth(), cn = src.channels();
-/*#if defined _DEBUG || defined DEBUG
-    int dst_cn = CV_MAT_CN(dst->type);
-#endif*/
    int i, j, cols = src.cols;
    int g_rshift = dst_bits == 16 ? 2 : 3;
    int r_lshift = dst_bits == 16 ? 11 : 10;
@ -1571,9 +1568,6 @@ void CV_ColorRGBTest::convert_forward( const Mat& src, Mat& dst )
 void CV_ColorRGBTest::convert_backward( const Mat& /*src*/, const Mat& src, Mat& dst )
 {
    int depth = src.depth(), cn = dst.channels();
-/*#if defined _DEBUG || defined DEBUG
-    int src_cn = CV_MAT_CN(src->type);
-#endif*/
    int i, j, cols = src.cols;
    int g_lshift = dst_bits == 16 ? 2 : 3;
    int r_rshift = dst_bits == 16 ? 11 : 10;
--- a/modules/imgproc/test/test_fitellipse.cpp
+++ b/modules/imgproc/test/test_fitellipse.cpp
@ -102,4 +102,15 @@ TEST(Imgproc_FitEllipse_JavaCase, accuracy) {
    EXPECT_NEAR(e.size.height, sqrt(2.)*2, 0.4);
 }

+TEST(Imgproc_FitEllipse_HorizontalLine, accuracy) {
+    vector<Point2f> pts({{-300, 100}, {-200, 100}, {-100, 100}, {0, 100}, {100, 100}, {200, 100}, {300, 100}});
+    const RotatedRect el = fitEllipse(pts);
+
+    EXPECT_NEAR(el.center.x, -100, 100);
+    EXPECT_NEAR(el.center.y, 100, 1);
+    EXPECT_NEAR(el.size.width, 1, 1);
+    EXPECT_GE(el.size.height, 150);
+    EXPECT_NEAR(el.angle, 90, 0.1);
+}
+
 }} // namespace
--- a/Show More
+++ b/Show More