Merge branch 4.x

2025-08-06 06:26:29 +08:00 · 2024-09-10 10:15:22 +03:00 · 2024-09-10 10:15:22 +03:00 · b574db2cff
commit b574db2cff
parent 99bc88c259 79faf857d9
26 changed files with 237 additions and 204 deletions
--- a/3rdparty/carotene/CMakeLists.txt
+++ b/3rdparty/carotene/CMakeLists.txt
@ -42,17 +42,9 @@ endif()

 if(WITH_NEON)
    target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON")
-    if(NOT DEFINED CAROTENE_NEON_ARCH )
-    elseif(CAROTENE_NEON_ARCH EQUAL 8)
-	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=8")
-    elseif(CAROTENE_NEON_ARCH EQUAL 7)
-	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=7")
-    else()
-	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=0")
-    endif()
 endif()

- if(MINGW) 
+ if(MINGW)
    target_compile_definitions(carotene_objs PRIVATE "-D_USE_MATH_DEFINES=1")
 endif()

--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@ -1857,7 +1857,7 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast<size_t>(range.
 #endif

 // The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8
-#if defined(DCAROTENE_NEON_ARCH) && (DCAROTENE_NEON_ARCH == 7)
+#if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
 inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType)
 {
    switch(borderType)
@ -1928,7 +1928,7 @@ inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uc
 #undef cv_hal_gaussianBlurBinomial
 #define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial

-#endif // DCAROTENE_NEON_ARCH=7
+#endif // __ARM_ARCH=7

 #endif // OPENCV_IMGPROC_HAL_INTERFACE_H

--- a/3rdparty/carotene/src/common.hpp
+++ b/3rdparty/carotene/src/common.hpp
@ -58,17 +58,6 @@

 namespace CAROTENE_NS { namespace internal {

-#ifndef CAROTENE_NEON_ARCH
-#    if defined(__aarch64__) || defined(__aarch32__)
-#        define CAROTENE_NEON_ARCH 8
-#    else
-#        define CAROTENE_NEON_ARCH 7
-#    endif
-#endif
-#if ( !defined(__aarch64__) && !defined(__aarch32__) ) && (CAROTENE_NEON_ARCH == 8 )
-#    error("ARMv7 doen't support A32/A64 Neon instructions")
-#endif
-
 inline void prefetch(const void *ptr, size_t offset = 32*10)
 {
 #if defined __GNUC__
--- a/3rdparty/carotene/src/vround_helper.hpp
+++ b/3rdparty/carotene/src/vround_helper.hpp
@ -57,7 +57,7 @@ namespace CAROTENE_NS { namespace internal {

 inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
    return vcvtnq_u32_f32(val);
 #else
    const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
@ -67,7 +67,7 @@ inline uint32x4_t vroundq_u32_f32(const float32x4_t val)

 inline uint32x2_t vround_u32_f32(const float32x2_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
    return vcvtn_u32_f32(val);
 #else
    const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
@ -77,7 +77,7 @@ inline uint32x2_t vround_u32_f32(const float32x2_t val)

 inline int32x4_t vroundq_s32_f32(const float32x4_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
    return vcvtnq_s32_f32(val);
 #else
    const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
@ -87,7 +87,7 @@ inline int32x4_t vroundq_s32_f32(const float32x4_t val)

 inline int32x2_t vround_s32_f32(const float32x2_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
    return vcvtn_s32_f32(val);
 #else
    const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -121,14 +121,20 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ${ENABLE_PIC})
 ocv_cmake_hook(PRE_CMAKE_BOOTSTRAP)

 # Bootstrap CMake system: setup CMAKE_SYSTEM_NAME and other vars
+
+# workaround: https://gitlab.kitware.com/cmake/cmake/-/issues/20989
 if(OPENCV_WORKAROUND_CMAKE_20989)
  set(CMAKE_SYSTEM_PROCESSOR_BACKUP ${CMAKE_SYSTEM_PROCESSOR})
 endif()
-enable_language(CXX C)
+
+project(OpenCV CXX C)
+
 if(OPENCV_WORKAROUND_CMAKE_20989)
  set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR_BACKUP})
 endif()

+enable_testing()
+
 ocv_cmake_hook(POST_CMAKE_BOOTSTRAP)

 if(NOT OPENCV_SKIP_CMAKE_SYSTEM_FILE)
@ -151,10 +157,6 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)  # https://cmake.org/cmake/help/
  endif()
 endif()

-enable_testing()
-
-project(OpenCV CXX C)
-
 if(MSVC)
  set(CMAKE_USE_RELATIVE_PATHS ON CACHE INTERNAL "" FORCE)
 endif()
@ -163,70 +165,30 @@ ocv_cmake_eval(DEBUG_PRE ONCE)

 ocv_clear_vars(OpenCVModules_TARGETS)

-include(cmake/OpenCVDownload.cmake)
-
-set(BUILD_LIST "" CACHE STRING "Build only listed modules (comma-separated, e.g. 'videoio,dnn,ts')")
-
 # ----------------------------------------------------------------------------
-# Break in case of popular CMake configuration mistakes
+#  Autodetect if we are in a GIT repository
 # ----------------------------------------------------------------------------
-if(NOT CMAKE_SIZEOF_VOID_P GREATER 0)
-  message(FATAL_ERROR "CMake fails to determine the bitness of the target platform.
-  Please check your CMake and compiler installation. If you are cross-compiling then ensure that your CMake toolchain file correctly sets the compiler details.")
+find_host_package(Git QUIET)
+
+if(NOT DEFINED OPENCV_VCSVERSION AND GIT_FOUND)
+  ocv_git_describe(OPENCV_VCSVERSION "${OpenCV_SOURCE_DIR}")
+elseif(NOT DEFINED OPENCV_VCSVERSION)
+  # We don't have git:
+  set(OPENCV_VCSVERSION "unknown")
 endif()

+include(cmake/OpenCVDownload.cmake)
+
 # ----------------------------------------------------------------------------
 # Detect compiler and target platform architecture
 # ----------------------------------------------------------------------------
 include(cmake/OpenCVDetectCXXCompiler.cmake)
 ocv_cmake_hook(POST_DETECT_COMPILER)

-# Add these standard paths to the search paths for FIND_LIBRARY
-# to find libraries from these locations first
-if(UNIX AND NOT ANDROID)
-  if(X86_64 OR CMAKE_SIZEOF_VOID_P EQUAL 8)
-    if(EXISTS /lib64)
-      list(APPEND CMAKE_LIBRARY_PATH /lib64)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /lib)
-    endif()
-    if(EXISTS /usr/lib64)
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib64)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib)
-    endif()
-  elseif(X86 OR CMAKE_SIZEOF_VOID_P EQUAL 4)
-    if(EXISTS /lib32)
-      list(APPEND CMAKE_LIBRARY_PATH /lib32)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /lib)
-    endif()
-    if(EXISTS /usr/lib32)
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib32)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib)
-    endif()
-  endif()
-endif()
-
-# Add these standard paths to the search paths for FIND_PATH
-# to find include files from these locations first
-if(MINGW)
-  if(EXISTS /mingw)
-      list(APPEND CMAKE_INCLUDE_PATH /mingw)
-  endif()
-  if(EXISTS /mingw32)
-      list(APPEND CMAKE_INCLUDE_PATH /mingw32)
-  endif()
-  if(EXISTS /mingw64)
-      list(APPEND CMAKE_INCLUDE_PATH /mingw64)
-  endif()
-endif()
-
 # ----------------------------------------------------------------------------
 # OpenCV cmake options
 # ----------------------------------------------------------------------------
-
+set(BUILD_LIST "" CACHE STRING "Build only listed modules (comma-separated, e.g. 'videoio,dnn,ts')")
 OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF)

 # 3rd party libs
@ -658,19 +620,6 @@ ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR})
 # ----------------------------------------------------------------------------
 set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV modules (can be ;-separated list of paths)")

-# ----------------------------------------------------------------------------
-#  Autodetect if we are in a GIT repository
-# ----------------------------------------------------------------------------
-find_host_package(Git QUIET)
-
-if(NOT DEFINED OPENCV_VCSVERSION AND GIT_FOUND)
-  ocv_git_describe(OPENCV_VCSVERSION "${OpenCV_SOURCE_DIR}")
-elseif(NOT DEFINED OPENCV_VCSVERSION)
-  # We don't have git:
-  set(OPENCV_VCSVERSION "unknown")
-endif()
-
-
 # ----------------------------------------------------------------------------
 # OpenCV compiler and linker options
 # ----------------------------------------------------------------------------
@ -1004,15 +953,7 @@ foreach(hal ${OpenCV_HAL})
    if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;")
      add_subdirectory(3rdparty/carotene/hal)
      ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS)
-
-      if( NOT DEFINED CAROTENE_NEON_ARCH)
-          set(CAROTENE_NEON_MSG "Auto detected")
-      elseif( CAROTENE_NEON_ARCH GREATER 7)
-          set(CAROTENE_NEON_MSG "Force ARMv8+")
-      else()
-          set(CAROTENE_NEON_MSG "Force ARMv7")
-      endif()
-      list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION}, ${CAROTENE_NEON_MSG})")
+      list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})")
    else()
      message(STATUS "Carotene: NEON is not available, disabling carotene...")
    endif()
--- a/cmake/OpenCVDetectCXXCompiler.cmake
+++ b/cmake/OpenCVDetectCXXCompiler.cmake
@ -83,6 +83,10 @@ if(NOT DEFINED CMAKE_SIZEOF_VOID_P
    AND NOT OPENCV_SUPPRESS_MESSAGE_MISSING_CMAKE_SIZEOF_VOID_P)
  message(WARNING "OpenCV: CMAKE_SIZEOF_VOID_P is not defined. Perhaps CMake toolchain is broken")
 endif()
+if(NOT CMAKE_SIZEOF_VOID_P GREATER 0)
+  message(FATAL_ERROR "CMake fails to determine the bitness of the target platform.
+  Please check your CMake and compiler installation. If you are cross-compiling then ensure that your CMake toolchain file correctly sets the compiler details.")
+endif()

 message(STATUS "Detected processor: ${CMAKE_SYSTEM_PROCESSOR}")
 if(OPENCV_SKIP_SYSTEM_PROCESSOR_DETECTION)
@ -156,8 +160,10 @@ elseif(MSVC)
    set(OpenCV_ARCH "ARM")
  elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
    set(OpenCV_ARCH "x64")
+  elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
+    set(OpenCV_ARCH "x86")
  else()
-    set(OpenCV_ARCH x86)
+    message(FATAL_ERROR "Failed to determine system architecture")
  endif()

  if(MSVC_VERSION EQUAL 1400)
--- a/cmake/OpenCVDownload.cmake
+++ b/cmake/OpenCVDownload.cmake
@ -40,11 +40,14 @@ file(REMOVE "${OPENCV_DOWNLOAD_WITH_WGET}")
 ocv_check_environment_variables(OPENCV_DOWNLOAD_MIRROR_ID)

 function(ocv_init_download_mirror)
+  if(NOT GIT_FOUND)
+    return()
+  endif()
  if(NOT DEFINED OPENCV_DOWNLOAD_MIRROR_ID)
    # Run `git remote get-url origin` to get remote source
    execute_process(
      COMMAND
-        git remote get-url origin
+        ${GIT_EXECUTABLE} remote get-url origin
      WORKING_DIRECTORY
        ${CMAKE_SOURCE_DIR}
      RESULT_VARIABLE
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@ -1220,7 +1220,7 @@
  title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...},
  author={Marco Zuliani},
  year={2014},
-  url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf}
+  url = {http://www.marcozuliani.com/docs/RANSAC4Dummies.pdf}
 }
@inproceedings{forstner1987fast,
  title={A fast operator for detection and precise location of distincs points, corners and center of circular features},
--- a/doc/tutorials/imgproc/imgtrans/remap/remap.markdown
+++ b/doc/tutorials/imgproc/imgtrans/remap/remap.markdown
@ -45,7 +45,7 @@ Theory

    ![](images/Remap_Tutorial_Theory_0.jpg)

-    observe how the red circle changes positions with respect to x (considering \f$x\f$ the horizontal
+    observe how the red circle changes positions with respect to \f$x\f$ (considering \f$x\f$ the horizontal
    direction):

    ![](images/Remap_Tutorial_Theory_1.jpg)
@ -62,19 +62,19 @@ Code
    -   Wait for the user to exit the program

@add_toggle_cpp
-   The tutorial code's is shown lines below. You can also download it from
+-   The tutorial code is shown lines below. You can also download it from
    [here](https://github.com/opencv/opencv/tree/5.x/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp)
    @include samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp
@end_toggle

@add_toggle_java
-   The tutorial code's is shown lines below. You can also download it from
+-   The tutorial code is shown lines below. You can also download it from
    [here](https://github.com/opencv/opencv/tree/5.x/samples/java/tutorial_code/ImgTrans/remap/RemapDemo.java)
    @include samples/java/tutorial_code/ImgTrans/remap/RemapDemo.java
@end_toggle

@add_toggle_python
-   The tutorial code's is shown lines below. You can also download it from
+-   The tutorial code is shown lines below. You can also download it from
    [here](https://github.com/opencv/opencv/tree/5.x/samples/python/tutorial_code/ImgTrans/remap/Remap_Demo.py)
    @include samples/python/tutorial_code/ImgTrans/remap/Remap_Demo.py
@end_toggle
--- a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown
+++ b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown
@ -72,7 +72,7 @@ Theory

 -#  We mentioned that an Affine Transformation is basically a **relation**
    between two images. The information about this relation can come, roughly, in two ways:
-    -#  We know both \f$X\f$ and T and we also know that they are related. Then our task is to find \f$M\f$
+    -#  We know both \f$X\f$ and \f$T\f$ and we also know that they are related. Then our task is to find \f$M\f$
    -#  We know \f$M\f$ and \f$X\f$. To obtain \f$T\f$ we only need to apply \f$T = M \cdot X\f$. Our information
        for \f$M\f$ may be explicit (i.e. have the 2-by-3 matrix) or it can come as a geometric relation
        between points.
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@ -586,7 +586,6 @@ Following options can be used to change installation layout for common scenarios
 | `BUILD_JAVA` | _ON_ | Enable Java wrappers build. Java SDK and Ant must be installed. |
 | `BUILD_FAT_JAVA_LIB` | _ON_ (for static Android builds) | Build single _opencv_java_ dynamic library containing all library functionality bundled with Java bindings. |
 | `BUILD_opencv_python3` | _ON_ | Build python3 bindings. Python with development files and numpy must be installed. |
-| `CAROTENE_NEON_ARCH` | '(auto)' | Switch NEON Arch for Carotene. If it sets nothing, it will be auto-detected. If it sets 8, ARMv8(and later) is used. Otherwise, ARMv7 is used. |

 TODO: need separate tutorials covering bindings builds

--- a/modules/core/include/opencv2/core/cuda_types.hpp
+++ b/modules/core/include/opencv2/core/cuda_types.hpp
@ -66,6 +66,9 @@
    #define __CV_CUDA_HOST_DEVICE__
 #endif

+#include "opencv2/core/cvdef.h"
+#include "opencv2/core.hpp"
+
 namespace cv
 {
    namespace cuda
@ -124,6 +127,11 @@ namespace cv

            int cols;
            int rows;
+
+            CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ Size size() const { return {cols, rows}; }
+            CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ T& operator ()(const Point &pos)       { return (*this)(pos.y, pos.x); }
+            CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ const T& operator ()(const Point &pos) const { return (*this)(pos.y, pos.x); }
+            using PtrStep<T>::operator();
        };

        typedef PtrStepSz<unsigned char> PtrStepSzb;
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@ -758,7 +758,11 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ   (EnumType, EnumType)
 #    define __has_cpp_attribute(__x) 0
 #  endif
 #  if __has_cpp_attribute(nodiscard)
-#    define CV_NODISCARD_STD [[nodiscard]]
+#    if defined(__NVCC__) && __CUDACC_VER_MAJOR__ < 12
+#       define CV_NODISCARD_STD
+#    else
+#       define CV_NODISCARD_STD [[nodiscard]]
+#    endif
 #  elif __cplusplus >= 201703L
 //   available when compiler is C++17 compliant
 #    define CV_NODISCARD_STD [[nodiscard]]
--- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
@ -650,16 +650,18 @@ inline v_float32x8 v256_shuffle(const v_float32x8 &a)
 template<int m>
 inline v_float64x4 v256_shuffle(const v_float64x4 &a)
 {
-    int imm8 = m & 0b0001;  //0 or 1
-    if (m & 0x0b0010) imm8 |= 0b0100;
-    //else imm8 |= 0b0000;
-    if (m & 0x0b0100) imm8 |= 0b110000;  //2 or 3
-    else imm8 |= 0b100000;
-    if (m & 0x0b1000) imm8 |= 0b11000000;
-    else imm8 |= 0b10000000;
+    const int m1 = m & 0b1;
+    const int m2 = m & 0b10;
+    const int m3 = m & 0b100;
+    const int m4 = m & 0b1000;
+    const int m5 = m2 << 1;
+    const int m6 = m3 << 2;
+    const int m7 = m4 << 3;
+    const int m8 = m1 & m5 & m6 & m7;

-    return v_float64x4(__lasx_xvpermi_d(*((__m256i*)&a.val), imm8));
+    return v_float64x4(__lasx_xvshuf4i_d(*((__m256i*)&a.val), *((__m256i*)&a.val), m8));
 }
+
 template<typename _Tpvec>
 inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1)
 {
@ -1100,7 +1102,7 @@ inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b)
 template<int imm>
 inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
 {
-    enum {IMM_L = (imm - 16) & 0xFF};
+    enum {IMM_L = ((imm - 16) & 0xFF) > 31 ? 31 : ((imm - 16) & 0xFF)};
    enum {IMM_R = (16 - imm) & 0xFF};

    if (imm == 0) return a;
@ -1117,7 +1119,7 @@ inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
 template<int imm>
 inline v_uint8x32 v_rotate_right(const v_uint8x32& a)
 {
-    enum {IMM_L = (imm - 16) & 0xFF};
+    enum {IMM_L = ((imm - 16) & 0xFF) > 31 ? 31 : ((imm - 16) & 0xFF)};

    if (imm == 0) return a;
    if (imm > 32) return v_uint8x32();
--- a/modules/core/perf/opencl/perf_arithm.cpp
+++ b/modules/core/perf/opencl/perf_arithm.cpp
@ -358,7 +358,8 @@ typedef TestBaseWithParam<FlipParams> FlipFixture;

 OCL_PERF_TEST_P(FlipFixture, Flip,
            ::testing::Combine(OCL_TEST_SIZES,
-                               OCL_TEST_TYPES, FlipType::all()))
+                               ::testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_32FC1, CV_32FC4),
+                               FlipType::all()))
 {
    const FlipParams params = GetParam();
    const Size srcSize = get<0>(params);
@ -388,7 +389,9 @@ typedef tuple<Size, MatType, RotateType> RotateParams;
 typedef TestBaseWithParam<RotateParams> RotateFixture;

 OCL_PERF_TEST_P(RotateFixture, rotate,
-                ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, RotateType::all()))
+                ::testing::Combine(OCL_TEST_SIZES,
+                                   ::testing::Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4),
+                                   RotateType::all()))
 {
    const RotateParams params = GetParam();
    const Size srcSize   = get<0>(params);
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -1049,6 +1049,13 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
    arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
 }

@ -1057,6 +1064,13 @@ void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(_src1.empty() == _src2.empty());
+    if (_src1.empty() && _src2.empty())
+    {
+        _dst.release();
+        return;
+    }
+
    ExtendedTypeFunc subExtFunc = getSubExtFunc(_src1.depth(), _src2.depth(), dtype < 0 ? _dst.depth() : dtype);
    arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB,
              /* extendedFunc */ subExtFunc);
@ -1066,6 +1080,13 @@ void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
    arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
 }

@ -1186,6 +1207,13 @@ void divide(InputArray src1, InputArray src2,
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
    arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
 }

@ -1194,6 +1222,12 @@ void divide(double scale, InputArray src2,
 {
    CV_INSTRUMENT_REGION();

+    if (src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
    arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
 }

@ -1236,6 +1270,13 @@ void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
    double scalars[] = {alpha, beta, gamma};
    arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
 }
--- a/modules/core/src/minmax.dispatch.cpp
+++ b/modules/core/src/minmax.dispatch.cpp
@ -21,6 +21,8 @@ static MinMaxIdxFunc getMinMaxIdxFunc(int depth)
                    CV_CPU_DISPATCH_MODES_ALL);
 }

+// The function expects 1-based indexing for ofs
+// Zero is treated as invalid offset (not found)
 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
 {
    int i, d = a.dims;
@ -324,9 +326,9 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
        {
            // minIdx[0] and minIdx[0] are always 0 for "flatten" version
            if (minIdx)
-                ofs2idx(src, minIdx[1], minIdx);
+                ofs2idx(src, minIdx[1]+1, minIdx);
            if (maxIdx)
-                ofs2idx(src, maxIdx[1], maxIdx);
+                ofs2idx(src, maxIdx[1]+1, maxIdx);
            return;
        }
        else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED)
--- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
@ -460,7 +460,7 @@ PERF_TEST_P_(DivPerfTest, TestPerformance)

    //This condition need to workaround the #21044 issue in the OpenCV.
    //It reinitializes divider matrix without zero values for CV_16S DST type.
-    if (dtype == CV_16S && dtype != type)
+    if (dtype != type)
        cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(255));

    // OpenCV code ///////////////////////////////////////////////////////////
@ -552,8 +552,7 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance)
    initMatsRandU(type, sz, dtype, false);
    //This condition need to workaround the #21044 issue in the OpenCV.
    //It reinitializes divider matrix without zero values for CV_16S DST type.
-    if (dtype == CV_16S || (type == CV_16S && dtype == -1))
-        cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));
+    cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));

    // OpenCV code ///////////////////////////////////////////////////////////
    cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);
--- a/modules/imgcodecs/src/grfmt_avif.cpp
+++ b/modules/imgcodecs/src/grfmt_avif.cpp
@ -143,6 +143,7 @@ AvifDecoder::AvifDecoder() {
  m_buf_supported = true;
  channels_ = 0;
  decoder_ = avifDecoderCreate();
+  decoder_->strictFlags = AVIF_STRICT_DISABLED;
 }

 AvifDecoder::~AvifDecoder() {
@ -166,6 +167,7 @@ bool AvifDecoder::checkSignature(const String &signature) const {
  std::unique_ptr<avifDecoder, decltype(&avifDecoderDestroy)> decoder(
      avifDecoderCreate(), avifDecoderDestroy);
  if (!decoder) return false;
+  decoder->strictFlags = AVIF_STRICT_DISABLED;
  OPENCV_AVIF_CHECK_STATUS(
      avifDecoderSetIOMemory(
          decoder.get(), reinterpret_cast<const uint8_t *>(signature.c_str()),
--- a/modules/imgcodecs/test/test_tiff.cpp
+++ b/modules/imgcodecs/test/test_tiff.cpp
@ -1096,7 +1096,6 @@ INSTANTIATE_TEST_CASE_P(AllModes, Imgcodecs_Tiff_Modes, testing::ValuesIn(all_mo
 TEST(Imgcodecs_Tiff_Modes, write_multipage)
 {
    const string root = cvtest::TS::ptr()->get_data_path();
-    const string filename = root + "readwrite/multipage.tif";
    const string page_files[] = {
        "readwrite/multipage_p1.tif",
        "readwrite/multipage_p2.tif",
@ -1109,7 +1108,7 @@ TEST(Imgcodecs_Tiff_Modes, write_multipage)
    vector<Mat> pages;
    for (size_t i = 0; i < page_count; i++)
    {
-        const Mat page = imread(root + page_files[i]);
+        const Mat page = imread(root + page_files[i], IMREAD_REDUCED_GRAYSCALE_8 + (int)i);
        pages.push_back(page);
    }

--- a/modules/imgproc/perf/perf_warp.cpp
+++ b/modules/imgproc/perf/perf_warp.cpp
@ -12,7 +12,7 @@ CV_ENUM(InterType, INTER_NEAREST, INTER_LINEAR)
 CV_ENUM(InterTypeExtended, INTER_NEAREST, INTER_LINEAR, WARP_RELATIVE_MAP)
 CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH)

-typedef TestBaseWithParam< tuple<Size, InterType, BorderMode> > TestWarpAffine;
+typedef TestBaseWithParam< tuple<MatType, Size, InterType, BorderMode> > TestWarpAffine;
 typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, int> > TestWarpPerspective;
 typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspectiveNear_t;
 typedef TestBaseWithParam< tuple<MatType, Size, InterTypeExtended, BorderMode, RemapMode> > TestRemap;
@ -21,6 +21,7 @@ void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, boo

 PERF_TEST_P( TestWarpAffine, WarpAffine,
             Combine(
+                Values(CV_8UC1, CV_8UC4),
                Values( szVGA, sz720p, sz1080p ),
                InterType::all(),
                BorderMode::all()
@ -28,13 +29,14 @@ PERF_TEST_P( TestWarpAffine, WarpAffine,
 )
 {
    Size sz, szSrc(512, 512);
-    int borderMode, interType;
-    sz         = get<0>(GetParam());
-    interType  = get<1>(GetParam());
-    borderMode = get<2>(GetParam());
+    int borderMode, interType, dataType;
+    dataType   = get<0>(GetParam());
+    sz         = get<1>(GetParam());
+    interType  = get<2>(GetParam());
+    borderMode = get<3>(GetParam());
    Scalar borderColor = Scalar::all(150);

-    Mat src(szSrc,CV_8UC4), dst(sz, CV_8UC4);
+    Mat src(szSrc, dataType), dst(sz, dataType);
    cvtest::fillGradient(src);
    if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
    Mat warpMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2);
@ -47,6 +49,7 @@ PERF_TEST_P( TestWarpAffine, WarpAffine,

 PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx,
    Combine(
+        Values(CV_8UC1, CV_8UC4),
        Values(szVGA, sz720p, sz1080p),
        InterType::all(),
        BorderMode::all()
@ -54,13 +57,16 @@ PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx,
 )
 {
    Size sz, szSrc(512, 512);
-    int borderMode, interType;
-    sz = get<0>(GetParam());
-    interType = get<1>(GetParam());
-    borderMode = get<2>(GetParam());
+    int borderMode, interType, dataType;
+
+    dataType   = get<0>(GetParam());
+    sz         = get<1>(GetParam());
+    interType  = get<2>(GetParam());
+    borderMode = get<3>(GetParam());
+
    Scalar borderColor = Scalar::all(150);

-    Mat src(szSrc, CV_8UC1), dst(sz, CV_8UC1);
+    Mat src(szSrc, dataType), dst(sz, dataType);
    cvtest::fillGradient(src);
    if (borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
    Mat warpMat = getRotationMatrix2D(Point2f(src.cols / 2.f, src.rows / 2.f), 30., 2.2);
--- a/modules/imgproc/src/color_rgb.simd.hpp
+++ b/modules/imgproc/src/color_rgb.simd.hpp
@ -1088,11 +1088,6 @@ struct mRGBA2RGBA<uchar>

            uchar v3_half = v3 / 2;

-            dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
-            dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
-            dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
-            dst[3] = v3;
-
            dst[0] = (v3==0)? 0 : saturate_cast<uchar>((v0 * max_val + v3_half) / v3);
            dst[1] = (v3==0)? 0 : saturate_cast<uchar>((v1 * max_val + v3_half) / v3);
            dst[2] = (v3==0)? 0 : saturate_cast<uchar>((v2 * max_val + v3_half) / v3);
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -1983,65 +1983,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
        }
        else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
        {
-            if( nninterpolate )
+            #if CV_TRY_SSE4_1
+            if( useSSE4_1 )
+                opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
+            else
+            #endif
            {
                #if CV_SIMD128
-                int span = VTraits<v_float32x4>::vlanes();
                {
-                    for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
-                        v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
-                                                 v_round(v_load(src1f + x + span))));
+                    v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
+                    v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
+                    v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
+                    int span = VTraits<v_uint16x8>::vlanes();
+                    for (; x <= size.width - span; x += span )
+                    {
+                        v_float32x4 v_src0[2], v_src1[2];
+                        v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
+                        v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
+                        v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
+                        v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
+                        v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
+                        v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
+
+                        v_int16x8 v_dst[2];
+                        v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
+                        v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
+                        v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
+
+                        v_store(dst2 + x, v_pack_u(
+                            v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
+                            v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
+                    }
                }
                #endif
                for( ; x < size.width; x++ )
                {
-                    dst1[x*2] = saturate_cast<short>(src1f[x*2]);
-                    dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
-                }
-            }
-            else
-            {
-                #if CV_TRY_SSE4_1
-                if( useSSE4_1 )
-                    opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
-                else
-                #endif
-                {
-                    #if CV_SIMD128
-                    {
-                        v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
-                        v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
-                        v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
-                        int span = VTraits<v_uint16x8>::vlanes();
-                        for (; x <= size.width - span; x += span )
-                        {
-                            v_float32x4 v_src0[2], v_src1[2];
-                            v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
-                            v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
-                            v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
-                            v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
-                            v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
-                            v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
-
-                            v_int16x8 v_dst[2];
-                            v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
-                            v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
-                            v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
-
-                            v_store(dst2 + x, v_pack_u(
-                                v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
-                                v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
-                        }
-                    }
-                    #endif
-                    for( ; x < size.width; x++ )
-                    {
-                        int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
-                        int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
-                        dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
-                        dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
-                        dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
-                    }
+                    int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
+                    int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
+                    dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
+                    dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
+                    dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
                }
            }
        }
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@ -94,6 +94,10 @@ static void hlineResize(ET* src, int cn, int *ofst, FT* m, FT* dst, int dst_min,
            }
        }
    }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
    ET* src_last = src + cn*ofst[dst_width - 1];
    for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
    {
@ -125,6 +129,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 1>
            ET* px = src + ofst[i];
            *(dst++) = m[0] * px[0] + m[1] * px[1];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + ofst[dst_width - 1])[0];
        for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
        {
@ -149,6 +157,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 2>
            *(dst++) = m[0] * px[0] + m[1] * px[2];
            *(dst++) = m[0] * px[1] + m[1] * px[3];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + 2*ofst[dst_width - 1])[0];
        src1 = (src + 2*ofst[dst_width - 1])[1];
        for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
@ -177,6 +189,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 3>
            *(dst++) = m[0] * px[1] + m[1] * px[4];
            *(dst++) = m[0] * px[2] + m[1] * px[5];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + 3*ofst[dst_width - 1])[0];
        src1 = (src + 3*ofst[dst_width - 1])[1];
        src2 = (src + 3*ofst[dst_width - 1])[2];
@ -209,6 +225,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 4>
            *(dst++) = m[0] * px[2] + m[1] * px[6];
            *(dst++) = m[0] * px[3] + m[1] * px[7];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + 4*ofst[dst_width - 1])[0];
        src1 = (src + 4*ofst[dst_width - 1])[1];
        src2 = (src + 4*ofst[dst_width - 1])[2];
@ -237,6 +257,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 1>
            ET* px = src + ofst[i];
            *(dst++) = m[0] * src[0] + m[1] * src[1] + m[2] * src[2] + m[3] * src[3];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + ofst[dst_width - 1])[0];
        for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
        {
@ -261,6 +285,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 2>
            *(dst++) = m[0] * src[0] + m[1] * src[2] + m[2] * src[4] + m[3] * src[6];
            *(dst++) = m[0] * src[1] + m[1] * src[3] + m[2] * src[5] + m[3] * src[7];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + 2*ofst[dst_width - 1])[0];
        src1 = (src + 2*ofst[dst_width - 1])[1];
        for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
@ -289,6 +317,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 3>
            *(dst++) = m[0] * src[1] + m[1] * src[4] + m[2] * src[7] + m[3] * src[10];
            *(dst++) = m[0] * src[2] + m[1] * src[5] + m[2] * src[8] + m[3] * src[11];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + 3*ofst[dst_width - 1])[0];
        src1 = (src + 3*ofst[dst_width - 1])[1];
        src2 = (src + 3*ofst[dst_width - 1])[2];
@ -321,6 +353,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 4>
            *(dst++) = m[0] * src[2] + m[1] * src[6] + m[2] * src[10] + m[3] * src[14];
            *(dst++) = m[0] * src[3] + m[1] * src[7] + m[2] * src[11] + m[3] * src[15];
        }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
        src0 = (src + 4*ofst[dst_width - 1])[0];
        src1 = (src + 4*ofst[dst_width - 1])[1];
        src2 = (src + 4*ofst[dst_width - 1])[2];
@ -382,6 +418,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 1>(uint8_t* src, int, int *o
        uint8_t* px = src + ofst[i];
        *(dst++) = m[0] * px[0] + m[1] * px[1];
    }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
    src_0 = (src + ofst[dst_width - 1])[0];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    v_src_0 = vx_setall_u16(*((uint16_t*)&src_0));
@ -438,6 +478,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 2>(uint8_t* src, int, int *o
        *(dst++) = m[0] * px[0] + m[1] * px[2];
        *(dst++) = m[0] * px[1] + m[1] * px[3];
    }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
    ((ufixedpoint16*)(srccn.w))[0] = (src + 2 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 2 * ofst[dst_width - 1])[1];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    v_srccn = v_reinterpret_as_u16(vx_setall_u32(srccn.d));
@ -510,6 +554,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 3>(uint8_t* src, int, int *o
        *(dst++) = m[0] * px[1] + m[1] * px[4];
        *(dst++) = m[0] * px[2] + m[1] * px[5];
    }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
    ((ufixedpoint16*)(srccn.w))[0] = (src + 3*ofst[dst_width - 1])[0];
    ((ufixedpoint16*)(srccn.w))[1] = (src + 3*ofst[dst_width - 1])[1];
    ((ufixedpoint16*)(srccn.w))[2] = (src + 3*ofst[dst_width - 1])[2];
@ -583,6 +631,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 4>(uint8_t* src, int, int *o
        *(dst++) = m[0] * px[2] + m[1] * px[6];
        *(dst++) = m[0] * px[3] + m[1] * px[7];
    }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
    ((ufixedpoint16*)(srccn.w))[0] = (src + 4 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 4 * ofst[dst_width - 1])[1];
    ((ufixedpoint16*)(srccn.w))[2] = (src + 4 * ofst[dst_width - 1])[2]; ((ufixedpoint16*)(srccn.w))[3] = (src + 4 * ofst[dst_width - 1])[3];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
@ -634,6 +686,10 @@ void hlineResizeCn<uint16_t, ufixedpoint32, 2, true, 1>(uint16_t* src, int, int
        uint16_t* px = src + ofst[i];
        *(dst++) = m[0] * px[0] + m[1] * px[1];
    }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
    src_0 = (src + ofst[dst_width - 1])[0];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    v_src_0 = vx_setall_u32(*((uint32_t*)&src_0));
--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@ -455,7 +455,7 @@ void CV_ColorGrayTest::get_test_array_types_and_sizes( int test_case_idx, vector
 double CV_ColorGrayTest::get_success_error_level( int /*test_case_idx*/, int i, int j )
 {
    int depth = test_mat[i][j].depth();
-    return depth == CV_8U ? 2 : depth == CV_16U ? 16 : 1e-5;
+    return depth == CV_8U ? 1 : depth == CV_16U ? 2 : 1e-5;
 }


@ -2844,6 +2844,11 @@ void runCvtColorBitExactCheck(ColorConversionCodes code, int inputType, uint32_t
    }
 }

+TEST(Imgproc_cvtColor_BE, COLOR_RGB2GRAY)  { runCvtColorBitExactCheck(COLOR_RGB2GRAY,  CV_8UC3, 0x416bd44a); }
+TEST(Imgproc_cvtColor_BE, COLOR_RGBA2GRAY) { runCvtColorBitExactCheck(COLOR_RGBA2GRAY, CV_8UC3, 0x416bd44a); }
+TEST(Imgproc_cvtColor_BE, COLOR_BGR2GRAY)  { runCvtColorBitExactCheck(COLOR_BGR2GRAY,  CV_8UC3, 0x3008c6b8); }
+TEST(Imgproc_cvtColor_BE, COLOR_BGRA2GRAY) { runCvtColorBitExactCheck(COLOR_BGRA2GRAY, CV_8UC3, 0x3008c6b8); }
+
 TEST(Imgproc_cvtColor_BE, COLOR_BGR2YUV) { runCvtColorBitExactCheck(COLOR_BGR2YUV, CV_8UC3, 0xc2cbcfda); }
 TEST(Imgproc_cvtColor_BE, COLOR_RGB2YUV) { runCvtColorBitExactCheck(COLOR_RGB2YUV, CV_8UC3, 0x4e98e757); }
 TEST(Imgproc_cvtColor_BE, COLOR_YUV2BGR) { runCvtColorBitExactCheck(COLOR_YUV2BGR, CV_8UC3, 0xb2c62a3f); }
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@ -1449,11 +1449,11 @@ void CvCaptureCAM_V4L::convertToRgb(const Buffer &currentBuffer)
        return;
    case V4L2_PIX_FMT_NV12:
        cv::cvtColor(cv::Mat(imageSize.height * 3 / 2, imageSize.width, CV_8U, start), frame,
-                     COLOR_YUV2RGB_NV12);
+                     COLOR_YUV2BGR_NV12);
        return;
    case V4L2_PIX_FMT_NV21:
        cv::cvtColor(cv::Mat(imageSize.height * 3 / 2, imageSize.width, CV_8U, start), frame,
-                     COLOR_YUV2RGB_NV21);
+                     COLOR_YUV2BGR_NV21);
        return;
 #ifdef HAVE_JPEG
    case V4L2_PIX_FMT_MJPEG: