mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge branch 4.x
This commit is contained in:
commit
6c69e2cc90
14
3rdparty/fastcv/fastcv.cmake
vendored
14
3rdparty/fastcv/fastcv.cmake
vendored
@ -1,23 +1,23 @@
|
|||||||
function(download_fastcv root_dir)
|
function(download_fastcv root_dir)
|
||||||
|
|
||||||
# Commit SHA in the opencv_3rdparty repo
|
# Commit SHA in the opencv_3rdparty repo
|
||||||
set(FASTCV_COMMIT "8d86e68dad8b80b8575a8d3cf401d3ee96c24148")
|
set(FASTCV_COMMIT "abe340d0fb7f19fa9315080e3c8616642e98a296")
|
||||||
|
|
||||||
# Define actual FastCV versions
|
# Define actual FastCV versions
|
||||||
if(ANDROID)
|
if(ANDROID)
|
||||||
if(AARCH64)
|
if(AARCH64)
|
||||||
message(STATUS "Download FastCV for Android aarch64")
|
message(STATUS "Download FastCV for Android aarch64")
|
||||||
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2025_04_08.tgz")
|
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2025_04_29.tgz")
|
||||||
set(FCV_PACKAGE_HASH "e028966a1d1b2f3f0bc5967d316e8b64")
|
set(FCV_PACKAGE_HASH "d9172a9a3e5d92d080a4192cc5691001")
|
||||||
else()
|
else()
|
||||||
message(STATUS "Download FastCV for Android armv7")
|
message(STATUS "Download FastCV for Android armv7")
|
||||||
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2025_04_08.tgz")
|
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2025_04_29.tgz")
|
||||||
set(FCV_PACKAGE_HASH "6fc1e812a4b3ef392469d2283e037ffe")
|
set(FCV_PACKAGE_HASH "246b5253233391cd2c74d01d49aee9c3")
|
||||||
endif()
|
endif()
|
||||||
elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
|
elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
|
||||||
if(AARCH64)
|
if(AARCH64)
|
||||||
set(FCV_PACKAGE_NAME "fastcv_linux_aarch64_2025_04_08.tgz")
|
set(FCV_PACKAGE_NAME "fastcv_linux_aarch64_2025_04_29.tgz")
|
||||||
set(FCV_PACKAGE_HASH "062a26639cd2788beee2e0dd8743d680")
|
set(FCV_PACKAGE_HASH "e2ce60e25c8e4113a7af2bd243118f4c")
|
||||||
else()
|
else()
|
||||||
message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
|
message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
|
||||||
endif()
|
endif()
|
||||||
|
4
3rdparty/libtiff/tif_hash_set.c
vendored
4
3rdparty/libtiff/tif_hash_set.c
vendored
@ -146,7 +146,7 @@ TIFFHashSet *TIFFHashSetNew(TIFFHashSetHashFunc fnHashFunc,
|
|||||||
set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : TIFFHashSetEqualPointer;
|
set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : TIFFHashSetEqualPointer;
|
||||||
set->fnFreeEltFunc = fnFreeEltFunc;
|
set->fnFreeEltFunc = fnFreeEltFunc;
|
||||||
set->nSize = 0;
|
set->nSize = 0;
|
||||||
set->tabList = (TIFFList **)(calloc(sizeof(TIFFList *), 53));
|
set->tabList = (TIFFList **)(calloc(53, sizeof(TIFFList *)));
|
||||||
if (set->tabList == NULL)
|
if (set->tabList == NULL)
|
||||||
{
|
{
|
||||||
free(set);
|
free(set);
|
||||||
@ -367,7 +367,7 @@ static bool TIFFHashSetRehash(TIFFHashSet *set)
|
|||||||
{
|
{
|
||||||
int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize];
|
int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize];
|
||||||
TIFFList **newTabList =
|
TIFFList **newTabList =
|
||||||
(TIFFList **)(calloc(sizeof(TIFFList *), nNewAllocatedSize));
|
(TIFFList **)(calloc(nNewAllocatedSize, sizeof(TIFFList *)));
|
||||||
if (newTabList == NULL)
|
if (newTabList == NULL)
|
||||||
return false;
|
return false;
|
||||||
#ifdef HASH_DEBUG
|
#ifdef HASH_DEBUG
|
||||||
|
2
3rdparty/openjpeg/openjp2/jp2.c
vendored
2
3rdparty/openjpeg/openjp2/jp2.c
vendored
@ -2873,7 +2873,7 @@ OPJ_BOOL opj_jp2_read_header(opj_stream_private_t *p_stream,
|
|||||||
p_image,
|
p_image,
|
||||||
p_manager);
|
p_manager);
|
||||||
|
|
||||||
if (p_image && *p_image) {
|
if (ret && p_image && *p_image) {
|
||||||
/* Set Image Color Space */
|
/* Set Image Color Space */
|
||||||
if (jp2->enumcs == 16) {
|
if (jp2->enumcs == 16) {
|
||||||
(*p_image)->color_space = OPJ_CLRSPC_SRGB;
|
(*p_image)->color_space = OPJ_CLRSPC_SRGB;
|
||||||
|
@ -920,9 +920,9 @@ if(WITH_NDSRVP)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_HAL_RVV)
|
if(WITH_HAL_RVV)
|
||||||
ocv_debug_message(STATUS "Enable HAL RVV acceleration")
|
ocv_debug_message(STATUS "Enable RVV HAL acceleration")
|
||||||
if(NOT ";${OpenCV_HAL};" MATCHES ";halrvv;")
|
if(NOT ";${OpenCV_HAL};" MATCHES ";rvvhal;")
|
||||||
set(OpenCV_HAL "halrvv;${OpenCV_HAL}")
|
set(OpenCV_HAL "rvvhal;${OpenCV_HAL}")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -955,13 +955,13 @@ foreach(hal ${OpenCV_HAL})
|
|||||||
else()
|
else()
|
||||||
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not enabled, disabling ndsrvp...")
|
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not enabled, disabling ndsrvp...")
|
||||||
endif()
|
endif()
|
||||||
elseif(hal STREQUAL "halrvv")
|
elseif(hal STREQUAL "rvvhal")
|
||||||
if(";${CPU_BASELINE_FINAL};" MATCHES ";RVV;")
|
if(";${CPU_BASELINE_FINAL};" MATCHES ";RVV;")
|
||||||
add_subdirectory(hal/riscv-rvv)
|
add_subdirectory(hal/riscv-rvv)
|
||||||
ocv_hal_register(RVV_HAL_LIBRARIES RVV_HAL_HEADERS RVV_HAL_INCLUDE_DIRS)
|
ocv_hal_register(RVV_HAL_LIBRARIES RVV_HAL_HEADERS RVV_HAL_INCLUDE_DIRS)
|
||||||
list(APPEND OpenCV_USED_HAL "HAL RVV (ver ${RVV_HAL_VERSION})")
|
list(APPEND OpenCV_USED_HAL "RVV HAL (ver ${RVV_HAL_VERSION})")
|
||||||
else()
|
else()
|
||||||
message(STATUS "HAL RVV: RVV is not available, disabling halrvv...")
|
message(STATUS "RVV HAL: RVV is not available, disabling RVV HAL...")
|
||||||
endif()
|
endif()
|
||||||
elseif(hal STREQUAL "ipp")
|
elseif(hal STREQUAL "ipp")
|
||||||
add_subdirectory(hal/ipp)
|
add_subdirectory(hal/ipp)
|
||||||
|
63
cmake/OpenCVBindingsPreprocessorDefinitions.cmake
Normal file
63
cmake/OpenCVBindingsPreprocessorDefinitions.cmake
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
function(ocv_bindings_generator_populate_preprocessor_definitions
|
||||||
|
opencv_modules
|
||||||
|
output_variable)
|
||||||
|
set(defs "\"CV_VERSION_MAJOR\": ${OPENCV_VERSION_MAJOR}")
|
||||||
|
|
||||||
|
macro(ocv_add_definition name value)
|
||||||
|
set(defs "${defs},\n\"${name}\": ${value}")
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
ocv_add_definition(CV_VERSION_MINOR ${OPENCV_VERSION_MINOR})
|
||||||
|
ocv_add_definition(CV_VERSION_PATCH ${OPENCV_VERSION_PATCH})
|
||||||
|
ocv_add_definition(OPENCV_ABI_COMPATIBILITY "${OPENCV_VERSION_MAJOR}00")
|
||||||
|
|
||||||
|
foreach(module IN LISTS ${opencv_modules})
|
||||||
|
if(HAVE_${module})
|
||||||
|
string(TOUPPER "${module}" module)
|
||||||
|
ocv_add_definition("HAVE_${module}" 1)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
if(HAVE_EIGEN)
|
||||||
|
ocv_add_definition(HAVE_EIGEN 1)
|
||||||
|
ocv_add_definition(EIGEN_WORLD_VERSION ${EIGEN_WORLD_VERSION})
|
||||||
|
ocv_add_definition(EIGEN_MAJOR_VERSION ${EIGEN_MAJOR_VERSION})
|
||||||
|
ocv_add_definition(EIGEN_MINOR_VERSION ${EIGEN_MINOR_VERSION})
|
||||||
|
else()
|
||||||
|
# Some checks in parsed headers might not be protected with HAVE_EIGEN check
|
||||||
|
ocv_add_definition(EIGEN_WORLD_VERSION 0)
|
||||||
|
ocv_add_definition(EIGEN_MAJOR_VERSION 0)
|
||||||
|
ocv_add_definition(EIGEN_MINOR_VERSION 0)
|
||||||
|
endif()
|
||||||
|
if(HAVE_LAPACK)
|
||||||
|
ocv_add_definition(HAVE_LAPACK 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(OPENCV_DISABLE_FILESYSTEM_SUPPORT)
|
||||||
|
ocv_add_definition(OPENCV_HAVE_FILESYSTEM_SUPPORT 0)
|
||||||
|
else()
|
||||||
|
ocv_add_definition(OPENCV_HAVE_FILESYSTEM_SUPPORT 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
ocv_add_definition(OPENCV_BINDINGS_PARSER 1)
|
||||||
|
|
||||||
|
# Implementation details definitions, having no impact on how bindings are
|
||||||
|
# generated, so their real values can be safely ignored
|
||||||
|
ocv_add_definition(CV_ENABLE_UNROLLED 0)
|
||||||
|
ocv_add_definition(CV__EXCEPTION_PTR 0)
|
||||||
|
ocv_add_definition(CV_NEON 0)
|
||||||
|
ocv_add_definition(TBB_INTERFACE_VERSION 0)
|
||||||
|
ocv_add_definition(CV_SSE2 0)
|
||||||
|
ocv_add_definition(CV_VSX 0)
|
||||||
|
ocv_add_definition(OPENCV_SUPPORTS_FP_DENORMALS_HINT 0)
|
||||||
|
ocv_add_definition(CV_LOG_STRIP_LEVEL 0)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_SILENT 0)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_FATAL 1)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_ERROR 2)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_WARN 3)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_INFO 4)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_DEBUG 5)
|
||||||
|
ocv_add_definition(CV_LOG_LEVEL_VERBOSE 6)
|
||||||
|
ocv_add_definition(CERES_FOUND 0)
|
||||||
|
|
||||||
|
set(${output_variable} ${defs} PARENT_SCOPE)
|
||||||
|
endfunction()
|
@ -297,6 +297,9 @@ if(WITH_SPNG)
|
|||||||
else()
|
else()
|
||||||
if(PkgConfig_FOUND)
|
if(PkgConfig_FOUND)
|
||||||
pkg_check_modules(SPNG QUIET spng)
|
pkg_check_modules(SPNG QUIET spng)
|
||||||
|
if(SPNG_FOUND)
|
||||||
|
set(SPNG_LIBRARY ${SPNG_LIBRARIES} CACHE INTERNAL "")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
if(SPNG_FOUND)
|
if(SPNG_FOUND)
|
||||||
|
@ -197,11 +197,13 @@ if(WITH_FASTCV)
|
|||||||
ocv_install_3rdparty_licenses(FastCV "${OpenCV_BINARY_DIR}/3rdparty/fastcv/LICENSE")
|
ocv_install_3rdparty_licenses(FastCV "${OpenCV_BINARY_DIR}/3rdparty/fastcv/LICENSE")
|
||||||
add_library(fastcv STATIC IMPORTED)
|
add_library(fastcv STATIC IMPORTED)
|
||||||
set_target_properties(fastcv PROPERTIES
|
set_target_properties(fastcv PROPERTIES
|
||||||
IMPORTED_LINK_INTERFACE_LIBRARIES ""
|
IMPORTED_LINK_INTERFACE_LIBRARIES "dl"
|
||||||
IMPORTED_LOCATION "${FastCV_LIB_PATH}/libfastcv.a"
|
IMPORTED_LOCATION "${FastCV_LIB_PATH}/libfastcv.a"
|
||||||
)
|
)
|
||||||
if (NOT BUILD_SHARED_LIBS)
|
if (NOT BUILD_SHARED_LIBS)
|
||||||
install(FILES "${FastCV_LIB_PATH}/libfastcv.a" DESTINATION "${OPENCV_3P_LIB_INSTALL_PATH}" COMPONENT "dev")
|
install(FILES "${FastCV_LIB_PATH}/libfastcv.a" DESTINATION "${OPENCV_3P_LIB_INSTALL_PATH}" COMPONENT "dev")
|
||||||
|
set(FASTCV_LOCATION_PATH "${FastCV_LIB_PATH}/libfastcv.a" CACHE INTERNAL "" FORCE)
|
||||||
|
set(FASTCV_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/${OPENCV_3P_LIB_INSTALL_PATH}/libfastcv.a" CACHE INTERNAL "" FORCE)
|
||||||
endif()
|
endif()
|
||||||
set(FASTCV_LIBRARY "fastcv" CACHE PATH "FastCV library")
|
set(FASTCV_LIBRARY "fastcv" CACHE PATH "FastCV library")
|
||||||
list(APPEND OPENCV_LINKER_LIBS ${FASTCV_LIBRARY})
|
list(APPEND OPENCV_LINKER_LIBS ${FASTCV_LIBRARY})
|
||||||
|
@ -68,6 +68,14 @@ else()
|
|||||||
set(USE_IPPIW FALSE)
|
set(USE_IPPIW FALSE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(TARGET fastcv AND NOT BUILD_SHARED_LIBS)
|
||||||
|
file(RELATIVE_PATH FASTCV_INSTALL_PATH_RELATIVE_CONFIGCMAKE "${CMAKE_BINARY_DIR}" "${FASTCV_LOCATION_PATH}")
|
||||||
|
ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-FastCV.cmake.in" FASTCV_CONFIGCMAKE @ONLY)
|
||||||
|
set(USE_FASTCV TRUE)
|
||||||
|
else()
|
||||||
|
set(USE_FASTCV FALSE)
|
||||||
|
endif()
|
||||||
|
|
||||||
ocv_cmake_hook(PRE_CMAKE_CONFIG_BUILD)
|
ocv_cmake_hook(PRE_CMAKE_CONFIG_BUILD)
|
||||||
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig.cmake.in" "${CMAKE_BINARY_DIR}/OpenCVConfig.cmake" @ONLY)
|
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig.cmake.in" "${CMAKE_BINARY_DIR}/OpenCVConfig.cmake" @ONLY)
|
||||||
#support for version checking when finding opencv. find_package(OpenCV 2.3.1 EXACT) should now work.
|
#support for version checking when finding opencv. find_package(OpenCV 2.3.1 EXACT) should now work.
|
||||||
@ -92,6 +100,11 @@ if(USE_IPPIW)
|
|||||||
ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-IPPIW.cmake.in" IPPIW_CONFIGCMAKE @ONLY)
|
ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-IPPIW.cmake.in" IPPIW_CONFIGCMAKE @ONLY)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(USE_FASTCV)
|
||||||
|
file(RELATIVE_PATH FASTCV_INSTALL_PATH_RELATIVE_CONFIGCMAKE "${CMAKE_INSTALL_PREFIX}" "${FASTCV_INSTALL_PATH}")
|
||||||
|
ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-FastCV.cmake.in" FASTCV_CONFIGCMAKE @ONLY)
|
||||||
|
endif()
|
||||||
|
|
||||||
function(ocv_gen_config TMP_DIR NESTED_PATH ROOT_NAME)
|
function(ocv_gen_config TMP_DIR NESTED_PATH ROOT_NAME)
|
||||||
ocv_path_join(__install_nested "${OPENCV_CONFIG_INSTALL_PATH}" "${NESTED_PATH}")
|
ocv_path_join(__install_nested "${OPENCV_CONFIG_INSTALL_PATH}" "${NESTED_PATH}")
|
||||||
ocv_path_join(__tmp_nested "${TMP_DIR}" "${NESTED_PATH}")
|
ocv_path_join(__tmp_nested "${TMP_DIR}" "${NESTED_PATH}")
|
||||||
|
@ -12,7 +12,7 @@ if(NOT OPENCV_CUSTOM_PACKAGE_INFO)
|
|||||||
"OpenCV (Open Source Computer Vision Library) is an open source computer vision
|
"OpenCV (Open Source Computer Vision Library) is an open source computer vision
|
||||||
and machine learning software library. OpenCV was built to provide a common
|
and machine learning software library. OpenCV was built to provide a common
|
||||||
infrastructure for computer vision applications and to accelerate the use of
|
infrastructure for computer vision applications and to accelerate the use of
|
||||||
machine perception in the commercial products. Being a BSD-licensed product,
|
machine perception in the commercial products. Being a Apache 2.0 -licensed product,
|
||||||
OpenCV makes it easy for businesses to utilize and modify the code.")
|
OpenCV makes it easy for businesses to utilize and modify the code.")
|
||||||
set(CPACK_PACKAGE_VENDOR "OpenCV Foundation")
|
set(CPACK_PACKAGE_VENDOR "OpenCV Foundation")
|
||||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||||
|
7
cmake/templates/OpenCVConfig-FastCV.cmake.in
Normal file
7
cmake/templates/OpenCVConfig-FastCV.cmake.in
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
if(NOT TARGET fastcv)
|
||||||
|
add_library(fastcv STATIC IMPORTED)
|
||||||
|
set_target_properties(fastcv PROPERTIES
|
||||||
|
IMPORTED_LINK_INTERFACE_LIBRARIES ""
|
||||||
|
IMPORTED_LOCATION "${OpenCV_INSTALL_PATH}/@FASTCV_INSTALL_PATH_RELATIVE_CONFIGCMAKE@"
|
||||||
|
)
|
||||||
|
endif()
|
@ -99,6 +99,8 @@ endif()
|
|||||||
@IPPICV_CONFIGCMAKE@
|
@IPPICV_CONFIGCMAKE@
|
||||||
@IPPIW_CONFIGCMAKE@
|
@IPPIW_CONFIGCMAKE@
|
||||||
|
|
||||||
|
@FASTCV_CONFIGCMAKE@
|
||||||
|
|
||||||
# Some additional settings are required if OpenCV is built as static libs
|
# Some additional settings are required if OpenCV is built as static libs
|
||||||
set(OpenCV_SHARED @BUILD_SHARED_LIBS@)
|
set(OpenCV_SHARED @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
|
@ -26,3 +26,14 @@ There are 2 approaches how to get OpenCV:
|
|||||||
- Build OpenCV from source code against specific version of OpenVINO. This approach solves the limitations mentioned above.
|
- Build OpenCV from source code against specific version of OpenVINO. This approach solves the limitations mentioned above.
|
||||||
|
|
||||||
The instruction how to follow both approaches is provided in [OpenCV wiki](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
|
The instruction how to follow both approaches is provided in [OpenCV wiki](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
|
||||||
|
|
||||||
|
## Supported targets
|
||||||
|
|
||||||
|
OpenVINO backend (DNN_BACKEND_INFERENCE_ENGINE) supports the following [targets](https://docs.opencv.org/4.x/d6/d0f/group__dnn.html#ga709af7692ba29788182cf573531b0ff5):
|
||||||
|
|
||||||
|
- **DNN_TARGET_CPU:** Runs on the CPU, no additional dependencies required.
|
||||||
|
- **DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16:** Runs on the iGPU, requires OpenCL drivers. Install [intel-opencl-icd](https://launchpad.net/ubuntu/jammy/+package/intel-opencl-icd) on Ubuntu.
|
||||||
|
- **DNN_TARGET_MYRIAD:** Runs on Intel® VPU like the [Neural Compute Stick](https://www.intel.com/content/www/us/en/products/sku/140109/intel-neural-compute-stick-2/specifications.html), to set up [see](https://www.intel.com/content/www/us/en/developer/archive/tools/neural-compute-stick.html).
|
||||||
|
- **DNN_TARGET_HDDL:** Runs on the Intel® Movidius™ Myriad™ X High Density Deep Learning VPU, for details [see](https://intelsmartedge.github.io/ido-specs/doc/building-blocks/enhanced-platform-awareness/smartedge-open_hddl/).
|
||||||
|
- **DNN_TARGET_FPGA:** Runs on Intel® Altera® series FPGAs [see](https://www.intel.com/content/www/us/en/docs/programmable/768970/2025-1/getting-started-guide.html).
|
||||||
|
- **DNN_TARGET_NPU:** Runs on the integrated Intel® AI Boost processor, requires [Linux drivers](https://github.com/intel/linux-npu-driver/releases/tag/v1.17.0) OR [Windows drivers](https://www.intel.com/content/www/us/en/download/794734/intel-npu-driver-windows.html).
|
@ -41,7 +41,7 @@ Assuming that we have successfully trained YOLOX model, the subsequent step invo
|
|||||||
running this model with OpenCV. There are several critical considerations to address before
|
running this model with OpenCV. There are several critical considerations to address before
|
||||||
proceeding with this process. Let's delve into these aspects.
|
proceeding with this process. Let's delve into these aspects.
|
||||||
|
|
||||||
### YOLO's Pre-proccessing & Output
|
### YOLO's Pre-processing & Output
|
||||||
|
|
||||||
Understanding the nature of inputs and outputs associated with YOLO family detectors is pivotal.
|
Understanding the nature of inputs and outputs associated with YOLO family detectors is pivotal.
|
||||||
These detectors, akin to most Deep Neural Networks (DNN), typically exhibit variation in input
|
These detectors, akin to most Deep Neural Networks (DNN), typically exhibit variation in input
|
||||||
|
@ -144,9 +144,9 @@ HAL and Extension list of APIs
|
|||||||
| |pyrUp & pyrDown |fcvPyramidCreateu8_v4 |
|
| |pyrUp & pyrDown |fcvPyramidCreateu8_v4 |
|
||||||
| |cvtColor |fcvColorRGB888toYCrCbu8_v3 |
|
| |cvtColor |fcvColorRGB888toYCrCbu8_v3 |
|
||||||
| | |fcvColorRGB888ToHSV888u8 |
|
| | |fcvColorRGB888ToHSV888u8 |
|
||||||
| |GaussianBlur |fcvFilterGaussian5x5u8_v3 |
|
| |gaussianBlur |fcvFilterGaussian5x5u8_v3 |
|
||||||
| | |fcvFilterGaussian3x3u8_v4 |
|
| | |fcvFilterGaussian3x3u8_v4 |
|
||||||
| |cvWarpPerspective |fcvWarpPerspectiveu8_v5 |
|
| |warpPerspective |fcvWarpPerspectiveu8_v5 |
|
||||||
| |Canny |fcvFilterCannyu8 |
|
| |Canny |fcvFilterCannyu8 |
|
||||||
| | | |
|
| | | |
|
||||||
|CORE |lut | fcvTableLookupu8 |
|
|CORE |lut | fcvTableLookupu8 |
|
||||||
@ -166,6 +166,7 @@ HAL and Extension list of APIs
|
|||||||
| | |fcvElementMultiplyf32 |
|
| | |fcvElementMultiplyf32 |
|
||||||
| |addWeighted |fcvAddWeightedu8_v2 |
|
| |addWeighted |fcvAddWeightedu8_v2 |
|
||||||
| |subtract |fcvImageDiffu8f32_v2 |
|
| |subtract |fcvImageDiffu8f32_v2 |
|
||||||
|
| |SVD & solve |fcvSVDf32_v2 |
|
||||||
|
|
||||||
|
|
||||||
**FastCV based OpenCV Extensions APIs list :**
|
**FastCV based OpenCV Extensions APIs list :**
|
||||||
@ -221,10 +222,10 @@ HAL and Extension list of APIs
|
|||||||
| |fcvFilterCorrSep17x17s16_v2 |
|
| |fcvFilterCorrSep17x17s16_v2 |
|
||||||
| |fcvFilterCorrSepNxNs16 |
|
| |fcvFilterCorrSepNxNs16 |
|
||||||
|sobel3x3u8 |fcvImageGradientSobelPlanars8_v2 |
|
|sobel3x3u8 |fcvImageGradientSobelPlanars8_v2 |
|
||||||
|sobel3x3u9 |fcvImageGradientSobelPlanars16_v2 |
|
|sobel3x3u8 |fcvImageGradientSobelPlanars16_v2 |
|
||||||
|sobel3x3u10 |fcvImageGradientSobelPlanars16_v3 |
|
|sobel3x3u8 |fcvImageGradientSobelPlanars16_v3 |
|
||||||
|sobel3x3u11 |fcvImageGradientSobelPlanarf32_v2 |
|
|sobel3x3u8 |fcvImageGradientSobelPlanarf32_v2 |
|
||||||
|sobel3x3u12 |fcvImageGradientSobelPlanarf32_v3 |
|
|sobel3x3u8 |fcvImageGradientSobelPlanarf32_v3 |
|
||||||
|sobel |fcvFilterSobel3x3u8_v2 |
|
|sobel |fcvFilterSobel3x3u8_v2 |
|
||||||
| |fcvFilterSobel3x3u8s16 |
|
| |fcvFilterSobel3x3u8s16 |
|
||||||
| |fcvFilterSobel5x5u8s16 |
|
| |fcvFilterSobel5x5u8s16 |
|
||||||
@ -244,3 +245,4 @@ HAL and Extension list of APIs
|
|||||||
|trackOpticalFlowLK |fcvTrackLKOpticalFlowu8_v3 |
|
|trackOpticalFlowLK |fcvTrackLKOpticalFlowu8_v3 |
|
||||||
| |fcvTrackLKOpticalFlowu8 |
|
| |fcvTrackLKOpticalFlowu8 |
|
||||||
|warpPerspective2Plane |fcv2PlaneWarpPerspectiveu8 |
|
|warpPerspective2Plane |fcv2PlaneWarpPerspectiveu8 |
|
||||||
|
|warpPerspective |fcvWarpPerspectiveu8_v5 |
|
||||||
|
@ -1040,7 +1040,7 @@ namespace CAROTENE_NS {
|
|||||||
s32 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity);
|
s32 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Among each pixel `p` within `src` find min and max values and its first occurences
|
Among each pixel `p` within `src` find min and max values and its first occurrences
|
||||||
*/
|
*/
|
||||||
void minMaxLoc(const Size2D &size,
|
void minMaxLoc(const Size2D &size,
|
||||||
const s8 * srcBase, ptrdiff_t srcStride,
|
const s8 * srcBase, ptrdiff_t srcStride,
|
||||||
|
@ -13,6 +13,7 @@ add_library(ipphal STATIC
|
|||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp"
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/cart_polar_ipp.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/src/cart_polar_ipp.cpp"
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/src/sum_ipp.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
#TODO: HAVE_IPP_ICV and HAVE_IPP_IW added as private macro till OpenCV itself is
|
#TODO: HAVE_IPP_ICV and HAVE_IPP_IW added as private macro till OpenCV itself is
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#ifndef __IPP_HAL_CORE_HPP__
|
#ifndef __IPP_HAL_CORE_HPP__
|
||||||
#define __IPP_HAL_CORE_HPP__
|
#define __IPP_HAL_CORE_HPP__
|
||||||
|
|
||||||
@ -32,6 +36,11 @@ int ipp_hal_normDiff(const uchar* src1, size_t src1_step, const uchar* src2, siz
|
|||||||
#undef cv_hal_normDiff
|
#undef cv_hal_normDiff
|
||||||
#define cv_hal_normDiff ipp_hal_normDiff
|
#define cv_hal_normDiff ipp_hal_normDiff
|
||||||
|
|
||||||
|
int ipp_hal_sum(const uchar *src_data, size_t src_step, int src_type, int width, int height, double *result);
|
||||||
|
|
||||||
|
#undef cv_hal_sum
|
||||||
|
#define cv_hal_sum ipp_hal_sum
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int ipp_hal_polarToCart32f(const float* mag, const float* angle, float* x, float* y, int len, bool angleInDegrees);
|
int ipp_hal_polarToCart32f(const float* mag, const float* angle, float* x, float* y, int len, bool angleInDegrees);
|
||||||
@ -56,4 +65,6 @@ int ipp_hal_transpose2d(const uchar* src_data, size_t src_step, uchar* dst_data,
|
|||||||
#undef cv_hal_transpose2d
|
#undef cv_hal_transpose2d
|
||||||
#define cv_hal_transpose2d ipp_hal_transpose2d
|
#define cv_hal_transpose2d ipp_hal_transpose2d
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#ifndef __IPP_HAL_UTILS_HPP__
|
#ifndef __IPP_HAL_UTILS_HPP__
|
||||||
#define __IPP_HAL_UTILS_HPP__
|
#define __IPP_HAL_UTILS_HPP__
|
||||||
|
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#include "ipp_hal_core.hpp"
|
#include "ipp_hal_core.hpp"
|
||||||
|
|
||||||
#include <opencv2/core/core.hpp>
|
#include <opencv2/core/core.hpp>
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#include "ipp_hal_core.hpp"
|
#include "ipp_hal_core.hpp"
|
||||||
|
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#include "ipp_hal_core.hpp"
|
#include "ipp_hal_core.hpp"
|
||||||
|
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#include "ipp_hal_core.hpp"
|
#include "ipp_hal_core.hpp"
|
||||||
|
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
59
hal/ipp/src/sum_ipp.cpp
Normal file
59
hal/ipp/src/sum_ipp.cpp
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
#include "ipp_hal_core.hpp"
|
||||||
|
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/core/base.hpp>
|
||||||
|
|
||||||
|
#if IPP_VERSION_X100 >= 700
|
||||||
|
|
||||||
|
int ipp_hal_sum(const uchar *src_data, size_t src_step, int src_type, int width, int height, double *result)
|
||||||
|
{
|
||||||
|
int cn = CV_MAT_CN(src_type);
|
||||||
|
if (cn > 4)
|
||||||
|
{
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
IppiSize sz = { width, height };
|
||||||
|
|
||||||
|
typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
|
||||||
|
typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
|
||||||
|
ippiSumFuncHint ippiSumHint =
|
||||||
|
src_type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
|
||||||
|
src_type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
|
||||||
|
src_type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
|
||||||
|
0;
|
||||||
|
ippiSumFuncNoHint ippiSum =
|
||||||
|
src_type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
|
||||||
|
src_type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
|
||||||
|
src_type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
|
||||||
|
src_type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
|
||||||
|
src_type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
|
||||||
|
src_type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
|
||||||
|
src_type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
|
||||||
|
src_type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
|
||||||
|
src_type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
|
||||||
|
0;
|
||||||
|
|
||||||
|
if( ippiSumHint || ippiSum )
|
||||||
|
{
|
||||||
|
IppStatus ret = ippiSumHint ?
|
||||||
|
CV_INSTRUMENT_FUN_IPP(ippiSumHint, src_data, (int)src_step, sz, result, ippAlgHintAccurate) :
|
||||||
|
CV_INSTRUMENT_FUN_IPP(ippiSum, src_data, (int)src_step, sz, result);
|
||||||
|
if( ret >= 0 )
|
||||||
|
{
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -1,3 +1,7 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
#include "ipp_hal_core.hpp"
|
#include "ipp_hal_core.hpp"
|
||||||
|
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
@ -2,6 +2,7 @@ project(kleidicv_hal)
|
|||||||
|
|
||||||
if(HAVE_KLEIDICV)
|
if(HAVE_KLEIDICV)
|
||||||
option(KLEIDICV_ENABLE_SME2 "" OFF) # not compatible with some CLang versions in NDK
|
option(KLEIDICV_ENABLE_SME2 "" OFF) # not compatible with some CLang versions in NDK
|
||||||
|
option(KLEIDICV_USE_CV_NAMESPACE_IN_OPENCV_HAL "" OFF)
|
||||||
include("${KLEIDICV_SOURCE_PATH}/adapters/opencv/CMakeLists.txt")
|
include("${KLEIDICV_SOURCE_PATH}/adapters/opencv/CMakeLists.txt")
|
||||||
# HACK to suppress adapters/opencv/kleidicv_hal.cpp:343:12: warning: unused function 'from_opencv' [-Wunused-function]
|
# HACK to suppress adapters/opencv/kleidicv_hal.cpp:343:12: warning: unused function 'from_opencv' [-Wunused-function]
|
||||||
target_compile_options( kleidicv_hal PRIVATE
|
target_compile_options( kleidicv_hal PRIVATE
|
||||||
|
@ -156,10 +156,12 @@ int bilateralFilter(const uchar* src_data, size_t src_step,
|
|||||||
|
|
||||||
int i, j, maxk, radius;
|
int i, j, maxk, radius;
|
||||||
|
|
||||||
if( sigma_color <= 0 )
|
constexpr double eps = 1e-6;
|
||||||
sigma_color = 1;
|
if( sigma_color <= eps || sigma_space <= eps )
|
||||||
if( sigma_space <= 0 )
|
{
|
||||||
sigma_space = 1;
|
src.copyTo(dst);
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
double gauss_color_coeff = -0.5/(sigma_color * sigma_color);
|
double gauss_color_coeff = -0.5/(sigma_color * sigma_color);
|
||||||
double gauss_space_coeff = -0.5/(sigma_space * sigma_space);
|
double gauss_space_coeff = -0.5/(sigma_space * sigma_space);
|
||||||
|
@ -1,9 +1,26 @@
|
|||||||
cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR)
|
cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR)
|
||||||
|
|
||||||
set(HAL_LIB_NAME "")
|
set(RVV_HAL_INCLUDE_DIR include)
|
||||||
|
set(RVV_HAL_SOURCE_DIR src)
|
||||||
|
|
||||||
|
file(GLOB rvv_hal_headers RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "${RVV_HAL_INCLUDE_DIR}/*.hpp")
|
||||||
|
file(GLOB rvv_hal_sources RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "${RVV_HAL_SOURCE_DIR}/**/*.cpp")
|
||||||
|
|
||||||
|
set(HAL_LIB_NAME "rvv_hal")
|
||||||
|
add_library(${HAL_LIB_NAME} STATIC)
|
||||||
|
target_sources(${HAL_LIB_NAME} PRIVATE ${rvv_hal_headers} ${rvv_hal_sources})
|
||||||
|
|
||||||
|
set_target_properties(${HAL_LIB_NAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH})
|
||||||
|
if(NOT BUILD_SHARED_LIBS)
|
||||||
|
ocv_install_target(${HAL_LIB_NAME} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
|
||||||
|
endif()
|
||||||
|
target_include_directories(${HAL_LIB_NAME} PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
${CMAKE_SOURCE_DIR}/modules/core/include
|
||||||
|
${CMAKE_SOURCE_DIR}/modules/imgproc/include) # ${CMAKE_SOURCE_DIR}/modules/features2d/include
|
||||||
|
|
||||||
set(RVV_HAL_FOUND TRUE CACHE INTERNAL "")
|
set(RVV_HAL_FOUND TRUE CACHE INTERNAL "")
|
||||||
set(RVV_HAL_VERSION "0.0.1" CACHE INTERNAL "")
|
set(RVV_HAL_VERSION "0.0.1" CACHE INTERNAL "")
|
||||||
set(RVV_HAL_LIBRARIES ${HAL_LIB_NAME} CACHE INTERNAL "")
|
set(RVV_HAL_LIBRARIES ${HAL_LIB_NAME} CACHE INTERNAL "")
|
||||||
set(RVV_HAL_HEADERS "hal_rvv.hpp" CACHE INTERNAL "")
|
set(RVV_HAL_HEADERS "rvv_hal.hpp" CACHE INTERNAL "")
|
||||||
set(RVV_HAL_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_SOURCE_DIR}/modules/imgproc/include" CACHE INTERNAL "")
|
set(RVV_HAL_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "")
|
||||||
|
@ -1,65 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include "opencv2/core/base.hpp"
|
|
||||||
#include "opencv2/core/hal/interface.h"
|
|
||||||
#include "opencv2/imgproc/hal/interface.h"
|
|
||||||
|
|
||||||
#ifndef CV_HAL_RVV_071_ENABLED
|
|
||||||
# if defined(__GNUC__) && __GNUC__ == 10 && __GNUC_MINOR__ == 4 && defined(__THEAD_VERSION__) && defined(__riscv_v) && __riscv_v == 7000
|
|
||||||
# define CV_HAL_RVV_071_ENABLED 1
|
|
||||||
# else
|
|
||||||
# define CV_HAL_RVV_071_ENABLED 0
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CV_HAL_RVV_071_ENABLED
|
|
||||||
#include "version/hal_rvv_071.hpp"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__riscv_v) && __riscv_v == 1000000
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
#include "hal_rvv_1p0/merge.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/mean.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/dxt.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/norm.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/norm_diff.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/norm_hamming.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/convert_scale.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/minmax.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/atan.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/split.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/magnitude.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/cart_to_polar.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/polar_to_cart.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/flip.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/lut.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/exp.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/log.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/lu.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/cholesky.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/qr.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/svd.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/sqrt.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/copy_mask.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/div.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/dotprod.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/compare.hpp" // core
|
|
||||||
#include "hal_rvv_1p0/transpose.hpp" // core
|
|
||||||
|
|
||||||
#include "hal_rvv_1p0/moments.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/filter.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/pyramids.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/color.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/warp.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/thresh.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/histogram.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/resize.hpp" // imgproc
|
|
||||||
#include "hal_rvv_1p0/integral.hpp" // imgproc
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,128 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level
|
|
||||||
// directory of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_ATAN_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_ATAN_HPP_INCLUDED
|
|
||||||
|
|
||||||
#undef cv_hal_fastAtan32f
|
|
||||||
#define cv_hal_fastAtan32f cv::cv_hal_rvv::fast_atan_32
|
|
||||||
|
|
||||||
#undef cv_hal_fastAtan64f
|
|
||||||
#define cv_hal_fastAtan64f cv::cv_hal_rvv::fast_atan_64
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
|
|
||||||
#include <cfloat>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
// ref: mathfuncs_core.simd.hpp
|
|
||||||
static constexpr float pi = CV_PI;
|
|
||||||
|
|
||||||
struct AtanParams
|
|
||||||
{
|
|
||||||
float p1, p3, p5, p7, angle_90;
|
|
||||||
};
|
|
||||||
|
|
||||||
static constexpr AtanParams atan_params_rad {
|
|
||||||
0.9997878412794807F,
|
|
||||||
-0.3258083974640975F,
|
|
||||||
0.1555786518463281F,
|
|
||||||
-0.04432655554792128F,
|
|
||||||
90.F * (pi / 180.F)};
|
|
||||||
static constexpr AtanParams atan_params_deg {
|
|
||||||
atan_params_rad.p1 * (180 / pi),
|
|
||||||
atan_params_rad.p3 * (180 / pi),
|
|
||||||
atan_params_rad.p5 * (180 / pi),
|
|
||||||
atan_params_rad.p7 * (180 / pi),
|
|
||||||
90.F};
|
|
||||||
|
|
||||||
template <typename VEC_T>
|
|
||||||
__attribute__((always_inline)) inline VEC_T
|
|
||||||
rvv_atan(VEC_T vy, VEC_T vx, size_t vl, const AtanParams& params)
|
|
||||||
{
|
|
||||||
const auto ax = __riscv_vfabs(vx, vl);
|
|
||||||
const auto ay = __riscv_vfabs(vy, vl);
|
|
||||||
// Reciprocal Estimate (vfrec7) is not accurate enough to pass the test of cartToPolar.
|
|
||||||
const auto c = __riscv_vfdiv(__riscv_vfmin(ax, ay, vl),
|
|
||||||
__riscv_vfadd(__riscv_vfmax(ax, ay, vl), FLT_EPSILON, vl),
|
|
||||||
vl);
|
|
||||||
const auto c2 = __riscv_vfmul(c, c, vl);
|
|
||||||
|
|
||||||
// Using vfmadd only results in about a 2% performance improvement, but it occupies 3 additional
|
|
||||||
// M4 registers. (Performance test on phase32f::VectorLength::1048576: time decreased
|
|
||||||
// from 5.952ms to 5.805ms on Muse Pi)
|
|
||||||
// Additionally, when registers are nearly fully utilized (though not yet exhausted), the
|
|
||||||
// compiler is likely to fail to optimize and may introduce slower memory access (e.g., in
|
|
||||||
// cv::cv_hal_rvv::fast_atan_64).
|
|
||||||
// Saving registers can also make this function more reusable in other contexts.
|
|
||||||
// Therefore, vfmadd is not used here.
|
|
||||||
auto a = __riscv_vfadd(__riscv_vfmul(c2, params.p7, vl), params.p5, vl);
|
|
||||||
a = __riscv_vfadd(__riscv_vfmul(c2, a, vl), params.p3, vl);
|
|
||||||
a = __riscv_vfadd(__riscv_vfmul(c2, a, vl), params.p1, vl);
|
|
||||||
a = __riscv_vfmul(a, c, vl);
|
|
||||||
|
|
||||||
a = __riscv_vfrsub_mu(__riscv_vmflt(ax, ay, vl), a, a, params.angle_90, vl);
|
|
||||||
a = __riscv_vfrsub_mu(__riscv_vmflt(vx, 0.F, vl), a, a, params.angle_90 * 2, vl);
|
|
||||||
a = __riscv_vfrsub_mu(__riscv_vmflt(vy, 0.F, vl), a, a, params.angle_90 * 4, vl);
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
inline int fast_atan_32(const float* y, const float* x, float* dst, size_t n, bool angle_in_deg)
|
|
||||||
{
|
|
||||||
auto atan_params = angle_in_deg ? detail::atan_params_deg : detail::atan_params_rad;
|
|
||||||
|
|
||||||
for (size_t vl = 0; n > 0; n -= vl)
|
|
||||||
{
|
|
||||||
vl = __riscv_vsetvl_e32m4(n);
|
|
||||||
|
|
||||||
auto vy = __riscv_vle32_v_f32m4(y, vl);
|
|
||||||
auto vx = __riscv_vle32_v_f32m4(x, vl);
|
|
||||||
|
|
||||||
auto a = detail::rvv_atan(vy, vx, vl, atan_params);
|
|
||||||
|
|
||||||
__riscv_vse32(dst, a, vl);
|
|
||||||
|
|
||||||
x += vl;
|
|
||||||
y += vl;
|
|
||||||
dst += vl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int fast_atan_64(const double* y, const double* x, double* dst, size_t n, bool angle_in_deg)
|
|
||||||
{
|
|
||||||
// this also uses float32 version, ref: mathfuncs_core.simd.hpp
|
|
||||||
|
|
||||||
auto atan_params = angle_in_deg ? detail::atan_params_deg : detail::atan_params_rad;
|
|
||||||
|
|
||||||
for (size_t vl = 0; n > 0; n -= vl)
|
|
||||||
{
|
|
||||||
vl = __riscv_vsetvl_e64m8(n);
|
|
||||||
|
|
||||||
auto vy = __riscv_vfncvt_f(__riscv_vle64_v_f64m8(y, vl), vl);
|
|
||||||
auto vx = __riscv_vfncvt_f(__riscv_vle64_v_f64m8(x, vl), vl);
|
|
||||||
|
|
||||||
auto a = detail::rvv_atan(vy, vx, vl, atan_params);
|
|
||||||
|
|
||||||
__riscv_vse64(dst, __riscv_vfwcvt_f(a, vl), vl);
|
|
||||||
|
|
||||||
x += vl;
|
|
||||||
y += vl;
|
|
||||||
dst += vl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_ATAN_HPP_INCLUDED
|
|
@ -1,52 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
|
||||||
// Third party copyrights are property of their respective owners.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_COMMON_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_COMMON_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace custom_intrin {
|
|
||||||
|
|
||||||
#define CV_HAL_RVV_NOOP(a) (a)
|
|
||||||
|
|
||||||
#define CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(_Tpvs, _Tpvd, shift, suffix) \
|
|
||||||
inline _Tpvd __riscv_vabs(const _Tpvs& v, const int vl) { \
|
|
||||||
_Tpvs mask = __riscv_vsra(v, shift, vl); \
|
|
||||||
_Tpvs v_xor = __riscv_vxor(v, mask, vl); \
|
|
||||||
return __riscv_vreinterpret_##suffix( \
|
|
||||||
__riscv_vsub(v_xor, mask, vl) \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint8m2_t, vuint8m2_t, 7, u8m2)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint8m8_t, vuint8m8_t, 7, u8m8)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint16m4_t, vuint16m4_t, 15, u16m4)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint16m8_t, vuint16m8_t, 15, u16m8)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint32m4_t, vuint32m4_t, 31, u32m4)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint32m8_t, vuint32m8_t, 31, u32m8)
|
|
||||||
|
|
||||||
#define CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(_Tpvs, _Tpvd, cast, sub, max, min) \
|
|
||||||
inline _Tpvd __riscv_vabd(const _Tpvs& v1, const _Tpvs& v2, const int vl) { \
|
|
||||||
return cast(__riscv_##sub(__riscv_##max(v1, v2, vl), __riscv_##min(v1, v2, vl), vl)); \
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint8m4_t, vuint8m4_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint8m8_t, vuint8m8_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint16m2_t, vuint16m2_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint16m8_t, vuint16m8_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
|
||||||
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint8m4_t, vuint8m4_t, __riscv_vreinterpret_u8m4, vsub, vmax, vmin)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint8m8_t, vuint8m8_t, __riscv_vreinterpret_u8m8, vsub, vmax, vmin)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint16m2_t, vuint16m2_t, __riscv_vreinterpret_u16m2, vsub, vmax, vmin)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint16m8_t, vuint16m8_t, __riscv_vreinterpret_u16m8, vsub, vmax, vmin)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint32m4_t, vuint32m4_t, __riscv_vreinterpret_u32m4, vsub, vmax, vmin)
|
|
||||||
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint32m8_t, vuint32m8_t, __riscv_vreinterpret_u32m8, vsub, vmax, vmin)
|
|
||||||
|
|
||||||
}}} // cv::cv_hal_rvv::custom_intrin
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,268 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
|
||||||
// Third party copyrights are property of their respective owners.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_DIV_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_DIV_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace div {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
inline size_t setvl(int l) { return __riscv_vsetvl_e8m2(l); }
|
|
||||||
|
|
||||||
inline vuint8m2_t vle(const uint8_t *p, int vl) { return __riscv_vle8_v_u8m2(p, vl); }
|
|
||||||
inline vint8m2_t vle(const int8_t *p, int vl) { return __riscv_vle8_v_i8m2(p, vl); }
|
|
||||||
inline vuint16m4_t vle(const uint16_t *p, int vl) { return __riscv_vle16_v_u16m4(p, vl); }
|
|
||||||
inline vint16m4_t vle(const int16_t *p, int vl) { return __riscv_vle16_v_i16m4(p, vl); }
|
|
||||||
inline vint32m8_t vle(const int *p, int vl) { return __riscv_vle32_v_i32m8(p, vl); }
|
|
||||||
inline vfloat32m8_t vle(const float *p, int vl) { return __riscv_vle32_v_f32m8(p, vl); }
|
|
||||||
|
|
||||||
inline void vse(uint8_t *p, const vuint8m2_t &v, int vl) { __riscv_vse8(p, v, vl); }
|
|
||||||
inline void vse(int8_t *p, const vint8m2_t &v, int vl) { __riscv_vse8(p, v, vl); }
|
|
||||||
inline void vse(uint16_t *p, const vuint16m4_t &v, int vl) { __riscv_vse16(p, v, vl); }
|
|
||||||
inline void vse(int16_t *p, const vint16m4_t &v, int vl) { __riscv_vse16(p, v, vl); }
|
|
||||||
inline void vse(int *p, const vint32m8_t &v, int vl) { __riscv_vse32(p, v, vl); }
|
|
||||||
inline void vse(float *p, const vfloat32m8_t &v, int vl) { __riscv_vse32(p, v, vl); }
|
|
||||||
|
|
||||||
inline vuint16m4_t ext(const vuint8m2_t &v, const int vl) { return __riscv_vzext_vf2(v, vl); }
|
|
||||||
inline vint16m4_t ext(const vint8m2_t &v, const int vl) { return __riscv_vsext_vf2(v, vl); }
|
|
||||||
inline vuint32m8_t ext(const vuint16m4_t &v, const int vl) { return __riscv_vzext_vf2(v, vl); }
|
|
||||||
inline vint32m8_t ext(const vint16m4_t &v, const int vl) { return __riscv_vsext_vf2(v, vl); }
|
|
||||||
|
|
||||||
inline vuint8m2_t nclip(const vuint16m4_t &v, const int vl) { return __riscv_vnclipu(v, 0, __RISCV_VXRM_RNU, vl); }
|
|
||||||
inline vint8m2_t nclip(const vint16m4_t &v, const int vl) { return __riscv_vnclip(v, 0, __RISCV_VXRM_RNU, vl); }
|
|
||||||
inline vuint16m4_t nclip(const vuint32m8_t &v, const int vl) { return __riscv_vnclipu(v, 0, __RISCV_VXRM_RNU, vl); }
|
|
||||||
inline vint16m4_t nclip(const vint32m8_t &v, const int vl) { return __riscv_vnclip(v, 0, __RISCV_VXRM_RNU, vl); }
|
|
||||||
|
|
||||||
template <typename VT> inline
|
|
||||||
VT div_sat(const VT &v1, const VT &v2, const float scale, const int vl) {
|
|
||||||
return nclip(div_sat(ext(v1, vl), ext(v2, vl), scale, vl), vl);
|
|
||||||
}
|
|
||||||
template <> inline
|
|
||||||
vint32m8_t div_sat(const vint32m8_t &v1, const vint32m8_t &v2, const float scale, const int vl) {
|
|
||||||
auto f1 = __riscv_vfcvt_f(v1, vl);
|
|
||||||
auto f2 = __riscv_vfcvt_f(v2, vl);
|
|
||||||
auto res = __riscv_vfmul(f1, __riscv_vfrdiv(f2, scale, vl), vl);
|
|
||||||
return __riscv_vfcvt_x(res, vl);
|
|
||||||
}
|
|
||||||
template <> inline
|
|
||||||
vuint32m8_t div_sat(const vuint32m8_t &v1, const vuint32m8_t &v2, const float scale, const int vl) {
|
|
||||||
auto f1 = __riscv_vfcvt_f(v1, vl);
|
|
||||||
auto f2 = __riscv_vfcvt_f(v2, vl);
|
|
||||||
auto res = __riscv_vfmul(f1, __riscv_vfrdiv(f2, scale, vl), vl);
|
|
||||||
return __riscv_vfcvt_xu(res, vl);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename VT> inline
|
|
||||||
VT recip_sat(const VT &v, const float scale, const int vl) {
|
|
||||||
return nclip(recip_sat(ext(v, vl), scale, vl), vl);
|
|
||||||
}
|
|
||||||
template <> inline
|
|
||||||
vint32m8_t recip_sat(const vint32m8_t &v, const float scale, const int vl) {
|
|
||||||
auto f = __riscv_vfcvt_f(v, vl);
|
|
||||||
auto res = __riscv_vfrdiv(f, scale, vl);
|
|
||||||
return __riscv_vfcvt_x(res, vl);
|
|
||||||
}
|
|
||||||
template <> inline
|
|
||||||
vuint32m8_t recip_sat(const vuint32m8_t &v, const float scale, const int vl) {
|
|
||||||
auto f = __riscv_vfcvt_f(v, vl);
|
|
||||||
auto res = __riscv_vfrdiv(f, scale, vl);
|
|
||||||
return __riscv_vfcvt_xu(res, vl);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anonymous
|
|
||||||
|
|
||||||
#undef cv_hal_div8u
|
|
||||||
#define cv_hal_div8u cv::cv_hal_rvv::div::div<uint8_t>
|
|
||||||
#undef cv_hal_div8s
|
|
||||||
#define cv_hal_div8s cv::cv_hal_rvv::div::div<int8_t>
|
|
||||||
#undef cv_hal_div16u
|
|
||||||
#define cv_hal_div16u cv::cv_hal_rvv::div::div<uint16_t>
|
|
||||||
#undef cv_hal_div16s
|
|
||||||
#define cv_hal_div16s cv::cv_hal_rvv::div::div<int16_t>
|
|
||||||
#undef cv_hal_div32s
|
|
||||||
#define cv_hal_div32s cv::cv_hal_rvv::div::div<int>
|
|
||||||
#undef cv_hal_div32f
|
|
||||||
#define cv_hal_div32f cv::cv_hal_rvv::div::div<float>
|
|
||||||
// #undef cv_hal_div64f
|
|
||||||
// #define cv_hal_div64f cv::cv_hal_rvv::div::div<double>
|
|
||||||
|
|
||||||
template <typename ST> inline
|
|
||||||
int div(const ST *src1, size_t step1, const ST *src2, size_t step2,
|
|
||||||
ST *dst, size_t step, int width, int height, float scale) {
|
|
||||||
if (scale == 0.f ||
|
|
||||||
(scale * static_cast<float>(std::numeric_limits<ST>::max())) < 1.f &&
|
|
||||||
(scale * static_cast<float>(std::numeric_limits<ST>::max())) > -1.f) {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst + h * step);
|
|
||||||
std::memset(dst_h, 0, sizeof(ST) * width);
|
|
||||||
}
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
const ST *src1_h = reinterpret_cast<const ST*>((const uchar*)src1 + h * step1);
|
|
||||||
const ST *src2_h = reinterpret_cast<const ST*>((const uchar*)src2 + h * step2);
|
|
||||||
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst + h * step);
|
|
||||||
|
|
||||||
int vl;
|
|
||||||
for (int w = 0; w < width; w += vl) {
|
|
||||||
vl = setvl(width - w);
|
|
||||||
|
|
||||||
auto v1 = vle(src1_h + w, vl);
|
|
||||||
auto v2 = vle(src2_h + w, vl);
|
|
||||||
|
|
||||||
auto mask = __riscv_vmseq(v2, 0, vl);
|
|
||||||
vse(dst_h + w, __riscv_vmerge(div_sat(v1, v2, scale, vl), 0, mask, vl), vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline
|
|
||||||
int div(const float *src1, size_t step1, const float *src2, size_t step2,
|
|
||||||
float *dst, size_t step, int width, int height, float scale) {
|
|
||||||
if (scale == 0.f) {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
float *dst_h = reinterpret_cast<float*>((uchar*)dst + h * step);
|
|
||||||
std::memset(dst_h, 0, sizeof(float) * width);
|
|
||||||
}
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (std::fabs(scale - 1.f) < FLT_EPSILON) {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
const float *src1_h = reinterpret_cast<const float*>((const uchar*)src1 + h * step1);
|
|
||||||
const float *src2_h = reinterpret_cast<const float*>((const uchar*)src2 + h * step2);
|
|
||||||
float *dst_h = reinterpret_cast<float*>((uchar*)dst + h * step);
|
|
||||||
|
|
||||||
int vl;
|
|
||||||
for (int w = 0; w < width; w += vl) {
|
|
||||||
vl = setvl(width - w);
|
|
||||||
|
|
||||||
auto v1 = vle(src1_h + w, vl);
|
|
||||||
auto v2 = vle(src2_h + w, vl);
|
|
||||||
|
|
||||||
vse(dst_h + w, __riscv_vfmul(v1, __riscv_vfrdiv(v2, 1.f, vl), vl), vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
const float *src1_h = reinterpret_cast<const float*>((const uchar*)src1 + h * step1);
|
|
||||||
const float *src2_h = reinterpret_cast<const float*>((const uchar*)src2 + h * step2);
|
|
||||||
float *dst_h = reinterpret_cast<float*>((uchar*)dst + h * step);
|
|
||||||
|
|
||||||
int vl;
|
|
||||||
for (int w = 0; w < width; w += vl) {
|
|
||||||
vl = setvl(width - w);
|
|
||||||
|
|
||||||
auto v1 = vle(src1_h + w, vl);
|
|
||||||
auto v2 = vle(src2_h + w, vl);
|
|
||||||
|
|
||||||
vse(dst_h + w, __riscv_vfmul(v1, __riscv_vfrdiv(v2, scale, vl), vl), vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef cv_hal_recip8u
|
|
||||||
#define cv_hal_recip8u cv::cv_hal_rvv::div::recip<uint8_t>
|
|
||||||
#undef cv_hal_recip8s
|
|
||||||
#define cv_hal_recip8s cv::cv_hal_rvv::div::recip<int8_t>
|
|
||||||
#undef cv_hal_recip16u
|
|
||||||
#define cv_hal_recip16u cv::cv_hal_rvv::div::recip<uint16_t>
|
|
||||||
#undef cv_hal_recip16s
|
|
||||||
#define cv_hal_recip16s cv::cv_hal_rvv::div::recip<int16_t>
|
|
||||||
#undef cv_hal_recip32s
|
|
||||||
#define cv_hal_recip32s cv::cv_hal_rvv::div::recip<int>
|
|
||||||
#undef cv_hal_recip32f
|
|
||||||
#define cv_hal_recip32f cv::cv_hal_rvv::div::recip<float>
|
|
||||||
// #undef cv_hal_recip64f
|
|
||||||
// #define cv_hal_recip64f cv::cv_hal_rvv::div::recip<double>
|
|
||||||
|
|
||||||
template <typename ST> inline
|
|
||||||
int recip(const ST *src_data, size_t src_step, ST *dst_data, size_t dst_step,
|
|
||||||
int width, int height, float scale) {
|
|
||||||
if (scale == 0.f || scale < 1.f && scale > -1.f) {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst_data + h * dst_step);
|
|
||||||
std::memset(dst_h, 0, sizeof(ST) * width);
|
|
||||||
}
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
const ST *src_h = reinterpret_cast<const ST*>((const uchar*)src_data + h * src_step);
|
|
||||||
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst_data + h * dst_step);
|
|
||||||
|
|
||||||
int vl;
|
|
||||||
for (int w = 0; w < width; w += vl) {
|
|
||||||
vl = setvl(width - w);
|
|
||||||
|
|
||||||
auto v = vle(src_h + w, vl);
|
|
||||||
|
|
||||||
auto mask = __riscv_vmseq(v, 0, vl);
|
|
||||||
vse(dst_h + w, __riscv_vmerge(recip_sat(v, scale, vl), 0, mask, vl), vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline
|
|
||||||
int recip(const float *src_data, size_t src_step, float *dst_data, size_t dst_step,
|
|
||||||
int width, int height, float scale) {
|
|
||||||
if (scale == 0.f) {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
float *dst_h = reinterpret_cast<float*>((uchar*)dst_data + h * dst_step);
|
|
||||||
std::memset(dst_h, 0, sizeof(float) * width);
|
|
||||||
}
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (std::fabs(scale - 1.f) < FLT_EPSILON) {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
const float *src_h = reinterpret_cast<const float*>((const uchar*)src_data + h * src_step);
|
|
||||||
float *dst_h = reinterpret_cast<float*>((uchar*)dst_data + h * dst_step);
|
|
||||||
|
|
||||||
int vl;
|
|
||||||
for (int w = 0; w < width; w += vl) {
|
|
||||||
vl = setvl(width - w);
|
|
||||||
|
|
||||||
auto v = vle(src_h + w, vl);
|
|
||||||
|
|
||||||
vse(dst_h + w, __riscv_vfrdiv(v, 1.f, vl), vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int h = 0; h < height; h++) {
|
|
||||||
const float *src_h = reinterpret_cast<const float*>((const uchar*)src_data + h * src_step);
|
|
||||||
float *dst_h = reinterpret_cast<float*>((uchar*)dst_data + h * dst_step);
|
|
||||||
|
|
||||||
int vl;
|
|
||||||
for (int w = 0; w < width; w += vl) {
|
|
||||||
vl = setvl(width - w);
|
|
||||||
|
|
||||||
auto v = vle(src_h + w, vl);
|
|
||||||
|
|
||||||
vse(dst_h + w, __riscv_vfrdiv(v, scale, vl), vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
}}} // cv::cv_hal_rvv::div
|
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_DIV_HPP_INCLUDED
|
|
File diff suppressed because it is too large
Load Diff
@ -1,108 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_HISTOGRAM_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_HISTOGRAM_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
namespace equalize_hist {
|
|
||||||
#undef cv_hal_equalize_hist
|
|
||||||
#define cv_hal_equalize_hist cv::cv_hal_rvv::equalize_hist::equalize_hist
|
|
||||||
|
|
||||||
class HistogramInvoker : public ParallelLoopBody
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
template<typename... Args>
|
|
||||||
HistogramInvoker(std::function<void(int, int, Args...)> _func, Args&&... args)
|
|
||||||
{
|
|
||||||
func = std::bind(_func, std::placeholders::_1, std::placeholders::_2, std::forward<Args>(args)...);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void operator()(const Range& range) const override
|
|
||||||
{
|
|
||||||
func(range.start, range.end);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::function<void(int, int)> func;
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr int HIST_SZ = std::numeric_limits<uchar>::max() + 1;
|
|
||||||
|
|
||||||
static inline void hist_invoke(int start, int end, const uchar* src_data, size_t src_step, int width, int* hist, std::mutex* m)
|
|
||||||
{
|
|
||||||
int h[HIST_SZ] = {0};
|
|
||||||
for (int i = start; i < end; i++)
|
|
||||||
{
|
|
||||||
const uchar* src = src_data + i * src_step;
|
|
||||||
int j;
|
|
||||||
for (j = 0; j + 3 < width; j += 4)
|
|
||||||
{
|
|
||||||
int t0 = src[j], t1 = src[j+1];
|
|
||||||
h[t0]++; h[t1]++;
|
|
||||||
t0 = src[j+2]; t1 = src[j+3];
|
|
||||||
h[t0]++; h[t1]++;
|
|
||||||
}
|
|
||||||
for (; j < width; j++)
|
|
||||||
{
|
|
||||||
h[src[j]]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lk(*m);
|
|
||||||
for (int i = 0; i < HIST_SZ; i++)
|
|
||||||
{
|
|
||||||
hist[i] += h[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void lut_invoke(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, const uchar* lut)
|
|
||||||
{
|
|
||||||
for (int i = start; i < end; i++)
|
|
||||||
{
|
|
||||||
int vl;
|
|
||||||
for (int j = 0; j < width; j += vl)
|
|
||||||
{
|
|
||||||
vl = __riscv_vsetvl_e8m8(width - j);
|
|
||||||
auto src = __riscv_vle8_v_u8m8(src_data + i * src_step + j, vl);
|
|
||||||
auto dst = __riscv_vloxei8_v_u8m8(lut, src, vl);
|
|
||||||
__riscv_vse8(dst_data + i * dst_step + j, dst, vl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/histogram.cpp,
|
|
||||||
// in the function void cv::equalizeHist
|
|
||||||
inline int equalize_hist(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height)
|
|
||||||
{
|
|
||||||
int hist[HIST_SZ] = {0};
|
|
||||||
uchar lut[HIST_SZ];
|
|
||||||
|
|
||||||
std::mutex m;
|
|
||||||
cv::parallel_for_(Range(0, height), HistogramInvoker({hist_invoke}, src_data, src_step, width, reinterpret_cast<int *>(hist), &m), static_cast<double>(width * height) / (1 << 15));
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
while (!hist[i]) ++i;
|
|
||||||
|
|
||||||
float scale = (HIST_SZ - 1.f)/(width * height - hist[i]);
|
|
||||||
int sum = 0;
|
|
||||||
for (lut[i++] = 0; i < HIST_SZ; i++)
|
|
||||||
{
|
|
||||||
sum += hist[i];
|
|
||||||
lut[i] = std::min(std::max(static_cast<int>(std::round(sum * scale)), 0), HIST_SZ - 1);
|
|
||||||
}
|
|
||||||
cv::parallel_for_(Range(0, height), HistogramInvoker({lut_invoke}, src_data, src_step, dst_data, dst_step, width, reinterpret_cast<const uchar*>(lut)), static_cast<double>(width * height) / (1 << 15));
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
} // cv::cv_hal_rvv::equalize_hist
|
|
||||||
|
|
||||||
}}
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,53 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_POLAR_TO_CART_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_POLAR_TO_CART_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include "hal_rvv_1p0/sincos.hpp"
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
#undef cv_hal_polarToCart32f
|
|
||||||
#define cv_hal_polarToCart32f cv::cv_hal_rvv::polarToCart<cv::cv_hal_rvv::RVV_F32M4>
|
|
||||||
#undef cv_hal_polarToCart64f
|
|
||||||
#define cv_hal_polarToCart64f cv::cv_hal_rvv::polarToCart<cv::cv_hal_rvv::RVV_F64M8>
|
|
||||||
|
|
||||||
template <typename RVV_T, typename Elem = typename RVV_T::ElemType>
|
|
||||||
inline int
|
|
||||||
polarToCart(const Elem* mag, const Elem* angle, Elem* x, Elem* y, int len, bool angleInDegrees)
|
|
||||||
{
|
|
||||||
using T = RVV_F32M4;
|
|
||||||
const auto sincos_scale = angleInDegrees ? detail::sincos_deg_scale : detail::sincos_rad_scale;
|
|
||||||
|
|
||||||
size_t vl;
|
|
||||||
auto cos_p2 = T::vmv(detail::sincos_cos_p2, T::setvlmax());
|
|
||||||
auto cos_p0 = T::vmv(detail::sincos_cos_p0, T::setvlmax());
|
|
||||||
for (; len > 0; len -= (int)vl, angle += vl, x += vl, y += vl)
|
|
||||||
{
|
|
||||||
vl = RVV_T::setvl(len);
|
|
||||||
auto vangle = T::cast(RVV_T::vload(angle, vl), vl);
|
|
||||||
T::VecType vsin, vcos;
|
|
||||||
detail::SinCos32f<T>(vangle, vsin, vcos, sincos_scale, cos_p2, cos_p0, vl);
|
|
||||||
if (mag)
|
|
||||||
{
|
|
||||||
auto vmag = T::cast(RVV_T::vload(mag, vl), vl);
|
|
||||||
vsin = __riscv_vfmul(vsin, vmag, vl);
|
|
||||||
vcos = __riscv_vfmul(vcos, vmag, vl);
|
|
||||||
mag += vl;
|
|
||||||
}
|
|
||||||
RVV_T::vstore(x, RVV_T::cast(vcos, vl), vl);
|
|
||||||
RVV_T::vstore(y, RVV_T::cast(vsin, vl), vl);
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_POLAR_TO_CART_HPP_INCLUDED
|
|
@ -1,131 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level
|
|
||||||
// directory of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_SQRT_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_SQRT_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include <cmath>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
#undef cv_hal_sqrt32f
|
|
||||||
#undef cv_hal_sqrt64f
|
|
||||||
#undef cv_hal_invSqrt32f
|
|
||||||
#undef cv_hal_invSqrt64f
|
|
||||||
|
|
||||||
#define cv_hal_sqrt32f cv::cv_hal_rvv::sqrt<cv::cv_hal_rvv::Sqrt32f<cv::cv_hal_rvv::RVV_F32M8>>
|
|
||||||
#define cv_hal_sqrt64f cv::cv_hal_rvv::sqrt<cv::cv_hal_rvv::Sqrt64f<cv::cv_hal_rvv::RVV_F64M8>>
|
|
||||||
|
|
||||||
#ifdef __clang__
|
|
||||||
// Strange bug in clang: invSqrt use 2 LMUL registers to store mask, which will cause memory access.
|
|
||||||
// So a smaller LMUL is used here.
|
|
||||||
# define cv_hal_invSqrt32f cv::cv_hal_rvv::invSqrt<cv::cv_hal_rvv::Sqrt32f<cv::cv_hal_rvv::RVV_F32M4>>
|
|
||||||
# define cv_hal_invSqrt64f cv::cv_hal_rvv::invSqrt<cv::cv_hal_rvv::Sqrt64f<cv::cv_hal_rvv::RVV_F64M4>>
|
|
||||||
#else
|
|
||||||
# define cv_hal_invSqrt32f cv::cv_hal_rvv::invSqrt<cv::cv_hal_rvv::Sqrt32f<cv::cv_hal_rvv::RVV_F32M8>>
|
|
||||||
# define cv_hal_invSqrt64f cv::cv_hal_rvv::invSqrt<cv::cv_hal_rvv::Sqrt64f<cv::cv_hal_rvv::RVV_F64M8>>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
// Newton-Raphson method
|
|
||||||
// Use 4 LMUL registers
|
|
||||||
template <size_t iter_times, typename VEC_T>
|
|
||||||
inline VEC_T sqrt(VEC_T x, size_t vl)
|
|
||||||
{
|
|
||||||
auto x2 = __riscv_vfmul(x, 0.5, vl);
|
|
||||||
auto y = __riscv_vfrsqrt7(x, vl);
|
|
||||||
#ifdef __clang__
|
|
||||||
#pragma unroll
|
|
||||||
#endif
|
|
||||||
for (size_t i = 0; i < iter_times; i++)
|
|
||||||
{
|
|
||||||
auto t = __riscv_vfmul(y, y, vl);
|
|
||||||
t = __riscv_vfmul(t, x2, vl);
|
|
||||||
t = __riscv_vfrsub(t, 1.5, vl);
|
|
||||||
y = __riscv_vfmul(t, y, vl);
|
|
||||||
}
|
|
||||||
// just to prevent the compiler from calculating mask before the iteration, which will run out
|
|
||||||
// of registers and cause memory access.
|
|
||||||
asm volatile("" ::: "memory");
|
|
||||||
auto classified = __riscv_vfclass(x, vl);
|
|
||||||
// block -0, +0, positive subnormal number, +inf
|
|
||||||
auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl);
|
|
||||||
return __riscv_vfmul_mu(mask, x, x, y, vl);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Newton-Raphson method
|
|
||||||
// Use 3 LMUL registers and 1 mask register
|
|
||||||
template <size_t iter_times, typename VEC_T>
|
|
||||||
inline VEC_T invSqrt(VEC_T x, size_t vl)
|
|
||||||
{
|
|
||||||
auto classified = __riscv_vfclass(x, vl);
|
|
||||||
// block -0, +0, positive subnormal number, +inf
|
|
||||||
auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl);
|
|
||||||
auto x2 = __riscv_vfmul(x, 0.5, vl);
|
|
||||||
auto y = __riscv_vfrsqrt7(x, vl);
|
|
||||||
#ifdef __clang__
|
|
||||||
#pragma unroll
|
|
||||||
#endif
|
|
||||||
for (size_t i = 0; i < iter_times; i++)
|
|
||||||
{
|
|
||||||
auto t = __riscv_vfmul(y, y, vl);
|
|
||||||
t = __riscv_vfmul(t, x2, vl);
|
|
||||||
t = __riscv_vfrsub(t, 1.5, vl);
|
|
||||||
y = __riscv_vfmul_mu(mask, y, t, y, vl);
|
|
||||||
}
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
template <typename RVV_T>
|
|
||||||
struct Sqrt32f
|
|
||||||
{
|
|
||||||
using T = RVV_T;
|
|
||||||
static constexpr size_t iter_times = 2;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename RVV_T>
|
|
||||||
struct Sqrt64f
|
|
||||||
{
|
|
||||||
using T = RVV_T;
|
|
||||||
static constexpr size_t iter_times = 3;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename SQRT_T, typename Elem = typename SQRT_T::T::ElemType>
|
|
||||||
inline int sqrt(const Elem* src, Elem* dst, int _len)
|
|
||||||
{
|
|
||||||
size_t vl;
|
|
||||||
for (size_t len = _len; len > 0; len -= vl, src += vl, dst += vl)
|
|
||||||
{
|
|
||||||
vl = SQRT_T::T::setvl(len);
|
|
||||||
auto x = SQRT_T::T::vload(src, vl);
|
|
||||||
SQRT_T::T::vstore(dst, detail::sqrt<SQRT_T::iter_times>(x, vl), vl);
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename SQRT_T, typename Elem = typename SQRT_T::T::ElemType>
|
|
||||||
inline int invSqrt(const Elem* src, Elem* dst, int _len)
|
|
||||||
{
|
|
||||||
size_t vl;
|
|
||||||
for (size_t len = _len; len > 0; len -= vl, src += vl, dst += vl)
|
|
||||||
{
|
|
||||||
vl = SQRT_T::T::setvl(len);
|
|
||||||
auto x = SQRT_T::T::vload(src, vl);
|
|
||||||
SQRT_T::T::vstore(dst, detail::invSqrt<SQRT_T::iter_times>(x, vl), vl);
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_SQRT_HPP_INCLUDED
|
|
332
hal/riscv-rvv/include/core.hpp
Normal file
332
hal/riscv-rvv/include/core.hpp
Normal file
@ -0,0 +1,332 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_RVV_HAL_CORE_HPP
|
||||||
|
#define OPENCV_RVV_HAL_CORE_HPP
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
/* ############ merge ############ */
|
||||||
|
|
||||||
|
int merge8u(const uchar** src, uchar* dst, int len, int cn);
|
||||||
|
int merge16u(const ushort** src, ushort* dst, int len, int cn);
|
||||||
|
int merge32s(const int** src, int* dst, int len, int cn);
|
||||||
|
int merge64s(const int64** src, int64* dst, int len, int cn);
|
||||||
|
|
||||||
|
#undef cv_hal_merge8u
|
||||||
|
#define cv_hal_merge8u cv::rvv_hal::core::merge8u
|
||||||
|
#undef cv_hal_merge16u
|
||||||
|
#define cv_hal_merge16u cv::rvv_hal::core::merge16u
|
||||||
|
#undef cv_hal_merge32s
|
||||||
|
#define cv_hal_merge32s cv::rvv_hal::core::merge32s
|
||||||
|
#undef cv_hal_merge64s
|
||||||
|
#define cv_hal_merge64s cv::rvv_hal::core::merge64s
|
||||||
|
|
||||||
|
/* ############ meanStdDev ############ */
|
||||||
|
|
||||||
|
int meanStdDev(const uchar* src_data, size_t src_step, int width, int height, int src_type,
|
||||||
|
double* mean_val, double* stddev_val, uchar* mask, size_t mask_step);
|
||||||
|
|
||||||
|
#undef cv_hal_meanStdDev
|
||||||
|
#define cv_hal_meanStdDev cv::rvv_hal::core::meanStdDev
|
||||||
|
|
||||||
|
/* ############ dft ############ */
|
||||||
|
|
||||||
|
int dft(const uchar* src, uchar* dst, int depth, int nf, int *factors, double scale,
|
||||||
|
int* itab, void* wave, int tab_size, int n, bool isInverse, bool noPermute);
|
||||||
|
|
||||||
|
#undef cv_hal_dft
|
||||||
|
#define cv_hal_dft cv::rvv_hal::core::dft
|
||||||
|
|
||||||
|
/* ############ norm ############ */
|
||||||
|
|
||||||
|
int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step,
|
||||||
|
int width, int height, int type, int norm_type, double* result);
|
||||||
|
|
||||||
|
#undef cv_hal_norm
|
||||||
|
#define cv_hal_norm cv::rvv_hal::core::norm
|
||||||
|
|
||||||
|
/* ############ normDiff ############ */
|
||||||
|
|
||||||
|
int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step,
|
||||||
|
const uchar* mask, size_t mask_step, int width, int height, int type,
|
||||||
|
int norm_type, double* result);
|
||||||
|
|
||||||
|
#undef cv_hal_normDiff
|
||||||
|
#define cv_hal_normDiff cv::rvv_hal::core::normDiff
|
||||||
|
|
||||||
|
/* ############ normHamming ############ */
|
||||||
|
|
||||||
|
int normHamming8u(const uchar* a, int n, int cellSize, int* result);
|
||||||
|
int normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize, int* result);
|
||||||
|
|
||||||
|
#undef cv_hal_normHamming8u
|
||||||
|
#define cv_hal_normHamming8u cv::rvv_hal::core::normHamming8u
|
||||||
|
#undef cv_hal_normHammingDiff8u
|
||||||
|
#define cv_hal_normHammingDiff8u cv::rvv_hal::core::normHammingDiff8u
|
||||||
|
|
||||||
|
/* ############ convertScale ############ */
|
||||||
|
|
||||||
|
int convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step,
|
||||||
|
int width, int height, int sdepth, int ddepth, double alpha, double beta);
|
||||||
|
|
||||||
|
#undef cv_hal_convertScale
|
||||||
|
#define cv_hal_convertScale cv::rvv_hal::core::convertScale
|
||||||
|
|
||||||
|
/* ############ minMaxIdx ############ */
|
||||||
|
|
||||||
|
int minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth,
|
||||||
|
double* minVal, double* maxVal, int* minIdx, int* maxIdx, uchar* mask, size_t mask_step = 0);
|
||||||
|
|
||||||
|
#undef cv_hal_minMaxIdx
|
||||||
|
#define cv_hal_minMaxIdx cv::rvv_hal::core::minMaxIdx
|
||||||
|
#undef cv_hal_minMaxIdxMaskStep
|
||||||
|
#define cv_hal_minMaxIdxMaskStep cv::rvv_hal::core::minMaxIdx
|
||||||
|
|
||||||
|
/* ############ fastAtan ############ */
|
||||||
|
|
||||||
|
int fast_atan_32(const float* y, const float* x, float* dst, size_t n, bool angle_in_deg);
|
||||||
|
int fast_atan_64(const double* y, const double* x, double* dst, size_t n, bool angle_in_deg);
|
||||||
|
|
||||||
|
#undef cv_hal_fastAtan32f
|
||||||
|
#define cv_hal_fastAtan32f cv::rvv_hal::core::fast_atan_32
|
||||||
|
#undef cv_hal_fastAtan64f
|
||||||
|
#define cv_hal_fastAtan64f cv::rvv_hal::core::fast_atan_64
|
||||||
|
|
||||||
|
/* ############ split ############ */
|
||||||
|
|
||||||
|
int split8u(const uchar* src, uchar** dst, int len, int cn);
|
||||||
|
|
||||||
|
#undef cv_hal_split8u
|
||||||
|
#define cv_hal_split8u cv::rvv_hal::core::split8u
|
||||||
|
|
||||||
|
/* ############ sqrt ############ */
|
||||||
|
|
||||||
|
int sqrt32f(const float* src, float* dst, int _len);
|
||||||
|
int sqrt64f(const double* src, double* dst, int _len);
|
||||||
|
|
||||||
|
#undef cv_hal_sqrt32f
|
||||||
|
#define cv_hal_sqrt32f cv::rvv_hal::core::sqrt32f
|
||||||
|
#undef cv_hal_sqrt64f
|
||||||
|
#define cv_hal_sqrt64f cv::rvv_hal::core::sqrt64f
|
||||||
|
|
||||||
|
int invSqrt32f(const float* src, float* dst, int _len);
|
||||||
|
int invSqrt64f(const double* src, double* dst, int _len);
|
||||||
|
|
||||||
|
#undef cv_hal_invSqrt32f
|
||||||
|
#define cv_hal_invSqrt32f cv::rvv_hal::core::invSqrt32f
|
||||||
|
#undef cv_hal_invSqrt64f
|
||||||
|
#define cv_hal_invSqrt64f cv::rvv_hal::core::invSqrt64f
|
||||||
|
|
||||||
|
/* ############ magnitude ############ */
|
||||||
|
|
||||||
|
int magnitude32f(const float *x, const float *y, float *dst, int len);
|
||||||
|
int magnitude64f(const double *x, const double *y, double *dst, int len);
|
||||||
|
|
||||||
|
#undef cv_hal_magnitude32f
|
||||||
|
#define cv_hal_magnitude32f cv::rvv_hal::core::magnitude32f
|
||||||
|
#undef cv_hal_magnitude64f
|
||||||
|
#define cv_hal_magnitude64f cv::rvv_hal::core::magnitude64f
|
||||||
|
|
||||||
|
/* ############ cartToPolar ############ */
|
||||||
|
|
||||||
|
int cartToPolar32f(const float* x, const float* y, float* mag, float* angle, int len, bool angleInDegrees);
|
||||||
|
int cartToPolar64f(const double* x, const double* y, double* mag, double* angle, int len, bool angleInDegrees);
|
||||||
|
|
||||||
|
#undef cv_hal_cartToPolar32f
|
||||||
|
#define cv_hal_cartToPolar32f cv::rvv_hal::core::cartToPolar32f
|
||||||
|
#undef cv_hal_cartToPolar64f
|
||||||
|
#define cv_hal_cartToPolar64f cv::rvv_hal::core::cartToPolar64f
|
||||||
|
|
||||||
|
/* ############ polarToCart ############ */
|
||||||
|
|
||||||
|
int polarToCart32f(const float* mag, const float* angle, float* x, float* y, int len, bool angleInDegrees);
|
||||||
|
int polarToCart64f(const double* mag, const double* angle, double* x, double* y, int len, bool angleInDegrees);
|
||||||
|
|
||||||
|
#undef cv_hal_polarToCart32f
|
||||||
|
#define cv_hal_polarToCart32f cv::rvv_hal::core::polarToCart32f
|
||||||
|
#undef cv_hal_polarToCart64f
|
||||||
|
#define cv_hal_polarToCart64f cv::rvv_hal::core::polarToCart64f
|
||||||
|
|
||||||
|
/* ############ polarToCart ############ */
|
||||||
|
|
||||||
|
int flip(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||||
|
uchar* dst_data, size_t dst_step, int flip_mode);
|
||||||
|
|
||||||
|
#undef cv_hal_flip
|
||||||
|
#define cv_hal_flip cv::rvv_hal::core::flip
|
||||||
|
|
||||||
|
/* ############ lut ############ */
|
||||||
|
|
||||||
|
int lut(const uchar* src_data, size_t src_step, size_t src_type,
|
||||||
|
const uchar* lut_data, size_t lut_channel_size, size_t lut_channels,
|
||||||
|
uchar* dst_data, size_t dst_step, int width, int height);
|
||||||
|
|
||||||
|
#undef cv_hal_lut
|
||||||
|
#define cv_hal_lut cv::rvv_hal::core::lut
|
||||||
|
|
||||||
|
/* ############ exp ############ */
|
||||||
|
|
||||||
|
int exp32f(const float* src, float* dst, int _len);
|
||||||
|
int exp64f(const double* src, double* dst, int _len);
|
||||||
|
|
||||||
|
#undef cv_hal_exp32f
|
||||||
|
#define cv_hal_exp32f cv::rvv_hal::core::exp32f
|
||||||
|
#undef cv_hal_exp64f
|
||||||
|
#define cv_hal_exp64f cv::rvv_hal::core::exp64f
|
||||||
|
|
||||||
|
/* ############ log ############ */
|
||||||
|
|
||||||
|
int log32f(const float* src, float* dst, int _len);
|
||||||
|
int log64f(const double* src, double* dst, int _len);
|
||||||
|
|
||||||
|
#undef cv_hal_log32f
|
||||||
|
#define cv_hal_log32f cv::rvv_hal::core::log32f
|
||||||
|
#undef cv_hal_log64f
|
||||||
|
#define cv_hal_log64f cv::rvv_hal::core::log64f
|
||||||
|
|
||||||
|
/* ############ lu ############ */
|
||||||
|
|
||||||
|
int LU32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, int* info);
|
||||||
|
int LU64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, int* info);
|
||||||
|
|
||||||
|
#undef cv_hal_LU32f
|
||||||
|
#define cv_hal_LU32f cv::rvv_hal::core::LU32f
|
||||||
|
#undef cv_hal_LU64f
|
||||||
|
#define cv_hal_LU64f cv::rvv_hal::core::LU64f
|
||||||
|
|
||||||
|
/* ############ cholesky ############ */
|
||||||
|
|
||||||
|
int Cholesky32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, bool* info);
|
||||||
|
int Cholesky64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, bool* info);
|
||||||
|
|
||||||
|
#undef cv_hal_Cholesky32f
|
||||||
|
#define cv_hal_Cholesky32f cv::rvv_hal::core::Cholesky32f
|
||||||
|
#undef cv_hal_Cholesky64f
|
||||||
|
#define cv_hal_Cholesky64f cv::rvv_hal::core::Cholesky64f
|
||||||
|
|
||||||
|
/* ############ qr ############ */
|
||||||
|
|
||||||
|
int QR32f(float* src1, size_t src1_step, int m, int n, int k, float* src2, size_t src2_step, float* dst, int* info);
|
||||||
|
int QR64f(double* src1, size_t src1_step, int m, int n, int k, double* src2, size_t src2_step, double* dst, int* info);
|
||||||
|
|
||||||
|
#undef cv_hal_QR32f
|
||||||
|
#define cv_hal_QR32f cv::rvv_hal::core::QR32f
|
||||||
|
#undef cv_hal_QR64f
|
||||||
|
#define cv_hal_QR64f cv::rvv_hal::core::QR64f
|
||||||
|
|
||||||
|
/* ############ SVD ############ */
|
||||||
|
|
||||||
|
int SVD32f(float* src, size_t src_step, float* w, float* u, size_t u_step, float* vt, size_t vt_step, int m, int n, int flags);
|
||||||
|
int SVD64f(double* src, size_t src_step, double* w, double* u, size_t u_step, double* vt, size_t vt_step, int m, int n, int flags);
|
||||||
|
|
||||||
|
#undef cv_hal_SVD32f
|
||||||
|
#define cv_hal_SVD32f cv::rvv_hal::core::SVD32f
|
||||||
|
#undef cv_hal_SVD64f
|
||||||
|
#define cv_hal_SVD64f cv::rvv_hal::core::SVD64f
|
||||||
|
|
||||||
|
/* ############ copyToMasked ############ */
|
||||||
|
|
||||||
|
int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height,
|
||||||
|
int type, const uchar *mask_data, size_t mask_step, int mask_type);
|
||||||
|
|
||||||
|
#undef cv_hal_copyToMasked
|
||||||
|
#define cv_hal_copyToMasked cv::rvv_hal::core::copyToMasked
|
||||||
|
|
||||||
|
/* ############ div, recip ############ */
|
||||||
|
|
||||||
|
int div8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int div8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int div16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int div16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int div32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int div32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
// int div64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
|
||||||
|
#undef cv_hal_div8u
|
||||||
|
#define cv_hal_div8u cv::rvv_hal::core::div8u
|
||||||
|
#undef cv_hal_div8s
|
||||||
|
#define cv_hal_div8s cv::rvv_hal::core::div8s
|
||||||
|
#undef cv_hal_div16u
|
||||||
|
#define cv_hal_div16u cv::rvv_hal::core::div16u
|
||||||
|
#undef cv_hal_div16s
|
||||||
|
#define cv_hal_div16s cv::rvv_hal::core::div16s
|
||||||
|
#undef cv_hal_div32s
|
||||||
|
#define cv_hal_div32s cv::rvv_hal::core::div32s
|
||||||
|
#undef cv_hal_div32f
|
||||||
|
#define cv_hal_div32f cv::rvv_hal::core::div32f
|
||||||
|
// #undef cv_hal_div64f
|
||||||
|
// #define cv_hal_div64f cv::rvv_hal::core::div64f
|
||||||
|
|
||||||
|
int recip8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int recip8s(const schar *src_data, size_t src_step, schar *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int recip16u(const ushort *src_data, size_t src_step, ushort *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int recip16s(const short *src_data, size_t src_step, short *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int recip32s(const int *src_data, size_t src_step, int *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
int recip32f(const float *src_data, size_t src_step, float *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
// int recip64f(const double *src_data, size_t src_step, double *dst_data, size_t dst_step, int width, int height, double scale);
|
||||||
|
|
||||||
|
#undef cv_hal_recip8u
|
||||||
|
#define cv_hal_recip8u cv::rvv_hal::core::recip8u
|
||||||
|
#undef cv_hal_recip8s
|
||||||
|
#define cv_hal_recip8s cv::rvv_hal::core::recip8s
|
||||||
|
#undef cv_hal_recip16u
|
||||||
|
#define cv_hal_recip16u cv::rvv_hal::core::recip16u
|
||||||
|
#undef cv_hal_recip16s
|
||||||
|
#define cv_hal_recip16s cv::rvv_hal::core::recip16s
|
||||||
|
#undef cv_hal_recip32s
|
||||||
|
#define cv_hal_recip32s cv::rvv_hal::core::recip32s
|
||||||
|
#undef cv_hal_recip32f
|
||||||
|
#define cv_hal_recip32f cv::rvv_hal::core::recip32f
|
||||||
|
// #undef cv_hal_recip64f
|
||||||
|
// #define cv_hal_recip64f cv::rvv_hal::core::recip64f
|
||||||
|
|
||||||
|
/* ############ dotProduct ############ */
|
||||||
|
|
||||||
|
int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size_t b_step,
|
||||||
|
int width, int height, int type, double *dot_val);
|
||||||
|
|
||||||
|
#undef cv_hal_dotProduct
|
||||||
|
#define cv_hal_dotProduct cv::rvv_hal::core::dotprod
|
||||||
|
|
||||||
|
/* ############ compare ############ */
|
||||||
|
|
||||||
|
int cmp8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
int cmp8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
int cmp16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
int cmp16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
int cmp32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
int cmp32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
// int cmp64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation);
|
||||||
|
|
||||||
|
#undef cv_hal_cmp8u
|
||||||
|
#define cv_hal_cmp8u cv::rvv_hal::core::cmp8u
|
||||||
|
#undef cv_hal_cmp8s
|
||||||
|
#define cv_hal_cmp8s cv::rvv_hal::core::cmp8s
|
||||||
|
#undef cv_hal_cmp16u
|
||||||
|
#define cv_hal_cmp16u cv::rvv_hal::core::cmp16u
|
||||||
|
#undef cv_hal_cmp16s
|
||||||
|
#define cv_hal_cmp16s cv::rvv_hal::core::cmp16s
|
||||||
|
#undef cv_hal_cmp32s
|
||||||
|
#define cv_hal_cmp32s cv::rvv_hal::core::cmp32s
|
||||||
|
#undef cv_hal_cmp32f
|
||||||
|
#define cv_hal_cmp32f cv::rvv_hal::core::cmp32f
|
||||||
|
// #undef cv_hal_cmp64f
|
||||||
|
// #define cv_hal_cmp64f cv::rvv_hal::core::cmp64f
|
||||||
|
|
||||||
|
/* ############ transpose2d ############ */
|
||||||
|
|
||||||
|
int transpose2d(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
|
||||||
|
int src_width, int src_height, int element_size);
|
||||||
|
|
||||||
|
#undef cv_hal_transpose2d
|
||||||
|
#define cv_hal_transpose2d cv::rvv_hal::core::transpose2d
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
||||||
|
|
||||||
|
#endif // OPENCV_RVV_HAL_CORE_HPP
|
256
hal/riscv-rvv/include/imgproc.hpp
Normal file
256
hal/riscv-rvv/include/imgproc.hpp
Normal file
@ -0,0 +1,256 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_RVV_HAL_IMGPROC_HPP
|
||||||
|
#define OPENCV_RVV_HAL_IMGPROC_HPP
|
||||||
|
|
||||||
|
struct cvhalFilter2D;
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
/* ############ imageMoments ############ */
|
||||||
|
|
||||||
|
int imageMoments(const uchar* src_data, size_t src_step, int src_type,
|
||||||
|
int width, int height, bool binary, double m[10]);
|
||||||
|
|
||||||
|
#undef cv_hal_imageMoments
|
||||||
|
#define cv_hal_imageMoments cv::rvv_hal::imgproc::imageMoments
|
||||||
|
|
||||||
|
/* ############ filter ############ */
|
||||||
|
|
||||||
|
int filterInit(cvhalFilter2D** context, uchar* kernel_data, size_t kernel_step, int kernel_type, int kernel_width, int kernel_height, int /*max_width*/, int /*max_height*/, int src_type, int dst_type, int borderType, double delta, int anchor_x, int anchor_y, bool /*allowSubmatrix*/, bool /*allowInplace*/);
|
||||||
|
int filter(cvhalFilter2D* context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y);
|
||||||
|
int filterFree(cvhalFilter2D* context);
|
||||||
|
|
||||||
|
#undef cv_hal_filterInit
|
||||||
|
#define cv_hal_filterInit cv::rvv_hal::imgproc::filterInit
|
||||||
|
#undef cv_hal_filter
|
||||||
|
#define cv_hal_filter cv::rvv_hal::imgproc::filter
|
||||||
|
#undef cv_hal_filterFree
|
||||||
|
#define cv_hal_filterFree cv::rvv_hal::imgproc::filterFree
|
||||||
|
|
||||||
|
/* ############ sepFilter ############ */
|
||||||
|
|
||||||
|
int sepFilterInit(cvhalFilter2D **context, int src_type, int dst_type, int kernel_type, uchar* kernelx_data, int kernelx_length, uchar* kernely_data, int kernely_length, int anchor_x, int anchor_y, double delta, int borderType);
|
||||||
|
int sepFilter(cvhalFilter2D *context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y);
|
||||||
|
int sepFilterFree(cvhalFilter2D* context);
|
||||||
|
|
||||||
|
#undef cv_hal_sepFilterInit
|
||||||
|
#define cv_hal_sepFilterInit cv::rvv_hal::imgproc::sepFilterInit
|
||||||
|
#undef cv_hal_sepFilter
|
||||||
|
#define cv_hal_sepFilter cv::rvv_hal::imgproc::sepFilter
|
||||||
|
#undef cv_hal_sepFilterFree
|
||||||
|
#define cv_hal_sepFilterFree cv::rvv_hal::imgproc::sepFilterFree
|
||||||
|
|
||||||
|
/* ############ morph ############ */
|
||||||
|
|
||||||
|
int morphInit(cvhalFilter2D** context, int operation, int src_type, int dst_type, int /*max_width*/, int /*max_height*/, int kernel_type, uchar* kernel_data, size_t kernel_step, int kernel_width, int kernel_height, int anchor_x, int anchor_y, int borderType, const double borderValue[4], int iterations, bool /*allowSubmatrix*/, bool /*allowInplace*/);
|
||||||
|
int morph(cvhalFilter2D* context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int src_full_width, int src_full_height, int src_roi_x, int src_roi_y, int /*dst_full_width*/, int /*dst_full_height*/, int /*dst_roi_x*/, int /*dst_roi_y*/);
|
||||||
|
int morphFree(cvhalFilter2D* context);
|
||||||
|
|
||||||
|
#undef cv_hal_morphInit
|
||||||
|
#undef cv_hal_morph
|
||||||
|
#undef cv_hal_morphFree
|
||||||
|
#define cv_hal_morphInit cv::rvv_hal::imgproc::morphInit
|
||||||
|
#define cv_hal_morph cv::rvv_hal::imgproc::morph
|
||||||
|
#define cv_hal_morphFree cv::rvv_hal::imgproc::morphFree
|
||||||
|
|
||||||
|
/* ############ gaussianBlur ############ */
|
||||||
|
|
||||||
|
int gaussianBlurBinomial(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, size_t margin_left, size_t margin_top, size_t margin_right, size_t margin_bottom, size_t ksize, int border_type);
|
||||||
|
|
||||||
|
#undef cv_hal_gaussianBlurBinomial
|
||||||
|
#define cv_hal_gaussianBlurBinomial cv::rvv_hal::imgproc::gaussianBlurBinomial
|
||||||
|
|
||||||
|
/* ############ medianBlur ############ */
|
||||||
|
|
||||||
|
int medianBlur(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, int ksize);
|
||||||
|
|
||||||
|
#undef cv_hal_medianBlur
|
||||||
|
#define cv_hal_medianBlur cv::rvv_hal::imgproc::medianBlur
|
||||||
|
|
||||||
|
/* ############ boxFilter ############ */
|
||||||
|
|
||||||
|
int boxFilter(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int src_depth, int dst_depth, int cn, int margin_left, int margin_top, int margin_right, int margin_bottom, size_t ksize_width, size_t ksize_height, int anchor_x, int anchor_y, bool normalize, int border_type);
|
||||||
|
|
||||||
|
#undef cv_hal_boxFilter
|
||||||
|
#define cv_hal_boxFilter cv::rvv_hal::imgproc::boxFilter
|
||||||
|
|
||||||
|
/* ############ bilateralFilter ############ */
|
||||||
|
|
||||||
|
int bilateralFilter(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
|
||||||
|
int width, int height, int depth, int cn, int d, double sigma_color,
|
||||||
|
double sigma_space, int border_type);
|
||||||
|
|
||||||
|
#undef cv_hal_bilateralFilter
|
||||||
|
#define cv_hal_bilateralFilter cv::rvv_hal::imgproc::bilateralFilter
|
||||||
|
|
||||||
|
/* ############ pyramid ############ */
|
||||||
|
|
||||||
|
int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type);
|
||||||
|
int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type);
|
||||||
|
|
||||||
|
#undef cv_hal_pyrdown
|
||||||
|
#define cv_hal_pyrdown cv::rvv_hal::imgproc::pyrDown
|
||||||
|
#undef cv_hal_pyrup
|
||||||
|
#define cv_hal_pyrup cv::rvv_hal::imgproc::pyrUp
|
||||||
|
|
||||||
|
/* ############ cvtColor ############ */
|
||||||
|
|
||||||
|
int cvtBGRtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue);
|
||||||
|
int cvtGraytoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn);
|
||||||
|
int cvtBGRtoGray(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue);
|
||||||
|
int cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int greenBits);
|
||||||
|
int cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int greenBits);
|
||||||
|
int cvtBGR5x5toGray(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int greenBits);
|
||||||
|
int cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int greenBits);
|
||||||
|
int cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr);
|
||||||
|
int cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr);
|
||||||
|
int cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx, int yIdx);
|
||||||
|
int cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx);
|
||||||
|
int cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx);
|
||||||
|
int cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int yIdx);
|
||||||
|
int cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, uchar * y_data, size_t y_step, uchar * uv_data, size_t uv_step, int width, int height, int scn, bool swapBlue, int uIdx);
|
||||||
|
int cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx);
|
||||||
|
int cvtHSVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV);
|
||||||
|
int cvtBGRtoHSV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV);
|
||||||
|
int cvtXYZtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue);
|
||||||
|
int cvtBGRtoXYZ(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue);
|
||||||
|
int cvtLabtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isLab, bool srgb);
|
||||||
|
int cvtBGRtoLab(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isLab, bool srgb);
|
||||||
|
|
||||||
|
#undef cv_hal_cvtBGRtoBGR
|
||||||
|
#define cv_hal_cvtBGRtoBGR cv::rvv_hal::imgproc::cvtBGRtoBGR
|
||||||
|
#undef cv_hal_cvtGraytoBGR
|
||||||
|
#define cv_hal_cvtGraytoBGR cv::rvv_hal::imgproc::cvtGraytoBGR
|
||||||
|
#undef cv_hal_cvtBGRtoGray
|
||||||
|
#define cv_hal_cvtBGRtoGray cv::rvv_hal::imgproc::cvtBGRtoGray
|
||||||
|
#undef cv_hal_cvtBGR5x5toBGR
|
||||||
|
#define cv_hal_cvtBGR5x5toBGR cv::rvv_hal::imgproc::cvtBGR5x5toBGR
|
||||||
|
#undef cv_hal_cvtBGRtoBGR5x5
|
||||||
|
#define cv_hal_cvtBGRtoBGR5x5 cv::rvv_hal::imgproc::cvtBGRtoBGR5x5
|
||||||
|
#undef cv_hal_cvtBGR5x5toGray
|
||||||
|
#define cv_hal_cvtBGR5x5toGray cv::rvv_hal::imgproc::cvtBGR5x5toGray
|
||||||
|
#undef cv_hal_cvtGraytoBGR5x5
|
||||||
|
#define cv_hal_cvtGraytoBGR5x5 cv::rvv_hal::imgproc::cvtGraytoBGR5x5
|
||||||
|
#undef cv_hal_cvtYUVtoBGR
|
||||||
|
#define cv_hal_cvtYUVtoBGR cv::rvv_hal::imgproc::cvtYUVtoBGR
|
||||||
|
#undef cv_hal_cvtBGRtoYUV
|
||||||
|
#define cv_hal_cvtBGRtoYUV cv::rvv_hal::imgproc::cvtBGRtoYUV
|
||||||
|
#undef cv_hal_cvtOnePlaneYUVtoBGR
|
||||||
|
#define cv_hal_cvtOnePlaneYUVtoBGR cv::rvv_hal::imgproc::cvtOnePlaneYUVtoBGR
|
||||||
|
#undef cv_hal_cvtTwoPlaneYUVtoBGR
|
||||||
|
#define cv_hal_cvtTwoPlaneYUVtoBGR cv::rvv_hal::imgproc::cvtTwoPlaneYUVtoBGR
|
||||||
|
#undef cv_hal_cvtThreePlaneYUVtoBGR
|
||||||
|
#define cv_hal_cvtThreePlaneYUVtoBGR cv::rvv_hal::imgproc::cvtThreePlaneYUVtoBGR
|
||||||
|
#undef cv_hal_cvtOnePlaneBGRtoYUV
|
||||||
|
#define cv_hal_cvtOnePlaneBGRtoYUV cv::rvv_hal::imgproc::cvtOnePlaneBGRtoYUV
|
||||||
|
#undef cv_hal_cvtBGRtoTwoPlaneYUV
|
||||||
|
#define cv_hal_cvtBGRtoTwoPlaneYUV cv::rvv_hal::imgproc::cvtBGRtoTwoPlaneYUV
|
||||||
|
#undef cv_hal_cvtBGRtoThreePlaneYUV
|
||||||
|
#define cv_hal_cvtBGRtoThreePlaneYUV cv::rvv_hal::imgproc::cvtBGRtoThreePlaneYUV
|
||||||
|
#undef cv_hal_cvtHSVtoBGR
|
||||||
|
#define cv_hal_cvtHSVtoBGR cv::rvv_hal::imgproc::cvtHSVtoBGR
|
||||||
|
#undef cv_hal_cvtBGRtoHSV
|
||||||
|
#define cv_hal_cvtBGRtoHSV cv::rvv_hal::imgproc::cvtBGRtoHSV
|
||||||
|
#undef cv_hal_cvtXYZtoBGR
|
||||||
|
#define cv_hal_cvtXYZtoBGR cv::rvv_hal::imgproc::cvtXYZtoBGR
|
||||||
|
#undef cv_hal_cvtBGRtoXYZ
|
||||||
|
#define cv_hal_cvtBGRtoXYZ cv::rvv_hal::imgproc::cvtBGRtoXYZ
|
||||||
|
#undef cv_hal_cvtLabtoBGR
|
||||||
|
#define cv_hal_cvtLabtoBGR cv::rvv_hal::imgproc::cvtLabtoBGR
|
||||||
|
#undef cv_hal_cvtBGRtoLab
|
||||||
|
#define cv_hal_cvtBGRtoLab cv::rvv_hal::imgproc::cvtBGRtoLab
|
||||||
|
|
||||||
|
/* ############ warp ############ */
|
||||||
|
|
||||||
|
int remap32f(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
|
||||||
|
uchar *dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||||
|
float* mapx, size_t mapx_step, float* mapy, size_t mapy_step,
|
||||||
|
int interpolation, int border_type, const double border_value[4]);
|
||||||
|
int remap32fc2(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
|
||||||
|
uchar *dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||||
|
float* map, size_t map_step, int interpolation, int border_type, const double border_value[4]);
|
||||||
|
int remap16s(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
|
||||||
|
uchar *dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||||
|
short* mapx, size_t mapx_step, ushort* mapy, size_t mapy_step,
|
||||||
|
int interpolation, int border_type, const double border_value[4]);
|
||||||
|
|
||||||
|
// BUG: https://github.com/opencv/opencv/issues/27279
|
||||||
|
// #undef cv_hal_remap32f
|
||||||
|
// #define cv_hal_remap32f cv::cv_hal_rvv::imgproc::remap32f
|
||||||
|
// #undef cv_hal_remap32fc2
|
||||||
|
// #define cv_hal_remap32fc2 cv::cv_hal_rvv::imgproc::remap32fc2
|
||||||
|
// #undef cv_hal_remap16s
|
||||||
|
// #define cv_hal_remap16s cv::cv_hal_rvv::imgproc::remap16s
|
||||||
|
|
||||||
|
int warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]);
|
||||||
|
int warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]);
|
||||||
|
|
||||||
|
// BUG: https://github.com/opencv/opencv/issues/27280
|
||||||
|
//#undef cv_hal_warpAffine
|
||||||
|
//#define cv_hal_warpAffine cv::cv_hal_rvv::imgproc::warpAffine
|
||||||
|
//#undef cv_hal_warpPerspective
|
||||||
|
//#define cv_hal_warpPerspective cv::cv_hal_rvv::imgproc::warpPerspective
|
||||||
|
|
||||||
|
/* ############ threshold ############ */
|
||||||
|
|
||||||
|
int threshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType);
|
||||||
|
int threshold_otsu(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, double maxValue, int thresholdType, double* thresh);
|
||||||
|
int adaptiveThreshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, double maxValue, int adaptiveMethod, int thresholdType, int blockSize, double C);
|
||||||
|
|
||||||
|
// disabled since UI is fast enough, only called in threshold_otsu
|
||||||
|
// #undef cv_hal_threshold
|
||||||
|
// #define cv_hal_threshold cv::rvv_hal::imgproc::threshold
|
||||||
|
#undef cv_hal_threshold_otsu
|
||||||
|
#define cv_hal_threshold_otsu cv::rvv_hal::imgproc::threshold_otsu
|
||||||
|
#undef cv_hal_adaptiveThreshold
|
||||||
|
#define cv_hal_adaptiveThreshold cv::rvv_hal::imgproc::adaptiveThreshold
|
||||||
|
|
||||||
|
/* ############ histogram ############ */
|
||||||
|
|
||||||
|
int equalize_hist(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height);
|
||||||
|
|
||||||
|
#undef cv_hal_equalize_hist
|
||||||
|
#define cv_hal_equalize_hist cv::rvv_hal::imgproc::equalize_hist
|
||||||
|
|
||||||
|
int calc_hist(const uchar* src_data, size_t src_step, int src_type, int src_width, int src_height, float* hist_data, int hist_size, const float** ranges, bool uniform, bool accumulate);
|
||||||
|
|
||||||
|
#undef cv_hal_calcHist
|
||||||
|
#define cv_hal_calcHist cv::rvv_hal::imgproc::calc_hist
|
||||||
|
|
||||||
|
/* ############ resize ############ */
|
||||||
|
|
||||||
|
int resize(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, double inv_scale_x, double inv_scale_y, int interpolation);
|
||||||
|
|
||||||
|
#undef cv_hal_resize
|
||||||
|
#define cv_hal_resize cv::rvv_hal::imgproc::resize
|
||||||
|
|
||||||
|
/* ############ resize ############ */
|
||||||
|
|
||||||
|
int integral(int depth, int sdepth, int sqdepth,
|
||||||
|
const uchar* src_data, size_t src_step,
|
||||||
|
uchar* sum_data, size_t sum_step,
|
||||||
|
uchar* sqsum_data, size_t sqsum_step,
|
||||||
|
uchar* tilted_data, [[maybe_unused]] size_t tilted_step,
|
||||||
|
int width, int height, int cn);
|
||||||
|
|
||||||
|
#undef cv_hal_integral
|
||||||
|
#define cv_hal_integral cv::rvv_hal::imgproc::integral
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_071_ENABLED
|
||||||
|
|
||||||
|
int cvtBGRtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue);
|
||||||
|
#undef cv_hal_cvtBGRtoBGR
|
||||||
|
#define cv_hal_cvtBGRtoBGR cv::rvv_hal::imgproc::cvtBGRtoBGR
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_071_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
||||||
|
|
||||||
|
#endif // OPENCV_RVV_HAL_IMGPROC_HPP
|
@ -4,13 +4,15 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_TYPES_HPP_INCLUDED
|
#ifndef OPENCV_RVV_HAL_TYPES_HPP
|
||||||
#define OPENCV_HAL_RVV_TYPES_HPP_INCLUDED
|
#define OPENCV_RVV_HAL_TYPES_HPP
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
#include <riscv_vector.h>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace cv { namespace rvv_hal {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
enum RVV_LMUL
|
enum RVV_LMUL
|
||||||
{
|
{
|
||||||
@ -869,6 +871,8 @@ HAL_RVV_GROUP(RVV_F64M1, RVV_F64M8, f64, m1, m8)
|
|||||||
|
|
||||||
#undef HAL_RVV_GROUP
|
#undef HAL_RVV_GROUP
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_TYPES_HPP_INCLUDED
|
}} // namespace cv::rvv_hal
|
||||||
|
|
||||||
|
#endif //OPENCV_RVV_HAL_TYPES_HPP
|
31
hal/riscv-rvv/rvv_hal.hpp
Normal file
31
hal/riscv-rvv/rvv_hal.hpp
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_RVV_HPP_INCLUDED
|
||||||
|
#define OPENCV_HAL_RVV_HPP_INCLUDED
|
||||||
|
|
||||||
|
#include "opencv2/core/base.hpp"
|
||||||
|
#include "opencv2/core/utility.hpp"
|
||||||
|
#include "opencv2/core/hal/interface.h"
|
||||||
|
|
||||||
|
#if defined(__riscv_v) && __riscv_v == 1000000
|
||||||
|
#define CV_HAL_RVV_1P0_ENABLED 1
|
||||||
|
#else
|
||||||
|
#define CV_HAL_RVV_1P0_ENABLED 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__riscv_v) && __riscv_v == 7000 && defined(__GNUC__) && __GNUC__ == 10 && __GNUC_MINOR__ == 4 && defined(__THEAD_VERSION__)
|
||||||
|
#define CV_HAL_RVV_071_ENABLED 1
|
||||||
|
#else
|
||||||
|
#define CV_HAL_RVV_071_ENABLED 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED || CV_HAL_RVV_071_ENABLED
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#endif
|
||||||
|
#include "include/types.hpp"
|
||||||
|
#include "include/core.hpp"
|
||||||
|
#include "include/imgproc.hpp"
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_RVV_HPP_INCLUDED
|
64
hal/riscv-rvv/src/core/atan.cpp
Normal file
64
hal/riscv-rvv/src/core/atan.cpp
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level
|
||||||
|
// directory of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
int fast_atan_32(const float* y, const float* x, float* dst, size_t n, bool angle_in_deg)
|
||||||
|
{
|
||||||
|
auto atan_params = angle_in_deg ? common::atan_params_deg : common::atan_params_rad;
|
||||||
|
|
||||||
|
for (size_t vl = 0; n > 0; n -= vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m4(n);
|
||||||
|
|
||||||
|
auto vy = __riscv_vle32_v_f32m4(y, vl);
|
||||||
|
auto vx = __riscv_vle32_v_f32m4(x, vl);
|
||||||
|
|
||||||
|
auto a = common::rvv_atan(vy, vx, vl, atan_params);
|
||||||
|
|
||||||
|
__riscv_vse32(dst, a, vl);
|
||||||
|
|
||||||
|
x += vl;
|
||||||
|
y += vl;
|
||||||
|
dst += vl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fast_atan_64(const double* y, const double* x, double* dst, size_t n, bool angle_in_deg)
|
||||||
|
{
|
||||||
|
// this also uses float32 version, ref: mathfuncs_core.simd.hpp
|
||||||
|
|
||||||
|
auto atan_params = angle_in_deg ? common::atan_params_deg : common::atan_params_rad;
|
||||||
|
|
||||||
|
for (size_t vl = 0; n > 0; n -= vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e64m8(n);
|
||||||
|
|
||||||
|
auto vy = __riscv_vfncvt_f(__riscv_vle64_v_f64m8(y, vl), vl);
|
||||||
|
auto vx = __riscv_vfncvt_f(__riscv_vle64_v_f64m8(x, vl), vl);
|
||||||
|
|
||||||
|
auto a = common::rvv_atan(vy, vx, vl, atan_params);
|
||||||
|
|
||||||
|
__riscv_vse64(dst, __riscv_vfwcvt_f(a, vl), vl);
|
||||||
|
|
||||||
|
x += vl;
|
||||||
|
y += vl;
|
||||||
|
dst += vl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,27 +4,20 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_CART_TO_POLAR_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_CART_TO_POLAR_HPP_INCLUDED
|
#include "common.hpp"
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#include "hal_rvv_1p0/atan.hpp"
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#include "hal_rvv_1p0/sqrt.hpp"
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace {
|
||||||
|
|
||||||
#undef cv_hal_cartToPolar32f
|
|
||||||
#define cv_hal_cartToPolar32f cv::cv_hal_rvv::cartToPolar<cv::cv_hal_rvv::RVV_F32M4>
|
|
||||||
#undef cv_hal_cartToPolar64f
|
|
||||||
#define cv_hal_cartToPolar64f cv::cv_hal_rvv::cartToPolar<cv::cv_hal_rvv::RVV_F64M8>
|
|
||||||
|
|
||||||
template <typename RVV_T, typename T = typename RVV_T::ElemType>
|
template <typename RVV_T, typename T = typename RVV_T::ElemType>
|
||||||
inline int cartToPolar(const T* x, const T* y, T* mag, T* angle, int len, bool angleInDegrees)
|
inline int cartToPolar(const T* x, const T* y, T* mag, T* angle, int len, bool angleInDegrees)
|
||||||
{
|
{
|
||||||
using CalType = RVV_SameLen<float, RVV_T>;
|
using CalType = RVV_SameLen<float, RVV_T>;
|
||||||
auto atan_params = angleInDegrees ? detail::atan_params_deg : detail::atan_params_rad;
|
auto atan_params = angleInDegrees ? common::atan_params_deg : common::atan_params_rad;
|
||||||
size_t vl;
|
size_t vl;
|
||||||
for (; len > 0; len -= (int)vl, x += vl, y += vl, mag += vl, angle += vl)
|
for (; len > 0; len -= (int)vl, x += vl, y += vl, mag += vl, angle += vl)
|
||||||
{
|
{
|
||||||
@ -33,16 +26,25 @@ inline int cartToPolar(const T* x, const T* y, T* mag, T* angle, int len, bool a
|
|||||||
auto vx = CalType::cast(RVV_T::vload(x, vl), vl);
|
auto vx = CalType::cast(RVV_T::vload(x, vl), vl);
|
||||||
auto vy = CalType::cast(RVV_T::vload(y, vl), vl);
|
auto vy = CalType::cast(RVV_T::vload(y, vl), vl);
|
||||||
|
|
||||||
auto vmag = detail::sqrt<2>(__riscv_vfmadd(vx, vx, __riscv_vfmul(vy, vy, vl), vl), vl);
|
auto vmag = common::sqrt<2>(__riscv_vfmadd(vx, vx, __riscv_vfmul(vy, vy, vl), vl), vl);
|
||||||
RVV_T::vstore(mag, RVV_T::cast(vmag, vl), vl);
|
RVV_T::vstore(mag, RVV_T::cast(vmag, vl), vl);
|
||||||
|
|
||||||
auto vangle = detail::rvv_atan(vy, vx, vl, atan_params);
|
auto vangle = common::rvv_atan(vy, vx, vl, atan_params);
|
||||||
RVV_T::vstore(angle, RVV_T::cast(vangle, vl), vl);
|
RVV_T::vstore(angle, RVV_T::cast(vangle, vl), vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
} // anonymous
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_CART_TO_POLAR_HPP_INCLUDED
|
int cartToPolar32f(const float* x, const float* y, float* mag, float* angle, int len, bool angleInDegrees) {
|
||||||
|
return cartToPolar<RVV_F32M4>(x, y, mag, angle, len, angleInDegrees);
|
||||||
|
}
|
||||||
|
int cartToPolar64f(const double* x, const double* y, double* mag, double* angle, int len, bool angleInDegrees) {
|
||||||
|
return cartToPolar<RVV_F64M8>(x, y, mag, angle, len, angleInDegrees);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,20 +4,15 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_CHOLESKY_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_CHOLESKY_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace cholesky {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_Cholesky32f
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_Cholesky32f cv::cv_hal_rvv::cholesky::Cholesky<cv::cv_hal_rvv::RVV_F32M4>
|
|
||||||
#undef cv_hal_Cholesky64f
|
namespace {
|
||||||
#define cv_hal_Cholesky64f cv::cv_hal_rvv::cholesky::Cholesky<cv::cv_hal_rvv::RVV_F64M4>
|
|
||||||
|
|
||||||
// the algorithm is copied from core/src/matrix_decomp.cpp,
|
// the algorithm is copied from core/src/matrix_decomp.cpp,
|
||||||
// in the function template static int cv::CholImpl
|
// in the function template static int cv::CholImpl
|
||||||
@ -119,6 +114,15 @@ inline int Cholesky(T* src1, size_t src1_step, int m, T* src2, size_t src2_step,
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
} // anonymous
|
||||||
|
|
||||||
#endif
|
int Cholesky32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, bool* info) {
|
||||||
|
return Cholesky<RVV_F32M4>(src1, src1_step, m, src2, src2_step, n, info);
|
||||||
|
}
|
||||||
|
int Cholesky64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, bool* info) {
|
||||||
|
return Cholesky<RVV_F64M4>(src1, src1_step, m, src2, src2_step, n, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
195
hal/riscv-rvv/src/core/common.hpp
Normal file
195
hal/riscv-rvv/src/core/common.hpp
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_RVV_CORE_COMMON_HPP_INCLUDED
|
||||||
|
#define OPENCV_HAL_RVV_CORE_COMMON_HPP_INCLUDED
|
||||||
|
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cfloat>
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace core { namespace common {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
#define CV_HAL_RVV_NOOP(a) (a)
|
||||||
|
|
||||||
|
// ############ abs ############
|
||||||
|
|
||||||
|
#define CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(_Tpvs, _Tpvd, shift, suffix) \
|
||||||
|
inline _Tpvd __riscv_vabs(const _Tpvs& v, const int vl) { \
|
||||||
|
_Tpvs mask = __riscv_vsra(v, shift, vl); \
|
||||||
|
_Tpvs v_xor = __riscv_vxor(v, mask, vl); \
|
||||||
|
return __riscv_vreinterpret_##suffix( \
|
||||||
|
__riscv_vsub(v_xor, mask, vl) \
|
||||||
|
); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint8m2_t, vuint8m2_t, 7, u8m2)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint8m8_t, vuint8m8_t, 7, u8m8)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint16m4_t, vuint16m4_t, 15, u16m4)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint16m8_t, vuint16m8_t, 15, u16m8)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint32m4_t, vuint32m4_t, 31, u32m4)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABS(vint32m8_t, vuint32m8_t, 31, u32m8)
|
||||||
|
|
||||||
|
// ############ absdiff ############
|
||||||
|
|
||||||
|
#define CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(_Tpvs, _Tpvd, cast, sub, max, min) \
|
||||||
|
inline _Tpvd __riscv_vabd(const _Tpvs& v1, const _Tpvs& v2, const int vl) { \
|
||||||
|
return cast(__riscv_##sub(__riscv_##max(v1, v2, vl), __riscv_##min(v1, v2, vl), vl)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint8m4_t, vuint8m4_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint8m8_t, vuint8m8_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint16m2_t, vuint16m2_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vuint16m8_t, vuint16m8_t, CV_HAL_RVV_NOOP, vsub, vmaxu, vminu)
|
||||||
|
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint8m4_t, vuint8m4_t, __riscv_vreinterpret_u8m4, vsub, vmax, vmin)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint8m8_t, vuint8m8_t, __riscv_vreinterpret_u8m8, vsub, vmax, vmin)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint16m2_t, vuint16m2_t, __riscv_vreinterpret_u16m2, vsub, vmax, vmin)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint16m8_t, vuint16m8_t, __riscv_vreinterpret_u16m8, vsub, vmax, vmin)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint32m4_t, vuint32m4_t, __riscv_vreinterpret_u32m4, vsub, vmax, vmin)
|
||||||
|
CV_HAL_RVV_COMMON_CUSTOM_INTRIN_ABSDIFF(vint32m8_t, vuint32m8_t, __riscv_vreinterpret_u32m8, vsub, vmax, vmin)
|
||||||
|
|
||||||
|
// ############ reciprocal ############
|
||||||
|
|
||||||
|
inline vfloat32m4_t __riscv_vfrec(const vfloat32m4_t &x, const int vl) {
|
||||||
|
auto rec = __riscv_vfrec7(x, vl);
|
||||||
|
auto cls = __riscv_vfclass(rec, vl);
|
||||||
|
auto m = __riscv_vmseq(__riscv_vand(cls, 0b10111000, vl), 0, vl);
|
||||||
|
auto two = __riscv_vfmv_v_f_f32m4(2.f, vl);
|
||||||
|
rec = __riscv_vfmul_mu(m, rec, __riscv_vfnmsac(two, x, rec, vl), rec, vl);
|
||||||
|
rec = __riscv_vfmul_mu(m, rec, __riscv_vfnmsac(two, x, rec, vl), rec, vl);
|
||||||
|
return rec;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ############ atan ############
|
||||||
|
|
||||||
|
// ref: mathfuncs_core.simd.hpp
|
||||||
|
static constexpr float pi = CV_PI;
|
||||||
|
|
||||||
|
struct AtanParams
|
||||||
|
{
|
||||||
|
float p1, p3, p5, p7, angle_90;
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr AtanParams atan_params_rad {
|
||||||
|
0.9997878412794807F,
|
||||||
|
-0.3258083974640975F,
|
||||||
|
0.1555786518463281F,
|
||||||
|
-0.04432655554792128F,
|
||||||
|
90.F * (pi / 180.F)};
|
||||||
|
static constexpr AtanParams atan_params_deg {
|
||||||
|
atan_params_rad.p1 * (180 / pi),
|
||||||
|
atan_params_rad.p3 * (180 / pi),
|
||||||
|
atan_params_rad.p5 * (180 / pi),
|
||||||
|
atan_params_rad.p7 * (180 / pi),
|
||||||
|
90.F};
|
||||||
|
|
||||||
|
template <typename VEC_T>
|
||||||
|
__attribute__((always_inline)) inline VEC_T
|
||||||
|
rvv_atan(VEC_T vy, VEC_T vx, size_t vl, const AtanParams& params)
|
||||||
|
{
|
||||||
|
const auto ax = __riscv_vfabs(vx, vl);
|
||||||
|
const auto ay = __riscv_vfabs(vy, vl);
|
||||||
|
// Reciprocal Estimate (vfrec7) is not accurate enough to pass the test of cartToPolar.
|
||||||
|
const auto c = __riscv_vfdiv(__riscv_vfmin(ax, ay, vl),
|
||||||
|
__riscv_vfadd(__riscv_vfmax(ax, ay, vl), FLT_EPSILON, vl),
|
||||||
|
vl);
|
||||||
|
const auto c2 = __riscv_vfmul(c, c, vl);
|
||||||
|
|
||||||
|
// Using vfmadd only results in about a 2% performance improvement, but it occupies 3 additional
|
||||||
|
// M4 registers. (Performance test on phase32f::VectorLength::1048576: time decreased
|
||||||
|
// from 5.952ms to 5.805ms on Muse Pi)
|
||||||
|
// Additionally, when registers are nearly fully utilized (though not yet exhausted), the
|
||||||
|
// compiler is likely to fail to optimize and may introduce slower memory access (e.g., in
|
||||||
|
// cv::rvv_hal::fast_atan_64).
|
||||||
|
// Saving registers can also make this function more reusable in other contexts.
|
||||||
|
// Therefore, vfmadd is not used here.
|
||||||
|
auto a = __riscv_vfadd(__riscv_vfmul(c2, params.p7, vl), params.p5, vl);
|
||||||
|
a = __riscv_vfadd(__riscv_vfmul(c2, a, vl), params.p3, vl);
|
||||||
|
a = __riscv_vfadd(__riscv_vfmul(c2, a, vl), params.p1, vl);
|
||||||
|
a = __riscv_vfmul(a, c, vl);
|
||||||
|
|
||||||
|
a = __riscv_vfrsub_mu(__riscv_vmflt(ax, ay, vl), a, a, params.angle_90, vl);
|
||||||
|
a = __riscv_vfrsub_mu(__riscv_vmflt(vx, 0.F, vl), a, a, params.angle_90 * 2, vl);
|
||||||
|
a = __riscv_vfrsub_mu(__riscv_vmflt(vy, 0.F, vl), a, a, params.angle_90 * 4, vl);
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ############ sqrt ############
|
||||||
|
|
||||||
|
template <typename RVV_T>
|
||||||
|
struct Sqrt32f
|
||||||
|
{
|
||||||
|
using T = RVV_T;
|
||||||
|
static constexpr size_t iter_times = 2;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename RVV_T>
|
||||||
|
struct Sqrt64f
|
||||||
|
{
|
||||||
|
using T = RVV_T;
|
||||||
|
static constexpr size_t iter_times = 3;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Newton-Raphson method
|
||||||
|
// Use 4 LMUL registers
|
||||||
|
template <size_t iter_times, typename VEC_T>
|
||||||
|
inline VEC_T sqrt(VEC_T x, size_t vl)
|
||||||
|
{
|
||||||
|
auto x2 = __riscv_vfmul(x, 0.5, vl);
|
||||||
|
auto y = __riscv_vfrsqrt7(x, vl);
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (size_t i = 0; i < iter_times; i++)
|
||||||
|
{
|
||||||
|
auto t = __riscv_vfmul(y, y, vl);
|
||||||
|
t = __riscv_vfmul(t, x2, vl);
|
||||||
|
t = __riscv_vfrsub(t, 1.5, vl);
|
||||||
|
y = __riscv_vfmul(t, y, vl);
|
||||||
|
}
|
||||||
|
// just to prevent the compiler from calculating mask before the iteration, which will run out
|
||||||
|
// of registers and cause memory access.
|
||||||
|
asm volatile("" ::: "memory");
|
||||||
|
auto classified = __riscv_vfclass(x, vl);
|
||||||
|
// block -0, +0, positive subnormal number, +inf
|
||||||
|
auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl);
|
||||||
|
return __riscv_vfmul_mu(mask, x, x, y, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newton-Raphson method
|
||||||
|
// Use 3 LMUL registers and 1 mask register
|
||||||
|
template <size_t iter_times, typename VEC_T>
|
||||||
|
inline VEC_T invSqrt(VEC_T x, size_t vl)
|
||||||
|
{
|
||||||
|
auto classified = __riscv_vfclass(x, vl);
|
||||||
|
// block -0, +0, positive subnormal number, +inf
|
||||||
|
auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl);
|
||||||
|
auto x2 = __riscv_vfmul(x, 0.5, vl);
|
||||||
|
auto y = __riscv_vfrsqrt7(x, vl);
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (size_t i = 0; i < iter_times; i++)
|
||||||
|
{
|
||||||
|
auto t = __riscv_vfmul(y, y, vl);
|
||||||
|
t = __riscv_vfmul(t, x2, vl);
|
||||||
|
t = __riscv_vfrsub(t, 1.5, vl);
|
||||||
|
y = __riscv_vfmul_mu(mask, y, t, y, vl);
|
||||||
|
}
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}}} // cv::rvv_hal::core::common
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_RVV_CORE_COMMON_HPP_INCLUDED
|
@ -5,12 +5,11 @@
|
|||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_COMPARE_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_COMPARE_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include "types.hpp"
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace compare {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -90,23 +89,6 @@ int compare_impl(const _Tps *src1_data, size_t src1_step, const _Tps *src2_data,
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous
|
|
||||||
|
|
||||||
#undef cv_hal_cmp8u
|
|
||||||
#define cv_hal_cmp8u cv::cv_hal_rvv::compare::compare<uchar>
|
|
||||||
#undef cv_hal_cmp8s
|
|
||||||
#define cv_hal_cmp8s cv::cv_hal_rvv::compare::compare<schar>
|
|
||||||
#undef cv_hal_cmp16u
|
|
||||||
#define cv_hal_cmp16u cv::cv_hal_rvv::compare::compare<ushort>
|
|
||||||
#undef cv_hal_cmp16s
|
|
||||||
#define cv_hal_cmp16s cv::cv_hal_rvv::compare::compare<short>
|
|
||||||
#undef cv_hal_cmp32s
|
|
||||||
#define cv_hal_cmp32s cv::cv_hal_rvv::compare::compare<int>
|
|
||||||
#undef cv_hal_cmp32f
|
|
||||||
#define cv_hal_cmp32f cv::cv_hal_rvv::compare::compare<float>
|
|
||||||
// #undef cv_hal_cmp64f
|
|
||||||
// #define cv_hal_cmp64f cv::cv_hal_rvv::compare::compare<double>
|
|
||||||
|
|
||||||
template <typename _Tps> inline
|
template <typename _Tps> inline
|
||||||
int compare(const _Tps *src1_data, size_t src1_step, const _Tps *src2_data, size_t src2_step,
|
int compare(const _Tps *src1_data, size_t src1_step, const _Tps *src2_data, size_t src2_step,
|
||||||
uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
@ -121,6 +103,27 @@ int compare(const _Tps *src1_data, size_t src1_step, const _Tps *src2_data, size
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // cv::cv_hal_rvv::compare
|
} // namespace anonymous
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_COMPARE_HPP_INCLUDED
|
int cmp8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
|
return compare<uchar>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, operation);
|
||||||
|
}
|
||||||
|
int cmp8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
|
return compare<schar>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, operation);
|
||||||
|
}
|
||||||
|
int cmp16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
|
return compare<ushort>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, operation);
|
||||||
|
}
|
||||||
|
int cmp16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
|
return compare<short>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, operation);
|
||||||
|
}
|
||||||
|
int cmp32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
|
return compare<int>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, operation);
|
||||||
|
}
|
||||||
|
int cmp32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) {
|
||||||
|
return compare<float>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,15 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_CONVERT_SCALE_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_CONVERT_SCALE_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_convertScale
|
|
||||||
#define cv_hal_convertScale cv::cv_hal_rvv::convertScale
|
|
||||||
|
|
||||||
inline int convertScale_8U8U(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
|
inline int convertScale_8U8U(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
|
||||||
{
|
{
|
||||||
@ -89,8 +85,8 @@ inline int convertScale_32F32F(const uchar* src, size_t src_step, uchar* dst, si
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height,
|
int convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step,
|
||||||
int sdepth, int ddepth, double alpha, double beta)
|
int width, int height, int sdepth, int ddepth, double alpha, double beta)
|
||||||
{
|
{
|
||||||
if (!dst)
|
if (!dst)
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
@ -118,6 +114,6 @@ inline int convertScale(const uchar* src, size_t src_step, uchar* dst, size_t ds
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -5,21 +5,17 @@
|
|||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_COPY_MASK_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_COPY_MASK_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_copyToMasked
|
|
||||||
#define cv_hal_copyToMasked cv::cv_hal_rvv::copyToMasked
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
#define CV_HAL_RVV_COPY_MASK_eXc1(X, mask_lmul) \
|
#define CV_HAL_RVV_COPY_MASK_eXc1(X, mask_lmul) \
|
||||||
static int copyToMasked_e##X##c1(const uchar *src_data, size_t src_step, const uchar *mask_data, size_t mask_step, \
|
static int copyToMasked_e##X##c1(const uchar *src_data, size_t src_step, const uchar *mask_data, size_t mask_step, \
|
||||||
uchar *dst_data, size_t dst_step, int width, int height) { \
|
uchar *dst_data, size_t dst_step, int width, int height) { \
|
||||||
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) { \
|
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) { \
|
||||||
const uint##X##_t *src = (const uint##X##_t*)src_data; \
|
const uint##X##_t *src = (const uint##X##_t*)src_data; \
|
||||||
uint##X##_t *dst = (uint##X##_t*)dst_data; \
|
uint##X##_t *dst = (uint##X##_t*)dst_data; \
|
||||||
@ -41,7 +37,7 @@ CV_HAL_RVV_COPY_MASK_eXc1(64, 1)
|
|||||||
|
|
||||||
#define CV_HAL_RVV_COPY_MASK_eXc3(X, mask_lmul) \
|
#define CV_HAL_RVV_COPY_MASK_eXc3(X, mask_lmul) \
|
||||||
static int copyToMasked_e##X##c3(const uchar *src_data, size_t src_step, const uchar *mask_data, size_t mask_step, \
|
static int copyToMasked_e##X##c3(const uchar *src_data, size_t src_step, const uchar *mask_data, size_t mask_step, \
|
||||||
uchar *dst_data, size_t dst_step, int width, int height) { \
|
uchar *dst_data, size_t dst_step, int width, int height) { \
|
||||||
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) { \
|
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) { \
|
||||||
const uint##X##_t *src = (const uint##X##_t*)src_data; \
|
const uint##X##_t *src = (const uint##X##_t*)src_data; \
|
||||||
uint##X##_t *dst = (uint##X##_t*)dst_data; \
|
uint##X##_t *dst = (uint##X##_t*)dst_data; \
|
||||||
@ -62,9 +58,9 @@ CV_HAL_RVV_COPY_MASK_eXc3(32, f2)
|
|||||||
CV_HAL_RVV_COPY_MASK_eXc3(64, f4)
|
CV_HAL_RVV_COPY_MASK_eXc3(64, f4)
|
||||||
|
|
||||||
static int copyToMasked_e64c2(const uchar *src_data, size_t src_step,
|
static int copyToMasked_e64c2(const uchar *src_data, size_t src_step,
|
||||||
const uchar *mask_data, size_t mask_step,
|
const uchar *mask_data, size_t mask_step,
|
||||||
uchar *dst_data, size_t dst_step, int width,
|
uchar *dst_data, size_t dst_step, int width,
|
||||||
int height) {
|
int height) {
|
||||||
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) {
|
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) {
|
||||||
const uint64_t *src = (const uint64_t *)src_data;
|
const uint64_t *src = (const uint64_t *)src_data;
|
||||||
uint64_t *dst = (uint64_t *)dst_data;
|
uint64_t *dst = (uint64_t *)dst_data;
|
||||||
@ -80,9 +76,9 @@ static int copyToMasked_e64c2(const uchar *src_data, size_t src_step,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int copyToMasked_e64c4(const uchar *src_data, size_t src_step,
|
static int copyToMasked_e64c4(const uchar *src_data, size_t src_step,
|
||||||
const uchar *mask_data, size_t mask_step,
|
const uchar *mask_data, size_t mask_step,
|
||||||
uchar *dst_data, size_t dst_step, int width,
|
uchar *dst_data, size_t dst_step, int width,
|
||||||
int height) {
|
int height) {
|
||||||
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) {
|
for (; height--; mask_data += mask_step, src_data += src_step, dst_data += dst_step) {
|
||||||
const uint64_t *src = (const uint64_t *)src_data;
|
const uint64_t *src = (const uint64_t *)src_data;
|
||||||
uint64_t *dst = (uint64_t *)dst_data;
|
uint64_t *dst = (uint64_t *)dst_data;
|
||||||
@ -100,71 +96,35 @@ static int copyToMasked_e64c4(const uchar *src_data, size_t src_step,
|
|||||||
} // anonymous
|
} // anonymous
|
||||||
|
|
||||||
using CopyToMaskedFunc = int (*)(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int);
|
using CopyToMaskedFunc = int (*)(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int);
|
||||||
inline int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height,
|
int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height,
|
||||||
int type, const uchar *mask_data, size_t mask_step, int mask_type) {
|
int type, const uchar *mask_data, size_t mask_step, int mask_type) {
|
||||||
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
int cn = CV_MAT_CN(type);
|
||||||
int mdepth = CV_MAT_DEPTH(mask_type), mcn = CV_MAT_CN(mask_type);
|
int mdepth = CV_MAT_DEPTH(mask_type), mcn = CV_MAT_CN(mask_type);
|
||||||
|
|
||||||
if (mcn > 1 || mdepth != CV_8U) {
|
if (mcn > 1 || mdepth != CV_8U) {
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
CopyToMaskedFunc func = nullptr;
|
static CopyToMaskedFunc tab[] = {
|
||||||
switch (depth) {
|
0, copyToMasked_e8c1, copyToMasked_e16c1, copyToMasked_e8c3,
|
||||||
case CV_8U: {}
|
copyToMasked_e32c1, 0, copyToMasked_e16c3, 0,
|
||||||
case CV_8S: switch (cn) {
|
copyToMasked_e64c1, 0, 0, 0,
|
||||||
case 1: func = copyToMasked_e8c1; break;
|
copyToMasked_e32c3, 0, 0, 0,
|
||||||
case 2: func = copyToMasked_e16c1; break;
|
copyToMasked_e64c2, 0, 0, 0,
|
||||||
case 3: func = copyToMasked_e8c3; break;
|
0, 0, 0, 0,
|
||||||
case 4: func = copyToMasked_e32c1; break;
|
copyToMasked_e64c3, 0, 0, 0,
|
||||||
case 6: func = copyToMasked_e16c3; break;
|
0, 0, 0, 0,
|
||||||
case 8: func = copyToMasked_e64c1; break;
|
copyToMasked_e64c4
|
||||||
default: func = nullptr;
|
};
|
||||||
}; break;
|
size_t elem_size = CV_ELEM_SIZE(type);
|
||||||
case CV_16U: {}
|
CopyToMaskedFunc func = elem_size <= 32 ? tab[elem_size] : nullptr;
|
||||||
case CV_16S: switch (cn) {
|
|
||||||
case 1: func = copyToMasked_e16c1; break;
|
|
||||||
case 2: func = copyToMasked_e32c1; break;
|
|
||||||
case 3: func = copyToMasked_e16c3; break;
|
|
||||||
case 4: func = copyToMasked_e64c1; break;
|
|
||||||
case 6: func = copyToMasked_e32c3; break;
|
|
||||||
case 8: func = copyToMasked_e64c2; break;
|
|
||||||
default: func = nullptr; break;
|
|
||||||
}; break;
|
|
||||||
case CV_32S: {}
|
|
||||||
case CV_32F: switch (cn) {
|
|
||||||
case 1: func = copyToMasked_e32c1; break;
|
|
||||||
case 2: func = copyToMasked_e64c1; break;
|
|
||||||
case 3: func = copyToMasked_e32c3; break;
|
|
||||||
case 4: func = copyToMasked_e64c2; break;
|
|
||||||
case 6: func = copyToMasked_e64c3; break;
|
|
||||||
case 8: func = copyToMasked_e64c4; break;
|
|
||||||
default: func = nullptr; break;
|
|
||||||
}; break;
|
|
||||||
case CV_64F: switch (cn) {
|
|
||||||
case 1: func = copyToMasked_e64c1; break;
|
|
||||||
case 2: func = copyToMasked_e64c2; break;
|
|
||||||
case 3: func = copyToMasked_e64c3; break;
|
|
||||||
case 4: func = copyToMasked_e64c4; break;
|
|
||||||
default: func = nullptr; break;
|
|
||||||
}; break;
|
|
||||||
default: func = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (func == nullptr) {
|
if (func == nullptr) {
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const size_t elem_size_tab[CV_DEPTH_MAX] = {
|
size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
|
||||||
sizeof(uchar), sizeof(schar),
|
bool src_continuous = (src_step == width * elem_size1 * cn || (src_step != width * elem_size1 * cn && height == 1));
|
||||||
sizeof(ushort), sizeof(short),
|
bool dst_continuous = (dst_step == width * elem_size1 * cn || (dst_step != width * elem_size1 * cn && height == 1));
|
||||||
sizeof(int), sizeof(float),
|
|
||||||
sizeof(int64_t), 0,
|
|
||||||
};
|
|
||||||
CV_Assert(elem_size_tab[depth]);
|
|
||||||
|
|
||||||
bool src_continuous = (src_step == width * elem_size_tab[depth] * cn || (src_step != width * elem_size_tab[depth] * cn && height == 1));
|
|
||||||
bool dst_continuous = (dst_step == width * elem_size_tab[depth] * cn || (dst_step != width * elem_size_tab[depth] * cn && height == 1));
|
|
||||||
bool mask_continuous = (mask_step == static_cast<size_t>(width));
|
bool mask_continuous = (mask_step == static_cast<size_t>(width));
|
||||||
size_t nplanes = 1;
|
size_t nplanes = 1;
|
||||||
int _width = width, _height = height;
|
int _width = width, _height = height;
|
||||||
@ -189,6 +149,6 @@ inline int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data,
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
299
hal/riscv-rvv/src/core/div.cpp
Normal file
299
hal/riscv-rvv/src/core/div.cpp
Normal file
@ -0,0 +1,299 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
inline size_t setvl(int l) { return __riscv_vsetvl_e8m1(l); }
|
||||||
|
|
||||||
|
inline vuint8m1_t vle(const uint8_t *p, int vl) { return __riscv_vle8_v_u8m1(p, vl); }
|
||||||
|
inline vint8m1_t vle(const int8_t *p, int vl) { return __riscv_vle8_v_i8m1(p, vl); }
|
||||||
|
inline vuint16m2_t vle(const uint16_t *p, int vl) { return __riscv_vle16_v_u16m2(p, vl); }
|
||||||
|
inline vint16m2_t vle(const int16_t *p, int vl) { return __riscv_vle16_v_i16m2(p, vl); }
|
||||||
|
inline vint32m4_t vle(const int *p, int vl) { return __riscv_vle32_v_i32m4(p, vl); }
|
||||||
|
inline vfloat32m4_t vle(const float *p, int vl) { return __riscv_vle32_v_f32m4(p, vl); }
|
||||||
|
|
||||||
|
inline void vse(uint8_t *p, const vuint8m1_t &v, int vl) { __riscv_vse8(p, v, vl); }
|
||||||
|
inline void vse(int8_t *p, const vint8m1_t &v, int vl) { __riscv_vse8(p, v, vl); }
|
||||||
|
inline void vse(uint16_t *p, const vuint16m2_t &v, int vl) { __riscv_vse16(p, v, vl); }
|
||||||
|
inline void vse(int16_t *p, const vint16m2_t &v, int vl) { __riscv_vse16(p, v, vl); }
|
||||||
|
inline void vse(int *p, const vint32m4_t &v, int vl) { __riscv_vse32(p, v, vl); }
|
||||||
|
inline void vse(float *p, const vfloat32m4_t &v, int vl) { __riscv_vse32(p, v, vl); }
|
||||||
|
|
||||||
|
inline vuint16m2_t ext(const vuint8m1_t &v, const int vl) { return __riscv_vzext_vf2(v, vl); }
|
||||||
|
inline vint16m2_t ext(const vint8m1_t &v, const int vl) { return __riscv_vsext_vf2(v, vl); }
|
||||||
|
|
||||||
|
inline vuint8m1_t nclip(const vuint16m2_t &v, const int vl) { return __riscv_vnclipu(v, 0, __RISCV_VXRM_RNU, vl); }
|
||||||
|
inline vint8m1_t nclip(const vint16m2_t &v, const int vl) { return __riscv_vnclip(v, 0, __RISCV_VXRM_RNU, vl); }
|
||||||
|
|
||||||
|
template <typename VT> inline
|
||||||
|
VT div_sat(const VT &v1, const VT &v2, const float scale, const int vl) {
|
||||||
|
return nclip(div_sat(ext(v1, vl), ext(v2, vl), scale, vl), vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vint16m2_t div_sat(const vint16m2_t &v1, const vint16m2_t &v2, const float scale, const int vl) {
|
||||||
|
auto f1 = __riscv_vfwcvt_f(v1, vl);
|
||||||
|
auto f2 = __riscv_vfwcvt_f(v2, vl);
|
||||||
|
auto res = __riscv_vfmul(f1, __riscv_vfmul(common::__riscv_vfrec(f2, vl), scale, vl), vl);
|
||||||
|
return __riscv_vfncvt_x(res, vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vuint16m2_t div_sat(const vuint16m2_t &v1, const vuint16m2_t &v2, const float scale, const int vl) {
|
||||||
|
auto f1 = __riscv_vfwcvt_f(v1, vl);
|
||||||
|
auto f2 = __riscv_vfwcvt_f(v2, vl);
|
||||||
|
auto res = __riscv_vfmul(f1, __riscv_vfmul(common::__riscv_vfrec(f2, vl), scale, vl), vl);
|
||||||
|
return __riscv_vfncvt_xu(res, vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vint32m4_t div_sat(const vint32m4_t &v1, const vint32m4_t &v2, const float scale, const int vl) {
|
||||||
|
auto f1 = __riscv_vfcvt_f(v1, vl);
|
||||||
|
auto f2 = __riscv_vfcvt_f(v2, vl);
|
||||||
|
auto res = __riscv_vfmul(f1, __riscv_vfmul(common::__riscv_vfrec(f2, vl), scale, vl), vl);
|
||||||
|
return __riscv_vfcvt_x(res, vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vuint32m4_t div_sat(const vuint32m4_t &v1, const vuint32m4_t &v2, const float scale, const int vl) {
|
||||||
|
auto f1 = __riscv_vfcvt_f(v1, vl);
|
||||||
|
auto f2 = __riscv_vfcvt_f(v2, vl);
|
||||||
|
auto res = __riscv_vfmul(f1, __riscv_vfmul(common::__riscv_vfrec(f2, vl), scale, vl), vl);
|
||||||
|
return __riscv_vfcvt_xu(res, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename VT> inline
|
||||||
|
VT recip_sat(const VT &v, const float scale, const int vl) {
|
||||||
|
return nclip(recip_sat(ext(v, vl), scale, vl), vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vint16m2_t recip_sat(const vint16m2_t &v, const float scale, const int vl) {
|
||||||
|
auto f = __riscv_vfwcvt_f(v, vl);
|
||||||
|
auto res = __riscv_vfmul(common::__riscv_vfrec(f, vl), scale, vl);
|
||||||
|
return __riscv_vfncvt_x(res, vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vuint16m2_t recip_sat(const vuint16m2_t &v, const float scale, const int vl) {
|
||||||
|
auto f = __riscv_vfwcvt_f(v, vl);
|
||||||
|
auto res = __riscv_vfmul(common::__riscv_vfrec(f, vl), scale, vl);
|
||||||
|
return __riscv_vfncvt_xu(res, vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vint32m4_t recip_sat(const vint32m4_t &v, const float scale, const int vl) {
|
||||||
|
auto f = __riscv_vfcvt_f(v, vl);
|
||||||
|
auto res = __riscv_vfmul(common::__riscv_vfrec(f, vl), scale, vl);
|
||||||
|
return __riscv_vfcvt_x(res, vl);
|
||||||
|
}
|
||||||
|
template <> inline
|
||||||
|
vuint32m4_t recip_sat(const vuint32m4_t &v, const float scale, const int vl) {
|
||||||
|
auto f = __riscv_vfcvt_f(v, vl);
|
||||||
|
auto res = __riscv_vfmul(common::__riscv_vfrec(f, vl), scale, vl);
|
||||||
|
return __riscv_vfcvt_xu(res, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation
|
||||||
|
|
||||||
|
template <typename ST> inline
|
||||||
|
int div(const ST *src1, size_t step1, const ST *src2, size_t step2,
|
||||||
|
ST *dst, size_t step, int width, int height, float scale) {
|
||||||
|
float max_fval = static_cast<float>(std::numeric_limits<ST>::max());
|
||||||
|
if (scale == 0.f || ((scale * max_fval) < 1.f && (scale * max_fval) > -1.f)) {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst + h * step);
|
||||||
|
std::memset(dst_h, 0, sizeof(ST) * width);
|
||||||
|
}
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
const ST *src1_h = reinterpret_cast<const ST*>((const uchar*)src1 + h * step1);
|
||||||
|
const ST *src2_h = reinterpret_cast<const ST*>((const uchar*)src2 + h * step2);
|
||||||
|
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst + h * step);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int w = 0; w < width; w += vl) {
|
||||||
|
vl = setvl(width - w);
|
||||||
|
|
||||||
|
auto v1 = vle(src1_h + w, vl);
|
||||||
|
auto v2 = vle(src2_h + w, vl);
|
||||||
|
|
||||||
|
auto mask = __riscv_vmseq(v2, 0, vl);
|
||||||
|
vse(dst_h + w, __riscv_vmerge(div_sat(v1, v2, scale, vl), 0, mask, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
int div(const float *src1, size_t step1, const float *src2, size_t step2,
|
||||||
|
float *dst, size_t step, int width, int height, float scale) {
|
||||||
|
if (scale == 0.f) {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
float *dst_h = reinterpret_cast<float*>((uchar*)dst + h * step);
|
||||||
|
std::memset(dst_h, 0, sizeof(float) * width);
|
||||||
|
}
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::fabs(scale - 1.f) < FLT_EPSILON) {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
const float *src1_h = reinterpret_cast<const float*>((const uchar*)src1 + h * step1);
|
||||||
|
const float *src2_h = reinterpret_cast<const float*>((const uchar*)src2 + h * step2);
|
||||||
|
float *dst_h = reinterpret_cast<float*>((uchar*)dst + h * step);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int w = 0; w < width; w += vl) {
|
||||||
|
vl = setvl(width - w);
|
||||||
|
|
||||||
|
auto v1 = vle(src1_h + w, vl);
|
||||||
|
auto v2 = vle(src2_h + w, vl);
|
||||||
|
|
||||||
|
vse(dst_h + w, __riscv_vfmul(v1, common::__riscv_vfrec(v2, vl), vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
const float *src1_h = reinterpret_cast<const float*>((const uchar*)src1 + h * step1);
|
||||||
|
const float *src2_h = reinterpret_cast<const float*>((const uchar*)src2 + h * step2);
|
||||||
|
float *dst_h = reinterpret_cast<float*>((uchar*)dst + h * step);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int w = 0; w < width; w += vl) {
|
||||||
|
vl = setvl(width - w);
|
||||||
|
|
||||||
|
auto v1 = vle(src1_h + w, vl);
|
||||||
|
auto v2 = vle(src2_h + w, vl);
|
||||||
|
|
||||||
|
vse(dst_h + w, __riscv_vfmul(v1, __riscv_vfmul(common::__riscv_vfrec(v2, vl), scale, vl), vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ST> inline
|
||||||
|
int recip(const ST *src_data, size_t src_step, ST *dst_data, size_t dst_step,
|
||||||
|
int width, int height, float scale) {
|
||||||
|
if (scale == 0.f || (scale < 1.f && scale > -1.f)) {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst_data + h * dst_step);
|
||||||
|
std::memset(dst_h, 0, sizeof(ST) * width);
|
||||||
|
}
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
const ST *src_h = reinterpret_cast<const ST*>((const uchar*)src_data + h * src_step);
|
||||||
|
ST *dst_h = reinterpret_cast<ST*>((uchar*)dst_data + h * dst_step);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int w = 0; w < width; w += vl) {
|
||||||
|
vl = setvl(width - w);
|
||||||
|
|
||||||
|
auto v = vle(src_h + w, vl);
|
||||||
|
|
||||||
|
auto mask = __riscv_vmseq(v, 0, vl);
|
||||||
|
vse(dst_h + w, __riscv_vmerge(recip_sat(v, scale, vl), 0, mask, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
int recip(const float *src_data, size_t src_step, float *dst_data, size_t dst_step,
|
||||||
|
int width, int height, float scale) {
|
||||||
|
if (scale == 0.f) {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
float *dst_h = reinterpret_cast<float*>((uchar*)dst_data + h * dst_step);
|
||||||
|
std::memset(dst_h, 0, sizeof(float) * width);
|
||||||
|
}
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::fabs(scale - 1.f) < FLT_EPSILON) {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
const float *src_h = reinterpret_cast<const float*>((const uchar*)src_data + h * src_step);
|
||||||
|
float *dst_h = reinterpret_cast<float*>((uchar*)dst_data + h * dst_step);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int w = 0; w < width; w += vl) {
|
||||||
|
vl = setvl(width - w);
|
||||||
|
|
||||||
|
auto v = vle(src_h + w, vl);
|
||||||
|
|
||||||
|
vse(dst_h + w, common::__riscv_vfrec(v, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int h = 0; h < height; h++) {
|
||||||
|
const float *src_h = reinterpret_cast<const float*>((const uchar*)src_data + h * src_step);
|
||||||
|
float *dst_h = reinterpret_cast<float*>((uchar*)dst_data + h * dst_step);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int w = 0; w < width; w += vl) {
|
||||||
|
vl = setvl(width - w);
|
||||||
|
|
||||||
|
auto v = vle(src_h + w, vl);
|
||||||
|
|
||||||
|
vse(dst_h + w, __riscv_vfmul(common::__riscv_vfrec(v, vl), scale, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int div8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return div<uchar>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int div8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return div<schar>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int div16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return div<ushort>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int div16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return div<short>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int div32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return div<int>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int div32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return div<float>(src1_data, src1_step, src2_data, src2_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
int recip8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return recip<uchar>(src_data, src_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int recip8s(const schar *src_data, size_t src_step, schar *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return recip<schar>(src_data, src_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int recip16u(const ushort *src_data, size_t src_step, ushort *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return recip<ushort>(src_data, src_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int recip16s(const short *src_data, size_t src_step, short *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return recip<short>(src_data, src_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int recip32s(const int *src_data, size_t src_step, int *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return recip<int>(src_data, src_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
int recip32f(const float *src_data, size_t src_step, float *dst_data, size_t dst_step, int width, int height, double scale) {
|
||||||
|
return recip<float>(src_data, src_step, dst_data, dst_step, width, height, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -5,21 +5,16 @@
|
|||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
#ifndef OPENCV_HAL_RVV_DOTPROD_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_DOTPROD_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace dotprod {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_dotProduct
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_dotProduct cv::cv_hal_rvv::dotprod::dotprod
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
double dotProd_8u(const uchar *a, const uchar *b, int len) {
|
static inline double dotProd_8u(const uchar *a, const uchar *b, int len) {
|
||||||
constexpr int block_size0 = (1 << 15);
|
constexpr int block_size0 = (1 << 15);
|
||||||
|
|
||||||
double r = 0;
|
double r = 0;
|
||||||
@ -47,7 +42,7 @@ double dotProd_8u(const uchar *a, const uchar *b, int len) {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dotProd_8s(const schar *a, const schar *b, int len) {
|
static inline double dotProd_8s(const schar *a, const schar *b, int len) {
|
||||||
constexpr int block_size0 = (1 << 14);
|
constexpr int block_size0 = (1 << 14);
|
||||||
|
|
||||||
double r = 0;
|
double r = 0;
|
||||||
@ -75,7 +70,7 @@ double dotProd_8s(const schar *a, const schar *b, int len) {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dotProd_16u(const ushort *a, const ushort *b, int len) {
|
static inline double dotProd_16u(const ushort *a, const ushort *b, int len) {
|
||||||
constexpr int block_size0 = (1 << 24);
|
constexpr int block_size0 = (1 << 24);
|
||||||
|
|
||||||
double r = 0;
|
double r = 0;
|
||||||
@ -103,7 +98,7 @@ double dotProd_16u(const ushort *a, const ushort *b, int len) {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dotProd_16s(const short *a, const short *b, int len) {
|
static inline double dotProd_16s(const short *a, const short *b, int len) {
|
||||||
constexpr int block_size0 = (1 << 24);
|
constexpr int block_size0 = (1 << 24);
|
||||||
|
|
||||||
double r = 0;
|
double r = 0;
|
||||||
@ -131,7 +126,7 @@ double dotProd_16s(const short *a, const short *b, int len) {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dotProd_32s(const int *a, const int *b, int len) {
|
static inline double dotProd_32s(const int *a, const int *b, int len) {
|
||||||
double r = 0;
|
double r = 0;
|
||||||
|
|
||||||
vfloat64m8_t s = __riscv_vfmv_v_f_f64m8(0.f, __riscv_vsetvlmax_e64m8());
|
vfloat64m8_t s = __riscv_vfmv_v_f_f64m8(0.f, __riscv_vsetvlmax_e64m8());
|
||||||
@ -149,7 +144,7 @@ double dotProd_32s(const int *a, const int *b, int len) {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dotProd_32f(const float *a, const float *b, int len) {
|
static inline double dotProd_32f(const float *a, const float *b, int len) {
|
||||||
constexpr int block_size0 = (1 << 11);
|
constexpr int block_size0 = (1 << 11);
|
||||||
|
|
||||||
double r = 0.f;
|
double r = 0.f;
|
||||||
@ -180,8 +175,8 @@ double dotProd_32f(const float *a, const float *b, int len) {
|
|||||||
} // anonymous
|
} // anonymous
|
||||||
|
|
||||||
using DotProdFunc = double (*)(const uchar *a, const uchar *b, int len);
|
using DotProdFunc = double (*)(const uchar *a, const uchar *b, int len);
|
||||||
inline int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size_t b_step,
|
int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size_t b_step,
|
||||||
int width, int height, int type, double *dot_val) {
|
int width, int height, int type, double *dot_val) {
|
||||||
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
|
|
||||||
static DotProdFunc dotprod_tab[CV_DEPTH_MAX] = {
|
static DotProdFunc dotprod_tab[CV_DEPTH_MAX] = {
|
||||||
@ -195,16 +190,9 @@ inline int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const size_t elem_size_tab[CV_DEPTH_MAX] = {
|
size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
|
||||||
sizeof(uchar), sizeof(schar),
|
bool a_continuous = (a_step == width * elem_size1 * cn);
|
||||||
sizeof(ushort), sizeof(short),
|
bool b_continuous = (b_step == width * elem_size1 * cn);
|
||||||
sizeof(int), sizeof(float),
|
|
||||||
sizeof(int64_t), 0,
|
|
||||||
};
|
|
||||||
CV_Assert(elem_size_tab[depth]);
|
|
||||||
|
|
||||||
bool a_continuous = (a_step == width * elem_size_tab[depth] * cn);
|
|
||||||
bool b_continuous = (b_step == width * elem_size_tab[depth] * cn);
|
|
||||||
size_t nplanes = 1;
|
size_t nplanes = 1;
|
||||||
size_t len = width * height;
|
size_t len = width * height;
|
||||||
if (!a_continuous || !b_continuous) {
|
if (!a_continuous || !b_continuous) {
|
||||||
@ -228,6 +216,6 @@ inline int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // cv::cv_hal_rvv::dotprod
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_DOTPROD_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
@ -4,17 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_DXT_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_DXT_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
#include "opencv2/core/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace dxt {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_dft
|
|
||||||
#define cv_hal_dft cv::cv_hal_rvv::dxt::dft
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
|
|
||||||
@ -42,7 +36,7 @@ template<> struct rvv<double> : RVV_F64M1
|
|||||||
// in the function template static void cv::DFT and cv::DFT_R2, cv::DFT_R3, cv::DFT_R5
|
// in the function template static void cv::DFT and cv::DFT_R2, cv::DFT_R3, cv::DFT_R5
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline int dft(const Complex<T>* src, Complex<T>* dst, int nf, int *factors, T scale, int* itab,
|
inline int dft(const Complex<T>* src, Complex<T>* dst, int nf, int *factors, T scale, int* itab,
|
||||||
const Complex<T>* wave, int tab_size, int len, bool isInverse, bool noPermute)
|
const Complex<T>* wave, int tab_size, int len, bool isInverse, bool noPermute)
|
||||||
{
|
{
|
||||||
int n = len;
|
int n = len;
|
||||||
int f_idx, nx;
|
int f_idx, nx;
|
||||||
@ -545,8 +539,8 @@ inline int dft(const Complex<T>* src, Complex<T>* dst, int nf, int *factors, T s
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int dft(const uchar* src, uchar* dst, int depth, int nf, int *factors, double scale, int* itab, void* wave,
|
int dft(const uchar* src, uchar* dst, int depth, int nf, int *factors, double scale,
|
||||||
int tab_size, int n, bool isInverse, bool noPermute)
|
int* itab, void* wave, int tab_size, int n, bool isInverse, bool noPermute)
|
||||||
{
|
{
|
||||||
if( n == 0 )
|
if( n == 0 )
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
@ -563,6 +557,6 @@ inline int dft(const uchar* src, uchar* dst, int depth, int nf, int *factors, do
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -4,17 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_EXP_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_EXP_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_exp32f
|
|
||||||
#define cv_hal_exp32f cv::cv_hal_rvv::exp32f
|
|
||||||
#undef cv_hal_exp64f
|
|
||||||
#define cv_hal_exp64f cv::cv_hal_rvv::exp64f
|
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
@ -116,7 +110,7 @@ static constexpr double exp_tab_64f[exp_tab_size] = EXP_TAB_VALUE;
|
|||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
inline int exp32f(const float* src, float* dst, int _len)
|
int exp32f(const float* src, float* dst, int _len)
|
||||||
{
|
{
|
||||||
size_t vl = __riscv_vsetvlmax_e32m4();
|
size_t vl = __riscv_vsetvlmax_e32m4();
|
||||||
auto exp_a2 = __riscv_vfmv_v_f_f32m4(detail::exp32f_a2, vl);
|
auto exp_a2 = __riscv_vfmv_v_f_f32m4(detail::exp32f_a2, vl);
|
||||||
@ -158,7 +152,7 @@ inline int exp32f(const float* src, float* dst, int _len)
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int exp64f(const double* src, double* dst, int _len)
|
int exp64f(const double* src, double* dst, int _len)
|
||||||
{
|
{
|
||||||
size_t vl = __riscv_vsetvlmax_e64m4();
|
size_t vl = __riscv_vsetvlmax_e64m4();
|
||||||
// all vector registers are used up, so not load more constants
|
// all vector registers are used up, so not load more constants
|
||||||
@ -203,6 +197,6 @@ inline int exp64f(const double* src, double* dst, int _len)
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_EXP_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
@ -5,13 +5,7 @@
|
|||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_FLIP_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_FLIP_HPP_INCLUDED
|
|
||||||
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include <opencv2/core/base.hpp>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
#if defined (__clang__) && __clang_major__ < 18
|
#if defined (__clang__) && __clang_major__ < 18
|
||||||
#define OPENCV_HAL_IMPL_RVV_VCREATE_x3(suffix, width, v0, v1, v2) \
|
#define OPENCV_HAL_IMPL_RVV_VCREATE_x3(suffix, width, v0, v1, v2) \
|
||||||
@ -24,10 +18,9 @@
|
|||||||
#define __riscv_vcreate_v_u64m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u64, 2, v0, v1, v2)
|
#define __riscv_vcreate_v_u64m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u64, 2, v0, v1, v2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_flip
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_flip cv::cv_hal_rvv::flip
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -73,6 +66,13 @@ CV_HAL_RVV_FLIP_INPLACE_C1(16UC1, ushort, RVV_U16M8)
|
|||||||
CV_HAL_RVV_FLIP_INPLACE_C1(32UC1, unsigned, RVV_U32M8)
|
CV_HAL_RVV_FLIP_INPLACE_C1(32UC1, unsigned, RVV_U32M8)
|
||||||
CV_HAL_RVV_FLIP_INPLACE_C1(64UC1, uint64_t, RVV_U64M8)
|
CV_HAL_RVV_FLIP_INPLACE_C1(64UC1, uint64_t, RVV_U64M8)
|
||||||
|
|
||||||
|
// Suppress warnings of "ignoring attributes applied to VecType after definition",
|
||||||
|
// VecType is vuint8m2x3_t, vuint16m2x3_t, vuint32m2x3_t or vuint64m2x3_t
|
||||||
|
#if defined (__GNUC__)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wattributes"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CV_HAL_RVV_FLIP_C3_TYPES(width) \
|
#define CV_HAL_RVV_FLIP_C3_TYPES(width) \
|
||||||
struct RVV_C3_U##width##M2 : RVV_U##width##M2 { \
|
struct RVV_C3_U##width##M2 : RVV_U##width##M2 { \
|
||||||
static inline vuint##width##m2x3_t vload3(const uint##width##_t *base, size_t vl) { return __riscv_vlseg3e##width##_v_u##width##m2x3(base, vl); } \
|
static inline vuint##width##m2x3_t vload3(const uint##width##_t *base, size_t vl) { return __riscv_vlseg3e##width##_v_u##width##m2x3(base, vl); } \
|
||||||
@ -90,6 +90,10 @@ CV_HAL_RVV_FLIP_C3_TYPES(16)
|
|||||||
CV_HAL_RVV_FLIP_C3_TYPES(32)
|
CV_HAL_RVV_FLIP_C3_TYPES(32)
|
||||||
CV_HAL_RVV_FLIP_C3_TYPES(64)
|
CV_HAL_RVV_FLIP_C3_TYPES(64)
|
||||||
|
|
||||||
|
#if defined (__GNUC__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CV_HAL_RVV_FLIP_C3(name, _Tps, RVV) \
|
#define CV_HAL_RVV_FLIP_C3(name, _Tps, RVV) \
|
||||||
inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int src_width, int src_height, int flip_mode) { \
|
inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int src_width, int src_height, int flip_mode) { \
|
||||||
for (int h = 0; h < src_height; h++) { \
|
for (int h = 0; h < src_height; h++) { \
|
||||||
@ -302,7 +306,7 @@ inline int flip_inplace(int esz, uchar* data, size_t step, int width, int height
|
|||||||
0, 0, 0, 0,
|
0, 0, 0, 0,
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
FlipInplaceFunc func = flip_inplace_func_tab[esz];
|
FlipInplaceFunc func = esz <= 32 ? flip_inplace_func_tab[esz] : nullptr;
|
||||||
if (!func) {
|
if (!func) {
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
@ -311,7 +315,7 @@ inline int flip_inplace(int esz, uchar* data, size_t step, int width, int height
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int flip(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
int flip(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||||
uchar* dst_data, size_t dst_step, int flip_mode)
|
uchar* dst_data, size_t dst_step, int flip_mode)
|
||||||
{
|
{
|
||||||
int esz = CV_ELEM_SIZE(src_type);
|
int esz = CV_ELEM_SIZE(src_type);
|
||||||
@ -344,7 +348,7 @@ inline int flip(int src_type, const uchar* src_data, size_t src_step, int src_wi
|
|||||||
0, 0, 0, 0,
|
0, 0, 0, 0,
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
FlipFunc func = flip_func_tab[esz];
|
FlipFunc func = esz <= 32 ? flip_func_tab[esz] : nullptr;
|
||||||
if (func) {
|
if (func) {
|
||||||
func(src_data, src_step, dst_data, dst_step, src_width, src_height, flip_mode);
|
func(src_data, src_step, dst_data, dst_step, src_width, src_height, flip_mode);
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
@ -368,6 +372,6 @@ inline int flip(int src_type, const uchar* src_data, size_t src_step, int src_wi
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_FLIP_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
@ -4,17 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_LOG_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_LOG_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_log32f
|
|
||||||
#define cv_hal_log32f cv::cv_hal_rvv::log32f
|
|
||||||
#undef cv_hal_log64f
|
|
||||||
#define cv_hal_log64f cv::cv_hal_rvv::log64f
|
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
@ -306,7 +300,7 @@ static constexpr double log_tab_64f[log_tab_size] = LOG_TAB_VALUE;
|
|||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
inline int log32f(const float* src, float* dst, int _len)
|
int log32f(const float* src, float* dst, int _len)
|
||||||
{
|
{
|
||||||
size_t vl = __riscv_vsetvlmax_e32m4();
|
size_t vl = __riscv_vsetvlmax_e32m4();
|
||||||
auto log_a2 = __riscv_vfmv_v_f_f32m4(detail::log32f_a2, vl);
|
auto log_a2 = __riscv_vfmv_v_f_f32m4(detail::log32f_a2, vl);
|
||||||
@ -340,7 +334,7 @@ inline int log32f(const float* src, float* dst, int _len)
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int log64f(const double* src, double* dst, int _len)
|
int log64f(const double* src, double* dst, int _len)
|
||||||
{
|
{
|
||||||
size_t vl = __riscv_vsetvlmax_e64m4();
|
size_t vl = __riscv_vsetvlmax_e64m4();
|
||||||
// all vector registers are used up, so not load more constants
|
// all vector registers are used up, so not load more constants
|
||||||
@ -382,6 +376,6 @@ inline int log64f(const double* src, double* dst, int _len)
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_LOG_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
@ -4,21 +4,16 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_LU_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_LU_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace lu {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_LU32f
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_LU32f cv::cv_hal_rvv::lu::LU<cv::cv_hal_rvv::RVV_F32M4>
|
|
||||||
#undef cv_hal_LU64f
|
namespace {
|
||||||
#define cv_hal_LU64f cv::cv_hal_rvv::lu::LU<cv::cv_hal_rvv::RVV_F64M4>
|
|
||||||
|
|
||||||
// the algorithm is copied from core/src/matrix_decomp.cpp,
|
// the algorithm is copied from core/src/matrix_decomp.cpp,
|
||||||
// in the function template static int cv::LUImpl
|
// in the function template static int cv::LUImpl
|
||||||
@ -167,6 +162,15 @@ inline int LU(T* src1, size_t src1_step, int m, T* src2, size_t src2_step, int n
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
} // anonymous
|
||||||
|
|
||||||
#endif
|
int LU32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, int* info) {
|
||||||
|
return LU<RVV_F32M4>(src1, src1_step, m, src2, src2_step, n, info);
|
||||||
|
}
|
||||||
|
int LU64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, int* info) {
|
||||||
|
return LU<RVV_F64M4>(src1, src1_step, m, src2, src2_step, n, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,19 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_LUT_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_LUT_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
#include <opencv2/core/base.hpp>
|
|
||||||
#include <opencv2/core/utility.hpp>
|
|
||||||
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
#undef cv_hal_lut
|
|
||||||
#define cv_hal_lut cv::cv_hal_rvv::lut
|
|
||||||
|
|
||||||
// need vlen >= 256
|
// need vlen >= 256
|
||||||
struct LUTCacheU8 : RVV_U8M8
|
struct LUTCacheU8 : RVV_U8M8
|
||||||
@ -135,7 +127,7 @@ private:
|
|||||||
LUTParallelBody& operator=(const LUTParallelBody&);
|
LUTParallelBody& operator=(const LUTParallelBody&);
|
||||||
};
|
};
|
||||||
|
|
||||||
inline int lut(const uchar* src_data,
|
int lut(const uchar* src_data,
|
||||||
size_t src_step,
|
size_t src_step,
|
||||||
size_t src_type,
|
size_t src_type,
|
||||||
const uchar* lut_data,
|
const uchar* lut_data,
|
||||||
@ -191,6 +183,6 @@ inline int lut(const uchar* src_data,
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_LUT_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
@ -4,20 +4,14 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED
|
#include "common.hpp"
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#include "hal_rvv_1p0/sqrt.hpp"
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace {
|
||||||
|
|
||||||
#undef cv_hal_magnitude32f
|
|
||||||
#define cv_hal_magnitude32f cv::cv_hal_rvv::magnitude<cv::cv_hal_rvv::Sqrt32f<cv::cv_hal_rvv::RVV_F32M8>>
|
|
||||||
#undef cv_hal_magnitude64f
|
|
||||||
#define cv_hal_magnitude64f cv::cv_hal_rvv::magnitude<cv::cv_hal_rvv::Sqrt64f<cv::cv_hal_rvv::RVV_F64M8>>
|
|
||||||
|
|
||||||
template <typename SQRT_T, typename T = typename SQRT_T::T::ElemType>
|
template <typename SQRT_T, typename T = typename SQRT_T::T::ElemType>
|
||||||
inline int magnitude(const T* x, const T* y, T* dst, int len)
|
inline int magnitude(const T* x, const T* y, T* dst, int len)
|
||||||
@ -30,13 +24,22 @@ inline int magnitude(const T* x, const T* y, T* dst, int len)
|
|||||||
auto vx = SQRT_T::T::vload(x, vl);
|
auto vx = SQRT_T::T::vload(x, vl);
|
||||||
auto vy = SQRT_T::T::vload(y, vl);
|
auto vy = SQRT_T::T::vload(y, vl);
|
||||||
|
|
||||||
auto vmag = detail::sqrt<SQRT_T::iter_times>(__riscv_vfmadd(vx, vx, __riscv_vfmul(vy, vy, vl), vl), vl);
|
auto vmag = common::sqrt<SQRT_T::iter_times>(__riscv_vfmadd(vx, vx, __riscv_vfmul(vy, vy, vl), vl), vl);
|
||||||
SQRT_T::T::vstore(dst, vmag, vl);
|
SQRT_T::T::vstore(dst, vmag, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
} // anonymous
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED
|
int magnitude32f(const float *x, const float *y, float *dst, int len) {
|
||||||
|
return magnitude<common::Sqrt32f<RVV_F32M8>>(x, y, dst, len);
|
||||||
|
}
|
||||||
|
int magnitude64f(const double *x, const double *y, double *dst, int len) {
|
||||||
|
return magnitude<common::Sqrt64f<RVV_F64M8>>(x, y, dst, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,15 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_MEANSTDDEV_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_MEANSTDDEV_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_meanStdDev
|
|
||||||
#define cv_hal_meanStdDev cv::cv_hal_rvv::meanStdDev
|
|
||||||
|
|
||||||
inline int meanStdDev_8UC1(const uchar* src_data, size_t src_step, int width, int height,
|
inline int meanStdDev_8UC1(const uchar* src_data, size_t src_step, int width, int height,
|
||||||
double* mean_val, double* stddev_val, uchar* mask, size_t mask_step);
|
double* mean_val, double* stddev_val, uchar* mask, size_t mask_step);
|
||||||
@ -21,8 +17,8 @@ inline int meanStdDev_8UC4(const uchar* src_data, size_t src_step, int width, in
|
|||||||
inline int meanStdDev_32FC1(const uchar* src_data, size_t src_step, int width, int height,
|
inline int meanStdDev_32FC1(const uchar* src_data, size_t src_step, int width, int height,
|
||||||
double* mean_val, double* stddev_val, uchar* mask, size_t mask_step);
|
double* mean_val, double* stddev_val, uchar* mask, size_t mask_step);
|
||||||
|
|
||||||
inline int meanStdDev(const uchar* src_data, size_t src_step, int width, int height,
|
int meanStdDev(const uchar* src_data, size_t src_step, int width, int height, int src_type,
|
||||||
int src_type, double* mean_val, double* stddev_val, uchar* mask, size_t mask_step) {
|
double* mean_val, double* stddev_val, uchar* mask, size_t mask_step) {
|
||||||
switch (src_type)
|
switch (src_type)
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
@ -226,6 +222,6 @@ inline int meanStdDev_32FC1(const uchar* src_data, size_t src_step, int width, i
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -4,21 +4,7 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_MERGE_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_MERGE_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
#undef cv_hal_merge8u
|
|
||||||
#define cv_hal_merge8u cv::cv_hal_rvv::merge8u
|
|
||||||
#undef cv_hal_merge16u
|
|
||||||
#define cv_hal_merge16u cv::cv_hal_rvv::merge16u
|
|
||||||
#undef cv_hal_merge32s
|
|
||||||
#define cv_hal_merge32s cv::cv_hal_rvv::merge32s
|
|
||||||
#undef cv_hal_merge64s
|
|
||||||
#define cv_hal_merge64s cv::cv_hal_rvv::merge64s
|
|
||||||
|
|
||||||
#if defined __clang__ && __clang_major__ < 18
|
#if defined __clang__ && __clang_major__ < 18
|
||||||
#define OPENCV_HAL_IMPL_RVV_VCREATE_x2(suffix, width, v0, v1) \
|
#define OPENCV_HAL_IMPL_RVV_VCREATE_x2(suffix, width, v0, v1) \
|
||||||
@ -44,7 +30,11 @@ namespace cv { namespace cv_hal_rvv {
|
|||||||
#define __riscv_vcreate_v_u16m2x4(v0, v1, v2, v3) OPENCV_HAL_IMPL_RVV_VCREATE_x4(u16, 2, v0, v1, v2, v3)
|
#define __riscv_vcreate_v_u16m2x4(v0, v1, v2, v3) OPENCV_HAL_IMPL_RVV_VCREATE_x4(u16, 2, v0, v1, v2, v3)
|
||||||
#endif // clang < 18
|
#endif // clang < 18
|
||||||
|
|
||||||
inline int merge8u(const uchar** src, uchar* dst, int len, int cn ) {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
int merge8u(const uchar** src, uchar* dst, int len, int cn ) {
|
||||||
int vl = 0;
|
int vl = 0;
|
||||||
if (cn == 1)
|
if (cn == 1)
|
||||||
{
|
{
|
||||||
@ -129,7 +119,7 @@ inline int merge8u(const uchar** src, uchar* dst, int len, int cn ) {
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int merge16u(const ushort** src, ushort* dst, int len, int cn ) {
|
int merge16u(const ushort** src, ushort* dst, int len, int cn ) {
|
||||||
int vl = 0;
|
int vl = 0;
|
||||||
if (cn == 1)
|
if (cn == 1)
|
||||||
{
|
{
|
||||||
@ -217,7 +207,7 @@ inline int merge16u(const ushort** src, ushort* dst, int len, int cn ) {
|
|||||||
#if defined __GNUC__ && !defined(__clang__)
|
#if defined __GNUC__ && !defined(__clang__)
|
||||||
__attribute__((optimize("no-tree-vectorize")))
|
__attribute__((optimize("no-tree-vectorize")))
|
||||||
#endif
|
#endif
|
||||||
inline int merge32s(const int** src, int* dst, int len, int cn ) {
|
int merge32s(const int** src, int* dst, int len, int cn ) {
|
||||||
int k = cn % 4 ? cn % 4 : 4;
|
int k = cn % 4 ? cn % 4 : 4;
|
||||||
int i, j;
|
int i, j;
|
||||||
if( k == 1 )
|
if( k == 1 )
|
||||||
@ -287,7 +277,7 @@ inline int merge32s(const int** src, int* dst, int len, int cn ) {
|
|||||||
#if defined __GNUC__ && !defined(__clang__)
|
#if defined __GNUC__ && !defined(__clang__)
|
||||||
__attribute__((optimize("no-tree-vectorize")))
|
__attribute__((optimize("no-tree-vectorize")))
|
||||||
#endif
|
#endif
|
||||||
inline int merge64s(const int64** src, int64* dst, int len, int cn ) {
|
int merge64s(const int64** src, int64* dst, int len, int cn ) {
|
||||||
int k = cn % 4 ? cn % 4 : 4;
|
int k = cn % 4 ? cn % 4 : 4;
|
||||||
int i, j;
|
int i, j;
|
||||||
if( k == 1 )
|
if( k == 1 )
|
||||||
@ -354,6 +344,6 @@ inline int merge64s(const int64** src, int64* dst, int len, int cn ) {
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -4,19 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_MINMAX_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_MINMAX_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
#include <opencv2/core/base.hpp>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace minmax {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_minMaxIdx
|
|
||||||
#define cv_hal_minMaxIdx cv::cv_hal_rvv::minmax::minMaxIdx
|
|
||||||
#undef cv_hal_minMaxIdxMaskStep
|
|
||||||
#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minmax::minMaxIdx
|
|
||||||
|
|
||||||
template<typename VEC_T, typename BOOL_T, typename T = typename VEC_T::ElemType>
|
template<typename VEC_T, typename BOOL_T, typename T = typename VEC_T::ElemType>
|
||||||
inline int minMaxIdxReadTwice(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal,
|
inline int minMaxIdxReadTwice(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal,
|
||||||
@ -269,8 +261,8 @@ inline int minMaxIdxReadOnce(const uchar* src_data, size_t src_step, int width,
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
|
int minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth,
|
||||||
int* minIdx, int* maxIdx, uchar* mask, size_t mask_step = 0)
|
double* minVal, double* maxVal, int* minIdx, int* maxIdx, uchar* mask, size_t mask_step)
|
||||||
{
|
{
|
||||||
if (!mask_step)
|
if (!mask_step)
|
||||||
mask_step = src_step;
|
mask_step = src_step;
|
||||||
@ -296,6 +288,6 @@ inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int heig
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -6,15 +6,12 @@
|
|||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_NORM_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_NORM_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace norm {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_norm
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_norm cv::cv_hal_rvv::norm::norm
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -76,7 +73,7 @@ struct NormInf_RVV<schar, int> {
|
|||||||
for (int i = 0; i < n; i += vl) {
|
for (int i = 0; i < n; i += vl) {
|
||||||
vl = __riscv_vsetvl_e8m8(n - i);
|
vl = __riscv_vsetvl_e8m8(n - i);
|
||||||
auto v = __riscv_vle8_v_i8m8(src + i, vl);
|
auto v = __riscv_vle8_v_i8m8(src + i, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, custom_intrin::__riscv_vabs(v, vl), vl);
|
s = __riscv_vmaxu_tu(s, s, common::__riscv_vabs(v, vl), vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
||||||
}
|
}
|
||||||
@ -106,7 +103,7 @@ struct NormInf_RVV<short, int> {
|
|||||||
for (int i = 0; i < n; i += vl) {
|
for (int i = 0; i < n; i += vl) {
|
||||||
vl = __riscv_vsetvl_e16m8(n - i);
|
vl = __riscv_vsetvl_e16m8(n - i);
|
||||||
auto v = __riscv_vle16_v_i16m8(src + i, vl);
|
auto v = __riscv_vle16_v_i16m8(src + i, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, custom_intrin::__riscv_vabs(v, vl), vl);
|
s = __riscv_vmaxu_tu(s, s, common::__riscv_vabs(v, vl), vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
||||||
}
|
}
|
||||||
@ -121,7 +118,7 @@ struct NormInf_RVV<int, int> {
|
|||||||
for (int i = 0; i < n; i += vl) {
|
for (int i = 0; i < n; i += vl) {
|
||||||
vl = __riscv_vsetvl_e32m8(n - i);
|
vl = __riscv_vsetvl_e32m8(n - i);
|
||||||
auto v = __riscv_vle32_v_i32m8(src + i, vl);
|
auto v = __riscv_vle32_v_i32m8(src + i, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, custom_intrin::__riscv_vabs(v, vl), vl);
|
s = __riscv_vmaxu_tu(s, s, common::__riscv_vabs(v, vl), vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u32m1(0, __riscv_vsetvlmax_e32m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u32m1(0, __riscv_vsetvlmax_e32m1()), vlmax));
|
||||||
}
|
}
|
||||||
@ -180,7 +177,7 @@ struct NormL1_RVV<schar, int> {
|
|||||||
int vl;
|
int vl;
|
||||||
for (int i = 0; i < n; i += vl) {
|
for (int i = 0; i < n; i += vl) {
|
||||||
vl = __riscv_vsetvl_e8m8(n - i);
|
vl = __riscv_vsetvl_e8m8(n - i);
|
||||||
auto v = custom_intrin::__riscv_vabs(__riscv_vle8_v_i8m8(src + i, vl), vl);
|
auto v = common::__riscv_vabs(__riscv_vle8_v_i8m8(src + i, vl), vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tu(zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tu(zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -208,7 +205,7 @@ struct NormL1_RVV<short, int> {
|
|||||||
int vl;
|
int vl;
|
||||||
for (int i = 0; i < n; i += vl) {
|
for (int i = 0; i < n; i += vl) {
|
||||||
vl = __riscv_vsetvl_e16m8(n - i);
|
vl = __riscv_vsetvl_e16m8(n - i);
|
||||||
auto v = custom_intrin::__riscv_vabs(__riscv_vle16_v_i16m8(src + i, vl), vl);
|
auto v = common::__riscv_vabs(__riscv_vle16_v_i16m8(src + i, vl), vl);
|
||||||
s = __riscv_vwredsumu(v, s, vl);
|
s = __riscv_vwredsumu(v, s, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -223,7 +220,7 @@ struct NormL1_RVV<int, double> {
|
|||||||
int vl;
|
int vl;
|
||||||
for (int i = 0; i < n; i += vl) {
|
for (int i = 0; i < n; i += vl) {
|
||||||
vl = __riscv_vsetvl_e32m4(n - i);
|
vl = __riscv_vsetvl_e32m4(n - i);
|
||||||
auto v = custom_intrin::__riscv_vabs(__riscv_vle32_v_i32m4(src + i, vl), vl);
|
auto v = common::__riscv_vabs(__riscv_vle32_v_i32m4(src + i, vl), vl);
|
||||||
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v, vl), vl);
|
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v, vl), vl);
|
||||||
}
|
}
|
||||||
return __riscv_vfmv_f(__riscv_vfredosum(s, __riscv_vfmv_s_f_f64m1(0, __riscv_vsetvlmax_e64m1()), vlmax));
|
return __riscv_vfmv_f(__riscv_vfredosum(s, __riscv_vfmv_s_f_f64m1(0, __riscv_vsetvlmax_e64m1()), vlmax));
|
||||||
@ -544,7 +541,7 @@ struct MaskedNormInf_RVV<schar, int> {
|
|||||||
auto v = __riscv_vlse8_v_i8m8(src + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v = __riscv_vlse8_v_i8m8(src + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, custom_intrin::__riscv_vabs(v, vl), vl);
|
s = __riscv_vmaxu_tumu(b, s, s, common::__riscv_vabs(v, vl), vl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
||||||
@ -560,7 +557,7 @@ struct MaskedNormL1_RVV<schar, int> {
|
|||||||
int vl;
|
int vl;
|
||||||
for (int i = 0; i < len; i += vl) {
|
for (int i = 0; i < len; i += vl) {
|
||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v = custom_intrin::__riscv_vabs(__riscv_vlse8_v_i8m8(src + cn * i + cn_index, sizeof(schar) * cn, vl), vl);
|
auto v = common::__riscv_vabs(__riscv_vlse8_v_i8m8(src + cn * i + cn_index, sizeof(schar) * cn, vl), vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
@ -657,7 +654,7 @@ struct MaskedNormInf_RVV<short, int> {
|
|||||||
auto v = __riscv_vlse16_v_i16m8(src + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v = __riscv_vlse16_v_i16m8(src + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, custom_intrin::__riscv_vabs(v, vl), vl);
|
s = __riscv_vmaxu_tumu(b, s, s, common::__riscv_vabs(v, vl), vl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
||||||
@ -672,7 +669,7 @@ struct MaskedNormL1_RVV<short, int> {
|
|||||||
int vl;
|
int vl;
|
||||||
for (int i = 0; i < len; i += vl) {
|
for (int i = 0; i < len; i += vl) {
|
||||||
vl = __riscv_vsetvl_e8m4(len - i);
|
vl = __riscv_vsetvl_e8m4(len - i);
|
||||||
auto v = custom_intrin::__riscv_vabs(__riscv_vlse16_v_i16m8(src + cn * i + cn_index, sizeof(short) * cn, vl), vl);
|
auto v = common::__riscv_vabs(__riscv_vlse16_v_i16m8(src + cn * i + cn_index, sizeof(short) * cn, vl), vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu_tum(b, s, v, s, vl);
|
s = __riscv_vwredsumu_tum(b, s, v, s, vl);
|
||||||
@ -714,7 +711,7 @@ struct MaskedNormInf_RVV<int, int> {
|
|||||||
auto v = __riscv_vlse32_v_i32m8(src + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v = __riscv_vlse32_v_i32m8(src + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, custom_intrin::__riscv_vabs(v, vl), vl);
|
s = __riscv_vmaxu_tumu(b, s, s, common::__riscv_vabs(v, vl), vl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u32m1(0, __riscv_vsetvlmax_e32m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u32m1(0, __riscv_vsetvlmax_e32m1()), vlmax));
|
||||||
@ -733,7 +730,7 @@ struct MaskedNormL1_RVV<int, double> {
|
|||||||
auto v = __riscv_vlse32_v_i32m4(src + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v = __riscv_vlse32_v_i32m4(src + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vfadd_tumu(b, s, s, __riscv_vfwcvt_f(b, custom_intrin::__riscv_vabs(v, vl), vl), vl);
|
s = __riscv_vfadd_tumu(b, s, s, __riscv_vfwcvt_f(b, common::__riscv_vabs(v, vl), vl), vl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return __riscv_vfmv_f(__riscv_vfredosum(s, __riscv_vfmv_s_f_f64m1(0, __riscv_vsetvlmax_e64m1()), vlmax));
|
return __riscv_vfmv_f(__riscv_vfredosum(s, __riscv_vfmv_s_f_f64m1(0, __riscv_vsetvlmax_e64m1()), vlmax));
|
||||||
@ -972,8 +969,8 @@ CV_HAL_RVV_DEF_NORM_ALL(64f, double, double, double, double)
|
|||||||
}
|
}
|
||||||
|
|
||||||
using NormFunc = int (*)(const uchar*, const uchar*, uchar*, int, int);
|
using NormFunc = int (*)(const uchar*, const uchar*, uchar*, int, int);
|
||||||
inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width,
|
int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step,
|
||||||
int height, int type, int norm_type, double* result) {
|
int width, int height, int type, int norm_type, double* result) {
|
||||||
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
|
|
||||||
if (result == nullptr || depth == CV_16F || norm_type > NORM_L2SQR) {
|
if (result == nullptr || depth == CV_16F || norm_type > NORM_L2SQR) {
|
||||||
@ -1004,18 +1001,8 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const size_t elem_size_tab[CV_DEPTH_MAX] = {
|
size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
|
||||||
sizeof(uchar), sizeof(schar),
|
bool src_continuous = (src_step == width * elem_size1 * cn || (src_step != width * elem_size1 * cn && height == 1));
|
||||||
sizeof(ushort), sizeof(short),
|
|
||||||
sizeof(int), sizeof(float),
|
|
||||||
sizeof(double), sizeof(cv::hfloat),
|
|
||||||
sizeof(cv::bfloat), sizeof(bool),
|
|
||||||
sizeof(uint64_t), sizeof(int64_t),
|
|
||||||
sizeof(unsigned), 0,
|
|
||||||
};
|
|
||||||
CV_Assert(elem_size_tab[depth]);
|
|
||||||
|
|
||||||
bool src_continuous = (src_step == width * elem_size_tab[depth] * cn || (src_step != width * elem_size_tab[depth] * cn && height == 1));
|
|
||||||
bool mask_continuous = (mask_step == static_cast<size_t>(width));
|
bool mask_continuous = (mask_step == static_cast<size_t>(width));
|
||||||
size_t nplanes = 1;
|
size_t nplanes = 1;
|
||||||
size_t size = width * height;
|
size_t size = width * height;
|
||||||
@ -1038,7 +1025,7 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas
|
|||||||
res.d = 0;
|
res.d = 0;
|
||||||
if ((norm_type == NORM_L1 && depth <= CV_16S) ||
|
if ((norm_type == NORM_L1 && depth <= CV_16S) ||
|
||||||
((norm_type == NORM_L2 || norm_type == NORM_L2SQR) && depth <= CV_8S)) {
|
((norm_type == NORM_L2 || norm_type == NORM_L2SQR) && depth <= CV_8S)) {
|
||||||
const size_t esz = elem_size_tab[depth] * cn;
|
const size_t esz = elem_size1 * cn;
|
||||||
const int total = (int)size;
|
const int total = (int)size;
|
||||||
const int intSumBlockSize = (norm_type == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
const int intSumBlockSize = (norm_type == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||||
const int blockSize = std::min(total, intSumBlockSize);
|
const int blockSize = std::min(total, intSumBlockSize);
|
||||||
@ -1095,6 +1082,6 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // cv::cv_hal_rvv::norm
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -6,15 +6,12 @@
|
|||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_NORM_DIFF_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_NORM_DIFF_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace norm_diff {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_normDiff
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_normDiff cv::cv_hal_rvv::norm_diff::normDiff
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -64,7 +61,7 @@ struct NormDiffInf_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(n - i);
|
vl = __riscv_vsetvl_e8m8(n - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, v, vl);
|
s = __riscv_vmaxu_tu(s, s, v, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
||||||
@ -81,7 +78,7 @@ struct NormDiffInf_RVV<schar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(n - i);
|
vl = __riscv_vsetvl_e8m8(n - i);
|
||||||
auto v1 = __riscv_vle8_v_i8m8(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_i8m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_i8m8(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_i8m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, v, vl);
|
s = __riscv_vmaxu_tu(s, s, v, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u8m1(0, __riscv_vsetvlmax_e8m1()), vlmax));
|
||||||
@ -98,7 +95,7 @@ struct NormDiffInf_RVV<ushort, int> {
|
|||||||
vl = __riscv_vsetvl_e16m8(n - i);
|
vl = __riscv_vsetvl_e16m8(n - i);
|
||||||
auto v1 = __riscv_vle16_v_u16m8(src1 + i, vl);
|
auto v1 = __riscv_vle16_v_u16m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle16_v_u16m8(src2 + i, vl);
|
auto v2 = __riscv_vle16_v_u16m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, v, vl);
|
s = __riscv_vmaxu_tu(s, s, v, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
||||||
@ -115,7 +112,7 @@ struct NormDiffInf_RVV<short, int> {
|
|||||||
vl = __riscv_vsetvl_e16m8(n - i);
|
vl = __riscv_vsetvl_e16m8(n - i);
|
||||||
auto v1 = __riscv_vle16_v_i16m8(src1 + i, vl);
|
auto v1 = __riscv_vle16_v_i16m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle16_v_i16m8(src2 + i, vl);
|
auto v2 = __riscv_vle16_v_i16m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vmaxu_tu(s, s, v, vl);
|
s = __riscv_vmaxu_tu(s, s, v, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u16m1(0, __riscv_vsetvlmax_e16m1()), vlmax));
|
||||||
@ -132,7 +129,8 @@ struct NormDiffInf_RVV<int, unsigned> {
|
|||||||
vl = __riscv_vsetvl_e32m8(n - i);
|
vl = __riscv_vsetvl_e32m8(n - i);
|
||||||
auto v1 = __riscv_vle32_v_i32m8(src1 + i, vl);
|
auto v1 = __riscv_vle32_v_i32m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle32_v_i32m8(src2 + i, vl);
|
auto v2 = __riscv_vle32_v_i32m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl); // 5.x
|
||||||
|
// auto v = common::__riscv_vabs(__riscv_vsub(v1, v2, vl), vl); // 4.x
|
||||||
s = __riscv_vmaxu_tu(s, s, v, vl);
|
s = __riscv_vmaxu_tu(s, s, v, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u32m1(0, __riscv_vsetvlmax_e32m1()), vlmax));
|
return __riscv_vmv_x(__riscv_vredmaxu(s, __riscv_vmv_s_x_u32m1(0, __riscv_vsetvlmax_e32m1()), vlmax));
|
||||||
@ -183,7 +181,7 @@ struct NormDiffL1_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(n - i);
|
vl = __riscv_vsetvl_e8m8(n - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tu(zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tu(zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -200,7 +198,7 @@ struct NormDiffL1_RVV<schar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(n - i);
|
vl = __riscv_vsetvl_e8m8(n - i);
|
||||||
auto v1 = __riscv_vle8_v_i8m8(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_i8m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_i8m8(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_i8m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tu(zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tu(zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -216,7 +214,7 @@ struct NormDiffL1_RVV<ushort, int> {
|
|||||||
vl = __riscv_vsetvl_e16m8(n - i);
|
vl = __riscv_vsetvl_e16m8(n - i);
|
||||||
auto v1 = __riscv_vle16_v_u16m8(src1 + i, vl);
|
auto v1 = __riscv_vle16_v_u16m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle16_v_u16m8(src2 + i, vl);
|
auto v2 = __riscv_vle16_v_u16m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vwredsumu(v, s, vl);
|
s = __riscv_vwredsumu(v, s, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -232,7 +230,7 @@ struct NormDiffL1_RVV<short, int> {
|
|||||||
vl = __riscv_vsetvl_e16m8(n - i);
|
vl = __riscv_vsetvl_e16m8(n - i);
|
||||||
auto v1 = __riscv_vle16_v_i16m8(src1 + i, vl);
|
auto v1 = __riscv_vle16_v_i16m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle16_v_i16m8(src2 + i, vl);
|
auto v2 = __riscv_vle16_v_i16m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vwredsumu(v, s, vl);
|
s = __riscv_vwredsumu(v, s, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -249,7 +247,8 @@ struct NormDiffL1_RVV<int, double> {
|
|||||||
vl = __riscv_vsetvl_e32m4(n - i);
|
vl = __riscv_vsetvl_e32m4(n - i);
|
||||||
auto v1 = __riscv_vle32_v_i32m4(src1 + i, vl);
|
auto v1 = __riscv_vle32_v_i32m4(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle32_v_i32m4(src2 + i, vl);
|
auto v2 = __riscv_vle32_v_i32m4(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl); // 5.x
|
||||||
|
// auto v = common::__riscv_vabs(__riscv_vsub(v1, v2, vl), vl); // 4.x
|
||||||
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v, vl), vl);
|
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v, vl), vl);
|
||||||
}
|
}
|
||||||
return __riscv_vfmv_f(__riscv_vfredosum(s, __riscv_vfmv_s_f_f64m1(0, __riscv_vsetvlmax_e64m1()), vlmax));
|
return __riscv_vfmv_f(__riscv_vfredosum(s, __riscv_vfmv_s_f_f64m1(0, __riscv_vsetvlmax_e64m1()), vlmax));
|
||||||
@ -299,7 +298,7 @@ struct NormDiffL2_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(n - i);
|
vl = __riscv_vsetvl_e8m4(n - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m4(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_u8m4(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m4(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_u8m4(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwmulu(v, v, vl), s, vl);
|
s = __riscv_vwredsumu(__riscv_vwmulu(v, v, vl), s, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -315,7 +314,7 @@ struct NormDiffL2_RVV<schar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(n - i);
|
vl = __riscv_vsetvl_e8m4(n - i);
|
||||||
auto v1 = __riscv_vle8_v_i8m4(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_i8m4(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_i8m4(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_i8m4(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwmulu(v, v, vl), s, vl);
|
s = __riscv_vwredsumu(__riscv_vwmulu(v, v, vl), s, vl);
|
||||||
}
|
}
|
||||||
return __riscv_vmv_x(s);
|
return __riscv_vmv_x(s);
|
||||||
@ -332,7 +331,7 @@ struct NormDiffL2_RVV<ushort, double> {
|
|||||||
vl = __riscv_vsetvl_e16m2(n - i);
|
vl = __riscv_vsetvl_e16m2(n - i);
|
||||||
auto v1 = __riscv_vle16_v_u16m2(src1 + i, vl);
|
auto v1 = __riscv_vle16_v_u16m2(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle16_v_u16m2(src2 + i, vl);
|
auto v2 = __riscv_vle16_v_u16m2(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto v_mul = __riscv_vwmulu(v, v, vl);
|
auto v_mul = __riscv_vwmulu(v, v, vl);
|
||||||
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v_mul, vl), vl);
|
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v_mul, vl), vl);
|
||||||
}
|
}
|
||||||
@ -350,7 +349,7 @@ struct NormDiffL2_RVV<short, double> {
|
|||||||
vl = __riscv_vsetvl_e16m2(n - i);
|
vl = __riscv_vsetvl_e16m2(n - i);
|
||||||
auto v1 = __riscv_vle16_v_i16m2(src1 + i, vl);
|
auto v1 = __riscv_vle16_v_i16m2(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle16_v_i16m2(src2 + i, vl);
|
auto v2 = __riscv_vle16_v_i16m2(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto v_mul = __riscv_vwmulu(v, v, vl);
|
auto v_mul = __riscv_vwmulu(v, v, vl);
|
||||||
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v_mul, vl), vl);
|
s = __riscv_vfadd_tu(s, s, __riscv_vfwcvt_f(v_mul, vl), vl);
|
||||||
}
|
}
|
||||||
@ -368,7 +367,7 @@ struct NormDiffL2_RVV<int, double> {
|
|||||||
vl = __riscv_vsetvl_e32m4(n - i);
|
vl = __riscv_vsetvl_e32m4(n - i);
|
||||||
auto v1 = __riscv_vle32_v_i32m4(src1 + i, vl);
|
auto v1 = __riscv_vle32_v_i32m4(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle32_v_i32m4(src2 + i, vl);
|
auto v2 = __riscv_vle32_v_i32m4(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto v_mul = __riscv_vwmulu(v, v, vl);
|
auto v_mul = __riscv_vwmulu(v, v, vl);
|
||||||
s = __riscv_vfadd_tu(s, s, __riscv_vfcvt_f(v_mul, vl), vl);
|
s = __riscv_vfadd_tu(s, s, __riscv_vfcvt_f(v_mul, vl), vl);
|
||||||
}
|
}
|
||||||
@ -471,7 +470,7 @@ struct MaskedNormDiffInf_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
||||||
@ -482,7 +481,7 @@ struct MaskedNormDiffInf_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m2(len - i);
|
vl = __riscv_vsetvl_e8m2(len - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m8(src1 + i * 4, vl * 4);
|
auto v1 = __riscv_vle8_v_u8m8(src1 + i * 4, vl * 4);
|
||||||
auto v2 = __riscv_vle8_v_u8m8(src2 + i * 4, vl * 4);
|
auto v2 = __riscv_vle8_v_u8m8(src2 + i * 4, vl * 4);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl * 4);
|
auto v = common::__riscv_vabd(v1, v2, vl * 4);
|
||||||
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(__riscv_vreinterpret_u8m8(__riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(m, 1, vl), vl), 0x01010101, vl)), 0, vl * 4);
|
auto b = __riscv_vmsne(__riscv_vreinterpret_u8m8(__riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(m, 1, vl), vl), 0x01010101, vl)), 0, vl * 4);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl * 4);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl * 4);
|
||||||
@ -494,7 +493,7 @@ struct MaskedNormDiffInf_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v1 = __riscv_vlse8_v_u8m8(src1 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
auto v1 = __riscv_vlse8_v_u8m8(src1 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
||||||
auto v2 = __riscv_vlse8_v_u8m8(src2 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
auto v2 = __riscv_vlse8_v_u8m8(src2 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
||||||
@ -516,7 +515,7 @@ struct MaskedNormDiffInf_RVV<schar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v1 = __riscv_vlse8_v_i8m8(src1 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v1 = __riscv_vlse8_v_i8m8(src1 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto v2 = __riscv_vlse8_v_i8m8(src2 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v2 = __riscv_vlse8_v_i8m8(src2 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
||||||
@ -537,7 +536,7 @@ struct MaskedNormDiffInf_RVV<ushort, int> {
|
|||||||
vl = __riscv_vsetvl_e16m8(len - i);
|
vl = __riscv_vsetvl_e16m8(len - i);
|
||||||
auto v1 = __riscv_vlse16_v_u16m8(src1 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
auto v1 = __riscv_vlse16_v_u16m8(src1 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
||||||
auto v2 = __riscv_vlse16_v_u16m8(src2 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
auto v2 = __riscv_vlse16_v_u16m8(src2 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
||||||
@ -558,7 +557,7 @@ struct MaskedNormDiffInf_RVV<short, int> {
|
|||||||
vl = __riscv_vsetvl_e16m8(len - i);
|
vl = __riscv_vsetvl_e16m8(len - i);
|
||||||
auto v1 = __riscv_vlse16_v_i16m8(src1 + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v1 = __riscv_vlse16_v_i16m8(src1 + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto v2 = __riscv_vlse16_v_i16m8(src2 + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v2 = __riscv_vlse16_v_i16m8(src2 + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
||||||
@ -579,7 +578,8 @@ struct MaskedNormDiffInf_RVV<int, unsigned> {
|
|||||||
vl = __riscv_vsetvl_e32m8(len - i);
|
vl = __riscv_vsetvl_e32m8(len - i);
|
||||||
auto v1 = __riscv_vlse32_v_i32m8(src1 + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v1 = __riscv_vlse32_v_i32m8(src1 + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto v2 = __riscv_vlse32_v_i32m8(src2 + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v2 = __riscv_vlse32_v_i32m8(src2 + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl); // 5.x
|
||||||
|
// auto v = common::__riscv_vabs(__riscv_vsub(v1, v2, vl), vl); // 4.x
|
||||||
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
s = __riscv_vmaxu_tumu(b, s, s, v, vl);
|
||||||
@ -655,7 +655,7 @@ struct MaskedNormDiffL1_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_u8m8(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_u8m8(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
@ -666,7 +666,7 @@ struct MaskedNormDiffL1_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m2(len - i);
|
vl = __riscv_vsetvl_e8m2(len - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m8(src1 + i * 4, vl * 4);
|
auto v1 = __riscv_vle8_v_u8m8(src1 + i * 4, vl * 4);
|
||||||
auto v2 = __riscv_vle8_v_u8m8(src2 + i * 4, vl * 4);
|
auto v2 = __riscv_vle8_v_u8m8(src2 + i * 4, vl * 4);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl * 4);
|
auto v = common::__riscv_vabd(v1, v2, vl * 4);
|
||||||
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m2(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(__riscv_vreinterpret_u8m8(__riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(m, 1, vl), vl), 0x01010101, vl)), 0, vl * 4);
|
auto b = __riscv_vmsne(__riscv_vreinterpret_u8m8(__riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(m, 1, vl), vl), 0x01010101, vl)), 0, vl * 4);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl * 4), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl * 4), s, __riscv_vsetvlmax_e16m1());
|
||||||
@ -678,7 +678,7 @@ struct MaskedNormDiffL1_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v1 = __riscv_vlse8_v_u8m8(src1 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
auto v1 = __riscv_vlse8_v_u8m8(src1 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
||||||
auto v2 = __riscv_vlse8_v_u8m8(src2 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
auto v2 = __riscv_vlse8_v_u8m8(src2 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
@ -700,7 +700,7 @@ struct MaskedNormDiffL1_RVV<schar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m8(len - i);
|
vl = __riscv_vsetvl_e8m8(len - i);
|
||||||
auto v1 = __riscv_vlse8_v_i8m8(src1 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v1 = __riscv_vlse8_v_i8m8(src1 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto v2 = __riscv_vlse8_v_i8m8(src2 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v2 = __riscv_vlse8_v_i8m8(src2 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m8(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
s = __riscv_vwredsumu(__riscv_vwredsumu_tum(b, zero, v, zero, vl), s, __riscv_vsetvlmax_e16m1());
|
||||||
@ -720,7 +720,7 @@ struct MaskedNormDiffL1_RVV<ushort, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(len - i);
|
vl = __riscv_vsetvl_e8m4(len - i);
|
||||||
auto v1 = __riscv_vlse16_v_u16m8(src1 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
auto v1 = __riscv_vlse16_v_u16m8(src1 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
||||||
auto v2 = __riscv_vlse16_v_u16m8(src2 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
auto v2 = __riscv_vlse16_v_u16m8(src2 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu_tum(b, s, v, s, vl);
|
s = __riscv_vwredsumu_tum(b, s, v, s, vl);
|
||||||
@ -740,7 +740,7 @@ struct MaskedNormDiffL1_RVV<short, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(len - i);
|
vl = __riscv_vsetvl_e8m4(len - i);
|
||||||
auto v1 = __riscv_vlse16_v_i16m8(src1 + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v1 = __riscv_vlse16_v_i16m8(src1 + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto v2 = __riscv_vlse16_v_i16m8(src2 + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v2 = __riscv_vlse16_v_i16m8(src2 + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu_tum(b, s, v, s, vl);
|
s = __riscv_vwredsumu_tum(b, s, v, s, vl);
|
||||||
@ -761,7 +761,8 @@ struct MaskedNormDiffL1_RVV<int, double> {
|
|||||||
vl = __riscv_vsetvl_e32m4(len - i);
|
vl = __riscv_vsetvl_e32m4(len - i);
|
||||||
auto v1 = __riscv_vlse32_v_i32m4(src1 + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v1 = __riscv_vlse32_v_i32m4(src1 + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto v2 = __riscv_vlse32_v_i32m4(src2 + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v2 = __riscv_vlse32_v_i32m4(src2 + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl); // 5.x
|
||||||
|
// auto v = common::__riscv_vabs(__riscv_vsub(v1, v2, vl), vl); // 4.x
|
||||||
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vfadd_tumu(b, s, s, __riscv_vfwcvt_f(b, v, vl), vl);
|
s = __riscv_vfadd_tumu(b, s, s, __riscv_vfwcvt_f(b, v, vl), vl);
|
||||||
@ -836,7 +837,7 @@ struct MaskedNormDiffL2_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(len - i);
|
vl = __riscv_vsetvl_e8m4(len - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m4(src1 + i, vl);
|
auto v1 = __riscv_vle8_v_u8m4(src1 + i, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m4(src2 + i, vl);
|
auto v2 = __riscv_vle8_v_u8m4(src2 + i, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl), s, vl);
|
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl), s, vl);
|
||||||
@ -847,7 +848,7 @@ struct MaskedNormDiffL2_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m1(len - i);
|
vl = __riscv_vsetvl_e8m1(len - i);
|
||||||
auto v1 = __riscv_vle8_v_u8m4(src1 + i * 4, vl * 4);
|
auto v1 = __riscv_vle8_v_u8m4(src1 + i * 4, vl * 4);
|
||||||
auto v2 = __riscv_vle8_v_u8m4(src2 + i * 4, vl * 4);
|
auto v2 = __riscv_vle8_v_u8m4(src2 + i * 4, vl * 4);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl * 4);
|
auto v = common::__riscv_vabd(v1, v2, vl * 4);
|
||||||
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(__riscv_vreinterpret_u8m4(__riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(m, 1, vl), vl), 0x01010101, vl)), 0, vl * 4);
|
auto b = __riscv_vmsne(__riscv_vreinterpret_u8m4(__riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(m, 1, vl), vl), 0x01010101, vl)), 0, vl * 4);
|
||||||
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl * 4), s, vl * 4);
|
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl * 4), s, vl * 4);
|
||||||
@ -859,7 +860,7 @@ struct MaskedNormDiffL2_RVV<uchar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(len - i);
|
vl = __riscv_vsetvl_e8m4(len - i);
|
||||||
auto v1 = __riscv_vlse8_v_u8m4(src1 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
auto v1 = __riscv_vlse8_v_u8m4(src1 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
||||||
auto v2 = __riscv_vlse8_v_u8m4(src2 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
auto v2 = __riscv_vlse8_v_u8m4(src2 + cn * i + cn_index, sizeof(uchar) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl), s, vl);
|
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl), s, vl);
|
||||||
@ -880,7 +881,7 @@ struct MaskedNormDiffL2_RVV<schar, int> {
|
|||||||
vl = __riscv_vsetvl_e8m4(len - i);
|
vl = __riscv_vsetvl_e8m4(len - i);
|
||||||
auto v1 = __riscv_vlse8_v_i8m4(src1 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v1 = __riscv_vlse8_v_i8m4(src1 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto v2 = __riscv_vlse8_v_i8m4(src2 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
auto v2 = __riscv_vlse8_v_i8m4(src2 + cn * i + cn_index, sizeof(schar) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m4(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl), s, vl);
|
s = __riscv_vwredsumu(b, __riscv_vwmulu(b, v, v, vl), s, vl);
|
||||||
@ -901,7 +902,7 @@ struct MaskedNormDiffL2_RVV<ushort, double> {
|
|||||||
vl = __riscv_vsetvl_e16m2(len - i);
|
vl = __riscv_vsetvl_e16m2(len - i);
|
||||||
auto v1 = __riscv_vlse16_v_u16m2(src1 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
auto v1 = __riscv_vlse16_v_u16m2(src1 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
||||||
auto v2 = __riscv_vlse16_v_u16m2(src2 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
auto v2 = __riscv_vlse16_v_u16m2(src2 + cn * i + cn_index, sizeof(ushort) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
auto v_mul = __riscv_vwmulu(b, v, v, vl);
|
auto v_mul = __riscv_vwmulu(b, v, v, vl);
|
||||||
@ -923,7 +924,7 @@ struct MaskedNormDiffL2_RVV<short, double> {
|
|||||||
vl = __riscv_vsetvl_e16m2(len - i);
|
vl = __riscv_vsetvl_e16m2(len - i);
|
||||||
auto v1 = __riscv_vlse16_v_i16m2(src1 + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v1 = __riscv_vlse16_v_i16m2(src1 + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto v2 = __riscv_vlse16_v_i16m2(src2 + cn * i + cn_index, sizeof(short) * cn, vl);
|
auto v2 = __riscv_vlse16_v_i16m2(src2 + cn * i + cn_index, sizeof(short) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
auto v_mul = __riscv_vwmulu(b, v, v, vl);
|
auto v_mul = __riscv_vwmulu(b, v, v, vl);
|
||||||
@ -945,7 +946,7 @@ struct MaskedNormDiffL2_RVV<int, double> {
|
|||||||
vl = __riscv_vsetvl_e16m2(len - i);
|
vl = __riscv_vsetvl_e16m2(len - i);
|
||||||
auto v1 = __riscv_vlse32_v_i32m4(src1 + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v1 = __riscv_vlse32_v_i32m4(src1 + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto v2 = __riscv_vlse32_v_i32m4(src2 + cn * i + cn_index, sizeof(int) * cn, vl);
|
auto v2 = __riscv_vlse32_v_i32m4(src2 + cn * i + cn_index, sizeof(int) * cn, vl);
|
||||||
auto v = custom_intrin::__riscv_vabd(v1, v2, vl);
|
auto v = common::__riscv_vabd(v1, v2, vl);
|
||||||
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
auto m = __riscv_vle8_v_u8m1(mask + i, vl);
|
||||||
auto b = __riscv_vmsne(m, 0, vl);
|
auto b = __riscv_vmsne(m, 0, vl);
|
||||||
auto v_mul = __riscv_vwmulu(b, v, v, vl);
|
auto v_mul = __riscv_vwmulu(b, v, v, vl);
|
||||||
@ -1079,9 +1080,8 @@ CV_HAL_RVV_DEF_NORM_DIFF_ALL(64f, double, double, double, double)
|
|||||||
}
|
}
|
||||||
|
|
||||||
using NormDiffFunc = int (*)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
|
using NormDiffFunc = int (*)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
|
||||||
inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask,
|
int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step,
|
||||||
size_t mask_step, int width, int height, int type, int norm_type, double* result)
|
int width, int height, int type, int norm_type, double* result) {
|
||||||
{
|
|
||||||
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
|
|
||||||
bool relative = norm_type & NORM_RELATIVE;
|
bool relative = norm_type & NORM_RELATIVE;
|
||||||
@ -1115,19 +1115,9 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const size_t elem_size_tab[CV_DEPTH_MAX] = {
|
size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
|
||||||
sizeof(uchar), sizeof(schar),
|
bool src_continuous = (src1_step == width * elem_size1 * cn || (src1_step != width * elem_size1 * cn && height == 1));
|
||||||
sizeof(ushort), sizeof(short),
|
src_continuous &= (src2_step == width * elem_size1 * cn || (src2_step != width * elem_size1 * cn && height == 1));
|
||||||
sizeof(int), sizeof(float),
|
|
||||||
sizeof(double), sizeof(cv::hfloat),
|
|
||||||
sizeof(cv::bfloat), sizeof(bool),
|
|
||||||
sizeof(uint64_t), sizeof(int64_t),
|
|
||||||
sizeof(unsigned), 0,
|
|
||||||
};
|
|
||||||
CV_Assert(elem_size_tab[depth]);
|
|
||||||
|
|
||||||
bool src_continuous = (src1_step == width * elem_size_tab[depth] * cn || (src1_step != width * elem_size_tab[depth] * cn && height == 1));
|
|
||||||
src_continuous &= (src2_step == width * elem_size_tab[depth] * cn || (src2_step != width * elem_size_tab[depth] * cn && height == 1));
|
|
||||||
bool mask_continuous = (mask_step == static_cast<size_t>(width));
|
bool mask_continuous = (mask_step == static_cast<size_t>(width));
|
||||||
size_t nplanes = 1;
|
size_t nplanes = 1;
|
||||||
size_t size = width * height;
|
size_t size = width * height;
|
||||||
@ -1150,7 +1140,7 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size
|
|||||||
res.d = 0;
|
res.d = 0;
|
||||||
if ((norm_type == NORM_L1 && depth <= CV_16S) ||
|
if ((norm_type == NORM_L1 && depth <= CV_16S) ||
|
||||||
((norm_type == NORM_L2 || norm_type == NORM_L2SQR) && depth <= CV_8S)) {
|
((norm_type == NORM_L2 || norm_type == NORM_L2SQR) && depth <= CV_8S)) {
|
||||||
const size_t esz = elem_size_tab[depth] * cn;
|
const size_t esz = elem_size1 * cn;
|
||||||
const int total = (int)size;
|
const int total = (int)size;
|
||||||
const int intSumBlockSize = (norm_type == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
const int intSumBlockSize = (norm_type == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||||
const int blockSize = std::min(total, intSumBlockSize);
|
const int blockSize = std::min(total, intSumBlockSize);
|
||||||
@ -1210,7 +1200,7 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size
|
|||||||
if(relative)
|
if(relative)
|
||||||
{
|
{
|
||||||
double result_;
|
double result_;
|
||||||
int ret = cv::cv_hal_rvv::norm::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type, &result_);
|
int ret = cv::rvv_hal::core::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type, &result_);
|
||||||
if(ret == CV_HAL_ERROR_OK)
|
if(ret == CV_HAL_ERROR_OK)
|
||||||
{
|
{
|
||||||
*result /= result_ + DBL_EPSILON;
|
*result /= result_ + DBL_EPSILON;
|
||||||
@ -1220,6 +1210,6 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::core
|
@ -4,18 +4,11 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_NORM_HAMMING_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_NORM_HAMMING_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
#include <opencv2/core/base.hpp>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_normHamming8u
|
|
||||||
#define cv_hal_normHamming8u cv::cv_hal_rvv::normHamming8u
|
|
||||||
#undef cv_hal_normHammingDiff8u
|
|
||||||
#define cv_hal_normHammingDiff8u cv::cv_hal_rvv::normHammingDiff8u
|
|
||||||
|
|
||||||
template <typename CellType>
|
template <typename CellType>
|
||||||
inline void normHammingCnt_m8(vuint8m8_t v, vbool1_t mask, size_t len_bool, size_t& result)
|
inline void normHammingCnt_m8(vuint8m8_t v, vbool1_t mask, size_t len_bool, size_t& result)
|
||||||
@ -153,7 +146,7 @@ inline void normHammingDiff8uLoop(const uchar* a, const uchar* b, size_t n, size
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int normHamming8u(const uchar* a, int n, int cellSize, int* result)
|
int normHamming8u(const uchar* a, int n, int cellSize, int* result)
|
||||||
{
|
{
|
||||||
size_t _result = 0;
|
size_t _result = 0;
|
||||||
|
|
||||||
@ -168,7 +161,7 @@ inline int normHamming8u(const uchar* a, int n, int cellSize, int* result)
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize, int* result)
|
int normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize, int* result)
|
||||||
{
|
{
|
||||||
size_t _result = 0;
|
size_t _result = 0;
|
||||||
|
|
||||||
@ -183,6 +176,6 @@ inline int normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}} // namespace cv::cv_hal_rvv
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif //OPENCV_HAL_RVV_NORM_HAMMING_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
@ -1,16 +1,16 @@
|
|||||||
// This file is part of OpenCV project.
|
// This file is part of OpenCV project.
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
// directory of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_SINCOS_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_SINCOS_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace detail {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
static constexpr size_t sincos_mask = 0x3;
|
static constexpr size_t sincos_mask = 0x3;
|
||||||
|
|
||||||
@ -67,6 +67,44 @@ static inline void
|
|||||||
cosval = __riscv_vfneg_mu(__riscv_vmor(idx1, idx2, vl), cosval, cosval, vl);
|
cosval = __riscv_vfneg_mu(__riscv_vmor(idx1, idx2, vl), cosval, cosval, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // namespace cv::cv_hal_rvv::detail
|
template <typename RVV_T, typename Elem = typename RVV_T::ElemType>
|
||||||
|
inline int polarToCart(const Elem* mag, const Elem* angle, Elem* x, Elem* y, int len, bool angleInDegrees)
|
||||||
|
{
|
||||||
|
using T = RVV_F32M4;
|
||||||
|
const auto sincos_scale = angleInDegrees ? sincos_deg_scale : sincos_rad_scale;
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_SINCOS_HPP_INCLUDED
|
size_t vl;
|
||||||
|
auto cos_p2 = T::vmv(sincos_cos_p2, T::setvlmax());
|
||||||
|
auto cos_p0 = T::vmv(sincos_cos_p0, T::setvlmax());
|
||||||
|
for (; len > 0; len -= (int)vl, angle += vl, x += vl, y += vl)
|
||||||
|
{
|
||||||
|
vl = RVV_T::setvl(len);
|
||||||
|
auto vangle = T::cast(RVV_T::vload(angle, vl), vl);
|
||||||
|
T::VecType vsin, vcos;
|
||||||
|
SinCos32f<T>(vangle, vsin, vcos, sincos_scale, cos_p2, cos_p0, vl);
|
||||||
|
if (mag)
|
||||||
|
{
|
||||||
|
auto vmag = T::cast(RVV_T::vload(mag, vl), vl);
|
||||||
|
vsin = __riscv_vfmul(vsin, vmag, vl);
|
||||||
|
vcos = __riscv_vfmul(vcos, vmag, vl);
|
||||||
|
mag += vl;
|
||||||
|
}
|
||||||
|
RVV_T::vstore(x, RVV_T::cast(vcos, vl), vl);
|
||||||
|
RVV_T::vstore(y, RVV_T::cast(vsin, vl), vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int polarToCart32f(const float* mag, const float* angle, float* x, float* y, int len, bool angleInDegrees) {
|
||||||
|
return polarToCart<RVV_F32M4>(mag, angle, x, y, len, angleInDegrees);
|
||||||
|
}
|
||||||
|
int polarToCart64f(const double* mag, const double* angle, double* x, double* y, int len, bool angleInDegrees) {
|
||||||
|
return polarToCart<RVV_F64M8>(mag, angle, x, y, len, angleInDegrees);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,22 +4,17 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_QR_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_QR_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace qr {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_QR32f
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_QR32f cv::cv_hal_rvv::qr::QR<cv::cv_hal_rvv::RVV_F32M4>
|
|
||||||
#undef cv_hal_QR64f
|
namespace {
|
||||||
#define cv_hal_QR64f cv::cv_hal_rvv::qr::QR<cv::cv_hal_rvv::RVV_F64M4>
|
|
||||||
|
|
||||||
// the algorithm is copied from core/src/matrix_decomp.cpp,
|
// the algorithm is copied from core/src/matrix_decomp.cpp,
|
||||||
// in the function template static int cv::QRImpl
|
// in the function template static int cv::QRImpl
|
||||||
@ -171,6 +166,15 @@ inline int QR(T* src1, size_t src1_step, int m, int n, int k, T* src2, size_t sr
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
} // anonymous
|
||||||
|
|
||||||
#endif
|
int QR32f(float* src1, size_t src1_step, int m, int n, int k, float* src2, size_t src2_step, float* dst, int* info) {
|
||||||
|
return QR<RVV_F32M4>(src1, src1_step, m, n, k, src2, src2_step, dst, info);
|
||||||
|
}
|
||||||
|
int QR64f(double* src1, size_t src1_step, int m, int n, int k, double* src2, size_t src2_step, double* dst, int* info) {
|
||||||
|
return QR<RVV_F64M4>(src1, src1_step, m, n, k, src2, src2_step, dst, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -1,17 +1,14 @@
|
|||||||
// This file is part of OpenCV project.
|
// This file is part of OpenCV project.
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
#ifndef OPENCV_HAL_RVV_SPLIT_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_SPLIT_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
#include "rvv_hal.hpp"
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_split8u
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_split8u cv::cv_hal_rvv::split8u
|
|
||||||
|
|
||||||
inline int split8u(const uchar* src, uchar** dst, int len, int cn)
|
int split8u(const uchar* src, uchar** dst, int len, int cn)
|
||||||
{
|
{
|
||||||
int vl = 0;
|
int vl = 0;
|
||||||
if (cn == 1)
|
if (cn == 1)
|
||||||
@ -89,5 +86,6 @@ inline int split8u(const uchar* src, uchar** dst, int len, int cn)
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
#endif
|
|
||||||
|
}}} // cv::rvv_hal::core
|
74
hal/riscv-rvv/src/core/sqrt.cpp
Normal file
74
hal/riscv-rvv/src/core/sqrt.cpp
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level
|
||||||
|
// directory of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
template <typename SQRT_T, typename Elem = typename SQRT_T::T::ElemType>
|
||||||
|
inline int sqrt(const Elem* src, Elem* dst, int _len)
|
||||||
|
{
|
||||||
|
size_t vl;
|
||||||
|
for (size_t len = _len; len > 0; len -= vl, src += vl, dst += vl)
|
||||||
|
{
|
||||||
|
vl = SQRT_T::T::setvl(len);
|
||||||
|
auto x = SQRT_T::T::vload(src, vl);
|
||||||
|
SQRT_T::T::vstore(dst, common::sqrt<SQRT_T::iter_times>(x, vl), vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename SQRT_T, typename Elem = typename SQRT_T::T::ElemType>
|
||||||
|
inline int invSqrt(const Elem* src, Elem* dst, int _len)
|
||||||
|
{
|
||||||
|
size_t vl;
|
||||||
|
for (size_t len = _len; len > 0; len -= vl, src += vl, dst += vl)
|
||||||
|
{
|
||||||
|
vl = SQRT_T::T::setvl(len);
|
||||||
|
auto x = SQRT_T::T::vload(src, vl);
|
||||||
|
SQRT_T::T::vstore(dst, common::invSqrt<SQRT_T::iter_times>(x, vl), vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int sqrt32f(const float* src, float* dst, int len) {
|
||||||
|
return sqrt<common::Sqrt32f<RVV_F32M8>>(src, dst, len);
|
||||||
|
}
|
||||||
|
int sqrt64f(const double* src, double* dst, int len) {
|
||||||
|
return sqrt<common::Sqrt64f<RVV_F64M8>>(src, dst, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
int invSqrt32f(const float* src, float* dst, int len) {
|
||||||
|
#ifdef __clang__
|
||||||
|
// Strange bug in clang: invSqrt use 2 LMUL registers to store mask, which will cause memory access.
|
||||||
|
// So a smaller LMUL is used here.
|
||||||
|
return invSqrt<common::Sqrt32f<RVV_F32M4>>(src, dst, len);
|
||||||
|
#else
|
||||||
|
return invSqrt<common::Sqrt32f<RVV_F32M8>>(src, dst, len);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
int invSqrt64f(const double* src, double* dst, int len) {
|
||||||
|
#ifdef __clang__
|
||||||
|
// Strange bug in clang: invSqrt use 2 LMUL registers to store mask, which will cause memory access.
|
||||||
|
// So a smaller LMUL is used here.
|
||||||
|
return invSqrt<common::Sqrt64f<RVV_F64M4>>(src, dst, len);
|
||||||
|
#else
|
||||||
|
return invSqrt<common::Sqrt64f<RVV_F64M8>>(src, dst, len);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -4,22 +4,17 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_SVD_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_SVD_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace svd {
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
#undef cv_hal_SVD32f
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#define cv_hal_SVD32f cv::cv_hal_rvv::svd::SVD<cv::cv_hal_rvv::RVV_F32M4>
|
|
||||||
#undef cv_hal_SVD64f
|
namespace {
|
||||||
#define cv_hal_SVD64f cv::cv_hal_rvv::svd::SVD<cv::cv_hal_rvv::RVV_F64M4>
|
|
||||||
|
|
||||||
// the algorithm is copied from core/src/lapack.cpp,
|
// the algorithm is copied from core/src/lapack.cpp,
|
||||||
// in the function template static void cv::JacobiSVDImpl_
|
// in the function template static void cv::JacobiSVDImpl_
|
||||||
@ -268,6 +263,15 @@ inline int SVD(T* src, size_t src_step, T* w, T*, size_t, T* vt, size_t vt_step,
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
} // anonymous
|
||||||
|
|
||||||
#endif
|
int SVD32f(float* src, size_t src_step, float* w, float* u, size_t u_step, float* vt, size_t vt_step, int m, int n, int flags) {
|
||||||
|
return SVD<RVV_F32M4>(src, src_step, w, u, u_step, vt, vt_step, m, n, flags);
|
||||||
|
}
|
||||||
|
int SVD64f(double* src, size_t src_step, double* w, double* u, size_t u_step, double* vt, size_t vt_step, int m, int n, int flags) {
|
||||||
|
return SVD<RVV_F64M4>(src, src_step, w, u, u_step, vt, vt_step, m, n, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::core
|
@ -5,12 +5,7 @@
|
|||||||
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_TRANSPOSE_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_TRANSPOSE_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace transpose {
|
|
||||||
|
|
||||||
#if defined (__clang__) && __clang_major__ < 18
|
#if defined (__clang__) && __clang_major__ < 18
|
||||||
#define OPENCV_HAL_IMPL_RVV_VCREATE_x4(suffix, width, v0, v1, v2, v3) \
|
#define OPENCV_HAL_IMPL_RVV_VCREATE_x4(suffix, width, v0, v1, v2, v3) \
|
||||||
@ -35,18 +30,22 @@ namespace cv { namespace cv_hal_rvv { namespace transpose {
|
|||||||
#define __riscv_vcreate_v_i64m1x8(v0, v1, v2, v3, v4, v5, v6, v7) OPENCV_HAL_IMPL_RVV_VCREATE_x8(i64, 1, v0, v1, v2, v3, v4, v5, v6, v7)
|
#define __riscv_vcreate_v_i64m1x8(v0, v1, v2, v3, v4, v5, v6, v7) OPENCV_HAL_IMPL_RVV_VCREATE_x8(i64, 1, v0, v1, v2, v3, v4, v5, v6, v7)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace core {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
static void transpose2d_8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
static void transpose2d_8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
||||||
auto transpose_8u_8xVl = [](const uchar *src, size_t src_step, uchar *dst, size_t dst_step, const int vl) {
|
auto transpose_8u_8xVl = [](const uchar *src, size_t sstep, uchar *dst, size_t dstep, const int vl) {
|
||||||
auto v0 = __riscv_vle8_v_u8m1(src, vl);
|
auto v0 = __riscv_vle8_v_u8m1(src, vl);
|
||||||
auto v1 = __riscv_vle8_v_u8m1(src + src_step, vl);
|
auto v1 = __riscv_vle8_v_u8m1(src + sstep, vl);
|
||||||
auto v2 = __riscv_vle8_v_u8m1(src + 2 * src_step, vl);
|
auto v2 = __riscv_vle8_v_u8m1(src + 2 * sstep, vl);
|
||||||
auto v3 = __riscv_vle8_v_u8m1(src + 3 * src_step, vl);
|
auto v3 = __riscv_vle8_v_u8m1(src + 3 * sstep, vl);
|
||||||
auto v4 = __riscv_vle8_v_u8m1(src + 4 * src_step, vl);
|
auto v4 = __riscv_vle8_v_u8m1(src + 4 * sstep, vl);
|
||||||
auto v5 = __riscv_vle8_v_u8m1(src + 5 * src_step, vl);
|
auto v5 = __riscv_vle8_v_u8m1(src + 5 * sstep, vl);
|
||||||
auto v6 = __riscv_vle8_v_u8m1(src + 6 * src_step, vl);
|
auto v6 = __riscv_vle8_v_u8m1(src + 6 * sstep, vl);
|
||||||
auto v7 = __riscv_vle8_v_u8m1(src + 7 * src_step, vl);
|
auto v7 = __riscv_vle8_v_u8m1(src + 7 * sstep, vl);
|
||||||
vuint8m1x8_t v = __riscv_vcreate_v_u8m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
|
vuint8m1x8_t v = __riscv_vcreate_v_u8m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||||
__riscv_vssseg8e8(dst, dst_step, v, vl);
|
__riscv_vssseg8e8(dst, dstep, v, vl);
|
||||||
};
|
};
|
||||||
|
|
||||||
int h = 0, w = 0;
|
int h = 0, w = 0;
|
||||||
@ -72,17 +71,17 @@ static void transpose2d_8u(const uchar *src_data, size_t src_step, uchar *dst_da
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void transpose2d_16u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
static void transpose2d_16u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
||||||
auto transpose_16u_8xVl = [](const ushort *src, size_t src_step, ushort *dst, size_t dst_step, const int vl) {
|
auto transpose_16u_8xVl = [](const ushort *src, size_t sstep, ushort *dst, size_t dstep, const int vl) {
|
||||||
auto v0 = __riscv_vle16_v_u16m1(src, vl);
|
auto v0 = __riscv_vle16_v_u16m1(src, vl);
|
||||||
auto v1 = __riscv_vle16_v_u16m1(src + src_step, vl);
|
auto v1 = __riscv_vle16_v_u16m1(src + sstep, vl);
|
||||||
auto v2 = __riscv_vle16_v_u16m1(src + 2 * src_step, vl);
|
auto v2 = __riscv_vle16_v_u16m1(src + 2 * sstep, vl);
|
||||||
auto v3 = __riscv_vle16_v_u16m1(src + 3 * src_step, vl);
|
auto v3 = __riscv_vle16_v_u16m1(src + 3 * sstep, vl);
|
||||||
auto v4 = __riscv_vle16_v_u16m1(src + 4 * src_step, vl);
|
auto v4 = __riscv_vle16_v_u16m1(src + 4 * sstep, vl);
|
||||||
auto v5 = __riscv_vle16_v_u16m1(src + 5 * src_step, vl);
|
auto v5 = __riscv_vle16_v_u16m1(src + 5 * sstep, vl);
|
||||||
auto v6 = __riscv_vle16_v_u16m1(src + 6 * src_step, vl);
|
auto v6 = __riscv_vle16_v_u16m1(src + 6 * sstep, vl);
|
||||||
auto v7 = __riscv_vle16_v_u16m1(src + 7 * src_step, vl);
|
auto v7 = __riscv_vle16_v_u16m1(src + 7 * sstep, vl);
|
||||||
vuint16m1x8_t v = __riscv_vcreate_v_u16m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
|
vuint16m1x8_t v = __riscv_vcreate_v_u16m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||||
__riscv_vssseg8e16(dst, dst_step, v, vl);
|
__riscv_vssseg8e16(dst, dstep, v, vl);
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t src_step_base = src_step / sizeof(ushort);
|
size_t src_step_base = src_step / sizeof(ushort);
|
||||||
@ -111,13 +110,13 @@ static void transpose2d_16u(const uchar *src_data, size_t src_step, uchar *dst_d
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void transpose2d_32s(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
static void transpose2d_32s(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
||||||
auto transpose_32s_4xVl = [](const int *src, size_t src_step, int *dst, size_t dst_step, const int vl) {
|
auto transpose_32s_4xVl = [](const int *src, size_t sstep, int *dst, size_t dstep, const int vl) {
|
||||||
auto v0 = __riscv_vle32_v_i32m1(src, vl);
|
auto v0 = __riscv_vle32_v_i32m1(src, vl);
|
||||||
auto v1 = __riscv_vle32_v_i32m1(src + src_step, vl);
|
auto v1 = __riscv_vle32_v_i32m1(src + sstep, vl);
|
||||||
auto v2 = __riscv_vle32_v_i32m1(src + 2 * src_step, vl);
|
auto v2 = __riscv_vle32_v_i32m1(src + 2 * sstep, vl);
|
||||||
auto v3 = __riscv_vle32_v_i32m1(src + 3 * src_step, vl);
|
auto v3 = __riscv_vle32_v_i32m1(src + 3 * sstep, vl);
|
||||||
vint32m1x4_t v = __riscv_vcreate_v_i32m1x4(v0, v1, v2, v3);
|
vint32m1x4_t v = __riscv_vcreate_v_i32m1x4(v0, v1, v2, v3);
|
||||||
__riscv_vssseg4e32(dst, dst_step, v, vl);
|
__riscv_vssseg4e32(dst, dstep, v, vl);
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t src_step_base = src_step / sizeof(int);
|
size_t src_step_base = src_step / sizeof(int);
|
||||||
@ -146,17 +145,17 @@ static void transpose2d_32s(const uchar *src_data, size_t src_step, uchar *dst_d
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void transpose2d_32sC2(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
static void transpose2d_32sC2(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height) {
|
||||||
auto transpose_64s_8xVl = [](const int64_t *src, size_t src_step, int64_t *dst, size_t dst_step, const int vl) {
|
auto transpose_64s_8xVl = [](const int64_t *src, size_t sstep, int64_t *dst, size_t dstep, const int vl) {
|
||||||
auto v0 = __riscv_vle64_v_i64m1(src, vl);
|
auto v0 = __riscv_vle64_v_i64m1(src, vl);
|
||||||
auto v1 = __riscv_vle64_v_i64m1(src + src_step, vl);
|
auto v1 = __riscv_vle64_v_i64m1(src + sstep, vl);
|
||||||
auto v2 = __riscv_vle64_v_i64m1(src + 2 * src_step, vl);
|
auto v2 = __riscv_vle64_v_i64m1(src + 2 * sstep, vl);
|
||||||
auto v3 = __riscv_vle64_v_i64m1(src + 3 * src_step, vl);
|
auto v3 = __riscv_vle64_v_i64m1(src + 3 * sstep, vl);
|
||||||
auto v4 = __riscv_vle64_v_i64m1(src + 4 * src_step, vl);
|
auto v4 = __riscv_vle64_v_i64m1(src + 4 * sstep, vl);
|
||||||
auto v5 = __riscv_vle64_v_i64m1(src + 5 * src_step, vl);
|
auto v5 = __riscv_vle64_v_i64m1(src + 5 * sstep, vl);
|
||||||
auto v6 = __riscv_vle64_v_i64m1(src + 6 * src_step, vl);
|
auto v6 = __riscv_vle64_v_i64m1(src + 6 * sstep, vl);
|
||||||
auto v7 = __riscv_vle64_v_i64m1(src + 7 * src_step, vl);
|
auto v7 = __riscv_vle64_v_i64m1(src + 7 * sstep, vl);
|
||||||
vint64m1x8_t v = __riscv_vcreate_v_i64m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
|
vint64m1x8_t v = __riscv_vcreate_v_i64m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||||
__riscv_vssseg8e64(dst, dst_step, v, vl);
|
__riscv_vssseg8e64(dst, dstep, v, vl);
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t src_step_base = src_step / sizeof(int64_t);
|
size_t src_step_base = src_step / sizeof(int64_t);
|
||||||
@ -184,12 +183,9 @@ static void transpose2d_32sC2(const uchar *src_data, size_t src_step, uchar *dst
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef cv_hal_transpose2d
|
|
||||||
#define cv_hal_transpose2d cv::cv_hal_rvv::transpose::transpose2d
|
|
||||||
|
|
||||||
using Transpose2dFunc = void (*)(const uchar*, size_t, uchar*, size_t, int, int);
|
using Transpose2dFunc = void (*)(const uchar*, size_t, uchar*, size_t, int, int);
|
||||||
inline int transpose2d(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
|
int transpose2d(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
|
||||||
int src_width, int src_height, int element_size) {
|
int src_width, int src_height, int element_size) {
|
||||||
if (src_data == dst_data) {
|
if (src_data == dst_data) {
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
@ -205,7 +201,7 @@ inline int transpose2d(const uchar* src_data, size_t src_step, uchar* dst_data,
|
|||||||
0, 0, 0, 0,
|
0, 0, 0, 0,
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
Transpose2dFunc func = tab[element_size];
|
Transpose2dFunc func = element_size <= 32 ? tab[element_size] : nullptr;
|
||||||
if (!func) {
|
if (!func) {
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
@ -215,6 +211,6 @@ inline int transpose2d(const uchar* src_data, size_t src_step, uchar* dst_data,
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // cv::cv_hal_rvv::transpose
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif // OPENCV_HAL_RVV_TRANSPOSE_HPP_INCLUDED
|
}}} // cv::rvv_hal::core
|
361
hal/riscv-rvv/src/imgproc/bilateral_filter.cpp
Normal file
361
hal/riscv-rvv/src/imgproc/bilateral_filter.cpp
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// the algorithm is copied from imgproc/src/bilateral_filter.simd.cpp
|
||||||
|
// in the functor BilateralFilter_8u_Invoker
|
||||||
|
static inline int bilateralFilter8UC1(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int radius, int maxk, const int* space_ofs, const float* space_weight, const float* color_weight)
|
||||||
|
{
|
||||||
|
constexpr int align = 31;
|
||||||
|
std::vector<float> _sum(width + align), _wsum(width + align);
|
||||||
|
float* sum = reinterpret_cast<float*>(((size_t)_sum.data() + align) & ~align);
|
||||||
|
float* wsum = reinterpret_cast<float*>(((size_t)_wsum.data() + align) & ~align);
|
||||||
|
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const uchar* sptr = src_data + (i+radius) * src_step + radius;
|
||||||
|
memset(sum, 0, sizeof(float) * width);
|
||||||
|
memset(wsum, 0, sizeof(float) * width);
|
||||||
|
for(int k = 0; k < maxk; k++)
|
||||||
|
{
|
||||||
|
const uchar* ksptr = sptr + space_ofs[k];
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(width - j);
|
||||||
|
auto src = __riscv_vle8_v_u8m2(sptr + j, vl);
|
||||||
|
auto ksrc = __riscv_vle8_v_u8m2(ksptr + j, vl);
|
||||||
|
auto diff = __riscv_vsub(__riscv_vmaxu(src, ksrc, vl), __riscv_vminu(src, ksrc, vl), vl);
|
||||||
|
auto w = __riscv_vloxei16_v_f32m8(color_weight, __riscv_vmul(__riscv_vzext_vf2(diff, vl), sizeof(float), vl), vl);
|
||||||
|
w = __riscv_vfmul(w, space_weight[k], vl);
|
||||||
|
|
||||||
|
__riscv_vse32(wsum + j, __riscv_vfadd(w, __riscv_vle32_v_f32m8(wsum + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum + j, __riscv_vfmadd(w, __riscv_vfwcvt_f(__riscv_vzext_vf2(ksrc, vl), vl), __riscv_vle32_v_f32m8(sum + j, vl), vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(width - j);
|
||||||
|
auto dst = __riscv_vfncvt_xu(__riscv_vfdiv(__riscv_vle32_v_f32m8(sum + j, vl), __riscv_vle32_v_f32m8(wsum + j, vl), vl), vl);
|
||||||
|
__riscv_vse8(dst_data + i * dst_step + j, __riscv_vncvt_x(dst, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int bilateralFilter8UC3(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int radius, int maxk, const int* space_ofs, const float* space_weight, const float* color_weight)
|
||||||
|
{
|
||||||
|
constexpr int align = 31;
|
||||||
|
std::vector<float> _sum_b(width + align), _sum_g(width + align), _sum_r(width + align), _wsum(width + align);
|
||||||
|
float* sum_b = reinterpret_cast<float*>(((size_t)_sum_b.data() + align) & ~align);
|
||||||
|
float* sum_g = reinterpret_cast<float*>(((size_t)_sum_g.data() + align) & ~align);
|
||||||
|
float* sum_r = reinterpret_cast<float*>(((size_t)_sum_r.data() + align) & ~align);
|
||||||
|
float* wsum = reinterpret_cast<float*>(((size_t)_wsum.data() + align) & ~align);
|
||||||
|
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const uchar* sptr = src_data + (i+radius) * src_step + radius*3;
|
||||||
|
memset(sum_b, 0, sizeof(float) * width);
|
||||||
|
memset(sum_g, 0, sizeof(float) * width);
|
||||||
|
memset(sum_r, 0, sizeof(float) * width);
|
||||||
|
memset(wsum, 0, sizeof(float) * width);
|
||||||
|
for(int k = 0; k < maxk; k++)
|
||||||
|
{
|
||||||
|
const uchar* ksptr = sptr + space_ofs[k];
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(width - j);
|
||||||
|
auto src = __riscv_vlseg3e8_v_u8m2x3(sptr + j * 3, vl);
|
||||||
|
auto src0 = __riscv_vget_v_u8m2x3_u8m2(src, 0);
|
||||||
|
auto src1 = __riscv_vget_v_u8m2x3_u8m2(src, 1);
|
||||||
|
auto src2 = __riscv_vget_v_u8m2x3_u8m2(src, 2);
|
||||||
|
src = __riscv_vlseg3e8_v_u8m2x3(ksptr + j * 3, vl);
|
||||||
|
auto ksrc0 = __riscv_vget_v_u8m2x3_u8m2(src, 0);
|
||||||
|
auto ksrc1 = __riscv_vget_v_u8m2x3_u8m2(src, 1);
|
||||||
|
auto ksrc2 = __riscv_vget_v_u8m2x3_u8m2(src, 2);
|
||||||
|
|
||||||
|
auto diff0 = __riscv_vsub(__riscv_vmaxu(src0, ksrc0, vl), __riscv_vminu(src0, ksrc0, vl), vl);
|
||||||
|
auto diff1 = __riscv_vsub(__riscv_vmaxu(src1, ksrc1, vl), __riscv_vminu(src1, ksrc1, vl), vl);
|
||||||
|
auto diff2 = __riscv_vsub(__riscv_vmaxu(src2, ksrc2, vl), __riscv_vminu(src2, ksrc2, vl), vl);
|
||||||
|
auto w = __riscv_vloxei16_v_f32m8(color_weight, __riscv_vmul(__riscv_vadd(__riscv_vadd(__riscv_vzext_vf2(diff0, vl), __riscv_vzext_vf2(diff1, vl), vl), __riscv_vzext_vf2(diff2, vl), vl), sizeof(float), vl), vl);
|
||||||
|
w = __riscv_vfmul(w, space_weight[k], vl);
|
||||||
|
|
||||||
|
__riscv_vse32(wsum + j, __riscv_vfadd(w, __riscv_vle32_v_f32m8(wsum + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum_b + j, __riscv_vfmadd(w, __riscv_vfwcvt_f(__riscv_vzext_vf2(ksrc0, vl), vl), __riscv_vle32_v_f32m8(sum_b + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum_g + j, __riscv_vfmadd(w, __riscv_vfwcvt_f(__riscv_vzext_vf2(ksrc1, vl), vl), __riscv_vle32_v_f32m8(sum_g + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum_r + j, __riscv_vfmadd(w, __riscv_vfwcvt_f(__riscv_vzext_vf2(ksrc2, vl), vl), __riscv_vle32_v_f32m8(sum_r + j, vl), vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(width - j);
|
||||||
|
auto w = __riscv_vfrdiv(__riscv_vle32_v_f32m8(wsum + j, vl), 1.0f, vl);
|
||||||
|
vuint8m2x3_t dst{};
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x3(dst, 0,__riscv_vncvt_x(__riscv_vfncvt_xu(__riscv_vfmul(__riscv_vle32_v_f32m8(sum_b + j, vl), w, vl), vl), vl));
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x3(dst, 1,__riscv_vncvt_x(__riscv_vfncvt_xu(__riscv_vfmul(__riscv_vle32_v_f32m8(sum_g + j, vl), w, vl), vl), vl));
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x3(dst, 2,__riscv_vncvt_x(__riscv_vfncvt_xu(__riscv_vfmul(__riscv_vle32_v_f32m8(sum_r + j, vl), w, vl), vl), vl));
|
||||||
|
__riscv_vsseg3e8(dst_data + i * dst_step + j * 3, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the algorithm is copied from imgproc/src/bilateral_filter.simd.cpp
|
||||||
|
// in the functor BilateralFilter_32f_Invoker
|
||||||
|
static inline int bilateralFilter32FC1(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int radius, int maxk, const int* space_ofs, const float* space_weight, const float* expLUT, float scale_index)
|
||||||
|
{
|
||||||
|
constexpr int align = 31;
|
||||||
|
std::vector<float> _sum(width + align), _wsum(width + align);
|
||||||
|
float* sum = reinterpret_cast<float*>(((size_t)_sum.data() + align) & ~align);
|
||||||
|
float* wsum = reinterpret_cast<float*>(((size_t)_wsum.data() + align) & ~align);
|
||||||
|
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const float* sptr = reinterpret_cast<const float*>(src_data + (i+radius) * src_step) + radius;
|
||||||
|
memset(sum, 0, sizeof(float) * width);
|
||||||
|
memset(wsum, 0, sizeof(float) * width);
|
||||||
|
for(int k = 0; k < maxk; k++)
|
||||||
|
{
|
||||||
|
const float* ksptr = sptr + space_ofs[k];
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m4(width - j);
|
||||||
|
auto src = __riscv_vle32_v_f32m4(sptr + j, vl);
|
||||||
|
auto ksrc = __riscv_vle32_v_f32m4(ksptr + j, vl);
|
||||||
|
auto diff = __riscv_vfmul(__riscv_vfabs(__riscv_vfsub(src, ksrc, vl), vl), scale_index, vl);
|
||||||
|
auto idx = __riscv_vfcvt_rtz_x(diff, vl);
|
||||||
|
auto alpha = __riscv_vfsub(diff, __riscv_vfcvt_f(idx, vl), vl);
|
||||||
|
|
||||||
|
auto exp = __riscv_vloxseg2ei32_v_f32m4x2(expLUT, __riscv_vreinterpret_v_i32m4_u32m4(__riscv_vmul(idx, sizeof(float), vl)), vl);
|
||||||
|
auto w = __riscv_vfmadd(alpha, __riscv_vfsub(__riscv_vget_v_f32m4x2_f32m4(exp, 1), __riscv_vget_v_f32m4x2_f32m4(exp, 0), vl), __riscv_vget_v_f32m4x2_f32m4(exp, 0), vl);
|
||||||
|
w = __riscv_vfmul(w, space_weight[k], vl);
|
||||||
|
|
||||||
|
__riscv_vse32(wsum + j, __riscv_vfadd(w, __riscv_vle32_v_f32m4(wsum + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum + j, __riscv_vfmadd(w, ksrc, __riscv_vle32_v_f32m4(sum + j, vl), vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m4(width - j);
|
||||||
|
auto src = __riscv_vle32_v_f32m4(sptr + j, vl);
|
||||||
|
auto dst = __riscv_vfdiv(__riscv_vfadd(__riscv_vle32_v_f32m4(sum + j, vl), src, vl), __riscv_vfadd(__riscv_vle32_v_f32m4(wsum + j, vl), 1, vl), vl);
|
||||||
|
__riscv_vse32(reinterpret_cast<float*>(dst_data + i * dst_step) + j, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int bilateralFilter32FC3(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int radius, int maxk, const int* space_ofs, const float* space_weight, const float* expLUT, float scale_index)
|
||||||
|
{
|
||||||
|
constexpr int align = 31;
|
||||||
|
std::vector<float> _sum_b(width + align), _sum_g(width + align), _sum_r(width + align), _wsum(width + align);
|
||||||
|
float* sum_b = reinterpret_cast<float*>(((size_t)_sum_b.data() + align) & ~align);
|
||||||
|
float* sum_g = reinterpret_cast<float*>(((size_t)_sum_g.data() + align) & ~align);
|
||||||
|
float* sum_r = reinterpret_cast<float*>(((size_t)_sum_r.data() + align) & ~align);
|
||||||
|
float* wsum = reinterpret_cast<float*>(((size_t)_wsum.data() + align) & ~align);
|
||||||
|
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const float* sptr = reinterpret_cast<const float*>(src_data + (i+radius) * src_step) + radius*3;
|
||||||
|
memset(sum_b, 0, sizeof(float) * width);
|
||||||
|
memset(sum_g, 0, sizeof(float) * width);
|
||||||
|
memset(sum_r, 0, sizeof(float) * width);
|
||||||
|
memset(wsum, 0, sizeof(float) * width);
|
||||||
|
for(int k = 0; k < maxk; k++)
|
||||||
|
{
|
||||||
|
const float* ksptr = sptr + space_ofs[k];
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m2(width - j);
|
||||||
|
auto src = __riscv_vlseg3e32_v_f32m2x3(sptr + j * 3, vl);
|
||||||
|
auto src0 = __riscv_vget_v_f32m2x3_f32m2(src, 0);
|
||||||
|
auto src1 = __riscv_vget_v_f32m2x3_f32m2(src, 1);
|
||||||
|
auto src2 = __riscv_vget_v_f32m2x3_f32m2(src, 2);
|
||||||
|
src = __riscv_vlseg3e32_v_f32m2x3(ksptr + j * 3, vl);
|
||||||
|
auto ksrc0 = __riscv_vget_v_f32m2x3_f32m2(src, 0);
|
||||||
|
auto ksrc1 = __riscv_vget_v_f32m2x3_f32m2(src, 1);
|
||||||
|
auto ksrc2 = __riscv_vget_v_f32m2x3_f32m2(src, 2);
|
||||||
|
|
||||||
|
auto diff = __riscv_vfmul(__riscv_vfadd(__riscv_vfadd(__riscv_vfabs(__riscv_vfsub(src0, ksrc0, vl), vl), __riscv_vfabs(__riscv_vfsub(src1, ksrc1, vl), vl), vl), __riscv_vfabs(__riscv_vfsub(src2, ksrc2, vl), vl), vl), scale_index, vl);
|
||||||
|
auto idx = __riscv_vfcvt_rtz_x(diff, vl);
|
||||||
|
auto alpha = __riscv_vfsub(diff, __riscv_vfcvt_f(idx, vl), vl);
|
||||||
|
|
||||||
|
auto exp = __riscv_vloxseg2ei32_v_f32m2x2(expLUT, __riscv_vreinterpret_v_i32m2_u32m2(__riscv_vmul(idx, sizeof(float), vl)), vl);
|
||||||
|
auto w = __riscv_vfmadd(alpha, __riscv_vfsub(__riscv_vget_v_f32m2x2_f32m2(exp, 1), __riscv_vget_v_f32m2x2_f32m2(exp, 0), vl), __riscv_vget_v_f32m2x2_f32m2(exp, 0), vl);
|
||||||
|
w = __riscv_vfmul(w, space_weight[k], vl);
|
||||||
|
|
||||||
|
__riscv_vse32(wsum + j, __riscv_vfadd(w, __riscv_vle32_v_f32m2(wsum + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum_b + j, __riscv_vfmadd(w, ksrc0, __riscv_vle32_v_f32m2(sum_b + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum_g + j, __riscv_vfmadd(w, ksrc1, __riscv_vle32_v_f32m2(sum_g + j, vl), vl), vl);
|
||||||
|
__riscv_vse32(sum_r + j, __riscv_vfmadd(w, ksrc2, __riscv_vle32_v_f32m2(sum_r + j, vl), vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m2(width - j);
|
||||||
|
auto w = __riscv_vfrdiv(__riscv_vfadd(__riscv_vle32_v_f32m2(wsum + j, vl), 1, vl), 1, vl);
|
||||||
|
auto src = __riscv_vlseg3e32_v_f32m2x3(sptr + j * 3, vl);
|
||||||
|
auto src0 = __riscv_vget_v_f32m2x3_f32m2(src, 0);
|
||||||
|
auto src1 = __riscv_vget_v_f32m2x3_f32m2(src, 1);
|
||||||
|
auto src2 = __riscv_vget_v_f32m2x3_f32m2(src, 2);
|
||||||
|
|
||||||
|
vfloat32m2x3_t dst{};
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 0, __riscv_vfmul(w, __riscv_vfadd(__riscv_vle32_v_f32m2(sum_b + j, vl), src0, vl), vl));
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 1, __riscv_vfmul(w, __riscv_vfadd(__riscv_vle32_v_f32m2(sum_g + j, vl), src1, vl), vl));
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 2, __riscv_vfmul(w, __riscv_vfadd(__riscv_vle32_v_f32m2(sum_r + j, vl), src2, vl), vl));
|
||||||
|
__riscv_vsseg3e32(reinterpret_cast<float*>(dst_data + i * dst_step) + j * 3, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
// the algorithm is copied from imgproc/src/bilateral_filter.dispatch.cpp
|
||||||
|
// in the function static void bilateralFilter_8u and bilateralFilter_32f
|
||||||
|
int bilateralFilter(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
|
||||||
|
int width, int height, int depth, int cn, int d, double sigma_color, double sigma_space, int border_type)
|
||||||
|
{
|
||||||
|
const int type = CV_MAKETYPE(depth, cn);
|
||||||
|
if (type != CV_8UC1 && type != CV_8UC3 && type != CV_32FC1 && type != CV_32FC3)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (type == CV_32FC1 && width * height > 1 << 20)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (src_data == dst_data || border_type & BORDER_ISOLATED)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
sigma_color = sigma_color <= 0 ? 1 : sigma_color;
|
||||||
|
sigma_space = sigma_space <= 0 ? 1 : sigma_space;
|
||||||
|
double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
|
||||||
|
double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
|
||||||
|
int radius = d <= 0 ? std::round(sigma_space*1.5) : d/2;
|
||||||
|
radius = std::max(radius, 1);
|
||||||
|
d = radius*2 + 1;
|
||||||
|
|
||||||
|
const int size = depth == CV_32F ? cn * sizeof(float) : cn;
|
||||||
|
const int temp_step = (width + radius * 2) * size;
|
||||||
|
std::vector<uchar> _temp((width + radius * 2) * (height + radius * 2) * size, 0);
|
||||||
|
uchar* temp = _temp.data();
|
||||||
|
std::vector<int> width_interpolate(radius * 2);
|
||||||
|
for (int j = 0; j < radius; j++)
|
||||||
|
{
|
||||||
|
width_interpolate[j] = common::borderInterpolate(j - radius, width, border_type);
|
||||||
|
width_interpolate[j + radius] = common::borderInterpolate(width + j, width, border_type);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < height + radius * 2; i++)
|
||||||
|
{
|
||||||
|
int x = common::borderInterpolate(i - radius, height, border_type);
|
||||||
|
if (x != -1)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < radius; j++)
|
||||||
|
{
|
||||||
|
int y = width_interpolate[j];
|
||||||
|
if (y != -1)
|
||||||
|
memcpy(temp + i * temp_step + j * size, src_data + x * src_step + y * size, size);
|
||||||
|
y = width_interpolate[j + radius];
|
||||||
|
if (y != -1)
|
||||||
|
memcpy(temp + i * temp_step + (width + j + radius) * size, src_data + x * src_step + y * size, size);
|
||||||
|
}
|
||||||
|
memcpy(temp + i * temp_step + radius * size, src_data + x * src_step, width * size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> _space_weight(d*d);
|
||||||
|
std::vector<int> _space_ofs(d*d);
|
||||||
|
float* space_weight = _space_weight.data();
|
||||||
|
int* space_ofs = _space_ofs.data();
|
||||||
|
int maxk = 0;
|
||||||
|
for (int i = -radius; i <= radius; i++)
|
||||||
|
{
|
||||||
|
for (int j = -radius; j <= radius; j++)
|
||||||
|
{
|
||||||
|
double r = std::sqrt((double)i*i + (double)j*j);
|
||||||
|
if (r <= radius && (depth == CV_8U || i != 0 || j != 0))
|
||||||
|
{
|
||||||
|
space_weight[maxk] = static_cast<float>(r*r*gauss_space_coeff);
|
||||||
|
space_ofs[maxk++] = (i * (temp_step / size) + j) * cn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv::rvv_hal::core::exp32f(space_weight, space_weight, maxk);
|
||||||
|
|
||||||
|
if (depth == CV_8U)
|
||||||
|
{
|
||||||
|
std::vector<float> _color_weight(cn*256);
|
||||||
|
float* color_weight = _color_weight.data();
|
||||||
|
for (int i = 0; i < 256*cn; i++)
|
||||||
|
color_weight[i] = static_cast<float>(i*i*gauss_color_coeff);
|
||||||
|
cv::rvv_hal::core::exp32f(color_weight, color_weight, 256*cn);
|
||||||
|
|
||||||
|
switch (cn)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
return common::invoke(height, {bilateralFilter8UC1}, temp, temp_step, dst_data, dst_step, width, radius, maxk, space_ofs, space_weight, color_weight);
|
||||||
|
case 3:
|
||||||
|
return common::invoke(height, {bilateralFilter8UC3}, temp, temp_step, dst_data, dst_step, width, radius, maxk, space_ofs, space_weight, color_weight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double minValSrc = -1, maxValSrc = 1;
|
||||||
|
cv::rvv_hal::core::minMaxIdx(src_data, src_step, width * cn, height, CV_32F, &minValSrc, &maxValSrc, nullptr, nullptr, nullptr);
|
||||||
|
if(std::abs(minValSrc - maxValSrc) < FLT_EPSILON)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < width; i++)
|
||||||
|
memcpy(dst_data + i * dst_step, src_data + i * src_step, width * size);
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int kExpNumBinsPerChannel = 1 << 12;
|
||||||
|
const int kExpNumBins = kExpNumBinsPerChannel * cn;
|
||||||
|
const float scale_index = kExpNumBins / static_cast<float>((maxValSrc - minValSrc) * cn);
|
||||||
|
std::vector<float> _expLUT(kExpNumBins+2, 0);
|
||||||
|
float* expLUT = _expLUT.data();
|
||||||
|
for (int i = 0; i < kExpNumBins+2; i++)
|
||||||
|
{
|
||||||
|
double val = i / scale_index;
|
||||||
|
expLUT[i] = static_cast<float>(val * val * gauss_color_coeff);
|
||||||
|
}
|
||||||
|
cv::rvv_hal::core::exp32f(expLUT, expLUT, kExpNumBins+2);
|
||||||
|
|
||||||
|
switch (cn)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
return common::invoke(height, {bilateralFilter32FC1}, temp, temp_step, dst_data, dst_step, width, radius, maxk, space_ofs, space_weight, expLUT, scale_index);
|
||||||
|
case 3:
|
||||||
|
return common::invoke(height, {bilateralFilter32FC3}, temp, temp_step, dst_data, dst_step, width, radius, maxk, space_ofs, space_weight, expLUT, scale_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
392
hal/riscv-rvv/src/imgproc/box_filter.cpp
Normal file
392
hal/riscv-rvv/src/imgproc/box_filter.cpp
Normal file
@ -0,0 +1,392 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
template<typename T> struct rvv;
|
||||||
|
template<> struct rvv<uchar>
|
||||||
|
{
|
||||||
|
static inline vuint16m8_t vcvt0(vuint8m4_t a, size_t b) { return __riscv_vzext_vf2(a, b); }
|
||||||
|
static inline vuint8m4_t vcvt1(vuint16m8_t a, size_t b) { return __riscv_vnclipu(a, 0, __RISCV_VXRM_RNU, b); }
|
||||||
|
static inline vuint16m8_t vdiv(vuint16m8_t a, ushort b, size_t c) { return __riscv_vdivu(__riscv_vadd(a, b / 2, c), b, c); }
|
||||||
|
};
|
||||||
|
template<> struct rvv<short>
|
||||||
|
{
|
||||||
|
static inline vint32m8_t vcvt0(vint16m4_t a, size_t b) { return __riscv_vsext_vf2(a, b); }
|
||||||
|
static inline vint16m4_t vcvt1(vint32m8_t a, size_t b) { return __riscv_vnclip(a, 0, __RISCV_VXRM_RNU, b); }
|
||||||
|
static inline vint32m8_t vdiv(vint32m8_t a, int b, size_t c) { return __riscv_vdiv(__riscv_vadd(a, b / 2, c), b, c); }
|
||||||
|
};
|
||||||
|
template<> struct rvv<int>
|
||||||
|
{
|
||||||
|
static inline vint32m8_t vcvt0(vint32m8_t a, size_t) { return a; }
|
||||||
|
static inline vint32m8_t vcvt1(vint32m8_t a, size_t) { return a; }
|
||||||
|
static inline vint32m8_t vdiv(vint32m8_t a, int b, size_t c) { return __riscv_vdiv(__riscv_vadd(a, b / 2, c), b, c); }
|
||||||
|
};
|
||||||
|
template<> struct rvv<float>
|
||||||
|
{
|
||||||
|
static inline vfloat32m8_t vcvt0(vfloat32m8_t a, size_t) { return a; }
|
||||||
|
static inline vfloat32m8_t vcvt1(vfloat32m8_t a, size_t) { return a; }
|
||||||
|
static inline vfloat32m8_t vdiv(vfloat32m8_t a, float b, size_t c) { return __riscv_vfdiv(a, b, c); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// the algorithm is same as cv_hal_sepFilter
|
||||||
|
template<int ksize, typename helperT, typename helperWT, bool cast>
|
||||||
|
static inline int boxFilterC1(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int full_width, int full_height, int offset_x, int offset_y, int anchor_x, int anchor_y, bool normalize, int border_type)
|
||||||
|
{
|
||||||
|
using T = typename helperT::ElemType;
|
||||||
|
using WT = typename helperWT::ElemType;
|
||||||
|
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto accessX = [&](int x) {
|
||||||
|
int pi = common::borderInterpolate(offset_y + x - anchor_y, full_height, border_type);
|
||||||
|
return pi < 0 ? noval : pi - offset_y;
|
||||||
|
};
|
||||||
|
auto accessY = [&](int y) {
|
||||||
|
int pj = common::borderInterpolate(offset_x + y - anchor_x, full_width, border_type);
|
||||||
|
return pj < 0 ? noval : pj - offset_x;
|
||||||
|
};
|
||||||
|
auto p2idx = [&](int x, int y){ return (x + ksize) % ksize * width + y; };
|
||||||
|
|
||||||
|
std::vector<WT> res(width * ksize);
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
WT sum = 0;
|
||||||
|
for (int i = 0; i < ksize; i++)
|
||||||
|
{
|
||||||
|
int p = accessY(y + i);
|
||||||
|
if (p != noval)
|
||||||
|
{
|
||||||
|
sum += reinterpret_cast<const T*>(src_data + x * src_step)[p];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[p2idx(x, y)] = sum;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = anchor_x, right = width - (ksize - 1 - anchor_x);
|
||||||
|
for (int i = start - anchor_y; i < end + (ksize - 1 - anchor_y); i++)
|
||||||
|
{
|
||||||
|
if (i + offset_y >= 0 && i + offset_y < full_height)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = helperT::setvl(right - j);
|
||||||
|
const T* extra = reinterpret_cast<const T*>(src_data + i * src_step) + j - anchor_x;
|
||||||
|
auto src = rvv<T>::vcvt0(helperT::vload(extra, vl), vl);
|
||||||
|
|
||||||
|
extra += vl;
|
||||||
|
auto sum = src;
|
||||||
|
src = helperWT::vslide1down(src, extra[0], vl);
|
||||||
|
sum = helperWT::vadd(sum, src, vl);
|
||||||
|
src = helperWT::vslide1down(src, extra[1], vl);
|
||||||
|
sum = helperWT::vadd(sum, src, vl);
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
src = helperWT::vslide1down(src, extra[2], vl);
|
||||||
|
sum = helperWT::vadd(sum, src, vl);
|
||||||
|
src = helperWT::vslide1down(src, extra[3], vl);
|
||||||
|
sum = helperWT::vadd(sum, src, vl);
|
||||||
|
}
|
||||||
|
helperWT::vstore(res.data() + p2idx(i, j), sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = i - (ksize - 1 - anchor_y);
|
||||||
|
if (cur >= start)
|
||||||
|
{
|
||||||
|
const WT* row0 = accessX(cur ) == noval ? nullptr : res.data() + p2idx(accessX(cur ), 0);
|
||||||
|
const WT* row1 = accessX(cur + 1) == noval ? nullptr : res.data() + p2idx(accessX(cur + 1), 0);
|
||||||
|
const WT* row2 = accessX(cur + 2) == noval ? nullptr : res.data() + p2idx(accessX(cur + 2), 0);
|
||||||
|
const WT* row3 = nullptr, *row4 = nullptr;
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
row3 = accessX(cur + 3) == noval ? nullptr : res.data() + p2idx(accessX(cur + 3), 0);
|
||||||
|
row4 = accessX(cur + 4) == noval ? nullptr : res.data() + p2idx(accessX(cur + 4), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = helperWT::setvl(width - j);
|
||||||
|
auto sum = row0 ? helperWT::vload(row0 + j, vl) : helperWT::vmv(0, vl);
|
||||||
|
if (row1) sum = helperWT::vadd(sum, helperWT::vload(row1 + j, vl), vl);
|
||||||
|
if (row2) sum = helperWT::vadd(sum, helperWT::vload(row2 + j, vl), vl);
|
||||||
|
if (row3) sum = helperWT::vadd(sum, helperWT::vload(row3 + j, vl), vl);
|
||||||
|
if (row4) sum = helperWT::vadd(sum, helperWT::vload(row4 + j, vl), vl);
|
||||||
|
if (normalize) sum = rvv<T>::vdiv(sum, ksize * ksize, vl);
|
||||||
|
|
||||||
|
if (cast)
|
||||||
|
{
|
||||||
|
helperT::vstore(reinterpret_cast<T*>(dst_data + cur * dst_step) + j, rvv<T>::vcvt1(sum, vl), vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
helperWT::vstore(reinterpret_cast<WT*>(dst_data + cur * dst_step) + j, sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int ksize>
|
||||||
|
static inline int boxFilterC3(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int full_width, int full_height, int offset_x, int offset_y, int anchor_x, int anchor_y, bool normalize, int border_type)
|
||||||
|
{
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto accessX = [&](int x) {
|
||||||
|
int pi = common::borderInterpolate(offset_y + x - anchor_y, full_height, border_type);
|
||||||
|
return pi < 0 ? noval : pi - offset_y;
|
||||||
|
};
|
||||||
|
auto accessY = [&](int y) {
|
||||||
|
int pj = common::borderInterpolate(offset_x + y - anchor_x, full_width, border_type);
|
||||||
|
return pj < 0 ? noval : pj - offset_x;
|
||||||
|
};
|
||||||
|
auto p2idx = [&](int x, int y){ return ((x + ksize) % ksize * width + y) * 3; };
|
||||||
|
|
||||||
|
std::vector<float> res(width * ksize * 3);
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
float sum0, sum1, sum2;
|
||||||
|
sum0 = sum1 = sum2 = 0;
|
||||||
|
for (int i = 0; i < ksize; i++)
|
||||||
|
{
|
||||||
|
int p = accessY(y + i);
|
||||||
|
if (p != noval)
|
||||||
|
{
|
||||||
|
sum0 += reinterpret_cast<const float*>(src_data + x * src_step)[p * 3 ];
|
||||||
|
sum1 += reinterpret_cast<const float*>(src_data + x * src_step)[p * 3 + 1];
|
||||||
|
sum2 += reinterpret_cast<const float*>(src_data + x * src_step)[p * 3 + 2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[p2idx(x, y) ] = sum0;
|
||||||
|
res[p2idx(x, y) + 1] = sum1;
|
||||||
|
res[p2idx(x, y) + 2] = sum2;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = anchor_x, right = width - (ksize - 1 - anchor_x);
|
||||||
|
for (int i = start - anchor_y; i < end + (ksize - 1 - anchor_y); i++)
|
||||||
|
{
|
||||||
|
if (i + offset_y >= 0 && i + offset_y < full_height)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m2(right - j);
|
||||||
|
const float* extra = reinterpret_cast<const float*>(src_data + i * src_step) + (j - anchor_x) * 3;
|
||||||
|
auto src = __riscv_vlseg3e32_v_f32m2x3(extra, vl);
|
||||||
|
auto src0 = __riscv_vget_v_f32m2x3_f32m2(src, 0);
|
||||||
|
auto src1 = __riscv_vget_v_f32m2x3_f32m2(src, 1);
|
||||||
|
auto src2 = __riscv_vget_v_f32m2x3_f32m2(src, 2);
|
||||||
|
|
||||||
|
extra += vl * 3;
|
||||||
|
auto sum0 = src0, sum1 = src1, sum2 = src2;
|
||||||
|
src0 = __riscv_vfslide1down(src0, extra[0], vl);
|
||||||
|
src1 = __riscv_vfslide1down(src1, extra[1], vl);
|
||||||
|
src2 = __riscv_vfslide1down(src2, extra[2], vl);
|
||||||
|
sum0 = __riscv_vfadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vfadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vfadd(sum2, src2, vl);
|
||||||
|
src0 = __riscv_vfslide1down(src0, extra[3], vl);
|
||||||
|
src1 = __riscv_vfslide1down(src1, extra[4], vl);
|
||||||
|
src2 = __riscv_vfslide1down(src2, extra[5], vl);
|
||||||
|
sum0 = __riscv_vfadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vfadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vfadd(sum2, src2, vl);
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
src0 = __riscv_vfslide1down(src0, extra[6], vl);
|
||||||
|
src1 = __riscv_vfslide1down(src1, extra[7], vl);
|
||||||
|
src2 = __riscv_vfslide1down(src2, extra[8], vl);
|
||||||
|
sum0 = __riscv_vfadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vfadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vfadd(sum2, src2, vl);
|
||||||
|
src0 = __riscv_vfslide1down(src0, extra[ 9], vl);
|
||||||
|
src1 = __riscv_vfslide1down(src1, extra[10], vl);
|
||||||
|
src2 = __riscv_vfslide1down(src2, extra[11], vl);
|
||||||
|
sum0 = __riscv_vfadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vfadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vfadd(sum2, src2, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
vfloat32m2x3_t dst{};
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 0, sum0);
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 1, sum1);
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 2, sum2);
|
||||||
|
__riscv_vsseg3e32(res.data() + p2idx(i, j), dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = i - (ksize - 1 - anchor_y);
|
||||||
|
if (cur >= start)
|
||||||
|
{
|
||||||
|
const float* row0 = accessX(cur ) == noval ? nullptr : res.data() + p2idx(accessX(cur ), 0);
|
||||||
|
const float* row1 = accessX(cur + 1) == noval ? nullptr : res.data() + p2idx(accessX(cur + 1), 0);
|
||||||
|
const float* row2 = accessX(cur + 2) == noval ? nullptr : res.data() + p2idx(accessX(cur + 2), 0);
|
||||||
|
const float* row3 = nullptr, *row4 = nullptr;
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
row3 = accessX(cur + 3) == noval ? nullptr : res.data() + p2idx(accessX(cur + 3), 0);
|
||||||
|
row4 = accessX(cur + 4) == noval ? nullptr : res.data() + p2idx(accessX(cur + 4), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m2(width - j);
|
||||||
|
vfloat32m2_t sum0, sum1, sum2;
|
||||||
|
sum0 = sum1 = sum2 = __riscv_vfmv_v_f_f32m2(0, vl);
|
||||||
|
auto loadres = [&](const float* row) {
|
||||||
|
if (!row) return;
|
||||||
|
auto src = __riscv_vlseg3e32_v_f32m2x3(row + j * 3, vl);
|
||||||
|
sum0 = __riscv_vfadd(sum0, __riscv_vget_v_f32m2x3_f32m2(src, 0), vl);
|
||||||
|
sum1 = __riscv_vfadd(sum1, __riscv_vget_v_f32m2x3_f32m2(src, 1), vl);
|
||||||
|
sum2 = __riscv_vfadd(sum2, __riscv_vget_v_f32m2x3_f32m2(src, 2), vl);
|
||||||
|
};
|
||||||
|
loadres(row0);
|
||||||
|
loadres(row1);
|
||||||
|
loadres(row2);
|
||||||
|
loadres(row3);
|
||||||
|
loadres(row4);
|
||||||
|
if (normalize)
|
||||||
|
{
|
||||||
|
sum0 = __riscv_vfdiv(sum0, ksize * ksize, vl);
|
||||||
|
sum1 = __riscv_vfdiv(sum1, ksize * ksize, vl);
|
||||||
|
sum2 = __riscv_vfdiv(sum2, ksize * ksize, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
vfloat32m2x3_t dst{};
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 0, sum0);
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 1, sum1);
|
||||||
|
dst = __riscv_vset_v_f32m2_f32m2x3(dst, 2, sum2);
|
||||||
|
__riscv_vsseg3e32(reinterpret_cast<float*>(dst_data + cur * dst_step) + j * 3, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int boxFilter(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int src_depth, int dst_depth, int cn, int margin_left, int margin_top, int margin_right, int margin_bottom, size_t ksize_width, size_t ksize_height, int anchor_x, int anchor_y, bool normalize, int border_type)
|
||||||
|
{
|
||||||
|
const int src_type = CV_MAKETYPE(src_depth, cn), dst_type = CV_MAKETYPE(dst_depth, cn);
|
||||||
|
if (ksize_width != ksize_height || (ksize_width != 3 && ksize_width != 5))
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (border_type & BORDER_ISOLATED || border_type == BORDER_WRAP)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
uchar* _dst_data = dst_data;
|
||||||
|
size_t _dst_step = dst_step;
|
||||||
|
const size_t size = CV_ELEM_SIZE(dst_type);
|
||||||
|
std::vector<uchar> dst;
|
||||||
|
if (src_data == _dst_data)
|
||||||
|
{
|
||||||
|
dst = std::vector<uchar>(width * height * size);
|
||||||
|
dst_data = dst.data();
|
||||||
|
dst_step = width * size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
anchor_x = anchor_x < 0 ? ksize_width / 2 : anchor_x;
|
||||||
|
anchor_y = anchor_y < 0 ? ksize_height / 2 : anchor_y;
|
||||||
|
if (src_type != dst_type)
|
||||||
|
{
|
||||||
|
if (src_type == CV_8UC1 && dst_type == CV_16UC1)
|
||||||
|
{
|
||||||
|
if (ksize_width == 3)
|
||||||
|
{
|
||||||
|
res = common::invoke(height, {boxFilterC1<3, RVV_U8M4, RVV_U16M8, false>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
}
|
||||||
|
if (ksize_width == 5)
|
||||||
|
{
|
||||||
|
res = common::invoke(height, {boxFilterC1<5, RVV_U8M4, RVV_U16M8, false>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (ksize_width*100 + src_type)
|
||||||
|
{
|
||||||
|
case 300 + CV_8UC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<3, RVV_U8M4, RVV_U16M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 500 + CV_8UC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<5, RVV_U8M4, RVV_U16M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 300 + CV_16SC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<3, RVV_I16M4, RVV_I32M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 500 + CV_16SC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<5, RVV_I16M4, RVV_I32M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 300 + CV_32SC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<3, RVV_I32M8, RVV_I32M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 500 + CV_32SC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<5, RVV_I32M8, RVV_I32M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 300 + CV_32FC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<3, RVV_F32M8, RVV_F32M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 500 + CV_32FC1:
|
||||||
|
res = common::invoke(height, {boxFilterC1<5, RVV_F32M8, RVV_F32M8, true>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 300 + CV_32FC3:
|
||||||
|
res = common::invoke(height, {boxFilterC3<3>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
case 500 + CV_32FC3:
|
||||||
|
res = common::invoke(height, {boxFilterC3<5>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, anchor_x, anchor_y, normalize, border_type);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (res == CV_HAL_ERROR_NOT_IMPLEMENTED)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
if (src_data == _dst_data)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < height; i++)
|
||||||
|
memcpy(_dst_data + i * _dst_step, dst.data() + i * dst_step, dst_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
@ -4,12 +4,12 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_COLOR_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_COLOR_HPP_INCLUDED
|
#include <limits>
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
namespace color {
|
namespace color {
|
||||||
class ColorInvoker : public ParallelLoopBody
|
class ColorInvoker : public ParallelLoopBody
|
||||||
@ -41,11 +41,9 @@ namespace color {
|
|||||||
{
|
{
|
||||||
return val - std::remainder(val, 1.0);
|
return val - std::remainder(val, 1.0);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::color
|
} // cv::rvv_hal::color
|
||||||
|
|
||||||
namespace BGRtoBGR {
|
namespace BGRtoBGR {
|
||||||
#undef cv_hal_cvtBGRtoBGR
|
|
||||||
#define cv_hal_cvtBGRtoBGR cv::cv_hal_rvv::BGRtoBGR::cvtBGRtoBGR
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -206,27 +204,26 @@ static inline int cvtBGRtoBGR(int start, int end, const T * src, size_t src_step
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue)
|
} // BGRtoBGR
|
||||||
|
|
||||||
|
int cvtBGRtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue)
|
||||||
{
|
{
|
||||||
if ((scn != 3 && scn != 4) || (dcn != 3 && dcn != 4))
|
if ((scn != 3 && scn != 4) || (dcn != 3 && dcn != 4))
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return cvtBGRtoBGR<uchar>(0, height, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, dcn, swapBlue);
|
return BGRtoBGR::cvtBGRtoBGR<uchar>(0, height, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, dcn, swapBlue);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return cvtBGRtoBGR<ushort>(0, height, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, dcn, swapBlue);
|
return BGRtoBGR::cvtBGRtoBGR<ushort>(0, height, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, dcn, swapBlue);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return cvtBGRtoBGR<float>(0, height, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, dcn, swapBlue);
|
return BGRtoBGR::cvtBGRtoBGR<float>(0, height, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, dcn, swapBlue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoBGR
|
|
||||||
|
|
||||||
namespace GraytoBGR {
|
namespace GraytoBGR {
|
||||||
#undef cv_hal_cvtGraytoBGR
|
|
||||||
#define cv_hal_cvtGraytoBGR cv::cv_hal_rvv::GraytoBGR::cvtGraytoBGR
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -337,27 +334,26 @@ static inline int cvtGraytoBGR(int start, int end, const T * src, size_t src_ste
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtGraytoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn)
|
} // GraytoBGR
|
||||||
|
|
||||||
|
int cvtGraytoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return cvtGraytoBGR<uchar>(0, height, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn);
|
return GraytoBGR::cvtGraytoBGR<uchar>(0, height, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return cvtGraytoBGR<ushort>(0, height, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, dcn);
|
return GraytoBGR::cvtGraytoBGR<ushort>(0, height, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, dcn);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return cvtGraytoBGR<float>(0, height, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn);
|
return GraytoBGR::cvtGraytoBGR<float>(0, height, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::GraytoBGR
|
|
||||||
|
|
||||||
namespace BGRtoGray {
|
namespace BGRtoGray {
|
||||||
#undef cv_hal_cvtBGRtoGray
|
|
||||||
#define cv_hal_cvtBGRtoGray cv::cv_hal_rvv::BGRtoGray::cvtBGRtoGray
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -462,27 +458,26 @@ static inline int cvtBGRtoGray(int start, int end, const T * src, size_t src_ste
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoGray(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue)
|
} // BGRtoGray
|
||||||
|
|
||||||
|
int cvtBGRtoGray(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtBGRtoGray<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {BGRtoGray::cvtBGRtoGray<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return color::invoke(width, height, {cvtBGRtoGray<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {BGRtoGray::cvtBGRtoGray<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtBGRtoGray<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {BGRtoGray::cvtBGRtoGray<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoGray
|
|
||||||
|
|
||||||
namespace BGR5x5toBGR {
|
namespace BGR5x5toBGR {
|
||||||
#undef cv_hal_cvtBGR5x5toBGR
|
|
||||||
#define cv_hal_cvtBGR5x5toBGR cv::cv_hal_rvv::BGR5x5toBGR::cvtBGR5x5toBGR
|
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
||||||
// in the functor struct RGB5x52RGB
|
// in the functor struct RGB5x52RGB
|
||||||
@ -540,18 +535,17 @@ static inline int cvtBGR5x5toBGR_u(int start, int end, const ushort * src, size_
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int greenBits)
|
} // BGR5x5toBGR
|
||||||
|
|
||||||
|
int cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int greenBits)
|
||||||
{
|
{
|
||||||
if ((dcn != 3 && dcn != 4) || (greenBits != 5 && greenBits != 6))
|
if ((dcn != 3 && dcn != 4) || (greenBits != 5 && greenBits != 6))
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
return color::invoke(width, height, {cvtBGR5x5toBGR_u}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, greenBits);
|
return color::invoke(width, height, {BGR5x5toBGR::cvtBGR5x5toBGR_u}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, greenBits);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGR5x5toBGR
|
|
||||||
|
|
||||||
namespace BGRtoBGR5x5 {
|
namespace BGRtoBGR5x5 {
|
||||||
#undef cv_hal_cvtBGRtoBGR5x5
|
|
||||||
#define cv_hal_cvtBGRtoBGR5x5 cv::cv_hal_rvv::BGRtoBGR5x5::cvtBGRtoBGR5x5
|
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
||||||
// in the functor struct RGB2RGB5x5
|
// in the functor struct RGB2RGB5x5
|
||||||
@ -604,18 +598,17 @@ static inline int cvtBGRtoBGR5x5_u(int start, int end, const uchar * src, size_t
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int greenBits)
|
} // BGRtoBGR5x5
|
||||||
|
|
||||||
|
int cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int greenBits)
|
||||||
{
|
{
|
||||||
if ((scn != 3 && scn != 4) || (greenBits != 5 && greenBits != 6))
|
if ((scn != 3 && scn != 4) || (greenBits != 5 && greenBits != 6))
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
return color::invoke(width, height, {cvtBGRtoBGR5x5_u}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue, greenBits);
|
return color::invoke(width, height, {BGRtoBGR5x5::cvtBGRtoBGR5x5_u}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue, greenBits);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoBGR5x5
|
|
||||||
|
|
||||||
namespace BGR5x5toGray {
|
namespace BGR5x5toGray {
|
||||||
#undef cv_hal_cvtBGR5x5toGray
|
|
||||||
#define cv_hal_cvtBGR5x5toGray cv::cv_hal_rvv::BGR5x5toGray::cvtBGR5x5toGray
|
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
||||||
// in the functor struct RGB5x52Gray
|
// in the functor struct RGB5x52Gray
|
||||||
@ -654,18 +647,17 @@ static inline int cvtBGR5x5toGray_u(int start, int end, const ushort * src, size
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGR5x5toGray(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int greenBits)
|
} // BGR5x5toGray
|
||||||
|
|
||||||
|
int cvtBGR5x5toGray(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int greenBits)
|
||||||
{
|
{
|
||||||
if (greenBits != 5 && greenBits != 6)
|
if (greenBits != 5 && greenBits != 6)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
return color::invoke(width, height, {cvtBGR5x5toGray_u}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, greenBits);
|
return color::invoke(width, height, {BGR5x5toGray::cvtBGR5x5toGray_u}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, greenBits);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGR5x5toGray
|
|
||||||
|
|
||||||
namespace GraytoBGR5x5 {
|
namespace GraytoBGR5x5 {
|
||||||
#undef cv_hal_cvtGraytoBGR5x5
|
|
||||||
#define cv_hal_cvtGraytoBGR5x5 cv::cv_hal_rvv::GraytoBGR5x5::cvtGraytoBGR5x5
|
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
// the algorithm is copied from imgproc/src/color_rgb.simd.cpp,
|
||||||
// in the functor struct Gray2RGB5x5
|
// in the functor struct Gray2RGB5x5
|
||||||
@ -697,18 +689,17 @@ static inline int cvtGraytoBGR5x5_u(int start, int end, const uchar * src, size_
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int greenBits)
|
} // GraytoBGR5x5
|
||||||
|
|
||||||
|
int cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int greenBits)
|
||||||
{
|
{
|
||||||
if (greenBits != 5 && greenBits != 6)
|
if (greenBits != 5 && greenBits != 6)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
return color::invoke(width, height, {cvtGraytoBGR5x5_u}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, greenBits);
|
return color::invoke(width, height, {GraytoBGR5x5::cvtGraytoBGR5x5_u}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, greenBits);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::GraytoBGR5x5
|
|
||||||
|
|
||||||
namespace YUVtoBGR {
|
namespace YUVtoBGR {
|
||||||
#undef cv_hal_cvtYUVtoBGR
|
|
||||||
#define cv_hal_cvtYUVtoBGR cv::cv_hal_rvv::YUVtoBGR::cvtYUVtoBGR
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -857,27 +848,26 @@ static inline int cvtYUVtoBGR(int start, int end, const T * src, size_t src_step
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr)
|
} // YUVtoBGR
|
||||||
|
|
||||||
|
int cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtYUVtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, isCbCr);
|
return color::invoke(width, height, {YUVtoBGR::cvtYUVtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, isCbCr);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return color::invoke(width, height, {cvtYUVtoBGR<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, dcn, swapBlue, isCbCr);
|
return color::invoke(width, height, {YUVtoBGR::cvtYUVtoBGR<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, dcn, swapBlue, isCbCr);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtYUVtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue, isCbCr);
|
return color::invoke(width, height, {YUVtoBGR::cvtYUVtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue, isCbCr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::YUVtoBGR
|
|
||||||
|
|
||||||
namespace BGRtoYUV {
|
namespace BGRtoYUV {
|
||||||
#undef cv_hal_cvtBGRtoYUV
|
|
||||||
#define cv_hal_cvtBGRtoYUV cv::cv_hal_rvv::BGRtoYUV::cvtBGRtoYUV
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -1027,31 +1017,26 @@ static inline int cvtBGRtoYUV(int start, int end, const T * src, size_t src_step
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr)
|
} // BGRtoYUV
|
||||||
|
|
||||||
|
int cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtBGRtoYUV<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue, isCbCr);
|
return color::invoke(width, height, {BGRtoYUV::cvtBGRtoYUV<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue, isCbCr);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return color::invoke(width, height, {cvtBGRtoYUV<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue, isCbCr);
|
return color::invoke(width, height, {BGRtoYUV::cvtBGRtoYUV<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue, isCbCr);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtBGRtoYUV<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue, isCbCr);
|
return color::invoke(width, height, {BGRtoYUV::cvtBGRtoYUV<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue, isCbCr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoYUV
|
|
||||||
|
|
||||||
namespace PlaneYUVtoBGR {
|
namespace PlaneYUVtoBGR {
|
||||||
#undef cv_hal_cvtOnePlaneYUVtoBGR
|
|
||||||
#define cv_hal_cvtOnePlaneYUVtoBGR cv::cv_hal_rvv::PlaneYUVtoBGR::cvtOnePlaneYUVtoBGR
|
|
||||||
#undef cv_hal_cvtTwoPlaneYUVtoBGR
|
|
||||||
#define cv_hal_cvtTwoPlaneYUVtoBGR cv::cv_hal_rvv::PlaneYUVtoBGR::cvtTwoPlaneYUVtoBGR
|
|
||||||
#undef cv_hal_cvtThreePlaneYUVtoBGR
|
|
||||||
#define cv_hal_cvtThreePlaneYUVtoBGR cv::cv_hal_rvv::PlaneYUVtoBGR::cvtThreePlaneYUVtoBGR
|
|
||||||
|
|
||||||
static const int ITUR_BT_601_SHIFT = 20;
|
static const int ITUR_BT_601_SHIFT = 20;
|
||||||
static const int ITUR_BT_601_CY = 1220542;
|
static const int ITUR_BT_601_CY = 1220542;
|
||||||
@ -1241,22 +1226,24 @@ static inline int cvtMultiPlaneYUVtoBGR(int start, int end, uchar * dst_data, si
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx, int yIdx)
|
} // PlaneYUVtoBGR
|
||||||
|
|
||||||
|
int cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx, int yIdx)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
return color::invoke(dst_width, dst_height, {cvtSinglePlaneYUVtoBGR}, dst_data, dst_step, dst_width, src_step, src_data, dcn, swapBlue, uIdx, yIdx);
|
return color::invoke(dst_width, dst_height, {PlaneYUVtoBGR::cvtSinglePlaneYUVtoBGR}, dst_data, dst_step, dst_width, src_step, src_data, dcn, swapBlue, uIdx, yIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx)
|
int cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
const uchar* uv = src_data + src_step * static_cast<size_t>(dst_height);
|
const uchar* uv = src_data + src_step * static_cast<size_t>(dst_height);
|
||||||
return color::invoke(dst_width, dst_height / 2, {cvtMultiPlaneYUVtoBGR}, dst_data, dst_step, dst_width, src_step, src_data, uv, uv, 0, 0, dcn, swapBlue, uIdx);
|
return color::invoke(dst_width, dst_height / 2, {PlaneYUVtoBGR::cvtMultiPlaneYUVtoBGR}, dst_data, dst_step, dst_width, src_step, src_data, uv, uv, 0, 0, dcn, swapBlue, uIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx)
|
int cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -1267,17 +1254,10 @@ inline int cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar
|
|||||||
int vstepIdx = dst_height % 4 == 2 ? 1 : 0;
|
int vstepIdx = dst_height % 4 == 2 ? 1 : 0;
|
||||||
if (uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); }
|
if (uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); }
|
||||||
|
|
||||||
return color::invoke(dst_width, dst_height / 2, {cvtMultiPlaneYUVtoBGR}, dst_data, dst_step, dst_width, src_step, src_data, u, v, ustepIdx, vstepIdx, dcn, swapBlue, -1);
|
return color::invoke(dst_width, dst_height / 2, {PlaneYUVtoBGR::cvtMultiPlaneYUVtoBGR}, dst_data, dst_step, dst_width, src_step, src_data, u, v, ustepIdx, vstepIdx, dcn, swapBlue, -1);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::PlaneYUVtoBGR
|
|
||||||
|
|
||||||
namespace PlaneBGRtoYUV {
|
namespace PlaneBGRtoYUV {
|
||||||
#undef cv_hal_cvtOnePlaneBGRtoYUV
|
|
||||||
#define cv_hal_cvtOnePlaneBGRtoYUV cv::cv_hal_rvv::PlaneBGRtoYUV::cvtOnePlaneBGRtoYUV
|
|
||||||
#undef cv_hal_cvtBGRtoTwoPlaneYUV
|
|
||||||
#define cv_hal_cvtBGRtoTwoPlaneYUV cv::cv_hal_rvv::PlaneBGRtoYUV::cvtBGRtoTwoPlaneYUV
|
|
||||||
#undef cv_hal_cvtBGRtoThreePlaneYUV
|
|
||||||
#define cv_hal_cvtBGRtoThreePlaneYUV cv::cv_hal_rvv::PlaneBGRtoYUV::cvtBGRtoThreePlaneYUV
|
|
||||||
|
|
||||||
static const int ITUR_BT_601_SHIFT = 20;
|
static const int ITUR_BT_601_SHIFT = 20;
|
||||||
static const int ITUR_BT_601_CBY = 102760; // 0.114035 * (236-16)/256 * (1 << ITUR_BT_601_SHIFT)
|
static const int ITUR_BT_601_CBY = 102760; // 0.114035 * (236-16)/256 * (1 << ITUR_BT_601_SHIFT)
|
||||||
@ -1512,35 +1492,34 @@ static inline int cvtBGRtoMultiPlaneYUV(int start, int end, uchar * yData, uchar
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int yIdx)
|
} // PlaneBGRtoYUV
|
||||||
|
|
||||||
|
int cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int yIdx)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
return color::invoke(width, height, {cvtBGRtoSinglePlaneYUV}, dst_data, dst_step, width, src_step, src_data, scn, swapBlue, uIdx, yIdx);
|
return color::invoke(width, height, {PlaneBGRtoYUV::cvtBGRtoSinglePlaneYUV}, dst_data, dst_step, width, src_step, src_data, scn, swapBlue, uIdx, yIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step,
|
int cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step,
|
||||||
uchar * y_data, size_t y_step, uchar * uv_data, size_t uv_step,
|
uchar * y_data, size_t y_step, uchar * uv_data, size_t uv_step,
|
||||||
int width, int height,
|
int width, int height,
|
||||||
int scn, bool swapBlue, int uIdx)
|
int scn, bool swapBlue, int uIdx)
|
||||||
{
|
{
|
||||||
if (y_step != uv_step || (scn != 3 && scn != 4))
|
if (y_step != uv_step || (scn != 3 && scn != 4))
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
return color::invoke(width, height / 2, {cvtBGRtoMultiPlaneYUV}, y_data, uv_data, y_step, width, height, src_step, src_data, scn, swapBlue, uIdx == 2);
|
return color::invoke(width, height / 2, {PlaneBGRtoYUV::cvtBGRtoMultiPlaneYUV}, y_data, uv_data, y_step, width, height, src_step, src_data, scn, swapBlue, uIdx == 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx)
|
int cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
uchar* uv_data = dst_data + dst_step * static_cast<size_t>(height);
|
uchar* uv_data = dst_data + dst_step * static_cast<size_t>(height);
|
||||||
return color::invoke(width, height / 2, {cvtBGRtoMultiPlaneYUV}, dst_data, uv_data, dst_step, width, height, src_step, src_data, scn, swapBlue, uIdx == 2 ? 3 : 2);
|
return color::invoke(width, height / 2, {PlaneBGRtoYUV::cvtBGRtoMultiPlaneYUV}, dst_data, uv_data, dst_step, width, height, src_step, src_data, scn, swapBlue, uIdx == 2 ? 3 : 2);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::PlaneBGRtoYUV
|
|
||||||
|
|
||||||
namespace HSVtoBGR {
|
namespace HSVtoBGR {
|
||||||
#undef cv_hal_cvtHSVtoBGR
|
|
||||||
#define cv_hal_cvtHSVtoBGR cv::cv_hal_rvv::HSVtoBGR::cvtHSVtoBGR
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline int cvtHSVtoBGR(int start, int end, const T * src, size_t src_step, T * dst, size_t dst_step, int width, int dcn, bool swapBlue, bool isFullRange, bool isHSV);
|
static inline int cvtHSVtoBGR(int start, int end, const T * src, size_t src_step, T * dst, size_t dst_step, int width, int dcn, bool swapBlue, bool isFullRange, bool isHSV);
|
||||||
@ -1710,25 +1689,24 @@ inline int cvtHSVtoBGR<float>(int start, int end, const float * src, size_t src_
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtHSVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV)
|
} // HSVtoBGR
|
||||||
|
|
||||||
|
int cvtHSVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtHSVtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, isFullRange, isHSV);
|
return color::invoke(width, height, {HSVtoBGR::cvtHSVtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, isFullRange, isHSV);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtHSVtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue, isFullRange, isHSV);
|
return color::invoke(width, height, {HSVtoBGR::cvtHSVtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue, isFullRange, isHSV);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::HSVtoBGR
|
|
||||||
|
|
||||||
namespace BGRtoHSV {
|
namespace BGRtoHSV {
|
||||||
#undef cv_hal_cvtBGRtoHSV
|
|
||||||
#define cv_hal_cvtBGRtoHSV cv::cv_hal_rvv::BGRtoHSV::cvtBGRtoHSV
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline int cvtBGRtoHSV(int start, int end, const T * src, size_t src_step, T * dst, size_t dst_step, int width, int scn, bool swapBlue, bool isFullRange, bool isHSV);
|
static inline int cvtBGRtoHSV(int start, int end, const T * src, size_t src_step, T * dst, size_t dst_step, int width, int scn, bool swapBlue, bool isFullRange, bool isHSV);
|
||||||
@ -1870,25 +1848,24 @@ inline int cvtBGRtoHSV<float>(int start, int end, const float * src, size_t src_
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoHSV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV)
|
} // BGRtoHSV
|
||||||
|
|
||||||
|
int cvtBGRtoHSV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtBGRtoHSV<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue, isFullRange, isHSV);
|
return color::invoke(width, height, {BGRtoHSV::cvtBGRtoHSV<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue, isFullRange, isHSV);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtBGRtoHSV<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue, isFullRange, isHSV);
|
return color::invoke(width, height, {BGRtoHSV::cvtBGRtoHSV<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue, isFullRange, isHSV);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoHSV
|
|
||||||
|
|
||||||
namespace XYZtoBGR {
|
namespace XYZtoBGR {
|
||||||
#undef cv_hal_cvtXYZtoBGR
|
|
||||||
#define cv_hal_cvtXYZtoBGR cv::cv_hal_rvv::XYZtoBGR::cvtXYZtoBGR
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -2042,27 +2019,26 @@ static inline int cvtXYZtoBGR(int start, int end, const T * src, size_t src_step
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtXYZtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue)
|
} // XYZtoBGR
|
||||||
|
|
||||||
|
int cvtXYZtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtXYZtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue);
|
return color::invoke(width, height, {XYZtoBGR::cvtXYZtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return color::invoke(width, height, {cvtXYZtoBGR<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, dcn, swapBlue);
|
return color::invoke(width, height, {XYZtoBGR::cvtXYZtoBGR<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, dcn, swapBlue);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtXYZtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue);
|
return color::invoke(width, height, {XYZtoBGR::cvtXYZtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::XYZtoBGR
|
|
||||||
|
|
||||||
namespace BGRtoXYZ {
|
namespace BGRtoXYZ {
|
||||||
#undef cv_hal_cvtBGRtoXYZ
|
|
||||||
#define cv_hal_cvtBGRtoXYZ cv::cv_hal_rvv::BGRtoXYZ::cvtBGRtoXYZ
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
template<> struct rvv<uchar>
|
template<> struct rvv<uchar>
|
||||||
@ -2209,23 +2185,24 @@ static inline int cvtBGRtoXYZ(int start, int end, const T * src, size_t src_step
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoXYZ(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue)
|
} // BGRtoXYZ
|
||||||
|
|
||||||
|
int cvtBGRtoXYZ(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtBGRtoXYZ<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {BGRtoXYZ::cvtBGRtoXYZ<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
case CV_16U:
|
case CV_16U:
|
||||||
return color::invoke(width, height, {cvtBGRtoXYZ<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {BGRtoXYZ::cvtBGRtoXYZ<ushort>}, reinterpret_cast<const ushort*>(src_data), src_step, reinterpret_cast<ushort*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtBGRtoXYZ<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {BGRtoXYZ::cvtBGRtoXYZ<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoXYZ
|
|
||||||
|
|
||||||
namespace LabTable
|
namespace LabTable
|
||||||
{
|
{
|
||||||
@ -2495,11 +2472,9 @@ namespace LabTable
|
|||||||
return __riscv_vfmadd(__riscv_vfmadd(__riscv_vfmadd(__riscv_vget_v_f32m2x4_f32m2(val, 3), x, __riscv_vget_v_f32m2x4_f32m2(val, 2), vl), x, __riscv_vget_v_f32m2x4_f32m2(val, 1), vl), x, __riscv_vget_v_f32m2x4_f32m2(val, 0), vl);
|
return __riscv_vfmadd(__riscv_vfmadd(__riscv_vfmadd(__riscv_vget_v_f32m2x4_f32m2(val, 3), x, __riscv_vget_v_f32m2x4_f32m2(val, 2), vl), x, __riscv_vget_v_f32m2x4_f32m2(val, 1), vl), x, __riscv_vget_v_f32m2x4_f32m2(val, 0), vl);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // cv::cv_hal_rvv::LabTable
|
} // cv::rvv_hal::imgproc::LabTable
|
||||||
|
|
||||||
namespace LabtoBGR {
|
namespace LabtoBGR {
|
||||||
#undef cv_hal_cvtLabtoBGR
|
|
||||||
#define cv_hal_cvtLabtoBGR cv::cv_hal_rvv::LabtoBGR::cvtLabtoBGR
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline int cvtLabtoBGR(int start, int end, const T * src, size_t src_step, T * dst, size_t dst_step, int width, int dcn, bool swapBlue, bool isLab, bool srgb);
|
static inline int cvtLabtoBGR(int start, int end, const T * src, size_t src_step, T * dst, size_t dst_step, int width, int dcn, bool swapBlue, bool isLab, bool srgb);
|
||||||
@ -2713,25 +2688,24 @@ inline int cvtLabtoBGR<float>(int start, int end, const float * src, size_t src_
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtLabtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isLab, bool srgb)
|
} // LabtoBGR
|
||||||
|
|
||||||
|
int cvtLabtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isLab, bool srgb)
|
||||||
{
|
{
|
||||||
if (dcn != 3 && dcn != 4)
|
if (dcn != 3 && dcn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtLabtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, isLab, srgb);
|
return color::invoke(width, height, {LabtoBGR::cvtLabtoBGR<uchar>}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, dcn, swapBlue, isLab, srgb);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtLabtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue, isLab, srgb);
|
return color::invoke(width, height, {LabtoBGR::cvtLabtoBGR<float>}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, dcn, swapBlue, isLab, srgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::LabtoBGR
|
|
||||||
|
|
||||||
namespace BGRtoLab {
|
namespace BGRtoLab {
|
||||||
#undef cv_hal_cvtBGRtoLab
|
|
||||||
#define cv_hal_cvtBGRtoLab cv::cv_hal_rvv::BGRtoLab::cvtBGRtoLab
|
|
||||||
|
|
||||||
struct rvv_base
|
struct rvv_base
|
||||||
{
|
{
|
||||||
@ -3060,31 +3034,126 @@ static inline int cvtBGRtoLab_f(int start, int end, const float * src, size_t sr
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int cvtBGRtoLab(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isLab, bool srgb)
|
} // BGRtoLab
|
||||||
|
|
||||||
|
int cvtBGRtoLab(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isLab, bool srgb)
|
||||||
{
|
{
|
||||||
if (scn != 3 && scn != 4)
|
if (scn != 3 && scn != 4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
auto cvtBGRtoLab_b = cvtBGRtoLab_u<true, true>;
|
auto cvtBGRtoLab_b = BGRtoLab::cvtBGRtoLab_u<true, true>;
|
||||||
if (!isLab && !srgb)
|
if (!isLab && !srgb)
|
||||||
cvtBGRtoLab_b = cvtBGRtoLab_u<false, false>;
|
cvtBGRtoLab_b = BGRtoLab::cvtBGRtoLab_u<false, false>;
|
||||||
else if (!isLab && srgb)
|
else if (!isLab && srgb)
|
||||||
cvtBGRtoLab_b = cvtBGRtoLab_u<false, true>;
|
cvtBGRtoLab_b = BGRtoLab::cvtBGRtoLab_u<false, true>;
|
||||||
else if (isLab && !srgb)
|
else if (isLab && !srgb)
|
||||||
cvtBGRtoLab_b = cvtBGRtoLab_u<true, false>;
|
cvtBGRtoLab_b = BGRtoLab::cvtBGRtoLab_u<true, false>;
|
||||||
|
|
||||||
switch (depth)
|
switch (depth)
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
return color::invoke(width, height, {cvtBGRtoLab_b}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue);
|
return color::invoke(width, height, {cvtBGRtoLab_b}, reinterpret_cast<const uchar*>(src_data), src_step, reinterpret_cast<uchar*>(dst_data), dst_step, width, scn, swapBlue);
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
return color::invoke(width, height, {cvtBGRtoLab_f}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue, isLab, srgb);
|
return color::invoke(width, height, {BGRtoLab::cvtBGRtoLab_f}, reinterpret_cast<const float*>(src_data), src_step, reinterpret_cast<float*>(dst_data), dst_step, width, scn, swapBlue, isLab, srgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::BGRtoLab
|
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
#if CV_HAL_RVV_071_ENABLED
|
||||||
|
|
||||||
|
static const unsigned char index_array_32 [32]
|
||||||
|
{ 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15, 18, 17, 16, 19, 22, 21, 20, 23, 26, 25, 24, 27, 30, 29, 28, 31 };
|
||||||
|
|
||||||
|
static const unsigned char index_array_24 [24]
|
||||||
|
{ 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17, 16, 15, 20, 19, 18, 23, 22, 21 };
|
||||||
|
|
||||||
|
static void vBGRtoBGR(const unsigned char* src, unsigned char * dst, const unsigned char * index, int n, int scn, int dcn, int vsize_pixels, const int vsize)
|
||||||
|
{
|
||||||
|
vuint8m2_t vec_index = vle8_v_u8m2(index, vsize);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
for ( ; i <= n-vsize; i += vsize_pixels, src += vsize, dst += vsize)
|
||||||
|
{
|
||||||
|
vuint8m2_t vec_src = vle8_v_u8m2(src, vsize);
|
||||||
|
vuint8m2_t vec_dst = vrgather_vv_u8m2(vec_src, vec_index, vsize);
|
||||||
|
vse8_v_u8m2(dst, vec_dst, vsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( ; i < n; i++, src += scn, dst += dcn )
|
||||||
|
{
|
||||||
|
unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
|
||||||
|
dst[2] = t0;
|
||||||
|
dst[1] = t1;
|
||||||
|
dst[0] = t2;
|
||||||
|
if(dcn == 4)
|
||||||
|
{
|
||||||
|
unsigned char d = src[3];
|
||||||
|
dst[3] = d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sBGRtoBGR(const unsigned char* src, unsigned char * dst, int n, int scn, int dcn, int bi)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++, src += scn, dst += dcn)
|
||||||
|
{
|
||||||
|
unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
|
||||||
|
dst[bi ] = t0;
|
||||||
|
dst[1] = t1;
|
||||||
|
dst[bi^2] = t2;
|
||||||
|
if(dcn == 4)
|
||||||
|
{
|
||||||
|
unsigned char d = scn == 4 ? src[3] : std::numeric_limits<unsigned char>::max();
|
||||||
|
dst[3] = d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cvtBGRtoBGR(const unsigned char * src_data, size_t src_step, unsigned char * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue)
|
||||||
|
{
|
||||||
|
if (depth != CV_8U)
|
||||||
|
{
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int blueIdx = swapBlue ? 2 : 0;
|
||||||
|
if (scn == dcn)
|
||||||
|
{
|
||||||
|
if (!swapBlue)
|
||||||
|
{
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int vsize_pixels = 8;
|
||||||
|
|
||||||
|
if (scn == 4)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
|
||||||
|
{
|
||||||
|
vBGRtoBGR(src_data, dst_data, index_array_32, width, scn, dcn, vsize_pixels, 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
|
||||||
|
{
|
||||||
|
vBGRtoBGR(src_data, dst_data, index_array_24, width, scn, dcn, vsize_pixels, 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
|
||||||
|
sBGRtoBGR(src_data, dst_data, width, scn, dcn, blueIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_071_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
76
hal/riscv-rvv/src/imgproc/common.hpp
Normal file
76
hal/riscv-rvv/src/imgproc/common.hpp
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_RVV_IMGPROC_COMMON_HPP_INCLUDED
|
||||||
|
#define OPENCV_HAL_RVV_IMGPROC_COMMON_HPP_INCLUDED
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/interface.h"
|
||||||
|
#include "opencv2/imgproc/hal/interface.h"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc { namespace common {
|
||||||
|
|
||||||
|
inline int borderInterpolate( int p, int len, int borderType )
|
||||||
|
{
|
||||||
|
if ((unsigned)p < (unsigned)len)
|
||||||
|
;
|
||||||
|
else if (borderType == CV_HAL_BORDER_REPLICATE)
|
||||||
|
p = p < 0 ? 0 : len - 1;
|
||||||
|
else if (borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101)
|
||||||
|
{
|
||||||
|
int delta = borderType == CV_HAL_BORDER_REFLECT_101;
|
||||||
|
if (len == 1)
|
||||||
|
return 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (p < 0)
|
||||||
|
p = -p - 1 + delta;
|
||||||
|
else
|
||||||
|
p = len - 1 - (p - len) - delta;
|
||||||
|
}
|
||||||
|
while( (unsigned)p >= (unsigned)len );
|
||||||
|
}
|
||||||
|
else if (borderType == CV_HAL_BORDER_WRAP)
|
||||||
|
{
|
||||||
|
if (p < 0)
|
||||||
|
p -= ((p-len+1)/len)*len;
|
||||||
|
if (p >= len)
|
||||||
|
p %= len;
|
||||||
|
}
|
||||||
|
else if (borderType == CV_HAL_BORDER_CONSTANT)
|
||||||
|
p = -1;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
class FilterInvoker : public ParallelLoopBody
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
template<typename... Args>
|
||||||
|
FilterInvoker(std::function<int(int, int, Args...)> _func, Args&&... args)
|
||||||
|
{
|
||||||
|
func = std::bind(_func, std::placeholders::_1, std::placeholders::_2, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void operator()(const Range& range) const override
|
||||||
|
{
|
||||||
|
func(range.start, range.end);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::function<int(int, int)> func;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename... Args>
|
||||||
|
inline int invoke(int height, std::function<int(int, int, Args...)> func, Args&&... args)
|
||||||
|
{
|
||||||
|
cv::parallel_for_(Range(1, height), FilterInvoker(func, std::forward<Args>(args)...), cv::getNumThreads());
|
||||||
|
return func(0, 1, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
}}}} // cv::rvv_hal::imgproc::common
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_RVV_IMGPROC_COMMON_HPP_INCLUDED
|
264
hal/riscv-rvv/src/imgproc/filter.cpp
Normal file
264
hal/riscv-rvv/src/imgproc/filter.cpp
Normal file
@ -0,0 +1,264 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct Filter2D
|
||||||
|
{
|
||||||
|
const uchar* kernel_data;
|
||||||
|
size_t kernel_step;
|
||||||
|
int kernel_type;
|
||||||
|
int kernel_width;
|
||||||
|
int kernel_height;
|
||||||
|
int src_type;
|
||||||
|
int dst_type;
|
||||||
|
int borderType;
|
||||||
|
double delta;
|
||||||
|
int anchor_x;
|
||||||
|
int anchor_y;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void process3(int anchor, int left, int right, float delta, const float* kernel, const uchar* row0, const uchar* row1, const uchar* row2, uchar* dst)
|
||||||
|
{
|
||||||
|
int vl;
|
||||||
|
for (int i = left; i < right; i += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m1(right - i);
|
||||||
|
auto s0 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
auto s1 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
auto s2 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
auto s3 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
|
||||||
|
auto addshift = [&](vfloat32m4_t a, vfloat32m4_t b, float k0, float k1, float k2, float r1, float r2) {
|
||||||
|
a = __riscv_vfmacc(a, k0, b, vl);
|
||||||
|
b = __riscv_vfslide1down(b, r1, vl);
|
||||||
|
a = __riscv_vfmacc(a, k1, b, vl);
|
||||||
|
b = __riscv_vfslide1down(b, r2, vl);
|
||||||
|
return __riscv_vfmacc(a, k2, b, vl);
|
||||||
|
};
|
||||||
|
auto loadsrc = [&](const uchar* row, float k0, float k1, float k2) {
|
||||||
|
if (!row) return;
|
||||||
|
|
||||||
|
const uchar* extra = row + (i - anchor) * 4;
|
||||||
|
auto src = __riscv_vlseg4e8_v_u8m1x4(extra, vl);
|
||||||
|
auto v0 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 0), vl), vl);
|
||||||
|
auto v1 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 1), vl), vl);
|
||||||
|
auto v2 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 2), vl), vl);
|
||||||
|
auto v3 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 3), vl), vl);
|
||||||
|
|
||||||
|
extra += vl * 4;
|
||||||
|
s0 = addshift(s0, v0, k0, k1, k2, extra[0], extra[4]);
|
||||||
|
s1 = addshift(s1, v1, k0, k1, k2, extra[1], extra[5]);
|
||||||
|
s2 = addshift(s2, v2, k0, k1, k2, extra[2], extra[6]);
|
||||||
|
s3 = addshift(s3, v3, k0, k1, k2, extra[3], extra[7]);
|
||||||
|
};
|
||||||
|
|
||||||
|
loadsrc(row0, kernel[0], kernel[1], kernel[2]);
|
||||||
|
loadsrc(row1, kernel[3], kernel[4], kernel[5]);
|
||||||
|
loadsrc(row2, kernel[6], kernel[7], kernel[8]);
|
||||||
|
vuint8m1x4_t val{};
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 0, __riscv_vnclipu(__riscv_vfncvt_xu(s0, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 1, __riscv_vnclipu(__riscv_vfncvt_xu(s1, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 2, __riscv_vnclipu(__riscv_vfncvt_xu(s2, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 3, __riscv_vnclipu(__riscv_vfncvt_xu(s3, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
__riscv_vsseg4e8(dst + i * 4, val, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void process5(int anchor, int left, int right, float delta, const float* kernel, const uchar* row0, const uchar* row1, const uchar* row2, const uchar* row3, const uchar* row4, uchar* dst)
|
||||||
|
{
|
||||||
|
int vl;
|
||||||
|
for (int i = left; i < right; i += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m1(right - i);
|
||||||
|
auto s0 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
auto s1 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
auto s2 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
auto s3 = __riscv_vfmv_v_f_f32m4(delta, vl);
|
||||||
|
|
||||||
|
auto addshift = [&](vfloat32m4_t a, vfloat32m4_t b, float k0, float k1, float k2, float k3, float k4, float r1, float r2, float r3, float r4) {
|
||||||
|
a = __riscv_vfmacc(a, k0, b, vl);
|
||||||
|
b = __riscv_vfslide1down(b, r1, vl);
|
||||||
|
a = __riscv_vfmacc(a, k1, b, vl);
|
||||||
|
b = __riscv_vfslide1down(b, r2, vl);
|
||||||
|
a = __riscv_vfmacc(a, k2, b, vl);
|
||||||
|
b = __riscv_vfslide1down(b, r3, vl);
|
||||||
|
a = __riscv_vfmacc(a, k3, b, vl);
|
||||||
|
b = __riscv_vfslide1down(b, r4, vl);
|
||||||
|
return __riscv_vfmacc(a, k4, b, vl);
|
||||||
|
};
|
||||||
|
auto loadsrc = [&](const uchar* row, float k0, float k1, float k2, float k3, float k4) {
|
||||||
|
if (!row) return;
|
||||||
|
|
||||||
|
const uchar* extra = row + (i - anchor) * 4;
|
||||||
|
auto src = __riscv_vlseg4e8_v_u8m1x4(extra, vl);
|
||||||
|
auto v0 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 0), vl), vl);
|
||||||
|
auto v1 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 1), vl), vl);
|
||||||
|
auto v2 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 2), vl), vl);
|
||||||
|
auto v3 = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vget_v_u8m1x4_u8m1(src, 3), vl), vl);
|
||||||
|
|
||||||
|
extra += vl * 4;
|
||||||
|
s0 = addshift(s0, v0, k0, k1, k2, k3, k4, extra[0], extra[4], extra[ 8], extra[12]);
|
||||||
|
s1 = addshift(s1, v1, k0, k1, k2, k3, k4, extra[1], extra[5], extra[ 9], extra[13]);
|
||||||
|
s2 = addshift(s2, v2, k0, k1, k2, k3, k4, extra[2], extra[6], extra[10], extra[14]);
|
||||||
|
s3 = addshift(s3, v3, k0, k1, k2, k3, k4, extra[3], extra[7], extra[11], extra[15]);
|
||||||
|
};
|
||||||
|
|
||||||
|
loadsrc(row0, kernel[ 0], kernel[ 1], kernel[ 2], kernel[ 3], kernel[ 4]);
|
||||||
|
loadsrc(row1, kernel[ 5], kernel[ 6], kernel[ 7], kernel[ 8], kernel[ 9]);
|
||||||
|
loadsrc(row2, kernel[10], kernel[11], kernel[12], kernel[13], kernel[14]);
|
||||||
|
loadsrc(row3, kernel[15], kernel[16], kernel[17], kernel[18], kernel[19]);
|
||||||
|
loadsrc(row4, kernel[20], kernel[21], kernel[22], kernel[23], kernel[24]);
|
||||||
|
vuint8m1x4_t val{};
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 0, __riscv_vnclipu(__riscv_vfncvt_xu(s0, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 1, __riscv_vnclipu(__riscv_vfncvt_xu(s1, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 2, __riscv_vnclipu(__riscv_vfncvt_xu(s2, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
val = __riscv_vset_v_u8m1_u8m1x4(val, 3, __riscv_vnclipu(__riscv_vfncvt_xu(s3, vl), 0, __RISCV_VXRM_RNU, vl));
|
||||||
|
__riscv_vsseg4e8(dst + i * 4, val, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the algorithm is copied from 3rdparty/carotene/src/convolution.cpp,
|
||||||
|
// in the function void CAROTENE_NS::convolution
|
||||||
|
template<int ksize>
|
||||||
|
static inline int filter(int start, int end, Filter2D* data, const uchar* src_data, size_t src_step, uchar* dst_data, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
|
{
|
||||||
|
float kernel[ksize * ksize];
|
||||||
|
for (int i = 0; i < ksize * ksize; i++)
|
||||||
|
{
|
||||||
|
kernel[i] = reinterpret_cast<const float*>(data->kernel_data + (i / ksize) * data->kernel_step)[i % ksize];
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto access = [&](int x, int y) {
|
||||||
|
int pi, pj;
|
||||||
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
|
{
|
||||||
|
pi = common::borderInterpolate(x - data->anchor_y, height, data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pj = common::borderInterpolate(y - data->anchor_x, width , data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pi = pi < 0 ? noval : pi;
|
||||||
|
pj = pj < 0 ? noval : pj;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pi = common::borderInterpolate(offset_y + x - data->anchor_y, full_height, data->borderType);
|
||||||
|
pj = common::borderInterpolate(offset_x + y - data->anchor_x, full_width , data->borderType);
|
||||||
|
pi = pi < 0 ? noval : pi - offset_y;
|
||||||
|
pj = pj < 0 ? noval : pj - offset_x;
|
||||||
|
}
|
||||||
|
return std::make_pair(pi, pj);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
float sum0, sum1, sum2, sum3;
|
||||||
|
sum0 = sum1 = sum2 = sum3 = data->delta;
|
||||||
|
for (int i = 0; i < ksize * ksize; i++)
|
||||||
|
{
|
||||||
|
auto p = access(x + i / ksize, y + i % ksize);
|
||||||
|
if (p.first != noval && p.second != noval)
|
||||||
|
{
|
||||||
|
sum0 += kernel[i] * src_data[p.first * src_step + p.second * 4 ];
|
||||||
|
sum1 += kernel[i] * src_data[p.first * src_step + p.second * 4 + 1];
|
||||||
|
sum2 += kernel[i] * src_data[p.first * src_step + p.second * 4 + 2];
|
||||||
|
sum3 += kernel[i] * src_data[p.first * src_step + p.second * 4 + 3];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst_data[(x * width + y) * 4 ] = std::max(0, std::min((int)std::round(sum0), (int)std::numeric_limits<uchar>::max()));
|
||||||
|
dst_data[(x * width + y) * 4 + 1] = std::max(0, std::min((int)std::round(sum1), (int)std::numeric_limits<uchar>::max()));
|
||||||
|
dst_data[(x * width + y) * 4 + 2] = std::max(0, std::min((int)std::round(sum2), (int)std::numeric_limits<uchar>::max()));
|
||||||
|
dst_data[(x * width + y) * 4 + 3] = std::max(0, std::min((int)std::round(sum3), (int)std::numeric_limits<uchar>::max()));
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = data->anchor_x, right = width - (ksize - 1 - data->anchor_x);
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
const uchar* row0 = access(i , 0).first == noval ? nullptr : src_data + access(i , 0).first * src_step;
|
||||||
|
const uchar* row1 = access(i + 1, 0).first == noval ? nullptr : src_data + access(i + 1, 0).first * src_step;
|
||||||
|
const uchar* row2 = access(i + 2, 0).first == noval ? nullptr : src_data + access(i + 2, 0).first * src_step;
|
||||||
|
if (ksize == 3)
|
||||||
|
{
|
||||||
|
process3(data->anchor_x, left, right, data->delta, kernel, row0, row1, row2, dst_data + i * width * 4);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const uchar* row3 = access(i + 3, 0).first == noval ? nullptr : src_data + access(i + 3, 0).first * src_step;
|
||||||
|
const uchar* row4 = access(i + 4, 0).first == noval ? nullptr : src_data + access(i + 4, 0).first * src_step;
|
||||||
|
process5(data->anchor_x, left, right, data->delta, kernel, row0, row1, row2, row3, row4, dst_data + i * width * 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int filterInit(cvhalFilter2D** context, uchar* kernel_data, size_t kernel_step, int kernel_type, int kernel_width, int kernel_height, int /*max_width*/, int /*max_height*/, int src_type, int dst_type, int borderType, double delta, int anchor_x, int anchor_y, bool /*allowSubmatrix*/, bool /*allowInplace*/)
|
||||||
|
{
|
||||||
|
if (kernel_type != CV_32FC1 || src_type != CV_8UC4 || dst_type != CV_8UC4)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (kernel_width != kernel_height)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (kernel_width != 3 && kernel_width != 5)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if ((borderType & ~BORDER_ISOLATED) == BORDER_WRAP)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
anchor_x = anchor_x < 0 ? kernel_width / 2 : anchor_x;
|
||||||
|
anchor_y = anchor_y < 0 ? kernel_height / 2 : anchor_y;
|
||||||
|
*context = reinterpret_cast<cvhalFilter2D*>(new Filter2D{kernel_data, kernel_step, kernel_type, kernel_width, kernel_height, src_type, dst_type, borderType, delta, anchor_x, anchor_y});
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int filter(cvhalFilter2D* context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
|
{
|
||||||
|
Filter2D* data = reinterpret_cast<Filter2D*>(context);
|
||||||
|
std::vector<uchar> dst(width * height * 4);
|
||||||
|
|
||||||
|
int res = CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
switch (data->kernel_width)
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
res = common::invoke(height, {filter<3>}, data, src_data, src_step, dst.data(), width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
res = common::invoke(height, {filter<5>}, data, src_data, src_step, dst.data(), width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < height; i++)
|
||||||
|
memcpy(dst_data + i * dst_step, dst.data() + i * width * 4, width * 4);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int filterFree(cvhalFilter2D* context)
|
||||||
|
{
|
||||||
|
delete reinterpret_cast<Filter2D*>(context);
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
389
hal/riscv-rvv/src/imgproc/gaussian_blur.cpp
Normal file
389
hal/riscv-rvv/src/imgproc/gaussian_blur.cpp
Normal file
@ -0,0 +1,389 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// the algorithm is same as cv_hal_sepFilter
|
||||||
|
template<int ksize, typename helperT, typename helperWT>
|
||||||
|
static inline int gaussianBlurC1(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int full_width, int full_height, int offset_x, int offset_y, int border_type)
|
||||||
|
{
|
||||||
|
using T = typename helperT::ElemType;
|
||||||
|
using WT = typename helperWT::ElemType;
|
||||||
|
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto accessX = [&](int x) {
|
||||||
|
int pi = common::borderInterpolate(offset_y + x - ksize / 2, full_height, border_type); // [TODO] fix dependencies
|
||||||
|
return pi < 0 ? noval : pi - offset_y;
|
||||||
|
};
|
||||||
|
auto accessY = [&](int y) {
|
||||||
|
int pj = common::borderInterpolate(offset_x + y - ksize / 2, full_width, border_type);
|
||||||
|
return pj < 0 ? noval : pj - offset_x;
|
||||||
|
};
|
||||||
|
auto p2idx = [&](int x, int y){ return (x + ksize) % ksize * width + y; };
|
||||||
|
|
||||||
|
constexpr uint kernel[2][5] = {{1, 2, 1}, {1, 4, 6, 4, 1}};
|
||||||
|
std::vector<WT> res(width * ksize);
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
WT sum = 0;
|
||||||
|
for (int i = 0; i < ksize; i++)
|
||||||
|
{
|
||||||
|
int p = accessY(y + i);
|
||||||
|
if (p != noval)
|
||||||
|
{
|
||||||
|
sum += kernel[ksize == 5][i] * static_cast<WT>(reinterpret_cast<const T*>(src_data + x * src_step)[p]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[p2idx(x, y)] = sum;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = ksize / 2, right = width - ksize / 2;
|
||||||
|
for (int i = start - ksize / 2; i < end + ksize / 2; i++)
|
||||||
|
{
|
||||||
|
if (i + offset_y >= 0 && i + offset_y < full_height)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = helperT::setvl(right - j);
|
||||||
|
const T* extra = reinterpret_cast<const T*>(src_data + i * src_step) + j - ksize / 2;
|
||||||
|
auto src = __riscv_vzext_vf2(helperT::vload(extra, vl), vl);
|
||||||
|
|
||||||
|
extra += vl;
|
||||||
|
auto sum = src;
|
||||||
|
if (ksize == 3)
|
||||||
|
{
|
||||||
|
src = __riscv_vslide1down(src, extra[0], vl);
|
||||||
|
sum = __riscv_vadd(sum, __riscv_vsll(src, 1, vl), vl);
|
||||||
|
src = __riscv_vslide1down(src, extra[1], vl);
|
||||||
|
sum = __riscv_vadd(sum, src, vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
src = __riscv_vslide1down(src, extra[0], vl);
|
||||||
|
sum = __riscv_vadd(sum, __riscv_vsll(src, 2, vl), vl);
|
||||||
|
src = __riscv_vslide1down(src, extra[1], vl);
|
||||||
|
sum = __riscv_vadd(sum, __riscv_vadd(__riscv_vsll(src, 1, vl), __riscv_vsll(src, 2, vl), vl), vl);
|
||||||
|
src = __riscv_vslide1down(src, extra[2], vl);
|
||||||
|
sum = __riscv_vadd(sum, __riscv_vsll(src, 2, vl), vl);
|
||||||
|
src = __riscv_vslide1down(src, extra[3], vl);
|
||||||
|
sum = __riscv_vadd(sum, src, vl);
|
||||||
|
}
|
||||||
|
helperWT::vstore(res.data() + p2idx(i, j), sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = i - ksize / 2;
|
||||||
|
if (cur >= start)
|
||||||
|
{
|
||||||
|
const WT* row0 = accessX(cur ) == noval ? nullptr : res.data() + p2idx(accessX(cur ), 0);
|
||||||
|
const WT* row1 = accessX(cur + 1) == noval ? nullptr : res.data() + p2idx(accessX(cur + 1), 0);
|
||||||
|
const WT* row2 = accessX(cur + 2) == noval ? nullptr : res.data() + p2idx(accessX(cur + 2), 0);
|
||||||
|
const WT* row3 = nullptr, *row4 = nullptr;
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
row3 = accessX(cur + 3) == noval ? nullptr : res.data() + p2idx(accessX(cur + 3), 0);
|
||||||
|
row4 = accessX(cur + 4) == noval ? nullptr : res.data() + p2idx(accessX(cur + 4), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = helperWT::setvl(width - j);
|
||||||
|
auto v0 = row0 ? helperWT::vload(row0 + j, vl) : helperWT::vmv(0, vl);
|
||||||
|
auto v1 = row1 ? helperWT::vload(row1 + j, vl) : helperWT::vmv(0, vl);
|
||||||
|
auto v2 = row2 ? helperWT::vload(row2 + j, vl) : helperWT::vmv(0, vl);
|
||||||
|
typename helperWT::VecType sum;
|
||||||
|
if (ksize == 3)
|
||||||
|
{
|
||||||
|
sum = __riscv_vadd(__riscv_vadd(v0, v2, vl), __riscv_vsll(v1, 1, vl), vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sum = __riscv_vadd(v0, __riscv_vadd(__riscv_vsll(v2, 1, vl), __riscv_vsll(v2, 2, vl), vl), vl);
|
||||||
|
auto v3 = row3 ? helperWT::vload(row3 + j, vl) : helperWT::vmv(0, vl);
|
||||||
|
sum = __riscv_vadd(sum, __riscv_vsll(__riscv_vadd(v1, v3, vl), 2, vl), vl);
|
||||||
|
auto v4 = row4 ? helperWT::vload(row4 + j, vl) : helperWT::vmv(0, vl);
|
||||||
|
sum = __riscv_vadd(sum, v4, vl);
|
||||||
|
}
|
||||||
|
helperT::vstore(reinterpret_cast<T*>(dst_data + cur * dst_step) + j, __riscv_vnclipu(sum, ksize == 5 ? 8 : 4, __RISCV_VXRM_RNU, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int ksize>
|
||||||
|
static inline int gaussianBlurC4(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int full_width, int full_height, int offset_x, int offset_y, int border_type)
|
||||||
|
{
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto accessX = [&](int x) {
|
||||||
|
int pi = common::borderInterpolate(offset_y + x - ksize / 2, full_height, border_type);
|
||||||
|
return pi < 0 ? noval : pi - offset_y;
|
||||||
|
};
|
||||||
|
auto accessY = [&](int y) {
|
||||||
|
int pj = common::borderInterpolate(offset_x + y - ksize / 2, full_width, border_type);
|
||||||
|
return pj < 0 ? noval : pj - offset_x;
|
||||||
|
};
|
||||||
|
auto p2idx = [&](int x, int y){ return ((x + ksize) % ksize * width + y) * 4; };
|
||||||
|
|
||||||
|
constexpr uint kernel[2][5] = {{1, 2, 1}, {1, 4, 6, 4, 1}};
|
||||||
|
std::vector<ushort> res(width * ksize * 4);
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
ushort sum0, sum1, sum2, sum3;
|
||||||
|
sum0 = sum1 = sum2 = sum3 = 0;
|
||||||
|
for (int i = 0; i < ksize; i++)
|
||||||
|
{
|
||||||
|
int p = accessY(y + i);
|
||||||
|
if (p != noval)
|
||||||
|
{
|
||||||
|
sum0 += kernel[ksize == 5][i] * static_cast<ushort>((src_data + x * src_step)[p * 4 ]);
|
||||||
|
sum1 += kernel[ksize == 5][i] * static_cast<ushort>((src_data + x * src_step)[p * 4 + 1]);
|
||||||
|
sum2 += kernel[ksize == 5][i] * static_cast<ushort>((src_data + x * src_step)[p * 4 + 2]);
|
||||||
|
sum3 += kernel[ksize == 5][i] * static_cast<ushort>((src_data + x * src_step)[p * 4 + 3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[p2idx(x, y) ] = sum0;
|
||||||
|
res[p2idx(x, y) + 1] = sum1;
|
||||||
|
res[p2idx(x, y) + 2] = sum2;
|
||||||
|
res[p2idx(x, y) + 3] = sum3;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = ksize / 2, right = width - ksize / 2;
|
||||||
|
for (int i = start - ksize / 2; i < end + ksize / 2; i++)
|
||||||
|
{
|
||||||
|
if (i + offset_y >= 0 && i + offset_y < full_height)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m1(right - j);
|
||||||
|
const uchar* extra = src_data + i * src_step + (j - ksize / 2) * 4;
|
||||||
|
auto src = __riscv_vlseg4e8_v_u8m1x4(extra, vl);
|
||||||
|
auto src0 = __riscv_vzext_vf2(__riscv_vget_v_u8m1x4_u8m1(src, 0), vl);
|
||||||
|
auto src1 = __riscv_vzext_vf2(__riscv_vget_v_u8m1x4_u8m1(src, 1), vl);
|
||||||
|
auto src2 = __riscv_vzext_vf2(__riscv_vget_v_u8m1x4_u8m1(src, 2), vl);
|
||||||
|
auto src3 = __riscv_vzext_vf2(__riscv_vget_v_u8m1x4_u8m1(src, 3), vl);
|
||||||
|
|
||||||
|
extra += vl * 4;
|
||||||
|
auto sum0 = src0, sum1 = src1, sum2 = src2, sum3 = src3;
|
||||||
|
if (ksize == 3)
|
||||||
|
{
|
||||||
|
src0 = __riscv_vslide1down(src0, extra[0], vl);
|
||||||
|
src1 = __riscv_vslide1down(src1, extra[1], vl);
|
||||||
|
src2 = __riscv_vslide1down(src2, extra[2], vl);
|
||||||
|
src3 = __riscv_vslide1down(src3, extra[3], vl);
|
||||||
|
sum0 = __riscv_vadd(sum0, __riscv_vsll(src0, 1, vl), vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, __riscv_vsll(src1, 1, vl), vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, __riscv_vsll(src2, 1, vl), vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, __riscv_vsll(src3, 1, vl), vl);
|
||||||
|
src0 = __riscv_vslide1down(src0, extra[4], vl);
|
||||||
|
src1 = __riscv_vslide1down(src1, extra[5], vl);
|
||||||
|
src2 = __riscv_vslide1down(src2, extra[6], vl);
|
||||||
|
src3 = __riscv_vslide1down(src3, extra[7], vl);
|
||||||
|
sum0 = __riscv_vadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, src2, vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, src3, vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
src0 = __riscv_vslide1down(src0, extra[0], vl);
|
||||||
|
src1 = __riscv_vslide1down(src1, extra[1], vl);
|
||||||
|
src2 = __riscv_vslide1down(src2, extra[2], vl);
|
||||||
|
src3 = __riscv_vslide1down(src3, extra[3], vl);
|
||||||
|
sum0 = __riscv_vadd(sum0, __riscv_vsll(src0, 2, vl), vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, __riscv_vsll(src1, 2, vl), vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, __riscv_vsll(src2, 2, vl), vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, __riscv_vsll(src3, 2, vl), vl);
|
||||||
|
src0 = __riscv_vslide1down(src0, extra[4], vl);
|
||||||
|
src1 = __riscv_vslide1down(src1, extra[5], vl);
|
||||||
|
src2 = __riscv_vslide1down(src2, extra[6], vl);
|
||||||
|
src3 = __riscv_vslide1down(src3, extra[7], vl);
|
||||||
|
sum0 = __riscv_vadd(sum0, __riscv_vadd(__riscv_vsll(src0, 1, vl), __riscv_vsll(src0, 2, vl), vl), vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, __riscv_vadd(__riscv_vsll(src1, 1, vl), __riscv_vsll(src1, 2, vl), vl), vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, __riscv_vadd(__riscv_vsll(src2, 1, vl), __riscv_vsll(src2, 2, vl), vl), vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, __riscv_vadd(__riscv_vsll(src3, 1, vl), __riscv_vsll(src3, 2, vl), vl), vl);
|
||||||
|
src0 = __riscv_vslide1down(src0, extra[ 8], vl);
|
||||||
|
src1 = __riscv_vslide1down(src1, extra[ 9], vl);
|
||||||
|
src2 = __riscv_vslide1down(src2, extra[10], vl);
|
||||||
|
src3 = __riscv_vslide1down(src3, extra[11], vl);
|
||||||
|
sum0 = __riscv_vadd(sum0, __riscv_vsll(src0, 2, vl), vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, __riscv_vsll(src1, 2, vl), vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, __riscv_vsll(src2, 2, vl), vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, __riscv_vsll(src3, 2, vl), vl);
|
||||||
|
src0 = __riscv_vslide1down(src0, extra[12], vl);
|
||||||
|
src1 = __riscv_vslide1down(src1, extra[13], vl);
|
||||||
|
src2 = __riscv_vslide1down(src2, extra[14], vl);
|
||||||
|
src3 = __riscv_vslide1down(src3, extra[15], vl);
|
||||||
|
sum0 = __riscv_vadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, src2, vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, src3, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
vuint16m2x4_t dst{};
|
||||||
|
dst = __riscv_vset_v_u16m2_u16m2x4(dst, 0, sum0);
|
||||||
|
dst = __riscv_vset_v_u16m2_u16m2x4(dst, 1, sum1);
|
||||||
|
dst = __riscv_vset_v_u16m2_u16m2x4(dst, 2, sum2);
|
||||||
|
dst = __riscv_vset_v_u16m2_u16m2x4(dst, 3, sum3);
|
||||||
|
__riscv_vsseg4e16(res.data() + p2idx(i, j), dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = i - ksize / 2;
|
||||||
|
if (cur >= start)
|
||||||
|
{
|
||||||
|
const ushort* row0 = accessX(cur ) == noval ? nullptr : res.data() + p2idx(accessX(cur ), 0);
|
||||||
|
const ushort* row1 = accessX(cur + 1) == noval ? nullptr : res.data() + p2idx(accessX(cur + 1), 0);
|
||||||
|
const ushort* row2 = accessX(cur + 2) == noval ? nullptr : res.data() + p2idx(accessX(cur + 2), 0);
|
||||||
|
const ushort* row3 = nullptr, *row4 = nullptr;
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
row3 = accessX(cur + 3) == noval ? nullptr : res.data() + p2idx(accessX(cur + 3), 0);
|
||||||
|
row4 = accessX(cur + 4) == noval ? nullptr : res.data() + p2idx(accessX(cur + 4), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e16m2(width - j);
|
||||||
|
vuint16m2_t sum0, sum1, sum2, sum3, src0{}, src1{}, src2{}, src3{};
|
||||||
|
sum0 = sum1 = sum2 = sum3 = __riscv_vmv_v_x_u16m2(0, vl);
|
||||||
|
|
||||||
|
auto loadres = [&](const ushort* row) {
|
||||||
|
auto src = __riscv_vlseg4e16_v_u16m2x4(row + j * 4, vl);
|
||||||
|
src0 = __riscv_vget_v_u16m2x4_u16m2(src, 0);
|
||||||
|
src1 = __riscv_vget_v_u16m2x4_u16m2(src, 1);
|
||||||
|
src2 = __riscv_vget_v_u16m2x4_u16m2(src, 2);
|
||||||
|
src3 = __riscv_vget_v_u16m2x4_u16m2(src, 3);
|
||||||
|
};
|
||||||
|
if (row0)
|
||||||
|
{
|
||||||
|
loadres(row0);
|
||||||
|
sum0 = src0;
|
||||||
|
sum1 = src1;
|
||||||
|
sum2 = src2;
|
||||||
|
sum3 = src3;
|
||||||
|
}
|
||||||
|
if (row1)
|
||||||
|
{
|
||||||
|
loadres(row1);
|
||||||
|
sum0 = __riscv_vadd(sum0, __riscv_vsll(src0, ksize == 5 ? 2 : 1, vl), vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, __riscv_vsll(src1, ksize == 5 ? 2 : 1, vl), vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, __riscv_vsll(src2, ksize == 5 ? 2 : 1, vl), vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, __riscv_vsll(src3, ksize == 5 ? 2 : 1, vl), vl);
|
||||||
|
}
|
||||||
|
if (row2)
|
||||||
|
{
|
||||||
|
loadres(row2);
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
src0 = __riscv_vadd(__riscv_vsll(src0, 1, vl), __riscv_vsll(src0, 2, vl), vl);
|
||||||
|
src1 = __riscv_vadd(__riscv_vsll(src1, 1, vl), __riscv_vsll(src1, 2, vl), vl);
|
||||||
|
src2 = __riscv_vadd(__riscv_vsll(src2, 1, vl), __riscv_vsll(src2, 2, vl), vl);
|
||||||
|
src3 = __riscv_vadd(__riscv_vsll(src3, 1, vl), __riscv_vsll(src3, 2, vl), vl);
|
||||||
|
}
|
||||||
|
sum0 = __riscv_vadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, src2, vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, src3, vl);
|
||||||
|
}
|
||||||
|
if (row3)
|
||||||
|
{
|
||||||
|
loadres(row3);
|
||||||
|
sum0 = __riscv_vadd(sum0, __riscv_vsll(src0, 2, vl), vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, __riscv_vsll(src1, 2, vl), vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, __riscv_vsll(src2, 2, vl), vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, __riscv_vsll(src3, 2, vl), vl);
|
||||||
|
}
|
||||||
|
if (row4)
|
||||||
|
{
|
||||||
|
loadres(row4);
|
||||||
|
sum0 = __riscv_vadd(sum0, src0, vl);
|
||||||
|
sum1 = __riscv_vadd(sum1, src1, vl);
|
||||||
|
sum2 = __riscv_vadd(sum2, src2, vl);
|
||||||
|
sum3 = __riscv_vadd(sum3, src3, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
vuint8m1x4_t dst{};
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 0, __riscv_vnclipu(sum0, ksize == 5 ? 8 : 4, __RISCV_VXRM_RNU, vl));
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 1, __riscv_vnclipu(sum1, ksize == 5 ? 8 : 4, __RISCV_VXRM_RNU, vl));
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 2, __riscv_vnclipu(sum2, ksize == 5 ? 8 : 4, __RISCV_VXRM_RNU, vl));
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 3, __riscv_vnclipu(sum3, ksize == 5 ? 8 : 4, __RISCV_VXRM_RNU, vl));
|
||||||
|
__riscv_vsseg4e8(dst_data + cur * dst_step + j * 4, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int gaussianBlurBinomial(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, size_t margin_left, size_t margin_top, size_t margin_right, size_t margin_bottom, size_t ksize, int border_type)
|
||||||
|
{
|
||||||
|
const int type = CV_MAKETYPE(depth, cn);
|
||||||
|
if ((type != CV_8UC1 && type != CV_8UC4 && type != CV_16UC1) || src_data == dst_data)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if ((ksize != 3 && ksize != 5) || border_type & BORDER_ISOLATED || border_type == BORDER_WRAP)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
switch (ksize*100 + type)
|
||||||
|
{
|
||||||
|
case 300 + CV_8UC1:
|
||||||
|
return common::invoke(height, {gaussianBlurC1<3, RVV_U8M4, RVV_U16M8>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, border_type);
|
||||||
|
case 500 + CV_8UC1:
|
||||||
|
return common::invoke(height, {gaussianBlurC1<5, RVV_U8M4, RVV_U16M8>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, border_type);
|
||||||
|
case 300 + CV_16UC1:
|
||||||
|
return common::invoke(height, {gaussianBlurC1<3, RVV_U16M4, RVV_U32M8>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, border_type);
|
||||||
|
case 500 + CV_16UC1:
|
||||||
|
return common::invoke(height, {gaussianBlurC1<5, RVV_U16M4, RVV_U32M8>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, border_type);
|
||||||
|
case 300 + CV_8UC4:
|
||||||
|
return common::invoke(height, {gaussianBlurC4<3>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, border_type);
|
||||||
|
case 500 + CV_8UC4:
|
||||||
|
return common::invoke(height, {gaussianBlurC4<5>}, src_data, src_step, dst_data, dst_step, width, margin_left + width + margin_right, margin_top + height + margin_bottom, margin_left, margin_top, border_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
282
hal/riscv-rvv/src/imgproc/histogram.cpp
Normal file
282
hal/riscv-rvv/src/imgproc/histogram.cpp
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class HistogramInvoker : public ParallelLoopBody
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
template<typename... Args>
|
||||||
|
HistogramInvoker(std::function<void(int, int, Args...)> _func, Args&&... args)
|
||||||
|
{
|
||||||
|
func = std::bind(_func, std::placeholders::_1, std::placeholders::_2, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void operator()(const Range& range) const override
|
||||||
|
{
|
||||||
|
func(range.start, range.end);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::function<void(int, int)> func;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr int HIST_SZ = std::numeric_limits<uchar>::max() + 1;
|
||||||
|
|
||||||
|
static inline void hist_invoke(int start, int end, const uchar* src_data, size_t src_step, int width, int* hist, std::mutex* m)
|
||||||
|
{
|
||||||
|
int h[HIST_SZ] = {0};
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const uchar* src = src_data + i * src_step;
|
||||||
|
int j;
|
||||||
|
for (j = 0; j + 3 < width; j += 4)
|
||||||
|
{
|
||||||
|
int t0 = src[j], t1 = src[j+1];
|
||||||
|
h[t0]++; h[t1]++;
|
||||||
|
t0 = src[j+2]; t1 = src[j+3];
|
||||||
|
h[t0]++; h[t1]++;
|
||||||
|
}
|
||||||
|
for (; j < width; j++)
|
||||||
|
{
|
||||||
|
h[src[j]]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lk(*m);
|
||||||
|
for (int i = 0; i < HIST_SZ; i++)
|
||||||
|
{
|
||||||
|
hist[i] += h[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void lut_invoke(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, const uchar* lut)
|
||||||
|
{
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m8(width - j);
|
||||||
|
auto src = __riscv_vle8_v_u8m8(src_data + i * src_step + j, vl);
|
||||||
|
auto dst = __riscv_vloxei8_v_u8m8(lut, src, vl);
|
||||||
|
__riscv_vse8(dst_data + i * dst_step + j, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // equalize_hist
|
||||||
|
|
||||||
|
// the algorithm is copied from imgproc/src/histogram.cpp,
|
||||||
|
// in the function void cv::equalizeHist
|
||||||
|
int equalize_hist(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height)
|
||||||
|
{
|
||||||
|
int hist[HIST_SZ] = {0};
|
||||||
|
uchar lut[HIST_SZ];
|
||||||
|
|
||||||
|
std::mutex m;
|
||||||
|
cv::parallel_for_(Range(0, height), HistogramInvoker({hist_invoke}, src_data, src_step, width, reinterpret_cast<int *>(hist), &m), static_cast<double>(width * height) / (1 << 15));
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while (!hist[i]) ++i;
|
||||||
|
|
||||||
|
float scale = (HIST_SZ - 1.f)/(width * height - hist[i]);
|
||||||
|
int sum = 0;
|
||||||
|
for (lut[i++] = 0; i < HIST_SZ; i++)
|
||||||
|
{
|
||||||
|
sum += hist[i];
|
||||||
|
lut[i] = std::min(std::max(static_cast<int>(std::round(sum * scale)), 0), HIST_SZ - 1);
|
||||||
|
}
|
||||||
|
cv::parallel_for_(Range(0, height), HistogramInvoker({lut_invoke}, src_data, src_step, dst_data, dst_step, width, reinterpret_cast<const uchar*>(lut)), static_cast<double>(width * height) / (1 << 15));
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ############ calc_hist ############
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr int MAX_VLEN = 1024;
|
||||||
|
constexpr int MAX_E8M1 = MAX_VLEN / 8;
|
||||||
|
|
||||||
|
inline void cvt_32s32f(const int* ihist, float* fhist, int hist_size) {
|
||||||
|
int vl;
|
||||||
|
for (int i = 0; i < hist_size; i += vl) {
|
||||||
|
vl = __riscv_vsetvl_e32m8(hist_size - i);
|
||||||
|
auto iv = __riscv_vle32_v_i32m8(ihist + i, vl);
|
||||||
|
__riscv_vse32(fhist + i, __riscv_vfcvt_f(iv, vl), vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void cvt32s32f_add32f(const int* ihist, float* fhist, int hist_size) {
|
||||||
|
int vl;
|
||||||
|
for (int i = 0; i < hist_size; i += vl) {
|
||||||
|
vl = __riscv_vsetvl_e32m8(hist_size - i);
|
||||||
|
auto iv = __riscv_vle32_v_i32m8(ihist + i, vl);
|
||||||
|
auto fv = __riscv_vle32_v_f32m8(fhist + i, vl);
|
||||||
|
auto s = __riscv_vfadd(__riscv_vfcvt_f(iv, vl), fv, vl);
|
||||||
|
__riscv_vse32(fhist + i, s, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int calc_hist(const uchar* src_data, size_t src_step, int src_type, int src_width, int src_height,
|
||||||
|
float* hist_data, int hist_size, const float** ranges, bool uniform, bool accumulate) {
|
||||||
|
int depth = CV_MAT_DEPTH(src_type), cn = CV_MAT_CN(src_type);
|
||||||
|
|
||||||
|
// [TODO] support non-uniform
|
||||||
|
// In case of CV_8U, it is already fast enough with lut
|
||||||
|
if ((depth != CV_16U && depth != CV_32F) || !uniform) {
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> buf_ihist(hist_size+1, 0);
|
||||||
|
int* ihist = buf_ihist.data();
|
||||||
|
|
||||||
|
double low = ranges[0][0], high = ranges[0][1];
|
||||||
|
double t = hist_size / (high - low);
|
||||||
|
double a = t, b = -t * low;
|
||||||
|
double v0_lo = low, v0_hi = high;
|
||||||
|
|
||||||
|
int sz = hist_size, d0 = cn, step0 = (int)(src_step / CV_ELEM_SIZE1(src_type));
|
||||||
|
int buf_idx[MAX_E8M1];
|
||||||
|
|
||||||
|
if (depth == CV_16U) {
|
||||||
|
const ushort* p0 = (const ushort*)src_data;
|
||||||
|
if (d0 == 1) {
|
||||||
|
while (src_height--) {
|
||||||
|
int vl;
|
||||||
|
for (int x = 0; x < src_width; x += vl) {
|
||||||
|
vl = __riscv_vsetvl_e16m2(src_width - x);
|
||||||
|
|
||||||
|
auto v = __riscv_vfcvt_f(__riscv_vwcvtu_x(__riscv_vwcvtu_x(__riscv_vle16_v_u16m2(p0 + x, vl), vl), vl), vl);
|
||||||
|
|
||||||
|
auto m0 = __riscv_vmflt(v, v0_lo, vl);
|
||||||
|
auto m1 = __riscv_vmfge(v, v0_hi, vl);
|
||||||
|
auto m = __riscv_vmor(m0, m1, vl);
|
||||||
|
|
||||||
|
auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
|
||||||
|
auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, -1, m, vl);
|
||||||
|
__riscv_vse32(buf_idx, idx, vl);
|
||||||
|
|
||||||
|
for (int i = 0; i < vl; i++) {
|
||||||
|
int _idx = buf_idx[i] + 1;
|
||||||
|
ihist[_idx]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p0 += step0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (src_height--) {
|
||||||
|
int vl;
|
||||||
|
for (int x = 0; x < src_width; x += vl) {
|
||||||
|
vl = __riscv_vsetvl_e16m2(src_width - x);
|
||||||
|
|
||||||
|
auto v = __riscv_vfcvt_f(__riscv_vwcvtu_x(__riscv_vwcvtu_x(__riscv_vlse16_v_u16m2(p0 + x*d0, sizeof(ushort)*d0, vl), vl), vl), vl);
|
||||||
|
|
||||||
|
auto m0 = __riscv_vmflt(v, v0_lo, vl);
|
||||||
|
auto m1 = __riscv_vmfge(v, v0_hi, vl);
|
||||||
|
auto m = __riscv_vmor(m0, m1, vl);
|
||||||
|
|
||||||
|
auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
|
||||||
|
auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, -1, m, vl);
|
||||||
|
__riscv_vse32(buf_idx, idx, vl);
|
||||||
|
|
||||||
|
for (int i = 0; i < vl; i++) {
|
||||||
|
int _idx = buf_idx[i] + 1;
|
||||||
|
ihist[_idx]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p0 += step0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (depth == CV_32F) {
|
||||||
|
const float* p0 = (const float*)src_data;
|
||||||
|
if (d0 == 1) {
|
||||||
|
while (src_height--) {
|
||||||
|
int vl;
|
||||||
|
for (int x = 0; x < src_width; x += vl) {
|
||||||
|
vl = __riscv_vsetvl_e32m4(src_width - x);
|
||||||
|
|
||||||
|
auto v = __riscv_vfwcvt_f(__riscv_vle32_v_f32m4(p0 + x, vl), vl);
|
||||||
|
|
||||||
|
auto m0 = __riscv_vmflt(v, v0_lo, vl);
|
||||||
|
auto m1 = __riscv_vmfge(v, v0_hi, vl);
|
||||||
|
auto m = __riscv_vmor(m0, m1, vl);
|
||||||
|
|
||||||
|
auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
|
||||||
|
auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, -1, m, vl);
|
||||||
|
__riscv_vse32(buf_idx, idx, vl);
|
||||||
|
|
||||||
|
for (int i = 0; i < vl; i++) {
|
||||||
|
int _idx = buf_idx[i] + 1;
|
||||||
|
ihist[_idx]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p0 += step0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (src_height--) {
|
||||||
|
int vl;
|
||||||
|
for (int x = 0; x < src_width; x += vl) {
|
||||||
|
vl = __riscv_vsetvl_e32m4(src_width - x);
|
||||||
|
|
||||||
|
auto v = __riscv_vfwcvt_f(__riscv_vlse32_v_f32m4(p0 + x*d0, sizeof(float)*d0, vl), vl);
|
||||||
|
|
||||||
|
auto m0 = __riscv_vmflt(v, v0_lo, vl);
|
||||||
|
auto m1 = __riscv_vmfge(v, v0_hi, vl);
|
||||||
|
auto m = __riscv_vmor(m0, m1, vl);
|
||||||
|
|
||||||
|
auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
|
||||||
|
auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
|
||||||
|
idx = __riscv_vmerge(idx, -1, m, vl);
|
||||||
|
__riscv_vse32(buf_idx, idx, vl);
|
||||||
|
|
||||||
|
for (int i = 0; i < vl; i++) {
|
||||||
|
int _idx = buf_idx[i] + 1;
|
||||||
|
ihist[_idx]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p0 += step0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (accumulate) {
|
||||||
|
cvt32s32f_add32f(ihist+1, hist_data, hist_size);
|
||||||
|
} else {
|
||||||
|
std::memset(hist_data, 0, sizeof(float)*hist_size);
|
||||||
|
cvt_32s32f(ihist+1, hist_data, hist_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
@ -4,16 +4,13 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_INTEGRAL_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_INTEGRAL_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
#include "types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_integral
|
namespace {
|
||||||
#define cv_hal_integral cv::cv_hal_rvv::integral
|
|
||||||
|
|
||||||
template <typename vec_t>
|
template <typename vec_t>
|
||||||
inline typename vec_t::VecType repeat_last_n(typename vec_t::VecType vs, int n, size_t vl) {
|
inline typename vec_t::VecType repeat_last_n(typename vec_t::VecType vs, int n, size_t vl) {
|
||||||
@ -87,6 +84,8 @@ inline int integral(const uchar* src_data, size_t src_step, uchar* sum_data, siz
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@brief Calculate integral image
|
@brief Calculate integral image
|
||||||
@param depth Depth of source image
|
@param depth Depth of source image
|
||||||
@ -119,12 +118,12 @@ inline int integral(const uchar* src_data, size_t src_step, uchar* sum_data, siz
|
|||||||
CV_32F | CV_64F | CV_64F
|
CV_32F | CV_64F | CV_64F
|
||||||
CV_64F | CV_64F | CV_64F
|
CV_64F | CV_64F | CV_64F
|
||||||
*/
|
*/
|
||||||
inline int integral(int depth, int sdepth, int sqdepth,
|
int integral(int depth, int sdepth, int sqdepth,
|
||||||
const uchar* src_data, size_t src_step,
|
const uchar* src_data, size_t src_step,
|
||||||
uchar* sum_data, size_t sum_step,
|
uchar* sum_data, size_t sum_step,
|
||||||
uchar* sqsum_data, size_t sqsum_step,
|
uchar* sqsum_data, size_t sqsum_step,
|
||||||
uchar* tilted_data, [[maybe_unused]] size_t tilted_step,
|
uchar* tilted_data, [[maybe_unused]] size_t tilted_step,
|
||||||
int width, int height, int cn) {
|
int width, int height, int cn) {
|
||||||
// tilted sum and cn == 3 cases are not supported
|
// tilted sum and cn == 3 cases are not supported
|
||||||
if (tilted_data || cn == 3) {
|
if (tilted_data || cn == 3) {
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -168,6 +167,6 @@ inline int integral(int depth, int sdepth, int sqdepth,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::imgproc
|
575
hal/riscv-rvv/src/imgproc/median_blur.cpp
Normal file
575
hal/riscv-rvv/src/imgproc/median_blur.cpp
Normal file
@ -0,0 +1,575 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// the algorithm is copied from imgproc/src/median_blur.simd.cpp
|
||||||
|
// in the function template static void medianBlur_SortNet
|
||||||
|
template<int ksize, typename helper>
|
||||||
|
static inline int medianBlurC1(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height)
|
||||||
|
{
|
||||||
|
using T = typename helper::ElemType;
|
||||||
|
using VT = typename helper::VecType;
|
||||||
|
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const T* row0 = reinterpret_cast<const T*>(src_data + std::min(std::max(i - ksize / 2, 0), height - 1) * src_step);
|
||||||
|
const T* row1 = reinterpret_cast<const T*>(src_data + std::min(std::max(i + 1 - ksize / 2, 0), height - 1) * src_step);
|
||||||
|
const T* row2 = reinterpret_cast<const T*>(src_data + std::min(std::max(i + 2 - ksize / 2, 0), height - 1) * src_step);
|
||||||
|
const T* row3 = reinterpret_cast<const T*>(src_data + std::min(std::max(i + 3 - ksize / 2, 0), height - 1) * src_step);
|
||||||
|
const T* row4 = reinterpret_cast<const T*>(src_data + std::min(std::max(i + 4 - ksize / 2, 0), height - 1) * src_step);
|
||||||
|
int vl;
|
||||||
|
auto vop = [&vl](VT& a, VT& b) {
|
||||||
|
auto t = a;
|
||||||
|
a = helper::vmin(a, b, vl);
|
||||||
|
b = helper::vmax(t, b, vl);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = helper::setvl(width - j);
|
||||||
|
if (ksize == 3)
|
||||||
|
{
|
||||||
|
VT p0, p1, p2;
|
||||||
|
VT p3, p4, p5;
|
||||||
|
VT p6, p7, p8;
|
||||||
|
if (j != 0)
|
||||||
|
{
|
||||||
|
p0 = helper::vload(row0 + j - 1, vl);
|
||||||
|
p3 = helper::vload(row1 + j - 1, vl);
|
||||||
|
p6 = helper::vload(row2 + j - 1, vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p0 = helper::vslide1up(helper::vload(row0, vl), row0[0], vl);
|
||||||
|
p3 = helper::vslide1up(helper::vload(row1, vl), row1[0], vl);
|
||||||
|
p6 = helper::vslide1up(helper::vload(row2, vl), row2[0], vl);
|
||||||
|
}
|
||||||
|
p1 = helper::vslide1down(p0, row0[j + vl - 1], vl);
|
||||||
|
p4 = helper::vslide1down(p3, row1[j + vl - 1], vl);
|
||||||
|
p7 = helper::vslide1down(p6, row2[j + vl - 1], vl);
|
||||||
|
p2 = helper::vslide1down(p1, row0[std::min(width - 1, j + vl)], vl);
|
||||||
|
p5 = helper::vslide1down(p4, row1[std::min(width - 1, j + vl)], vl);
|
||||||
|
p8 = helper::vslide1down(p7, row2[std::min(width - 1, j + vl)], vl);
|
||||||
|
|
||||||
|
vop(p1, p2); vop(p4, p5); vop(p7, p8); vop(p0, p1);
|
||||||
|
vop(p3, p4); vop(p6, p7); vop(p1, p2); vop(p4, p5);
|
||||||
|
vop(p7, p8); vop(p0, p3); vop(p5, p8); vop(p4, p7);
|
||||||
|
vop(p3, p6); vop(p1, p4); vop(p2, p5); vop(p4, p7);
|
||||||
|
vop(p4, p2); vop(p6, p4); vop(p4, p2);
|
||||||
|
helper::vstore(reinterpret_cast<T*>(dst_data + i * dst_step) + j, p4, vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
VT p0, p1, p2, p3, p4;
|
||||||
|
VT p5, p6, p7, p8, p9;
|
||||||
|
VT p10, p11, p12, p13, p14;
|
||||||
|
VT p15, p16, p17, p18, p19;
|
||||||
|
VT p20, p21, p22, p23, p24;
|
||||||
|
if (j >= 2)
|
||||||
|
{
|
||||||
|
p0 = helper::vload(row0 + j - 2, vl);
|
||||||
|
p5 = helper::vload(row1 + j - 2, vl);
|
||||||
|
p10 = helper::vload(row2 + j - 2, vl);
|
||||||
|
p15 = helper::vload(row3 + j - 2, vl);
|
||||||
|
p20 = helper::vload(row4 + j - 2, vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p0 = helper::vslide1up(helper::vload(row0, vl), row0[0], vl);
|
||||||
|
p5 = helper::vslide1up(helper::vload(row1, vl), row1[0], vl);
|
||||||
|
p10 = helper::vslide1up(helper::vload(row2, vl), row2[0], vl);
|
||||||
|
p15 = helper::vslide1up(helper::vload(row3, vl), row3[0], vl);
|
||||||
|
p20 = helper::vslide1up(helper::vload(row4, vl), row4[0], vl);
|
||||||
|
if (j == 0)
|
||||||
|
{
|
||||||
|
p0 = helper::vslide1up(p0, row0[0], vl);
|
||||||
|
p5 = helper::vslide1up(p5, row1[0], vl);
|
||||||
|
p10 = helper::vslide1up(p10, row2[0], vl);
|
||||||
|
p15 = helper::vslide1up(p15, row3[0], vl);
|
||||||
|
p20 = helper::vslide1up(p20, row4[0], vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p1 = helper::vslide1down(p0, row0[j + vl - 2], vl);
|
||||||
|
p6 = helper::vslide1down(p5, row1[j + vl - 2], vl);
|
||||||
|
p11 = helper::vslide1down(p10, row2[j + vl - 2], vl);
|
||||||
|
p16 = helper::vslide1down(p15, row3[j + vl - 2], vl);
|
||||||
|
p21 = helper::vslide1down(p20, row4[j + vl - 2], vl);
|
||||||
|
p2 = helper::vslide1down(p1, row0[j + vl - 1], vl);
|
||||||
|
p7 = helper::vslide1down(p6, row1[j + vl - 1], vl);
|
||||||
|
p12 = helper::vslide1down(p11, row2[j + vl - 1], vl);
|
||||||
|
p17 = helper::vslide1down(p16, row3[j + vl - 1], vl);
|
||||||
|
p22 = helper::vslide1down(p21, row4[j + vl - 1], vl);
|
||||||
|
p3 = helper::vslide1down(p2, row0[std::min(width - 1, j + vl)], vl);
|
||||||
|
p8 = helper::vslide1down(p7, row1[std::min(width - 1, j + vl)], vl);
|
||||||
|
p13 = helper::vslide1down(p12, row2[std::min(width - 1, j + vl)], vl);
|
||||||
|
p18 = helper::vslide1down(p17, row3[std::min(width - 1, j + vl)], vl);
|
||||||
|
p23 = helper::vslide1down(p22, row4[std::min(width - 1, j + vl)], vl);
|
||||||
|
p4 = helper::vslide1down(p3, row0[std::min(width - 1, j + vl + 1)], vl);
|
||||||
|
p9 = helper::vslide1down(p8, row1[std::min(width - 1, j + vl + 1)], vl);
|
||||||
|
p14 = helper::vslide1down(p13, row2[std::min(width - 1, j + vl + 1)], vl);
|
||||||
|
p19 = helper::vslide1down(p18, row3[std::min(width - 1, j + vl + 1)], vl);
|
||||||
|
p24 = helper::vslide1down(p23, row4[std::min(width - 1, j + vl + 1)], vl);
|
||||||
|
|
||||||
|
vop(p1, p2); vop(p0, p1); vop(p1, p2); vop(p4, p5); vop(p3, p4);
|
||||||
|
vop(p4, p5); vop(p0, p3); vop(p2, p5); vop(p2, p3); vop(p1, p4);
|
||||||
|
vop(p1, p2); vop(p3, p4); vop(p7, p8); vop(p6, p7); vop(p7, p8);
|
||||||
|
vop(p10, p11); vop(p9, p10); vop(p10, p11); vop(p6, p9); vop(p8, p11);
|
||||||
|
vop(p8, p9); vop(p7, p10); vop(p7, p8); vop(p9, p10); vop(p0, p6);
|
||||||
|
vop(p4, p10); vop(p4, p6); vop(p2, p8); vop(p2, p4); vop(p6, p8);
|
||||||
|
vop(p1, p7); vop(p5, p11); vop(p5, p7); vop(p3, p9); vop(p3, p5);
|
||||||
|
vop(p7, p9); vop(p1, p2); vop(p3, p4); vop(p5, p6); vop(p7, p8);
|
||||||
|
vop(p9, p10); vop(p13, p14); vop(p12, p13); vop(p13, p14); vop(p16, p17);
|
||||||
|
vop(p15, p16); vop(p16, p17); vop(p12, p15); vop(p14, p17); vop(p14, p15);
|
||||||
|
vop(p13, p16); vop(p13, p14); vop(p15, p16); vop(p19, p20); vop(p18, p19);
|
||||||
|
vop(p19, p20); vop(p21, p22); vop(p23, p24); vop(p21, p23); vop(p22, p24);
|
||||||
|
vop(p22, p23); vop(p18, p21); vop(p20, p23); vop(p20, p21); vop(p19, p22);
|
||||||
|
vop(p22, p24); vop(p19, p20); vop(p21, p22); vop(p23, p24); vop(p12, p18);
|
||||||
|
vop(p16, p22); vop(p16, p18); vop(p14, p20); vop(p20, p24); vop(p14, p16);
|
||||||
|
vop(p18, p20); vop(p22, p24); vop(p13, p19); vop(p17, p23); vop(p17, p19);
|
||||||
|
vop(p15, p21); vop(p15, p17); vop(p19, p21); vop(p13, p14); vop(p15, p16);
|
||||||
|
vop(p17, p18); vop(p19, p20); vop(p21, p22); vop(p23, p24); vop(p0, p12);
|
||||||
|
vop(p8, p20); vop(p8, p12); vop(p4, p16); vop(p16, p24); vop(p12, p16);
|
||||||
|
vop(p2, p14); vop(p10, p22); vop(p10, p14); vop(p6, p18); vop(p6, p10);
|
||||||
|
vop(p10, p12); vop(p1, p13); vop(p9, p21); vop(p9, p13); vop(p5, p17);
|
||||||
|
vop(p13, p17); vop(p3, p15); vop(p11, p23); vop(p11, p15); vop(p7, p19);
|
||||||
|
vop(p7, p11); vop(p11, p13); vop(p11, p12);
|
||||||
|
helper::vstore(reinterpret_cast<T*>(dst_data + i * dst_step) + j, p12, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int ksize>
|
||||||
|
static inline int medianBlurC4(int start, int end, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height)
|
||||||
|
{
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const uchar* row0 = src_data + std::min(std::max(i - ksize / 2, 0), height - 1) * src_step;
|
||||||
|
const uchar* row1 = src_data + std::min(std::max(i + 1 - ksize / 2, 0), height - 1) * src_step;
|
||||||
|
const uchar* row2 = src_data + std::min(std::max(i + 2 - ksize / 2, 0), height - 1) * src_step;
|
||||||
|
const uchar* row3 = src_data + std::min(std::max(i + 3 - ksize / 2, 0), height - 1) * src_step;
|
||||||
|
const uchar* row4 = src_data + std::min(std::max(i + 4 - ksize / 2, 0), height - 1) * src_step;
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
if (ksize == 3)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m1(width - j);
|
||||||
|
vuint8m1_t p00, p01, p02;
|
||||||
|
vuint8m1_t p03, p04, p05;
|
||||||
|
vuint8m1_t p06, p07, p08;
|
||||||
|
vuint8m1_t p10, p11, p12;
|
||||||
|
vuint8m1_t p13, p14, p15;
|
||||||
|
vuint8m1_t p16, p17, p18;
|
||||||
|
vuint8m1_t p20, p21, p22;
|
||||||
|
vuint8m1_t p23, p24, p25;
|
||||||
|
vuint8m1_t p26, p27, p28;
|
||||||
|
vuint8m1_t p30, p31, p32;
|
||||||
|
vuint8m1_t p33, p34, p35;
|
||||||
|
vuint8m1_t p36, p37, p38;
|
||||||
|
auto loadsrc = [&vl](const uchar* row, vuint8m1_t& p0, vuint8m1_t& p1, vuint8m1_t& p2, vuint8m1_t& p3) {
|
||||||
|
auto src = __riscv_vlseg4e8_v_u8m1x4(row, vl);
|
||||||
|
p0 = __riscv_vget_v_u8m1x4_u8m1(src, 0);
|
||||||
|
p1 = __riscv_vget_v_u8m1x4_u8m1(src, 1);
|
||||||
|
p2 = __riscv_vget_v_u8m1x4_u8m1(src, 2);
|
||||||
|
p3 = __riscv_vget_v_u8m1x4_u8m1(src, 3);
|
||||||
|
};
|
||||||
|
if (j != 0)
|
||||||
|
{
|
||||||
|
loadsrc(row0 + (j - 1) * 4, p00, p10, p20, p30);
|
||||||
|
loadsrc(row1 + (j - 1) * 4, p03, p13, p23, p33);
|
||||||
|
loadsrc(row2 + (j - 1) * 4, p06, p16, p26, p36);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
loadsrc(row0, p00, p10, p20, p30);
|
||||||
|
loadsrc(row1, p03, p13, p23, p33);
|
||||||
|
loadsrc(row2, p06, p16, p26, p36);
|
||||||
|
p00 = __riscv_vslide1up(p00, row0[0], vl);
|
||||||
|
p10 = __riscv_vslide1up(p10, row0[1], vl);
|
||||||
|
p20 = __riscv_vslide1up(p20, row0[2], vl);
|
||||||
|
p30 = __riscv_vslide1up(p30, row0[3], vl);
|
||||||
|
p03 = __riscv_vslide1up(p03, row1[0], vl);
|
||||||
|
p13 = __riscv_vslide1up(p13, row1[1], vl);
|
||||||
|
p23 = __riscv_vslide1up(p23, row1[2], vl);
|
||||||
|
p33 = __riscv_vslide1up(p33, row1[3], vl);
|
||||||
|
p06 = __riscv_vslide1up(p06, row2[0], vl);
|
||||||
|
p16 = __riscv_vslide1up(p16, row2[1], vl);
|
||||||
|
p26 = __riscv_vslide1up(p26, row2[2], vl);
|
||||||
|
p36 = __riscv_vslide1up(p36, row2[3], vl);
|
||||||
|
}
|
||||||
|
p01 = __riscv_vslide1down(p00, row0[(j + vl - 1) * 4 ], vl);
|
||||||
|
p11 = __riscv_vslide1down(p10, row0[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p21 = __riscv_vslide1down(p20, row0[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p31 = __riscv_vslide1down(p30, row0[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p04 = __riscv_vslide1down(p03, row1[(j + vl - 1) * 4 ], vl);
|
||||||
|
p14 = __riscv_vslide1down(p13, row1[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p24 = __riscv_vslide1down(p23, row1[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p34 = __riscv_vslide1down(p33, row1[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p07 = __riscv_vslide1down(p06, row2[(j + vl - 1) * 4 ], vl);
|
||||||
|
p17 = __riscv_vslide1down(p16, row2[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p27 = __riscv_vslide1down(p26, row2[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p37 = __riscv_vslide1down(p36, row2[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p02 = __riscv_vslide1down(p01, row0[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p12 = __riscv_vslide1down(p11, row0[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p22 = __riscv_vslide1down(p21, row0[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p32 = __riscv_vslide1down(p31, row0[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p05 = __riscv_vslide1down(p04, row1[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p15 = __riscv_vslide1down(p14, row1[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p25 = __riscv_vslide1down(p24, row1[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p35 = __riscv_vslide1down(p34, row1[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p08 = __riscv_vslide1down(p07, row2[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p18 = __riscv_vslide1down(p17, row2[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p28 = __riscv_vslide1down(p27, row2[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p38 = __riscv_vslide1down(p37, row2[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
|
||||||
|
auto vop = [&vl](vuint8m1_t& a, vuint8m1_t& b) {
|
||||||
|
auto t = a;
|
||||||
|
a = __riscv_vminu(a, b, vl);
|
||||||
|
b = __riscv_vmaxu(t, b, vl);
|
||||||
|
};
|
||||||
|
vuint8m1x4_t dst{};
|
||||||
|
vop(p01, p02); vop(p04, p05); vop(p07, p08); vop(p00, p01);
|
||||||
|
vop(p03, p04); vop(p06, p07); vop(p01, p02); vop(p04, p05);
|
||||||
|
vop(p07, p08); vop(p00, p03); vop(p05, p08); vop(p04, p07);
|
||||||
|
vop(p03, p06); vop(p01, p04); vop(p02, p05); vop(p04, p07);
|
||||||
|
vop(p04, p02); vop(p06, p04); vop(p04, p02);
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 0, p04);
|
||||||
|
vop(p11, p12); vop(p14, p15); vop(p17, p18); vop(p10, p11);
|
||||||
|
vop(p13, p14); vop(p16, p17); vop(p11, p12); vop(p14, p15);
|
||||||
|
vop(p17, p18); vop(p10, p13); vop(p15, p18); vop(p14, p17);
|
||||||
|
vop(p13, p16); vop(p11, p14); vop(p12, p15); vop(p14, p17);
|
||||||
|
vop(p14, p12); vop(p16, p14); vop(p14, p12);
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 1, p14);
|
||||||
|
vop(p21, p22); vop(p24, p25); vop(p27, p28); vop(p20, p21);
|
||||||
|
vop(p23, p24); vop(p26, p27); vop(p21, p22); vop(p24, p25);
|
||||||
|
vop(p27, p28); vop(p20, p23); vop(p25, p28); vop(p24, p27);
|
||||||
|
vop(p23, p26); vop(p21, p24); vop(p22, p25); vop(p24, p27);
|
||||||
|
vop(p24, p22); vop(p26, p24); vop(p24, p22);
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 2, p24);
|
||||||
|
vop(p31, p32); vop(p34, p35); vop(p37, p38); vop(p30, p31);
|
||||||
|
vop(p33, p34); vop(p36, p37); vop(p31, p32); vop(p34, p35);
|
||||||
|
vop(p37, p38); vop(p30, p33); vop(p35, p38); vop(p34, p37);
|
||||||
|
vop(p33, p36); vop(p31, p34); vop(p32, p35); vop(p34, p37);
|
||||||
|
vop(p34, p32); vop(p36, p34); vop(p34, p32);
|
||||||
|
dst = __riscv_vset_v_u8m1_u8m1x4(dst, 3, p34);
|
||||||
|
__riscv_vsseg4e8(dst_data + i * dst_step + j * 4, dst, vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(width - j);
|
||||||
|
vuint8m2_t p00, p01, p02, p03, p04;
|
||||||
|
vuint8m2_t p05, p06, p07, p08, p09;
|
||||||
|
vuint8m2_t p010, p011, p012, p013, p014;
|
||||||
|
vuint8m2_t p015, p016, p017, p018, p019;
|
||||||
|
vuint8m2_t p020, p021, p022, p023, p024;
|
||||||
|
vuint8m2_t p10, p11, p12, p13, p14;
|
||||||
|
vuint8m2_t p15, p16, p17, p18, p19;
|
||||||
|
vuint8m2_t p110, p111, p112, p113, p114;
|
||||||
|
vuint8m2_t p115, p116, p117, p118, p119;
|
||||||
|
vuint8m2_t p120, p121, p122, p123, p124;
|
||||||
|
vuint8m2_t p20, p21, p22, p23, p24;
|
||||||
|
vuint8m2_t p25, p26, p27, p28, p29;
|
||||||
|
vuint8m2_t p210, p211, p212, p213, p214;
|
||||||
|
vuint8m2_t p215, p216, p217, p218, p219;
|
||||||
|
vuint8m2_t p220, p221, p222, p223, p224;
|
||||||
|
vuint8m2_t p30, p31, p32, p33, p34;
|
||||||
|
vuint8m2_t p35, p36, p37, p38, p39;
|
||||||
|
vuint8m2_t p310, p311, p312, p313, p314;
|
||||||
|
vuint8m2_t p315, p316, p317, p318, p319;
|
||||||
|
vuint8m2_t p320, p321, p322, p323, p324;
|
||||||
|
auto loadsrc = [&vl](const uchar* row, vuint8m2_t& p0, vuint8m2_t& p1, vuint8m2_t& p2, vuint8m2_t& p3) {
|
||||||
|
auto src = __riscv_vlseg4e8_v_u8m2x4(row, vl);
|
||||||
|
p0 = __riscv_vget_v_u8m2x4_u8m2(src, 0);
|
||||||
|
p1 = __riscv_vget_v_u8m2x4_u8m2(src, 1);
|
||||||
|
p2 = __riscv_vget_v_u8m2x4_u8m2(src, 2);
|
||||||
|
p3 = __riscv_vget_v_u8m2x4_u8m2(src, 3);
|
||||||
|
};
|
||||||
|
if (j >= 2)
|
||||||
|
{
|
||||||
|
loadsrc(row0 + (j - 2) * 4, p00, p10, p20, p30);
|
||||||
|
loadsrc(row1 + (j - 2) * 4, p05, p15, p25, p35);
|
||||||
|
loadsrc(row2 + (j - 2) * 4, p010, p110, p210, p310);
|
||||||
|
loadsrc(row3 + (j - 2) * 4, p015, p115, p215, p315);
|
||||||
|
loadsrc(row4 + (j - 2) * 4, p020, p120, p220, p320);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
loadsrc(row0, p00, p10, p20, p30);
|
||||||
|
loadsrc(row1, p05, p15, p25, p35);
|
||||||
|
loadsrc(row2, p010, p110, p210, p310);
|
||||||
|
loadsrc(row3, p015, p115, p215, p315);
|
||||||
|
loadsrc(row4, p020, p120, p220, p320);
|
||||||
|
auto slideup = [&] {
|
||||||
|
p00 = __riscv_vslide1up(p00, row0[0], vl);
|
||||||
|
p10 = __riscv_vslide1up(p10, row0[1], vl);
|
||||||
|
p20 = __riscv_vslide1up(p20, row0[2], vl);
|
||||||
|
p30 = __riscv_vslide1up(p30, row0[3], vl);
|
||||||
|
p05 = __riscv_vslide1up(p05, row1[0], vl);
|
||||||
|
p15 = __riscv_vslide1up(p15, row1[1], vl);
|
||||||
|
p25 = __riscv_vslide1up(p25, row1[2], vl);
|
||||||
|
p35 = __riscv_vslide1up(p35, row1[3], vl);
|
||||||
|
p010 = __riscv_vslide1up(p010, row2[0], vl);
|
||||||
|
p110 = __riscv_vslide1up(p110, row2[1], vl);
|
||||||
|
p210 = __riscv_vslide1up(p210, row2[2], vl);
|
||||||
|
p310 = __riscv_vslide1up(p310, row2[3], vl);
|
||||||
|
p015 = __riscv_vslide1up(p015, row3[0], vl);
|
||||||
|
p115 = __riscv_vslide1up(p115, row3[1], vl);
|
||||||
|
p215 = __riscv_vslide1up(p215, row3[2], vl);
|
||||||
|
p315 = __riscv_vslide1up(p315, row3[3], vl);
|
||||||
|
p020 = __riscv_vslide1up(p020, row4[0], vl);
|
||||||
|
p120 = __riscv_vslide1up(p120, row4[1], vl);
|
||||||
|
p220 = __riscv_vslide1up(p220, row4[2], vl);
|
||||||
|
p320 = __riscv_vslide1up(p320, row4[3], vl);
|
||||||
|
};
|
||||||
|
slideup();
|
||||||
|
if (j == 0)
|
||||||
|
{
|
||||||
|
slideup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p01 = __riscv_vslide1down(p00, row0[(j + vl - 2) * 4 ], vl);
|
||||||
|
p11 = __riscv_vslide1down(p10, row0[(j + vl - 2) * 4 + 1], vl);
|
||||||
|
p21 = __riscv_vslide1down(p20, row0[(j + vl - 2) * 4 + 2], vl);
|
||||||
|
p31 = __riscv_vslide1down(p30, row0[(j + vl - 2) * 4 + 3], vl);
|
||||||
|
p06 = __riscv_vslide1down(p05, row1[(j + vl - 2) * 4 ], vl);
|
||||||
|
p16 = __riscv_vslide1down(p15, row1[(j + vl - 2) * 4 + 1], vl);
|
||||||
|
p26 = __riscv_vslide1down(p25, row1[(j + vl - 2) * 4 + 2], vl);
|
||||||
|
p36 = __riscv_vslide1down(p35, row1[(j + vl - 2) * 4 + 3], vl);
|
||||||
|
p011 = __riscv_vslide1down(p010, row2[(j + vl - 2) * 4 ], vl);
|
||||||
|
p111 = __riscv_vslide1down(p110, row2[(j + vl - 2) * 4 + 1], vl);
|
||||||
|
p211 = __riscv_vslide1down(p210, row2[(j + vl - 2) * 4 + 2], vl);
|
||||||
|
p311 = __riscv_vslide1down(p310, row2[(j + vl - 2) * 4 + 3], vl);
|
||||||
|
p016 = __riscv_vslide1down(p015, row3[(j + vl - 2) * 4 ], vl);
|
||||||
|
p116 = __riscv_vslide1down(p115, row3[(j + vl - 2) * 4 + 1], vl);
|
||||||
|
p216 = __riscv_vslide1down(p215, row3[(j + vl - 2) * 4 + 2], vl);
|
||||||
|
p316 = __riscv_vslide1down(p315, row3[(j + vl - 2) * 4 + 3], vl);
|
||||||
|
p021 = __riscv_vslide1down(p020, row4[(j + vl - 2) * 4 ], vl);
|
||||||
|
p121 = __riscv_vslide1down(p120, row4[(j + vl - 2) * 4 + 1], vl);
|
||||||
|
p221 = __riscv_vslide1down(p220, row4[(j + vl - 2) * 4 + 2], vl);
|
||||||
|
p321 = __riscv_vslide1down(p320, row4[(j + vl - 2) * 4 + 3], vl);
|
||||||
|
p02 = __riscv_vslide1down(p01, row0[(j + vl - 1) * 4 ], vl);
|
||||||
|
p12 = __riscv_vslide1down(p11, row0[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p22 = __riscv_vslide1down(p21, row0[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p32 = __riscv_vslide1down(p31, row0[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p07 = __riscv_vslide1down(p06, row1[(j + vl - 1) * 4 ], vl);
|
||||||
|
p17 = __riscv_vslide1down(p16, row1[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p27 = __riscv_vslide1down(p26, row1[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p37 = __riscv_vslide1down(p36, row1[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p012 = __riscv_vslide1down(p011, row2[(j + vl - 1) * 4 ], vl);
|
||||||
|
p112 = __riscv_vslide1down(p111, row2[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p212 = __riscv_vslide1down(p211, row2[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p312 = __riscv_vslide1down(p311, row2[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p017 = __riscv_vslide1down(p016, row3[(j + vl - 1) * 4 ], vl);
|
||||||
|
p117 = __riscv_vslide1down(p116, row3[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p217 = __riscv_vslide1down(p216, row3[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p317 = __riscv_vslide1down(p316, row3[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p022 = __riscv_vslide1down(p021, row4[(j + vl - 1) * 4 ], vl);
|
||||||
|
p122 = __riscv_vslide1down(p121, row4[(j + vl - 1) * 4 + 1], vl);
|
||||||
|
p222 = __riscv_vslide1down(p221, row4[(j + vl - 1) * 4 + 2], vl);
|
||||||
|
p322 = __riscv_vslide1down(p321, row4[(j + vl - 1) * 4 + 3], vl);
|
||||||
|
p03 = __riscv_vslide1down(p02, row0[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p13 = __riscv_vslide1down(p12, row0[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p23 = __riscv_vslide1down(p22, row0[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p33 = __riscv_vslide1down(p32, row0[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p08 = __riscv_vslide1down(p07, row1[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p18 = __riscv_vslide1down(p17, row1[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p28 = __riscv_vslide1down(p27, row1[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p38 = __riscv_vslide1down(p37, row1[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p013 = __riscv_vslide1down(p012, row2[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p113 = __riscv_vslide1down(p112, row2[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p213 = __riscv_vslide1down(p212, row2[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p313 = __riscv_vslide1down(p312, row2[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p018 = __riscv_vslide1down(p017, row3[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p118 = __riscv_vslide1down(p117, row3[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p218 = __riscv_vslide1down(p217, row3[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p318 = __riscv_vslide1down(p317, row3[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p023 = __riscv_vslide1down(p022, row4[std::min(width - 1, j + vl) * 4 ], vl);
|
||||||
|
p123 = __riscv_vslide1down(p122, row4[std::min(width - 1, j + vl) * 4 + 1], vl);
|
||||||
|
p223 = __riscv_vslide1down(p222, row4[std::min(width - 1, j + vl) * 4 + 2], vl);
|
||||||
|
p323 = __riscv_vslide1down(p322, row4[std::min(width - 1, j + vl) * 4 + 3], vl);
|
||||||
|
p04 = __riscv_vslide1down(p03, row0[std::min(width - 1, j + vl + 1) * 4 ], vl);
|
||||||
|
p14 = __riscv_vslide1down(p13, row0[std::min(width - 1, j + vl + 1) * 4 + 1], vl);
|
||||||
|
p24 = __riscv_vslide1down(p23, row0[std::min(width - 1, j + vl + 1) * 4 + 2], vl);
|
||||||
|
p34 = __riscv_vslide1down(p33, row0[std::min(width - 1, j + vl + 1) * 4 + 3], vl);
|
||||||
|
p09 = __riscv_vslide1down(p08, row1[std::min(width - 1, j + vl + 1) * 4 ], vl);
|
||||||
|
p19 = __riscv_vslide1down(p18, row1[std::min(width - 1, j + vl + 1) * 4 + 1], vl);
|
||||||
|
p29 = __riscv_vslide1down(p28, row1[std::min(width - 1, j + vl + 1) * 4 + 2], vl);
|
||||||
|
p39 = __riscv_vslide1down(p38, row1[std::min(width - 1, j + vl + 1) * 4 + 3], vl);
|
||||||
|
p014 = __riscv_vslide1down(p013, row2[std::min(width - 1, j + vl + 1) * 4 ], vl);
|
||||||
|
p114 = __riscv_vslide1down(p113, row2[std::min(width - 1, j + vl + 1) * 4 + 1], vl);
|
||||||
|
p214 = __riscv_vslide1down(p213, row2[std::min(width - 1, j + vl + 1) * 4 + 2], vl);
|
||||||
|
p314 = __riscv_vslide1down(p313, row2[std::min(width - 1, j + vl + 1) * 4 + 3], vl);
|
||||||
|
p019 = __riscv_vslide1down(p018, row3[std::min(width - 1, j + vl + 1) * 4 ], vl);
|
||||||
|
p119 = __riscv_vslide1down(p118, row3[std::min(width - 1, j + vl + 1) * 4 + 1], vl);
|
||||||
|
p219 = __riscv_vslide1down(p218, row3[std::min(width - 1, j + vl + 1) * 4 + 2], vl);
|
||||||
|
p319 = __riscv_vslide1down(p318, row3[std::min(width - 1, j + vl + 1) * 4 + 3], vl);
|
||||||
|
p024 = __riscv_vslide1down(p023, row4[std::min(width - 1, j + vl + 1) * 4 ], vl);
|
||||||
|
p124 = __riscv_vslide1down(p123, row4[std::min(width - 1, j + vl + 1) * 4 + 1], vl);
|
||||||
|
p224 = __riscv_vslide1down(p223, row4[std::min(width - 1, j + vl + 1) * 4 + 2], vl);
|
||||||
|
p324 = __riscv_vslide1down(p323, row4[std::min(width - 1, j + vl + 1) * 4 + 3], vl);
|
||||||
|
|
||||||
|
auto vop = [&vl](vuint8m2_t& a, vuint8m2_t& b) {
|
||||||
|
auto t = a;
|
||||||
|
a = __riscv_vminu(a, b, vl);
|
||||||
|
b = __riscv_vmaxu(t, b, vl);
|
||||||
|
};
|
||||||
|
vuint8m2x4_t dst{};
|
||||||
|
vop(p01, p02); vop(p00, p01); vop(p01, p02); vop(p04, p05); vop(p03, p04);
|
||||||
|
vop(p04, p05); vop(p00, p03); vop(p02, p05); vop(p02, p03); vop(p01, p04);
|
||||||
|
vop(p01, p02); vop(p03, p04); vop(p07, p08); vop(p06, p07); vop(p07, p08);
|
||||||
|
vop(p010, p011); vop(p09, p010); vop(p010, p011); vop(p06, p09); vop(p08, p011);
|
||||||
|
vop(p08, p09); vop(p07, p010); vop(p07, p08); vop(p09, p010); vop(p00, p06);
|
||||||
|
vop(p04, p010); vop(p04, p06); vop(p02, p08); vop(p02, p04); vop(p06, p08);
|
||||||
|
vop(p01, p07); vop(p05, p011); vop(p05, p07); vop(p03, p09); vop(p03, p05);
|
||||||
|
vop(p07, p09); vop(p01, p02); vop(p03, p04); vop(p05, p06); vop(p07, p08);
|
||||||
|
vop(p09, p010); vop(p013, p014); vop(p012, p013); vop(p013, p014); vop(p016, p017);
|
||||||
|
vop(p015, p016); vop(p016, p017); vop(p012, p015); vop(p014, p017); vop(p014, p015);
|
||||||
|
vop(p013, p016); vop(p013, p014); vop(p015, p016); vop(p019, p020); vop(p018, p019);
|
||||||
|
vop(p019, p020); vop(p021, p022); vop(p023, p024); vop(p021, p023); vop(p022, p024);
|
||||||
|
vop(p022, p023); vop(p018, p021); vop(p020, p023); vop(p020, p021); vop(p019, p022);
|
||||||
|
vop(p022, p024); vop(p019, p020); vop(p021, p022); vop(p023, p024); vop(p012, p018);
|
||||||
|
vop(p016, p022); vop(p016, p018); vop(p014, p020); vop(p020, p024); vop(p014, p016);
|
||||||
|
vop(p018, p020); vop(p022, p024); vop(p013, p019); vop(p017, p023); vop(p017, p019);
|
||||||
|
vop(p015, p021); vop(p015, p017); vop(p019, p021); vop(p013, p014); vop(p015, p016);
|
||||||
|
vop(p017, p018); vop(p019, p020); vop(p021, p022); vop(p023, p024); vop(p00, p012);
|
||||||
|
vop(p08, p020); vop(p08, p012); vop(p04, p016); vop(p016, p024); vop(p012, p016);
|
||||||
|
vop(p02, p014); vop(p010, p022); vop(p010, p014); vop(p06, p018); vop(p06, p010);
|
||||||
|
vop(p010, p012); vop(p01, p013); vop(p09, p021); vop(p09, p013); vop(p05, p017);
|
||||||
|
vop(p013, p017); vop(p03, p015); vop(p011, p023); vop(p011, p015); vop(p07, p019);
|
||||||
|
vop(p07, p011); vop(p011, p013); vop(p011, p012);
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x4(dst, 0, p012);
|
||||||
|
vop(p11, p12); vop(p10, p11); vop(p11, p12); vop(p14, p15); vop(p13, p14);
|
||||||
|
vop(p14, p15); vop(p10, p13); vop(p12, p15); vop(p12, p13); vop(p11, p14);
|
||||||
|
vop(p11, p12); vop(p13, p14); vop(p17, p18); vop(p16, p17); vop(p17, p18);
|
||||||
|
vop(p110, p111); vop(p19, p110); vop(p110, p111); vop(p16, p19); vop(p18, p111);
|
||||||
|
vop(p18, p19); vop(p17, p110); vop(p17, p18); vop(p19, p110); vop(p10, p16);
|
||||||
|
vop(p14, p110); vop(p14, p16); vop(p12, p18); vop(p12, p14); vop(p16, p18);
|
||||||
|
vop(p11, p17); vop(p15, p111); vop(p15, p17); vop(p13, p19); vop(p13, p15);
|
||||||
|
vop(p17, p19); vop(p11, p12); vop(p13, p14); vop(p15, p16); vop(p17, p18);
|
||||||
|
vop(p19, p110); vop(p113, p114); vop(p112, p113); vop(p113, p114); vop(p116, p117);
|
||||||
|
vop(p115, p116); vop(p116, p117); vop(p112, p115); vop(p114, p117); vop(p114, p115);
|
||||||
|
vop(p113, p116); vop(p113, p114); vop(p115, p116); vop(p119, p120); vop(p118, p119);
|
||||||
|
vop(p119, p120); vop(p121, p122); vop(p123, p124); vop(p121, p123); vop(p122, p124);
|
||||||
|
vop(p122, p123); vop(p118, p121); vop(p120, p123); vop(p120, p121); vop(p119, p122);
|
||||||
|
vop(p122, p124); vop(p119, p120); vop(p121, p122); vop(p123, p124); vop(p112, p118);
|
||||||
|
vop(p116, p122); vop(p116, p118); vop(p114, p120); vop(p120, p124); vop(p114, p116);
|
||||||
|
vop(p118, p120); vop(p122, p124); vop(p113, p119); vop(p117, p123); vop(p117, p119);
|
||||||
|
vop(p115, p121); vop(p115, p117); vop(p119, p121); vop(p113, p114); vop(p115, p116);
|
||||||
|
vop(p117, p118); vop(p119, p120); vop(p121, p122); vop(p123, p124); vop(p10, p112);
|
||||||
|
vop(p18, p120); vop(p18, p112); vop(p14, p116); vop(p116, p124); vop(p112, p116);
|
||||||
|
vop(p12, p114); vop(p110, p122); vop(p110, p114); vop(p16, p118); vop(p16, p110);
|
||||||
|
vop(p110, p112); vop(p11, p113); vop(p19, p121); vop(p19, p113); vop(p15, p117);
|
||||||
|
vop(p113, p117); vop(p13, p115); vop(p111, p123); vop(p111, p115); vop(p17, p119);
|
||||||
|
vop(p17, p111); vop(p111, p113); vop(p111, p112);
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x4(dst, 1, p112);
|
||||||
|
vop(p21, p22); vop(p20, p21); vop(p21, p22); vop(p24, p25); vop(p23, p24);
|
||||||
|
vop(p24, p25); vop(p20, p23); vop(p22, p25); vop(p22, p23); vop(p21, p24);
|
||||||
|
vop(p21, p22); vop(p23, p24); vop(p27, p28); vop(p26, p27); vop(p27, p28);
|
||||||
|
vop(p210, p211); vop(p29, p210); vop(p210, p211); vop(p26, p29); vop(p28, p211);
|
||||||
|
vop(p28, p29); vop(p27, p210); vop(p27, p28); vop(p29, p210); vop(p20, p26);
|
||||||
|
vop(p24, p210); vop(p24, p26); vop(p22, p28); vop(p22, p24); vop(p26, p28);
|
||||||
|
vop(p21, p27); vop(p25, p211); vop(p25, p27); vop(p23, p29); vop(p23, p25);
|
||||||
|
vop(p27, p29); vop(p21, p22); vop(p23, p24); vop(p25, p26); vop(p27, p28);
|
||||||
|
vop(p29, p210); vop(p213, p214); vop(p212, p213); vop(p213, p214); vop(p216, p217);
|
||||||
|
vop(p215, p216); vop(p216, p217); vop(p212, p215); vop(p214, p217); vop(p214, p215);
|
||||||
|
vop(p213, p216); vop(p213, p214); vop(p215, p216); vop(p219, p220); vop(p218, p219);
|
||||||
|
vop(p219, p220); vop(p221, p222); vop(p223, p224); vop(p221, p223); vop(p222, p224);
|
||||||
|
vop(p222, p223); vop(p218, p221); vop(p220, p223); vop(p220, p221); vop(p219, p222);
|
||||||
|
vop(p222, p224); vop(p219, p220); vop(p221, p222); vop(p223, p224); vop(p212, p218);
|
||||||
|
vop(p216, p222); vop(p216, p218); vop(p214, p220); vop(p220, p224); vop(p214, p216);
|
||||||
|
vop(p218, p220); vop(p222, p224); vop(p213, p219); vop(p217, p223); vop(p217, p219);
|
||||||
|
vop(p215, p221); vop(p215, p217); vop(p219, p221); vop(p213, p214); vop(p215, p216);
|
||||||
|
vop(p217, p218); vop(p219, p220); vop(p221, p222); vop(p223, p224); vop(p20, p212);
|
||||||
|
vop(p28, p220); vop(p28, p212); vop(p24, p216); vop(p216, p224); vop(p212, p216);
|
||||||
|
vop(p22, p214); vop(p210, p222); vop(p210, p214); vop(p26, p218); vop(p26, p210);
|
||||||
|
vop(p210, p212); vop(p21, p213); vop(p29, p221); vop(p29, p213); vop(p25, p217);
|
||||||
|
vop(p213, p217); vop(p23, p215); vop(p211, p223); vop(p211, p215); vop(p27, p219);
|
||||||
|
vop(p27, p211); vop(p211, p213); vop(p211, p212);
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x4(dst, 2, p212);
|
||||||
|
vop(p31, p32); vop(p30, p31); vop(p31, p32); vop(p34, p35); vop(p33, p34);
|
||||||
|
vop(p34, p35); vop(p30, p33); vop(p32, p35); vop(p32, p33); vop(p31, p34);
|
||||||
|
vop(p31, p32); vop(p33, p34); vop(p37, p38); vop(p36, p37); vop(p37, p38);
|
||||||
|
vop(p310, p311); vop(p39, p310); vop(p310, p311); vop(p36, p39); vop(p38, p311);
|
||||||
|
vop(p38, p39); vop(p37, p310); vop(p37, p38); vop(p39, p310); vop(p30, p36);
|
||||||
|
vop(p34, p310); vop(p34, p36); vop(p32, p38); vop(p32, p34); vop(p36, p38);
|
||||||
|
vop(p31, p37); vop(p35, p311); vop(p35, p37); vop(p33, p39); vop(p33, p35);
|
||||||
|
vop(p37, p39); vop(p31, p32); vop(p33, p34); vop(p35, p36); vop(p37, p38);
|
||||||
|
vop(p39, p310); vop(p313, p314); vop(p312, p313); vop(p313, p314); vop(p316, p317);
|
||||||
|
vop(p315, p316); vop(p316, p317); vop(p312, p315); vop(p314, p317); vop(p314, p315);
|
||||||
|
vop(p313, p316); vop(p313, p314); vop(p315, p316); vop(p319, p320); vop(p318, p319);
|
||||||
|
vop(p319, p320); vop(p321, p322); vop(p323, p324); vop(p321, p323); vop(p322, p324);
|
||||||
|
vop(p322, p323); vop(p318, p321); vop(p320, p323); vop(p320, p321); vop(p319, p322);
|
||||||
|
vop(p322, p324); vop(p319, p320); vop(p321, p322); vop(p323, p324); vop(p312, p318);
|
||||||
|
vop(p316, p322); vop(p316, p318); vop(p314, p320); vop(p320, p324); vop(p314, p316);
|
||||||
|
vop(p318, p320); vop(p322, p324); vop(p313, p319); vop(p317, p323); vop(p317, p319);
|
||||||
|
vop(p315, p321); vop(p315, p317); vop(p319, p321); vop(p313, p314); vop(p315, p316);
|
||||||
|
vop(p317, p318); vop(p319, p320); vop(p321, p322); vop(p323, p324); vop(p30, p312);
|
||||||
|
vop(p38, p320); vop(p38, p312); vop(p34, p316); vop(p316, p324); vop(p312, p316);
|
||||||
|
vop(p32, p314); vop(p310, p322); vop(p310, p314); vop(p36, p318); vop(p36, p310);
|
||||||
|
vop(p310, p312); vop(p31, p313); vop(p39, p321); vop(p39, p313); vop(p35, p317);
|
||||||
|
vop(p313, p317); vop(p33, p315); vop(p311, p323); vop(p311, p315); vop(p37, p319);
|
||||||
|
vop(p37, p311); vop(p311, p313); vop(p311, p312);
|
||||||
|
dst = __riscv_vset_v_u8m2_u8m2x4(dst, 3, p312);
|
||||||
|
__riscv_vsseg4e8(dst_data + i * dst_step + j * 4, dst, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int medianBlur(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, int ksize)
|
||||||
|
{
|
||||||
|
const int type = CV_MAKETYPE(depth, cn);
|
||||||
|
if (type != CV_8UC1 && type != CV_8UC4 && type != CV_16UC1 && type != CV_16SC1 && type != CV_32FC1)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if ((ksize != 3 && ksize != 5) || src_data == dst_data)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
switch (ksize*100 + type)
|
||||||
|
{
|
||||||
|
case 300 + CV_8UC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<3, RVV_U8M4>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 300 + CV_16UC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<3, RVV_U16M4>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 300 + CV_16SC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<3, RVV_I16M4>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 300 + CV_32FC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<3, RVV_F32M4>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 500 + CV_8UC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<5, RVV_U8M1>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 500 + CV_16UC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<5, RVV_U16M1>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 500 + CV_16SC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<5, RVV_I16M1>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 500 + CV_32FC1:
|
||||||
|
return common::invoke(height, {medianBlurC1<5, RVV_F32M1>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
|
||||||
|
case 300 + CV_8UC4:
|
||||||
|
return common::invoke(height, {medianBlurC4<3>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
case 500 + CV_8UC4:
|
||||||
|
return common::invoke(height, {medianBlurC4<5>}, src_data, src_step, dst_data, dst_step, width, height);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
@ -4,16 +4,13 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_MOMENTS_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_MOMENTS_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
namespace imageMoments {
|
namespace {
|
||||||
#undef cv_hal_imageMoments
|
|
||||||
#define cv_hal_imageMoments cv::cv_hal_rvv::imageMoments::imageMoments
|
|
||||||
|
|
||||||
class MomentsInvoker : public ParallelLoopBody
|
class MomentsInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
@ -152,9 +149,11 @@ static inline int imageMoments(int start, int end, const uchar* src_data, size_t
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/moments.cpp,
|
// the algorithm is copied from imgproc/src/moments.cpp,
|
||||||
// in the function cv::Moments cv::moments
|
// in the function cv::Moments cv::moments
|
||||||
inline int imageMoments(const uchar* src_data, size_t src_step, int src_type, int width, int height, bool binary, double m[10])
|
int imageMoments(const uchar* src_data, size_t src_step, int src_type, int width, int height, bool binary, double m[10])
|
||||||
{
|
{
|
||||||
if (src_type != CV_16UC1 && src_type != CV_16SC1 && src_type != CV_32FC1 && src_type != CV_64FC1)
|
if (src_type != CV_16UC1 && src_type != CV_16SC1 && src_type != CV_32FC1 && src_type != CV_64FC1)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -184,8 +183,7 @@ inline int imageMoments(const uchar* src_data, size_t src_step, int src_type, in
|
|||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::imageMoments
|
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::imgproc
|
331
hal/riscv-rvv/src/imgproc/morph.cpp
Normal file
331
hal/riscv-rvv/src/imgproc/morph.cpp
Normal file
@ -0,0 +1,331 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct Morph2D
|
||||||
|
{
|
||||||
|
int operation;
|
||||||
|
int src_type;
|
||||||
|
int dst_type;
|
||||||
|
int kernel_type;
|
||||||
|
uchar* kernel_data;
|
||||||
|
size_t kernel_step;
|
||||||
|
int kernel_width;
|
||||||
|
int kernel_height;
|
||||||
|
int anchor_x;
|
||||||
|
int anchor_y;
|
||||||
|
int borderType;
|
||||||
|
const uchar* borderValue;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int op> struct rvv;
|
||||||
|
template<> struct rvv<CV_HAL_MORPH_ERODE>
|
||||||
|
{
|
||||||
|
static inline uchar init() { return std::numeric_limits<uchar>::max(); }
|
||||||
|
static inline uchar mop(uchar a, uchar b) { return a < b ? a : b; }
|
||||||
|
static inline vuint8m4_t vop(vuint8m4_t a, vuint8m4_t b, size_t c) { return __riscv_vminu(a, b, c); }
|
||||||
|
static inline vuint8m4_t vop(vuint8m4_t a, uchar b, size_t c) { return __riscv_vminu(a, b, c); }
|
||||||
|
};
|
||||||
|
template<> struct rvv<CV_HAL_MORPH_DILATE>
|
||||||
|
{
|
||||||
|
static inline uchar init() { return std::numeric_limits<uchar>::min(); }
|
||||||
|
static inline uchar mop(uchar a, uchar b) { return a > b ? a : b; }
|
||||||
|
static inline vuint8m4_t vop(vuint8m4_t a, vuint8m4_t b, size_t c) { return __riscv_vmaxu(a, b, c); }
|
||||||
|
static inline vuint8m4_t vop(vuint8m4_t a, uchar b, size_t c) { return __riscv_vmaxu(a, b, c); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// the algorithm is copied from 3rdparty/carotene/src/morph.cpp,
|
||||||
|
// in the function template void morph3x3
|
||||||
|
template<int op>
|
||||||
|
static inline int morph(int start, int end, Morph2D* data, const uchar* src_data, size_t src_step, uchar* dst_data, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
|
{
|
||||||
|
bool kernel[9];
|
||||||
|
for (int i = 0; i < 9; i++)
|
||||||
|
{
|
||||||
|
kernel[i] = data->kernel_data[(i / 3) * data->kernel_step + i % 3] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto access = [&](int x, int y) {
|
||||||
|
int pi, pj;
|
||||||
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
|
{
|
||||||
|
pi = common::borderInterpolate(x - data->anchor_y, height, data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pj = common::borderInterpolate(y - data->anchor_x, width , data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pi = pi < 0 ? noval : pi;
|
||||||
|
pj = pj < 0 ? noval : pj;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pi = common::borderInterpolate(offset_y + x - data->anchor_y, full_height, data->borderType);
|
||||||
|
pj = common::borderInterpolate(offset_x + y - data->anchor_x, full_width , data->borderType);
|
||||||
|
pi = pi < 0 ? noval : pi - offset_y;
|
||||||
|
pj = pj < 0 ? noval : pj - offset_x;
|
||||||
|
}
|
||||||
|
return std::make_pair(pi, pj);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
if (data->src_type == CV_8UC1)
|
||||||
|
{
|
||||||
|
uchar val = rvv<op>::init();
|
||||||
|
for (int i = 0; i < 9; i++)
|
||||||
|
{
|
||||||
|
if (kernel[i])
|
||||||
|
{
|
||||||
|
auto p = access(x + i / 3, y + i % 3);
|
||||||
|
if (p.first != noval && p.second != noval)
|
||||||
|
{
|
||||||
|
val = rvv<op>::mop(val, src_data[p.first * src_step + p.second]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
val = rvv<op>::mop(val, data->borderValue[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst_data[x * width + y] = val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uchar val0, val1, val2, val3;
|
||||||
|
val0 = val1 = val2 = val3 = rvv<op>::init();
|
||||||
|
for (int i = 0; i < 9; i++)
|
||||||
|
{
|
||||||
|
if (kernel[i])
|
||||||
|
{
|
||||||
|
auto p = access(x + i / 3, y + i % 3);
|
||||||
|
if (p.first != noval && p.second != noval)
|
||||||
|
{
|
||||||
|
val0 = rvv<op>::mop(val0, src_data[p.first * src_step + p.second * 4 ]);
|
||||||
|
val1 = rvv<op>::mop(val1, src_data[p.first * src_step + p.second * 4 + 1]);
|
||||||
|
val2 = rvv<op>::mop(val2, src_data[p.first * src_step + p.second * 4 + 2]);
|
||||||
|
val3 = rvv<op>::mop(val3, src_data[p.first * src_step + p.second * 4 + 3]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
val0 = rvv<op>::mop(val0, data->borderValue[0]);
|
||||||
|
val1 = rvv<op>::mop(val1, data->borderValue[1]);
|
||||||
|
val2 = rvv<op>::mop(val2, data->borderValue[2]);
|
||||||
|
val3 = rvv<op>::mop(val3, data->borderValue[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst_data[(x * width + y) * 4 ] = val0;
|
||||||
|
dst_data[(x * width + y) * 4 + 1] = val1;
|
||||||
|
dst_data[(x * width + y) * 4 + 2] = val2;
|
||||||
|
dst_data[(x * width + y) * 4 + 3] = val3;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = data->anchor_x, right = width - (2 - data->anchor_x);
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
const uchar* row0 = access(i , 0).first == noval ? nullptr : src_data + access(i , 0).first * src_step;
|
||||||
|
const uchar* row1 = access(i + 1, 0).first == noval ? nullptr : src_data + access(i + 1, 0).first * src_step;
|
||||||
|
const uchar* row2 = access(i + 2, 0).first == noval ? nullptr : src_data + access(i + 2, 0).first * src_step;
|
||||||
|
if (data->src_type == CV_8UC1)
|
||||||
|
{
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m4(right - j);
|
||||||
|
auto m0 = __riscv_vmv_v_x_u8m4(rvv<op>::init(), vl);
|
||||||
|
auto loadsrc = [&](const uchar* row, bool k0, bool k1, bool k2) {
|
||||||
|
if (!row)
|
||||||
|
{
|
||||||
|
m0 = rvv<op>::vop(m0, data->borderValue[0], vl);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uchar* extra = row + j - data->anchor_x;
|
||||||
|
auto v0 = __riscv_vle8_v_u8m4(extra, vl);
|
||||||
|
|
||||||
|
if (k0) m0 = rvv<op>::vop(m0, v0, vl);
|
||||||
|
v0 = __riscv_vslide1down(v0, extra[vl], vl);
|
||||||
|
if (k1) m0 = rvv<op>::vop(m0, v0, vl);
|
||||||
|
if (!k2) return;
|
||||||
|
v0 = __riscv_vslide1down(v0, extra[vl + 1], vl);
|
||||||
|
m0 = rvv<op>::vop(m0, v0, vl);
|
||||||
|
};
|
||||||
|
|
||||||
|
loadsrc(row0, kernel[0], kernel[1], kernel[2]);
|
||||||
|
loadsrc(row1, kernel[3], kernel[4], kernel[5]);
|
||||||
|
loadsrc(row2, kernel[6], kernel[7], kernel[8]);
|
||||||
|
__riscv_vse8(dst_data + i * width + j, m0, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int vl, vl0, vl1;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m4(right - j);
|
||||||
|
vl0 = std::min(vl, (int)__riscv_vlenb() * 2);
|
||||||
|
vl1 = vl - vl0;
|
||||||
|
auto m0 = __riscv_vmv_v_x_u8m4(rvv<op>::init(), vl);
|
||||||
|
auto m1 = __riscv_vmv_v_x_u8m4(rvv<op>::init(), vl);
|
||||||
|
auto m2 = __riscv_vmv_v_x_u8m4(rvv<op>::init(), vl);
|
||||||
|
auto m3 = __riscv_vmv_v_x_u8m4(rvv<op>::init(), vl);
|
||||||
|
|
||||||
|
auto opshift = [&](vuint8m4_t a, vuint8m4_t b, bool k0, bool k1, bool k2, uchar r1, uchar r2) {
|
||||||
|
if (k0) a = rvv<op>::vop(a, b, vl);
|
||||||
|
b = __riscv_vslide1down(b, r1, vl);
|
||||||
|
if (k1) a = rvv<op>::vop(a, b, vl);
|
||||||
|
if (!k2) return a;
|
||||||
|
b = __riscv_vslide1down(b, r2, vl);
|
||||||
|
return rvv<op>::vop(a, b, vl);
|
||||||
|
};
|
||||||
|
auto loadsrc = [&](const uchar* row, bool k0, bool k1, bool k2) {
|
||||||
|
if (!row)
|
||||||
|
{
|
||||||
|
m0 = rvv<op>::vop(m0, data->borderValue[0], vl);
|
||||||
|
m1 = rvv<op>::vop(m1, data->borderValue[1], vl);
|
||||||
|
m2 = rvv<op>::vop(m2, data->borderValue[2], vl);
|
||||||
|
m3 = rvv<op>::vop(m3, data->borderValue[3], vl);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vuint8m4_t v0{}, v1{}, v2{}, v3{};
|
||||||
|
const uchar* extra = row + (j - data->anchor_x) * 4;
|
||||||
|
auto src = __riscv_vlseg4e8_v_u8m2x4(extra, vl0);
|
||||||
|
v0 = __riscv_vset_v_u8m2_u8m4(v0, 0, __riscv_vget_v_u8m2x4_u8m2(src, 0));
|
||||||
|
v1 = __riscv_vset_v_u8m2_u8m4(v1, 0, __riscv_vget_v_u8m2x4_u8m2(src, 1));
|
||||||
|
v2 = __riscv_vset_v_u8m2_u8m4(v2, 0, __riscv_vget_v_u8m2x4_u8m2(src, 2));
|
||||||
|
v3 = __riscv_vset_v_u8m2_u8m4(v3, 0, __riscv_vget_v_u8m2x4_u8m2(src, 3));
|
||||||
|
src = __riscv_vlseg4e8_v_u8m2x4(extra + vl0 * 4, vl1);
|
||||||
|
v0 = __riscv_vset_v_u8m2_u8m4(v0, 1, __riscv_vget_v_u8m2x4_u8m2(src, 0));
|
||||||
|
v1 = __riscv_vset_v_u8m2_u8m4(v1, 1, __riscv_vget_v_u8m2x4_u8m2(src, 1));
|
||||||
|
v2 = __riscv_vset_v_u8m2_u8m4(v2, 1, __riscv_vget_v_u8m2x4_u8m2(src, 2));
|
||||||
|
v3 = __riscv_vset_v_u8m2_u8m4(v3, 1, __riscv_vget_v_u8m2x4_u8m2(src, 3));
|
||||||
|
|
||||||
|
extra += vl * 4;
|
||||||
|
m0 = opshift(m0, v0, k0, k1, k2, extra[0], extra[4]);
|
||||||
|
m1 = opshift(m1, v1, k0, k1, k2, extra[1], extra[5]);
|
||||||
|
m2 = opshift(m2, v2, k0, k1, k2, extra[2], extra[6]);
|
||||||
|
m3 = opshift(m3, v3, k0, k1, k2, extra[3], extra[7]);
|
||||||
|
};
|
||||||
|
|
||||||
|
loadsrc(row0, kernel[0], kernel[1], kernel[2]);
|
||||||
|
loadsrc(row1, kernel[3], kernel[4], kernel[5]);
|
||||||
|
loadsrc(row2, kernel[6], kernel[7], kernel[8]);
|
||||||
|
vuint8m2x4_t val{};
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 0, __riscv_vget_v_u8m4_u8m2(m0, 0));
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 1, __riscv_vget_v_u8m4_u8m2(m1, 0));
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 2, __riscv_vget_v_u8m4_u8m2(m2, 0));
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 3, __riscv_vget_v_u8m4_u8m2(m3, 0));
|
||||||
|
__riscv_vsseg4e8(dst_data + (i * width + j) * 4, val, vl0);
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 0, __riscv_vget_v_u8m4_u8m2(m0, 1));
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 1, __riscv_vget_v_u8m4_u8m2(m1, 1));
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 2, __riscv_vget_v_u8m4_u8m2(m2, 1));
|
||||||
|
val = __riscv_vset_v_u8m2_u8m2x4(val, 3, __riscv_vget_v_u8m4_u8m2(m3, 1));
|
||||||
|
__riscv_vsseg4e8(dst_data + (i * width + j + vl0) * 4, val, vl1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int morphInit(cvhalFilter2D** context, int operation, int src_type, int dst_type, int /*max_width*/, int /*max_height*/, int kernel_type, uchar* kernel_data, size_t kernel_step, int kernel_width, int kernel_height, int anchor_x, int anchor_y, int borderType, const double borderValue[4], int iterations, bool /*allowSubmatrix*/, bool /*allowInplace*/)
|
||||||
|
{
|
||||||
|
if (kernel_type != CV_8UC1 || src_type != dst_type)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (src_type != CV_8UC1 && src_type != CV_8UC4)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (kernel_width != kernel_height || kernel_width != 3)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (iterations != 1)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (operation != CV_HAL_MORPH_ERODE && operation != CV_HAL_MORPH_DILATE)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if ((borderType & ~BORDER_ISOLATED) == BORDER_WRAP)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
uchar* borderV;
|
||||||
|
if (src_type == CV_8UC1)
|
||||||
|
{
|
||||||
|
borderV = new uchar{static_cast<uchar>(borderValue[0])};
|
||||||
|
if (operation == CV_HAL_MORPH_DILATE && borderValue[0] == DBL_MAX)
|
||||||
|
borderV[0] = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
borderV = new uchar[4]{static_cast<uchar>(borderValue[0]), static_cast<uchar>(borderValue[1]), static_cast<uchar>(borderValue[2]), static_cast<uchar>(borderValue[3])};
|
||||||
|
if (operation == CV_HAL_MORPH_DILATE)
|
||||||
|
{
|
||||||
|
if (borderValue[0] == DBL_MAX)
|
||||||
|
borderV[0] = 0;
|
||||||
|
if (borderValue[1] == DBL_MAX)
|
||||||
|
borderV[1] = 0;
|
||||||
|
if (borderValue[2] == DBL_MAX)
|
||||||
|
borderV[2] = 0;
|
||||||
|
if (borderValue[3] == DBL_MAX)
|
||||||
|
borderV[3] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
anchor_x = anchor_x < 0 ? kernel_width / 2 : anchor_x;
|
||||||
|
anchor_y = anchor_y < 0 ? kernel_height / 2 : anchor_y;
|
||||||
|
*context = reinterpret_cast<cvhalFilter2D*>(new Morph2D{operation, src_type, dst_type, kernel_type, kernel_data, kernel_step, kernel_width, kernel_height, anchor_x, anchor_y, borderType, borderV});
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int morph(cvhalFilter2D* context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int src_full_width, int src_full_height, int src_roi_x, int src_roi_y, int /*dst_full_width*/, int /*dst_full_height*/, int /*dst_roi_x*/, int /*dst_roi_y*/)
|
||||||
|
{
|
||||||
|
Morph2D* data = reinterpret_cast<Morph2D*>(context);
|
||||||
|
int cn = data->src_type == CV_8UC1 ? 1 : 4;
|
||||||
|
std::vector<uchar> dst(width * height * cn);
|
||||||
|
|
||||||
|
int res = CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
switch (data->operation)
|
||||||
|
{
|
||||||
|
case CV_HAL_MORPH_ERODE:
|
||||||
|
res = common::invoke(height, {morph<CV_HAL_MORPH_ERODE>}, data, src_data, src_step, dst.data(), width, height, src_full_width, src_full_height, src_roi_x, src_roi_y);
|
||||||
|
break;
|
||||||
|
case CV_HAL_MORPH_DILATE:
|
||||||
|
res = common::invoke(height, {morph<CV_HAL_MORPH_DILATE>}, data, src_data, src_step, dst.data(), width, height, src_full_width, src_full_height, src_roi_x, src_roi_y);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < height; i++)
|
||||||
|
memcpy(dst_data + i * dst_step, dst.data() + i * width * cn, width * cn);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int morphFree(cvhalFilter2D* context)
|
||||||
|
{
|
||||||
|
delete reinterpret_cast<Morph2D*>(context)->borderValue;
|
||||||
|
delete reinterpret_cast<Morph2D*>(context);
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
@ -4,18 +4,13 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv { namespace pyramids {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#undef cv_hal_pyrdown
|
namespace {
|
||||||
#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown
|
|
||||||
#undef cv_hal_pyrup
|
|
||||||
#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp
|
|
||||||
|
|
||||||
template<typename T> struct rvv;
|
template<typename T> struct rvv;
|
||||||
|
|
||||||
@ -562,7 +557,9 @@ inline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type)
|
} // anonymous
|
||||||
|
|
||||||
|
int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type)
|
||||||
{
|
{
|
||||||
if (border_type == BORDER_CONSTANT || (depth == CV_32F && cn == 1))
|
if (border_type == BORDER_CONSTANT || (depth == CV_32F && cn == 1))
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -580,7 +577,7 @@ inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int sr
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type)
|
int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type)
|
||||||
{
|
{
|
||||||
if (border_type != BORDER_DEFAULT)
|
if (border_type != BORDER_DEFAULT)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -598,6 +595,6 @@ inline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::imgproc
|
@ -4,17 +4,15 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_RESIZE_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_RESIZE_HPP_INCLUDED
|
#include "common.hpp"
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
namespace resize {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
#undef cv_hal_resize
|
|
||||||
#define cv_hal_resize cv::cv_hal_rvv::resize::resize
|
namespace {
|
||||||
|
|
||||||
class ResizeInvoker : public ParallelLoopBody
|
class ResizeInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
@ -986,7 +984,9 @@ static inline int resizeArea(int src_type, const uchar *src_data, size_t src_ste
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int resize(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, double inv_scale_x, double inv_scale_y, int interpolation)
|
} // anonymous
|
||||||
|
|
||||||
|
int resize(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, double inv_scale_x, double inv_scale_y, int interpolation)
|
||||||
{
|
{
|
||||||
inv_scale_x = 1 / inv_scale_x;
|
inv_scale_x = 1 / inv_scale_x;
|
||||||
inv_scale_y = 1 / inv_scale_y;
|
inv_scale_y = 1 / inv_scale_y;
|
||||||
@ -999,8 +999,7 @@ inline int resize(int src_type, const uchar *src_data, size_t src_step, int src_
|
|||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::resize
|
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::imgproc
|
259
hal/riscv-rvv/src/imgproc/sep_filter.cpp
Normal file
259
hal/riscv-rvv/src/imgproc/sep_filter.cpp
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
|
#include "rvv_hal.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct sepFilter2D
|
||||||
|
{
|
||||||
|
int src_type;
|
||||||
|
int dst_type;
|
||||||
|
int kernel_type;
|
||||||
|
const uchar* kernelx_data;
|
||||||
|
int kernelx_length;
|
||||||
|
const uchar* kernely_data;
|
||||||
|
int kernely_length;
|
||||||
|
int anchor_x;
|
||||||
|
int anchor_y;
|
||||||
|
double delta;
|
||||||
|
int borderType;
|
||||||
|
};
|
||||||
|
|
||||||
|
// the algorithm is copied from 3rdparty/carotene/src/separable_filter.hpp,
|
||||||
|
// in the functor RowFilter3x3S16Generic and ColFilter3x3S16Generic
|
||||||
|
template<int ksize, typename T>
|
||||||
|
static inline int sepFilter(int start, int end, sepFilter2D* data, const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
|
{
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto accessX = [&](int x) {
|
||||||
|
int pi;
|
||||||
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
|
{
|
||||||
|
pi = common::borderInterpolate(x - data->anchor_y, height, data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pi = pi < 0 ? noval : pi;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pi = common::borderInterpolate(offset_y + x - data->anchor_y, full_height, data->borderType);
|
||||||
|
pi = pi < 0 ? noval : pi - offset_y;
|
||||||
|
}
|
||||||
|
return pi;
|
||||||
|
};
|
||||||
|
auto accessY = [&](int y) {
|
||||||
|
int pj;
|
||||||
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
|
{
|
||||||
|
pj = common::borderInterpolate(y - data->anchor_x, width, data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pj = pj < 0 ? noval : pj;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pj = common::borderInterpolate(offset_x + y - data->anchor_x, full_width, data->borderType);
|
||||||
|
pj = pj < 0 ? noval : pj - offset_x;
|
||||||
|
}
|
||||||
|
return pj;
|
||||||
|
};
|
||||||
|
auto p2idx = [&](int x, int y){ return (x + ksize) % ksize * width + y; };
|
||||||
|
|
||||||
|
const float* kx = reinterpret_cast<const float*>(data->kernelx_data);
|
||||||
|
const float* ky = reinterpret_cast<const float*>(data->kernely_data);
|
||||||
|
std::vector<float> res(width * ksize);
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
float sum = 0;
|
||||||
|
for (int i = 0; i < ksize; i++)
|
||||||
|
{
|
||||||
|
int p = accessY(y + i);
|
||||||
|
if (p != noval)
|
||||||
|
{
|
||||||
|
sum += kx[i] * reinterpret_cast<const T*>(src_data + x * src_step)[p];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[p2idx(x, y)] = sum;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int left = data->anchor_x, right = width - (ksize - 1 - data->anchor_x);
|
||||||
|
for (int i = start - data->anchor_y; i < end + (ksize - 1 - data->anchor_y); i++)
|
||||||
|
{
|
||||||
|
if (i + offset_y >= 0 && i + offset_y < full_height)
|
||||||
|
{
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(right - j);
|
||||||
|
const T* extra = reinterpret_cast<const T*>(src_data + i * src_step) + j - data->anchor_x;
|
||||||
|
vfloat32m8_t src;
|
||||||
|
if (std::is_same<T, uchar>::value)
|
||||||
|
{
|
||||||
|
src = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vle8_v_u8m2(reinterpret_cast<const uchar*>(extra), vl), vl), vl);
|
||||||
|
}
|
||||||
|
else if (std::is_same<T, short>::value)
|
||||||
|
{
|
||||||
|
src = __riscv_vfwcvt_f(__riscv_vle16_v_i16m4(reinterpret_cast<const short*>(extra), vl), vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
src = __riscv_vle32_v_f32m8(reinterpret_cast<const float*>(extra), vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
extra += vl;
|
||||||
|
auto sum = __riscv_vfmul(src, kx[0], vl);
|
||||||
|
src = __riscv_vfslide1down(src, extra[0], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[1], src, vl);
|
||||||
|
src = __riscv_vfslide1down(src, extra[1], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[2], src, vl);
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
src = __riscv_vfslide1down(src, extra[2], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[3], src, vl);
|
||||||
|
src = __riscv_vfslide1down(src, extra[3], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[4], src, vl);
|
||||||
|
}
|
||||||
|
__riscv_vse32(res.data() + p2idx(i, j), sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = i - (ksize - 1 - data->anchor_y);
|
||||||
|
if (cur >= start)
|
||||||
|
{
|
||||||
|
const float* row0 = accessX(cur ) == noval ? nullptr : res.data() + p2idx(accessX(cur ), 0);
|
||||||
|
const float* row1 = accessX(cur + 1) == noval ? nullptr : res.data() + p2idx(accessX(cur + 1), 0);
|
||||||
|
const float* row2 = accessX(cur + 2) == noval ? nullptr : res.data() + p2idx(accessX(cur + 2), 0);
|
||||||
|
const float* row3 = nullptr, *row4 = nullptr;
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
row3 = accessX(cur + 3) == noval ? nullptr : res.data() + p2idx(accessX(cur + 3), 0);
|
||||||
|
row4 = accessX(cur + 4) == noval ? nullptr : res.data() + p2idx(accessX(cur + 4), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m4(width - j);
|
||||||
|
auto v0 = row0 ? __riscv_vle32_v_f32m4(row0 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto v1 = row1 ? __riscv_vle32_v_f32m4(row1 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto v2 = row2 ? __riscv_vle32_v_f32m4(row2 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto sum = __riscv_vfmacc(__riscv_vfmacc(__riscv_vfmacc(__riscv_vfmv_v_f_f32m4(data->delta, vl), ky[0], v0, vl), ky[1], v1, vl), ky[2], v2, vl);
|
||||||
|
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
auto v3 = row3 ? __riscv_vle32_v_f32m4(row3 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto v4 = row4 ? __riscv_vle32_v_f32m4(row4 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
sum = __riscv_vfmacc(__riscv_vfmacc(sum, ky[3], v3, vl), ky[4], v4, vl);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data->dst_type == CV_16SC1)
|
||||||
|
{
|
||||||
|
__riscv_vse16(reinterpret_cast<short*>(dst_data + cur * dst_step) + j, __riscv_vfncvt_x(sum, vl), vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
__riscv_vse32(reinterpret_cast<float*>(dst_data + cur * dst_step) + j, sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
int sepFilterInit(cvhalFilter2D **context, int src_type, int dst_type, int kernel_type, uchar* kernelx_data, int kernelx_length, uchar* kernely_data, int kernely_length, int anchor_x, int anchor_y, double delta, int borderType)
|
||||||
|
{
|
||||||
|
if (kernel_type != CV_32FC1)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (src_type != CV_8UC1 && src_type != CV_16SC1 && src_type != CV_32FC1)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (dst_type != CV_16SC1 && dst_type != CV_32FC1)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if ((kernelx_length != 3 && kernelx_length != 5) || kernelx_length != kernely_length)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if ((borderType & ~BORDER_ISOLATED) == BORDER_WRAP)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
anchor_x = anchor_x < 0 ? kernelx_length / 2 : anchor_x;
|
||||||
|
anchor_y = anchor_y < 0 ? kernely_length / 2 : anchor_y;
|
||||||
|
*context = reinterpret_cast<cvhalFilter2D*>(new sepFilter2D{src_type, dst_type, kernel_type, kernelx_data, kernelx_length, kernely_data, kernely_length, anchor_x, anchor_y, delta, borderType & ~BORDER_ISOLATED});
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sepFilter(cvhalFilter2D *context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
|
{
|
||||||
|
sepFilter2D* data = reinterpret_cast<sepFilter2D*>(context);
|
||||||
|
|
||||||
|
uchar* _dst_data = dst_data;
|
||||||
|
size_t _dst_step = dst_step;
|
||||||
|
const size_t size = CV_ELEM_SIZE(data->dst_type);
|
||||||
|
std::vector<uchar> dst;
|
||||||
|
if (src_data == _dst_data)
|
||||||
|
{
|
||||||
|
dst = std::vector<uchar>(width * height * size);
|
||||||
|
dst_data = dst.data();
|
||||||
|
dst_step = width * size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
switch (data->kernelx_length*100 + data->src_type)
|
||||||
|
{
|
||||||
|
case 300 + CV_8UC1:
|
||||||
|
res = common::invoke(height, {sepFilter<3, uchar>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
case 500 + CV_8UC1:
|
||||||
|
res = common::invoke(height, {sepFilter<5, uchar>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
case 300 + CV_16SC1:
|
||||||
|
res = common::invoke(height, {sepFilter<3, short>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
case 500 + CV_16SC1:
|
||||||
|
res = common::invoke(height, {sepFilter<5, short>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
case 300 + CV_32FC1:
|
||||||
|
res = common::invoke(height, {sepFilter<3, float>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
case 500 + CV_32FC1:
|
||||||
|
res = common::invoke(height, {sepFilter<5, float>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (res == CV_HAL_ERROR_NOT_IMPLEMENTED)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
if (src_data == _dst_data)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < height; i++)
|
||||||
|
memcpy(_dst_data + i * _dst_step, dst.data() + i * dst_step, dst_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sepFilterFree(cvhalFilter2D* context)
|
||||||
|
{
|
||||||
|
delete reinterpret_cast<sepFilter2D*>(context);
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
|
}}} // cv::rvv_hal::imgproc
|
@ -4,18 +4,15 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_THRESH_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_THRESH_HPP_INCLUDED
|
#include "common.hpp"
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
namespace threshold {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
// disabled since UI is fast enough, only called in threshold_otsu
|
|
||||||
// #undef cv_hal_threshold
|
namespace {
|
||||||
// #define cv_hal_threshold cv::cv_hal_rvv::threshold::threshold
|
|
||||||
|
|
||||||
class ThresholdInvoker : public ParallelLoopBody
|
class ThresholdInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
@ -182,16 +179,6 @@ static inline int threshold_range(int start, int end, const uchar* src_data, siz
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int threshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType)
|
|
||||||
{
|
|
||||||
return threshold_range(0, height, src_data, src_step, dst_data, dst_step, width, depth, cn, thresh, maxValue, thresholdType);
|
|
||||||
}
|
|
||||||
} // cv::cv_hal_rvv::threshold
|
|
||||||
|
|
||||||
namespace threshold_otsu {
|
|
||||||
#undef cv_hal_threshold_otsu
|
|
||||||
#define cv_hal_threshold_otsu cv::cv_hal_rvv::threshold_otsu::threshold_otsu
|
|
||||||
|
|
||||||
static inline int otsu(int start, int end, const uchar* src_data, size_t src_step, int width, std::atomic<int>* cnt, int N, int* h)
|
static inline int otsu(int start, int end, const uchar* src_data, size_t src_step, int width, std::atomic<int>* cnt, int N, int* h)
|
||||||
{
|
{
|
||||||
const int c = cnt->fetch_add(1) % cv::getNumThreads();
|
const int c = cnt->fetch_add(1) % cv::getNumThreads();
|
||||||
@ -205,69 +192,6 @@ static inline int otsu(int start, int end, const uchar* src_data, size_t src_ste
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/thresh.cpp,
|
|
||||||
// in the function template static double getThreshVal_Otsu
|
|
||||||
inline int threshold_otsu(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, double maxValue, int thresholdType, double* thresh)
|
|
||||||
{
|
|
||||||
if (depth != CV_8UC1 || width * height < (1 << 15))
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
|
||||||
|
|
||||||
const int N = std::numeric_limits<uchar>::max() + 1;
|
|
||||||
const int nums = cv::getNumThreads();
|
|
||||||
std::vector<int> _h(N * nums, 0);
|
|
||||||
int* h = _h.data();
|
|
||||||
|
|
||||||
std::atomic<int> cnt(0);
|
|
||||||
cv::parallel_for_(Range(0, height), threshold::ThresholdInvoker({otsu}, src_data, src_step, width, &cnt, N, h), nums);
|
|
||||||
for (int i = N; i < nums * N; i++)
|
|
||||||
{
|
|
||||||
h[i % N] += h[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
double mu = 0, scale = 1. / (width*height);
|
|
||||||
for (int i = 0; i < N; i++)
|
|
||||||
{
|
|
||||||
mu += i*(double)h[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
mu *= scale;
|
|
||||||
double mu1 = 0, q1 = 0;
|
|
||||||
double max_sigma = 0, max_val = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < N; i++)
|
|
||||||
{
|
|
||||||
double p_i, q2, mu2, sigma;
|
|
||||||
|
|
||||||
p_i = h[i]*scale;
|
|
||||||
mu1 *= q1;
|
|
||||||
q1 += p_i;
|
|
||||||
q2 = 1. - q1;
|
|
||||||
|
|
||||||
if (std::min(q1,q2) < FLT_EPSILON || std::max(q1,q2) > 1. - FLT_EPSILON)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
mu1 = (mu1 + i*p_i)/q1;
|
|
||||||
mu2 = (mu - q1*mu1)/q2;
|
|
||||||
sigma = q1*q2*(mu1 - mu2)*(mu1 - mu2);
|
|
||||||
if (sigma > max_sigma)
|
|
||||||
{
|
|
||||||
max_sigma = sigma;
|
|
||||||
max_val = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*thresh = max_val;
|
|
||||||
if (dst_data == nullptr)
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
|
|
||||||
return threshold::invoke(width, height, {threshold::threshold_range}, src_data, src_step, dst_data, dst_step, width, depth, 1, max_val, maxValue, thresholdType);
|
|
||||||
}
|
|
||||||
} // cv::cv_hal_rvv::threshold_otsu
|
|
||||||
|
|
||||||
namespace adaptiveThreshold {
|
|
||||||
#undef cv_hal_adaptiveThreshold
|
|
||||||
#define cv_hal_adaptiveThreshold cv::cv_hal_rvv::adaptiveThreshold::adaptiveThreshold
|
|
||||||
|
|
||||||
// the algorithm is copied from imgproc/src/thresh.cpp,
|
// the algorithm is copied from imgproc/src/thresh.cpp,
|
||||||
// in the function void cv::adaptiveThreshold
|
// in the function void cv::adaptiveThreshold
|
||||||
template<int ksize, int method, int type>
|
template<int ksize, int method, int type>
|
||||||
@ -444,7 +368,72 @@ static inline int adaptiveThreshold(int start, int end, const uchar* src_data, s
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int adaptiveThreshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, double maxValue, int adaptiveMethod, int thresholdType, int blockSize, double C)
|
} // anonymous
|
||||||
|
|
||||||
|
int threshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType)
|
||||||
|
{
|
||||||
|
return threshold_range(0, height, src_data, src_step, dst_data, dst_step, width, depth, cn, thresh, maxValue, thresholdType);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the algorithm is copied from imgproc/src/thresh.cpp,
|
||||||
|
// in the function template static double getThreshVal_Otsu
|
||||||
|
int threshold_otsu(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, double maxValue, int thresholdType, double* thresh)
|
||||||
|
{
|
||||||
|
if (depth != CV_8UC1 || width * height < (1 << 15))
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
const int N = std::numeric_limits<uchar>::max() + 1;
|
||||||
|
const int nums = cv::getNumThreads();
|
||||||
|
std::vector<int> _h(N * nums, 0);
|
||||||
|
int* h = _h.data();
|
||||||
|
|
||||||
|
std::atomic<int> cnt(0);
|
||||||
|
cv::parallel_for_(Range(0, height), ThresholdInvoker({otsu}, src_data, src_step, width, &cnt, N, h), nums);
|
||||||
|
for (int i = N; i < nums * N; i++)
|
||||||
|
{
|
||||||
|
h[i % N] += h[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
double mu = 0, scale = 1. / (width*height);
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
mu += i*(double)h[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
mu *= scale;
|
||||||
|
double mu1 = 0, q1 = 0;
|
||||||
|
double max_sigma = 0, max_val = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
double p_i, q2, mu2, sigma;
|
||||||
|
|
||||||
|
p_i = h[i]*scale;
|
||||||
|
mu1 *= q1;
|
||||||
|
q1 += p_i;
|
||||||
|
q2 = 1. - q1;
|
||||||
|
|
||||||
|
if (std::min(q1,q2) < FLT_EPSILON || std::max(q1,q2) > 1. - FLT_EPSILON)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
mu1 = (mu1 + i*p_i)/q1;
|
||||||
|
mu2 = (mu - q1*mu1)/q2;
|
||||||
|
sigma = q1*q2*(mu1 - mu2)*(mu1 - mu2);
|
||||||
|
if (sigma > max_sigma)
|
||||||
|
{
|
||||||
|
max_sigma = sigma;
|
||||||
|
max_val = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*thresh = max_val;
|
||||||
|
if (dst_data == nullptr)
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
|
||||||
|
return invoke(width, height, {threshold_range}, src_data, src_step, dst_data, dst_step, width, depth, 1, max_val, maxValue, thresholdType);
|
||||||
|
}
|
||||||
|
|
||||||
|
int adaptiveThreshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, double maxValue, int adaptiveMethod, int thresholdType, int blockSize, double C)
|
||||||
{
|
{
|
||||||
if (thresholdType != CV_HAL_THRESH_BINARY && thresholdType != CV_HAL_THRESH_BINARY_INV)
|
if (thresholdType != CV_HAL_THRESH_BINARY && thresholdType != CV_HAL_THRESH_BINARY_INV)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -456,27 +445,26 @@ inline int adaptiveThreshold(const uchar* src_data, size_t src_step, uchar* dst_
|
|||||||
switch (blockSize*100 + adaptiveMethod*10 + thresholdType)
|
switch (blockSize*100 + adaptiveMethod*10 + thresholdType)
|
||||||
{
|
{
|
||||||
case 300 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY:
|
case 300 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 300 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
case 300 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 500 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY:
|
case 500 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 500 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
case 500 + CV_HAL_ADAPTIVE_THRESH_MEAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_MEAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 300 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY:
|
case 300 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 300 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
case 300 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<3, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 500 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY:
|
case 500 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
case 500 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
case 500 + CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C*10 + CV_HAL_THRESH_BINARY_INV:
|
||||||
return threshold::invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
return invoke(width, height, {adaptiveThreshold<5, CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C, CV_HAL_THRESH_BINARY_INV>}, src_data, src_step, dst_data, dst_step, width, height, maxValue, C);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::adaptiveThreshold
|
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} /// cv::rvv_hal::imgproc
|
@ -4,22 +4,14 @@
|
|||||||
|
|
||||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_WARP_HPP_INCLUDED
|
#include "rvv_hal.hpp"
|
||||||
#define OPENCV_HAL_RVV_WARP_HPP_INCLUDED
|
#include "common.hpp"
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
namespace cv { namespace rvv_hal { namespace imgproc {
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
#if CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
namespace remap {
|
namespace {
|
||||||
|
|
||||||
// BUG: https://github.com/opencv/opencv/issues/27279
|
|
||||||
// #undef cv_hal_remap32f
|
|
||||||
// #define cv_hal_remap32f cv::cv_hal_rvv::remap::remap32f
|
|
||||||
// #undef cv_hal_remap32fc2
|
|
||||||
// #define cv_hal_remap32fc2 cv::cv_hal_rvv::remap::remap32fc2
|
|
||||||
// #undef cv_hal_remap16s
|
|
||||||
// #define cv_hal_remap16s cv::cv_hal_rvv::remap::remap16s
|
|
||||||
|
|
||||||
class RemapInvoker : public ParallelLoopBody
|
class RemapInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
@ -794,6 +786,8 @@ static inline int remap32fC4(int start, int end, const uchar *src_data, size_t s
|
|||||||
return CV_HAL_ERROR_OK;
|
return CV_HAL_ERROR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
// the algorithm is copied from 3rdparty/carotene/src/remap.cpp,
|
// the algorithm is copied from 3rdparty/carotene/src/remap.cpp,
|
||||||
// in the function void CAROTENE_NS::remapNearestNeighbor and void CAROTENE_NS::remapLinear
|
// in the function void CAROTENE_NS::remapNearestNeighbor and void CAROTENE_NS::remapLinear
|
||||||
template<bool s16 = false>
|
template<bool s16 = false>
|
||||||
@ -880,17 +874,6 @@ inline int remap16s(int src_type, const uchar *src_data, size_t src_step, int sr
|
|||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
return remap32f<true>(src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, reinterpret_cast<float*>(mapx), mapx_step, reinterpret_cast<float*>(mapy), mapy_step, interpolation, border_type, border_value);
|
return remap32f<true>(src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, reinterpret_cast<float*>(mapx), mapx_step, reinterpret_cast<float*>(mapy), mapy_step, interpolation, border_type, border_value);
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::remap
|
|
||||||
|
|
||||||
namespace warp {
|
|
||||||
|
|
||||||
// BUG: https://github.com/opencv/opencv/issues/27280
|
|
||||||
//#undef cv_hal_warpAffine
|
|
||||||
//#define cv_hal_warpAffine cv::cv_hal_rvv::warp::warpAffine
|
|
||||||
|
|
||||||
// BUG: https://github.com/opencv/opencv/issues/27281
|
|
||||||
//#undef cv_hal_warpPerspective
|
|
||||||
//#define cv_hal_warpPerspective cv::cv_hal_rvv::warp::warpPerspective
|
|
||||||
|
|
||||||
template<bool perspective>
|
template<bool perspective>
|
||||||
static inline int warpC1(int start, int end, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, const double* M, int interpolation, int borderType, const double* borderValue)
|
static inline int warpC1(int start, int end, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, const double* M, int interpolation, int borderType, const double* borderValue)
|
||||||
@ -1162,7 +1145,7 @@ static inline int warpC4(int start, int end, const uchar *src_data, size_t src_s
|
|||||||
|
|
||||||
// the algorithm is copied from 3rdparty/carotene/src/warp_affine.cpp,
|
// the algorithm is copied from 3rdparty/carotene/src/warp_affine.cpp,
|
||||||
// in the function void CAROTENE_NS::warpAffineNearestNeighbor and void CAROTENE_NS::warpAffineLinear
|
// in the function void CAROTENE_NS::warpAffineNearestNeighbor and void CAROTENE_NS::warpAffineLinear
|
||||||
inline int warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4])
|
int warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4])
|
||||||
{
|
{
|
||||||
if (src_type != CV_8UC1 && src_type != CV_8UC3 && src_type != CV_8UC4)
|
if (src_type != CV_8UC1 && src_type != CV_8UC3 && src_type != CV_8UC4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -1174,11 +1157,11 @@ inline int warpAffine(int src_type, const uchar *src_data, size_t src_step, int
|
|||||||
switch (src_type)
|
switch (src_type)
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
return remap::invoke(dst_width, dst_height, {warpC1<false>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
return invoke(dst_width, dst_height, {warpC1<false>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
return remap::invoke(dst_width, dst_height, {warpC3<false>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
return invoke(dst_width, dst_height, {warpC3<false>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
return remap::invoke(dst_width, dst_height, {warpC4<false>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
return invoke(dst_width, dst_height, {warpC4<false>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -1186,7 +1169,7 @@ inline int warpAffine(int src_type, const uchar *src_data, size_t src_step, int
|
|||||||
|
|
||||||
// the algorithm is copied from 3rdparty/carotene/src/warp_perspective.cpp,
|
// the algorithm is copied from 3rdparty/carotene/src/warp_perspective.cpp,
|
||||||
// in the function void CAROTENE_NS::warpPerspectiveNearestNeighbor and void CAROTENE_NS::warpPerspectiveLinear
|
// in the function void CAROTENE_NS::warpPerspectiveNearestNeighbor and void CAROTENE_NS::warpPerspectiveLinear
|
||||||
inline int warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4])
|
int warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4])
|
||||||
{
|
{
|
||||||
if (src_type != CV_8UC1 && src_type != CV_8UC3 && src_type != CV_8UC4)
|
if (src_type != CV_8UC1 && src_type != CV_8UC3 && src_type != CV_8UC4)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
@ -1198,17 +1181,16 @@ inline int warpPerspective(int src_type, const uchar *src_data, size_t src_step,
|
|||||||
switch (src_type)
|
switch (src_type)
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
return remap::invoke(dst_width, dst_height, {warpC1<true>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
return invoke(dst_width, dst_height, {warpC1<true>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
return remap::invoke(dst_width, dst_height, {warpC3<true>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
return invoke(dst_width, dst_height, {warpC3<true>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
return remap::invoke(dst_width, dst_height, {warpC4<true>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
return invoke(dst_width, dst_height, {warpC4<true>}, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, M, interpolation, borderType, borderValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::warp
|
|
||||||
|
|
||||||
}}
|
#endif // CV_HAL_RVV_1P0_ENABLED
|
||||||
|
|
||||||
#endif
|
}}} // cv::rvv_hal::imgproc
|
@ -1,109 +0,0 @@
|
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
|
|
||||||
#ifndef OPENCV_HAL_RVV_071_HPP_INCLUDED
|
|
||||||
#define OPENCV_HAL_RVV_071_HPP_INCLUDED
|
|
||||||
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
|
||||||
|
|
||||||
#undef cv_hal_cvtBGRtoBGR
|
|
||||||
#define cv_hal_cvtBGRtoBGR cv::cv_hal_rvv::cvtBGRtoBGR
|
|
||||||
|
|
||||||
static const unsigned char index_array_32 [32]
|
|
||||||
{ 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15, 18, 17, 16, 19, 22, 21, 20, 23, 26, 25, 24, 27, 30, 29, 28, 31 };
|
|
||||||
|
|
||||||
static const unsigned char index_array_24 [24]
|
|
||||||
{ 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17, 16, 15, 20, 19, 18, 23, 22, 21 };
|
|
||||||
|
|
||||||
static void vBGRtoBGR(const unsigned char* src, unsigned char * dst, const unsigned char * index, int n, int scn, int dcn, int vsize_pixels, const int vsize)
|
|
||||||
{
|
|
||||||
vuint8m2_t vec_index = vle8_v_u8m2(index, vsize);
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
for ( ; i <= n-vsize; i += vsize_pixels, src += vsize, dst += vsize)
|
|
||||||
{
|
|
||||||
vuint8m2_t vec_src = vle8_v_u8m2(src, vsize);
|
|
||||||
vuint8m2_t vec_dst = vrgather_vv_u8m2(vec_src, vec_index, vsize);
|
|
||||||
vse8_v_u8m2(dst, vec_dst, vsize);
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( ; i < n; i++, src += scn, dst += dcn )
|
|
||||||
{
|
|
||||||
unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
|
|
||||||
dst[2] = t0;
|
|
||||||
dst[1] = t1;
|
|
||||||
dst[0] = t2;
|
|
||||||
if(dcn == 4)
|
|
||||||
{
|
|
||||||
unsigned char d = src[3];
|
|
||||||
dst[3] = d;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void sBGRtoBGR(const unsigned char* src, unsigned char * dst, int n, int scn, int dcn, int bi)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < n; i++, src += scn, dst += dcn)
|
|
||||||
{
|
|
||||||
unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
|
|
||||||
dst[bi ] = t0;
|
|
||||||
dst[1] = t1;
|
|
||||||
dst[bi^2] = t2;
|
|
||||||
if(dcn == 4)
|
|
||||||
{
|
|
||||||
unsigned char d = scn == 4 ? src[3] : std::numeric_limits<unsigned char>::max();
|
|
||||||
dst[3] = d;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int cvtBGRtoBGR(const unsigned char * src_data, size_t src_step, unsigned char * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue)
|
|
||||||
{
|
|
||||||
if (depth != CV_8U)
|
|
||||||
{
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int blueIdx = swapBlue ? 2 : 0;
|
|
||||||
if (scn == dcn)
|
|
||||||
{
|
|
||||||
if (!swapBlue)
|
|
||||||
{
|
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int vsize_pixels = 8;
|
|
||||||
|
|
||||||
if (scn == 4)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
|
|
||||||
{
|
|
||||||
vBGRtoBGR(src_data, dst_data, index_array_32, width, scn, dcn, vsize_pixels, 32);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
|
|
||||||
{
|
|
||||||
vBGRtoBGR(src_data, dst_data, index_array_24, width, scn, dcn, vsize_pixels, 24);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
|
|
||||||
sBGRtoBGR(src_data, dst_data, width, scn, dcn, blueIdx);
|
|
||||||
}
|
|
||||||
|
|
||||||
return CV_HAL_ERROR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
}}
|
|
||||||
|
|
||||||
#endif
|
|
@ -99,7 +99,8 @@ void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray d
|
|||||||
CV_CheckType(type, cn == 1 || cn == 3 || cn == 4,
|
CV_CheckType(type, cn == 1 || cn == 3 || cn == 4,
|
||||||
"Number of channels must be 1, 3 or 4" );
|
"Number of channels must be 1, 3 or 4" );
|
||||||
|
|
||||||
CV_Assert(image.getMat().total() > 0);
|
cv::Mat img = image.getMat();
|
||||||
|
CV_Assert(img.total() > 0);
|
||||||
CV_Assert(length > 0);
|
CV_Assert(length > 0);
|
||||||
|
|
||||||
// project axes points
|
// project axes points
|
||||||
@ -111,6 +112,18 @@ void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray d
|
|||||||
std::vector<Point2f> imagePoints;
|
std::vector<Point2f> imagePoints;
|
||||||
projectPoints(axesPoints, rvec, tvec, cameraMatrix, distCoeffs, imagePoints);
|
projectPoints(axesPoints, rvec, tvec, cameraMatrix, distCoeffs, imagePoints);
|
||||||
|
|
||||||
|
cv::Rect imageRect(0, 0, img.cols, img.rows);
|
||||||
|
bool allIn = true;
|
||||||
|
for (size_t i = 0; i < imagePoints.size(); i++)
|
||||||
|
{
|
||||||
|
allIn &= imageRect.contains(imagePoints[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allIn)
|
||||||
|
{
|
||||||
|
CV_LOG_WARNING(NULL, "Some of projected axes endpoints are out of frame. The drawn axes may be not relaible.");
|
||||||
|
}
|
||||||
|
|
||||||
// draw axes lines
|
// draw axes lines
|
||||||
line(image, imagePoints[0], imagePoints[1], Scalar(0, 0, 255), thickness);
|
line(image, imagePoints[0], imagePoints[1], Scalar(0, 0, 255), thickness);
|
||||||
line(image, imagePoints[0], imagePoints[2], Scalar(0, 255, 0), thickness);
|
line(image, imagePoints[0], imagePoints[2], Scalar(0, 255, 0), thickness);
|
||||||
|
@ -17,7 +17,7 @@ class Error : public Algorithm {
|
|||||||
public:
|
public:
|
||||||
// set model to use getError() function
|
// set model to use getError() function
|
||||||
virtual void setModelParameters (const Mat &model) = 0;
|
virtual void setModelParameters (const Mat &model) = 0;
|
||||||
// returns error of point wih @point_idx w.r.t. model
|
// returns error of point with @point_idx w.r.t. model
|
||||||
virtual float getError (int point_idx) const = 0;
|
virtual float getError (int point_idx) const = 0;
|
||||||
virtual const std::vector<float> &getErrors (const Mat &model) = 0;
|
virtual const std::vector<float> &getErrors (const Mat &model) = 0;
|
||||||
};
|
};
|
||||||
|
@ -175,7 +175,7 @@ TEST_F(fisheyeTest, CalibrationWithFixedFocalLength)
|
|||||||
cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, theK, theD,
|
cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, theK, theD,
|
||||||
cv::noArray(), cv::noArray(), flag, cv::TermCriteria(3, 20, 1e-6));
|
cv::noArray(), cv::noArray(), flag, cv::TermCriteria(3, 20, 1e-6));
|
||||||
|
|
||||||
// ensure that CALIB_FIX_FOCAL_LENGTH works and focal lenght has not changed
|
// ensure that CALIB_FIX_FOCAL_LENGTH works and focal length has not changed
|
||||||
EXPECT_EQ(theK(0,0), K(0,0));
|
EXPECT_EQ(theK(0,0), K(0,0));
|
||||||
EXPECT_EQ(theK(1,1), K(1,1));
|
EXPECT_EQ(theK(1,1), K(1,1));
|
||||||
|
|
||||||
|
@ -1965,8 +1965,8 @@ The function solveCubic finds the real roots of a cubic equation:
|
|||||||
|
|
||||||
The roots are stored in the roots array.
|
The roots are stored in the roots array.
|
||||||
@param coeffs equation coefficients, an array of 3 or 4 elements.
|
@param coeffs equation coefficients, an array of 3 or 4 elements.
|
||||||
@param roots output array of real roots that has 1 or 3 elements.
|
@param roots output array of real roots that has 0, 1, 2 or 3 elements.
|
||||||
@return number of real roots. It can be 0, 1 or 2.
|
@return number of real roots. It can be -1 (all real numbers), 0, 1, 2 or 3.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots);
|
CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots);
|
||||||
|
|
||||||
|
@ -225,32 +225,30 @@ These operations allow to reorder or recombine elements in one or multiple vecto
|
|||||||
Element-wise binary and unary operations.
|
Element-wise binary and unary operations.
|
||||||
|
|
||||||
- Arithmetics:
|
- Arithmetics:
|
||||||
@ref v_add(const v_reg &a, const v_reg &b) "+",
|
@ref v_add,
|
||||||
@ref v_sub(const v_reg &a, const v_reg &b) "-",
|
@ref v_sub,
|
||||||
@ref v_mul(const v_reg &a, const v_reg &b) "*",
|
@ref v_mul,
|
||||||
@ref v_div(const v_reg &a, const v_reg &b) "/",
|
@ref v_div,
|
||||||
@ref v_mul_expand
|
@ref v_mul_expand
|
||||||
|
|
||||||
- Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
|
- Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
|
||||||
|
|
||||||
- Bitwise shifts:
|
- Bitwise shifts:
|
||||||
@ref v_shl(const v_reg &a, int s) "<<",
|
|
||||||
@ref v_shr(const v_reg &a, int s) ">>",
|
|
||||||
@ref v_shl, @ref v_shr
|
@ref v_shl, @ref v_shr
|
||||||
|
|
||||||
- Bitwise logic:
|
- Bitwise logic:
|
||||||
@ref v_and(const v_reg &a, const v_reg &b) "&",
|
@ref v_and,
|
||||||
@ref v_or(const v_reg &a, const v_reg &b) "|",
|
@ref v_or,
|
||||||
@ref v_xor(const v_reg &a, const v_reg &b) "^",
|
@ref v_xor,
|
||||||
@ref v_not(const v_reg &a) "~"
|
@ref v_not
|
||||||
|
|
||||||
- Comparison:
|
- Comparison:
|
||||||
@ref v_gt(const v_reg &a, const v_reg &b) ">",
|
@ref v_gt,
|
||||||
@ref v_ge(const v_reg &a, const v_reg &b) ">=",
|
@ref v_ge,
|
||||||
@ref v_lt(const v_reg &a, const v_reg &b) "<",
|
@ref v_lt,
|
||||||
@ref v_le(const v_reg &a, const v_reg &b) "<=",
|
@ref v_le,
|
||||||
@ref v_eq(const v_reg &a, const v_reg &b) "==",
|
@ref v_eq,
|
||||||
@ref v_ne(const v_reg &a, const v_reg &b) "!="
|
@ref v_ne
|
||||||
|
|
||||||
- min/max: @ref v_min, @ref v_max
|
- min/max: @ref v_min, @ref v_max
|
||||||
|
|
||||||
|
111
modules/core/include/opencv2/core/hal/intrin_legacy_ops.h
Normal file
111
modules/core/include/opencv2/core/hal/intrin_legacy_ops.h
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
// This file has been created for compatibility with older versions of Universal Intrinscs
|
||||||
|
// Binary operators for vector types has been removed since version 4.11
|
||||||
|
// Include this file manually after OpenCV headers if you need these operators
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_INTRIN_LEGACY_OPS_HPP
|
||||||
|
#define OPENCV_HAL_INTRIN_LEGACY_OPS_HPP
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
#error "Universal Intrinsics operators are deprecated and should not be used in OpenCV library"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __riscv
|
||||||
|
#warning "Operators might conflict with built-in functions on RISC-V platform"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(CV_VERSION) && CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 9
|
||||||
|
#warning "Older versions of OpenCV (<4.9) already have Universal Intrinscs operators"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
|
#define BIN_OP(OP, FUN) \
|
||||||
|
template <typename R> R operator OP (const R & lhs, const R & rhs) { return FUN(lhs, rhs); }
|
||||||
|
|
||||||
|
#define BIN_A_OP(OP, FUN) \
|
||||||
|
template <typename R> R & operator OP (R & res, const R & val) { res = FUN(res, val); return res; }
|
||||||
|
|
||||||
|
#define UN_OP(OP, FUN) \
|
||||||
|
template <typename R> R operator OP (const R & val) { return FUN(val); }
|
||||||
|
|
||||||
|
BIN_OP(+, v_add)
|
||||||
|
BIN_OP(-, v_sub)
|
||||||
|
BIN_OP(*, v_mul)
|
||||||
|
BIN_OP(/, v_div)
|
||||||
|
BIN_OP(&, v_and)
|
||||||
|
BIN_OP(|, v_or)
|
||||||
|
BIN_OP(^, v_xor)
|
||||||
|
|
||||||
|
BIN_OP(==, v_eq)
|
||||||
|
BIN_OP(!=, v_ne)
|
||||||
|
BIN_OP(<, v_lt)
|
||||||
|
BIN_OP(>, v_gt)
|
||||||
|
BIN_OP(<=, v_le)
|
||||||
|
BIN_OP(>=, v_ge)
|
||||||
|
|
||||||
|
BIN_A_OP(+=, v_add)
|
||||||
|
BIN_A_OP(-=, v_sub)
|
||||||
|
BIN_A_OP(*=, v_mul)
|
||||||
|
BIN_A_OP(/=, v_div)
|
||||||
|
BIN_A_OP(&=, v_and)
|
||||||
|
BIN_A_OP(|=, v_or)
|
||||||
|
BIN_A_OP(^=, v_xor)
|
||||||
|
|
||||||
|
UN_OP(~, v_not)
|
||||||
|
|
||||||
|
// TODO: shift operators?
|
||||||
|
|
||||||
|
}} // cv::hal::
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
#ifdef OPENCV_ENABLE_INLINE_INTRIN_OPERATOR_TEST
|
||||||
|
|
||||||
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
|
inline static void opencv_operator_compile_test()
|
||||||
|
{
|
||||||
|
using namespace cv;
|
||||||
|
v_float32 a, b, c;
|
||||||
|
uint8_t shift = 1;
|
||||||
|
a = b + c;
|
||||||
|
a = b - c;
|
||||||
|
a = b * c;
|
||||||
|
a = b / c;
|
||||||
|
a = b & c;
|
||||||
|
a = b | c;
|
||||||
|
a = b ^ c;
|
||||||
|
// a = b >> shift;
|
||||||
|
// a = b << shift;
|
||||||
|
|
||||||
|
a = (b == c);
|
||||||
|
a = (b != c);
|
||||||
|
a = (b < c);}}
|
||||||
|
a = (b > c);
|
||||||
|
a = (b <= c);
|
||||||
|
a = (b >= c);
|
||||||
|
|
||||||
|
a += b;
|
||||||
|
a -= b;
|
||||||
|
a *= b;
|
||||||
|
a /= b;
|
||||||
|
a &= b;
|
||||||
|
a |= b;
|
||||||
|
a ^= b;
|
||||||
|
// a <<= shift;
|
||||||
|
// a >>= shift;
|
||||||
|
|
||||||
|
a = ~b;
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // cv::hal::
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_INTRIN_LEGACY_OPS_HPP
|
@ -3184,6 +3184,12 @@ Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> inline
|
||||||
|
MatExpr Mat_<_Tp>::zeros(int _ndims, const int* _sizes)
|
||||||
|
{
|
||||||
|
return Mat::zeros(_ndims, _sizes, traits::Type<_Tp>::value);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename _Tp> inline
|
template<typename _Tp> inline
|
||||||
MatExpr Mat_<_Tp>::zeros(int rows, int cols)
|
MatExpr Mat_<_Tp>::zeros(int rows, int cols)
|
||||||
{
|
{
|
||||||
|
@ -147,7 +147,23 @@ namespace cv { namespace cuda
|
|||||||
inline explicit NppStreamHandler(cudaStream_t newStream)
|
inline explicit NppStreamHandler(cudaStream_t newStream)
|
||||||
{
|
{
|
||||||
nppStreamContext = {};
|
nppStreamContext = {};
|
||||||
nppSafeCall(nppGetStreamContext(&nppStreamContext));
|
#if CUDA_VERSION < 12090
|
||||||
|
nppSafeCall(nppGetStreamContext(&nppStreamContext));
|
||||||
|
#else
|
||||||
|
int device = 0;
|
||||||
|
cudaSafeCall(cudaGetDevice(&device));
|
||||||
|
|
||||||
|
cudaDeviceProp prop{};
|
||||||
|
cudaSafeCall(cudaGetDeviceProperties(&prop, device));
|
||||||
|
|
||||||
|
nppStreamContext.nCudaDeviceId = device;
|
||||||
|
nppStreamContext.nMultiProcessorCount = prop.multiProcessorCount;
|
||||||
|
nppStreamContext.nMaxThreadsPerMultiProcessor = prop.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamContext.nMaxThreadsPerBlock = prop.maxThreadsPerBlock;
|
||||||
|
nppStreamContext.nSharedMemPerBlock = prop.sharedMemPerBlock;
|
||||||
|
nppStreamContext.nCudaDevAttrComputeCapabilityMajor = prop.major;
|
||||||
|
nppStreamContext.nCudaDevAttrComputeCapabilityMinor = prop.minor;
|
||||||
|
#endif
|
||||||
nppStreamContext.hStream = newStream;
|
nppStreamContext.hStream = newStream;
|
||||||
cudaSafeCall(cudaStreamGetFlags(nppStreamContext.hStream, &nppStreamContext.nStreamFlags));
|
cudaSafeCall(cudaStreamGetFlags(nppStreamContext.hStream, &nppStreamContext.nStreamFlags));
|
||||||
}
|
}
|
||||||
|
@ -694,7 +694,7 @@ OCL_PERF_TEST_P(PowFixture, Pow, ::testing::Combine(
|
|||||||
|
|
||||||
///////////// iPow ////////////////////////
|
///////////// iPow ////////////////////////
|
||||||
OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine(
|
OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine(
|
||||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8SC1,CV_16UC1,CV_16SC1,CV_32SC1)))
|
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8UC3, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1)))
|
||||||
{
|
{
|
||||||
const Size_MatType_t params = GetParam();
|
const Size_MatType_t params = GetParam();
|
||||||
const Size srcSize = get<0>(params);
|
const Size srcSize = get<0>(params);
|
||||||
@ -706,7 +706,7 @@ OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine(
|
|||||||
randu(src, 0, 100);
|
randu(src, 0, 100);
|
||||||
declare.in(src).out(dst);
|
declare.in(src).out(dst);
|
||||||
|
|
||||||
OCL_TEST_CYCLE() cv::pow(src, 7.0, dst);
|
OCL_TEST_CYCLE() cv::pow(src, 3, dst);
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
}
|
||||||
|
@ -1223,8 +1223,22 @@ inline int hal_ni_copyToMasked(const uchar* src_data, size_t src_step, uchar* ds
|
|||||||
#define cv_hal_copyToMasked hal_ni_copyToMasked
|
#define cv_hal_copyToMasked hal_ni_copyToMasked
|
||||||
//! @endcond
|
//! @endcond
|
||||||
|
|
||||||
//! @}
|
/**
|
||||||
|
@ brief sum
|
||||||
|
@param src_data Source image data
|
||||||
|
@param src_step Source image step
|
||||||
|
@param src_type Source image type
|
||||||
|
@param width, height Source image dimensions
|
||||||
|
@param result Pointer to save the sum result to.
|
||||||
|
*/
|
||||||
|
inline int hal_ni_sum(const uchar *src_data, size_t src_step, int src_type, int width, int height, double *result)
|
||||||
|
{ return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define cv_hal_sum hal_ni_sum
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
#if defined(__clang__)
|
#if defined(__clang__)
|
||||||
#pragma clang diagnostic pop
|
#pragma clang diagnostic pop
|
||||||
|
@ -938,9 +938,40 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
|||||||
bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
|
bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
|
||||||
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
|
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
|
||||||
|
|
||||||
|
// Note: channels are unrolled
|
||||||
|
|
||||||
|
std::string extra_opts ="";
|
||||||
|
if (is_ipower)
|
||||||
|
{
|
||||||
|
int wdepth = CV_32F;
|
||||||
|
if (depth == CV_64F)
|
||||||
|
wdepth = CV_64F;
|
||||||
|
else if (depth == CV_16F)
|
||||||
|
wdepth = CV_16F;
|
||||||
|
|
||||||
|
char cvt[2][50];
|
||||||
|
extra_opts = format(
|
||||||
|
" -D srcT1=%s -DsrcT1_C1=%s"
|
||||||
|
" -D srcT2=int -D workST=int"
|
||||||
|
" -D workT=%s -D wdepth=%d -D convertToWT1=%s"
|
||||||
|
" -D convertToDT=%s"
|
||||||
|
" -D workT1=%s",
|
||||||
|
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
|
||||||
|
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
|
||||||
|
ocl::typeToStr(CV_MAKE_TYPE(wdepth, 1)),
|
||||||
|
wdepth,
|
||||||
|
ocl::convertTypeStr(depth, wdepth, 1, cvt[0], sizeof(cvt[0])),
|
||||||
|
ocl::convertTypeStr(wdepth, depth, 1, cvt[1], sizeof(cvt[1])),
|
||||||
|
ocl::typeToStr(wdepth)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||||
format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
|
format("-D cn=%d -D dstT=%s -D dstT_C1=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s%s%s%s",
|
||||||
ocl::typeToStr(depth), depth, rowsPerWI, op,
|
1,
|
||||||
|
ocl::typeToStr(depth), ocl::typeToStr(depth), depth, rowsPerWI, op,
|
||||||
|
" -D UNARY_OP=1",
|
||||||
|
extra_opts.empty() ? "" : extra_opts.c_str(),
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
@ -1396,7 +1427,7 @@ int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
|
|||||||
{
|
{
|
||||||
if( a1 == 0 )
|
if( a1 == 0 )
|
||||||
{
|
{
|
||||||
if( a2 == 0 )
|
if( a2 == 0 ) // constant
|
||||||
n = a3 == 0 ? -1 : 0;
|
n = a3 == 0 ? -1 : 0;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1430,15 +1461,23 @@ int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// cubic equation
|
||||||
a0 = 1./a0;
|
a0 = 1./a0;
|
||||||
a1 *= a0;
|
a1 *= a0;
|
||||||
a2 *= a0;
|
a2 *= a0;
|
||||||
a3 *= a0;
|
a3 *= a0;
|
||||||
|
|
||||||
double Q = (a1 * a1 - 3 * a2) * (1./9);
|
double Q = (a1 * a1 - 3 * a2) * (1./9);
|
||||||
double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
|
double R = (a1 * (2 * a1 * a1 - 9 * a2) + 27 * a3) * (1./54);
|
||||||
double Qcubed = Q * Q * Q;
|
double Qcubed = Q * Q * Q;
|
||||||
double d = Qcubed - R * R;
|
/*
|
||||||
|
Here we expand expression `Qcubed - R * R` for `d` variable
|
||||||
|
to reduce common terms `a1^6 / 729` and `-a1^4 * a2 / 81`
|
||||||
|
and thus decrease rounding error (in case of quite big coefficients).
|
||||||
|
|
||||||
|
And then we additionally group terms to further reduce rounding error.
|
||||||
|
*/
|
||||||
|
double d = (a1 * a1 * (a2 * a2 - 4 * a1 * a3) + 2 * a2 * (9 * a1 * a3 - 2 * a2 * a2) - 27 * a3 * a3) * (1./108);
|
||||||
|
|
||||||
if( d > 0 )
|
if( d > 0 )
|
||||||
{
|
{
|
||||||
|
@ -559,7 +559,7 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
|
|||||||
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
|
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
|
||||||
|
|
||||||
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
|
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
|
||||||
CV_Assert( func != 0 );
|
CV_Assert( (normType >> 1) >= 3 || func != 0 );
|
||||||
|
|
||||||
if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
|
if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
|
||||||
{
|
{
|
||||||
|
@ -1581,6 +1581,7 @@ NormDiffFunc getNormDiffFunc(int normType, int depth)
|
|||||||
0
|
0
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
if (normType >= 3 || normType < 0) return nullptr;
|
||||||
|
|
||||||
return normDiffTab[normType][depth];
|
return normDiffTab[normType][depth];
|
||||||
}
|
}
|
||||||
|
@ -80,6 +80,10 @@
|
|||||||
#error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead"
|
#error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define CAT__(x, y) x ## y
|
||||||
|
#define CAT_(x, y) CAT__(x, y)
|
||||||
|
#define CAT(x, y) CAT_(x, y)
|
||||||
|
|
||||||
|
|
||||||
#if DEPTH_dst < 5 /* CV_32F */
|
#if DEPTH_dst < 5 /* CV_32F */
|
||||||
#define CV_DST_TYPE_IS_INTEGER
|
#define CV_DST_TYPE_IS_INTEGER
|
||||||
@ -325,9 +329,12 @@
|
|||||||
#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
|
#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
|
||||||
|
|
||||||
#elif defined OP_POWN
|
#elif defined OP_POWN
|
||||||
#undef workT
|
#if cn > 1
|
||||||
#define workT int
|
#define PROCESS_INIT CAT(int, cn) powi = (CAT(int, cn))srcelem2;
|
||||||
#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
|
#else // cn
|
||||||
|
#define PROCESS_INIT int powi = srcelem2;
|
||||||
|
#endif
|
||||||
|
#define PROCESS_ELEM storedst(convertToDT(pown(srcelem1, powi)))
|
||||||
|
|
||||||
#elif defined OP_SQRT
|
#elif defined OP_SQRT
|
||||||
#if CV_DST_TYPE_FIT_32F
|
#if CV_DST_TYPE_FIT_32F
|
||||||
@ -469,7 +476,7 @@
|
|||||||
#define srcelem2 srcelem2_
|
#define srcelem2 srcelem2_
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if cn == 3
|
#if !defined(PROCESS_INIT) && cn == 3
|
||||||
#undef srcelem2
|
#undef srcelem2
|
||||||
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
|
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
|
||||||
#endif
|
#endif
|
||||||
@ -517,6 +524,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
|
|||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
int y0 = get_global_id(1) * rowsPerWI;
|
int y0 = get_global_id(1) * rowsPerWI;
|
||||||
|
|
||||||
|
#ifdef PROCESS_INIT
|
||||||
|
PROCESS_INIT
|
||||||
|
#endif
|
||||||
|
|
||||||
if (x < cols)
|
if (x < cols)
|
||||||
{
|
{
|
||||||
int mask_index = mad24(y0, maskstep, x + maskoffset);
|
int mask_index = mad24(y0, maskstep, x + maskoffset);
|
||||||
@ -542,6 +553,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
|
|||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
int y0 = get_global_id(1) * rowsPerWI;
|
int y0 = get_global_id(1) * rowsPerWI;
|
||||||
|
|
||||||
|
#ifdef PROCESS_INIT
|
||||||
|
PROCESS_INIT
|
||||||
|
#endif
|
||||||
|
|
||||||
if (x < cols)
|
if (x < cols)
|
||||||
{
|
{
|
||||||
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
|
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
|
||||||
@ -564,6 +579,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
|
|||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
int y0 = get_global_id(1) * rowsPerWI;
|
int y0 = get_global_id(1) * rowsPerWI;
|
||||||
|
|
||||||
|
#ifdef PROCESS_INIT
|
||||||
|
PROCESS_INIT
|
||||||
|
#endif
|
||||||
|
|
||||||
if (x < cols)
|
if (x < cols)
|
||||||
{
|
{
|
||||||
int mask_index = mad24(y0, maskstep, x + maskoffset);
|
int mask_index = mad24(y0, maskstep, x + maskoffset);
|
||||||
|
@ -10,14 +10,6 @@
|
|||||||
#include "sum.simd.hpp"
|
#include "sum.simd.hpp"
|
||||||
#include "sum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
|
#include "sum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
|
||||||
|
|
||||||
#ifndef OPENCV_IPP_SUM
|
|
||||||
#undef HAVE_IPP
|
|
||||||
#undef CV_IPP_RUN_FAST
|
|
||||||
#define CV_IPP_RUN_FAST(f, ...)
|
|
||||||
#undef CV_IPP_RUN
|
|
||||||
#define CV_IPP_RUN(c, f, ...)
|
|
||||||
#endif // OPENCV_IPP_SUM
|
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -126,95 +118,45 @@ bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_IPP
|
|
||||||
static bool ipp_sum(Mat &src, Scalar &_res)
|
|
||||||
{
|
|
||||||
CV_INSTRUMENT_REGION_IPP();
|
|
||||||
|
|
||||||
#if IPP_VERSION_X100 >= 700
|
|
||||||
int cn = src.channels();
|
|
||||||
if (cn > 4)
|
|
||||||
return false;
|
|
||||||
size_t total_size = src.total();
|
|
||||||
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
|
|
||||||
if( src.dims <= 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
|
|
||||||
{
|
|
||||||
IppiSize sz = { cols, rows };
|
|
||||||
int type = src.type();
|
|
||||||
typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
|
|
||||||
typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
|
|
||||||
ippiSumFuncHint ippiSumHint =
|
|
||||||
type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
|
|
||||||
type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
|
|
||||||
type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
|
|
||||||
0;
|
|
||||||
ippiSumFuncNoHint ippiSum =
|
|
||||||
type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
|
|
||||||
type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
|
|
||||||
type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
|
|
||||||
type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
|
|
||||||
type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
|
|
||||||
type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
|
|
||||||
type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
|
|
||||||
type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
|
|
||||||
type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
|
|
||||||
0;
|
|
||||||
CV_Assert(!ippiSumHint || !ippiSum);
|
|
||||||
if( ippiSumHint || ippiSum )
|
|
||||||
{
|
|
||||||
Ipp64f res[4];
|
|
||||||
IppStatus ret = ippiSumHint ?
|
|
||||||
CV_INSTRUMENT_FUN_IPP(ippiSumHint, src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
|
|
||||||
CV_INSTRUMENT_FUN_IPP(ippiSum, src.ptr(), (int)src.step[0], sz, res);
|
|
||||||
if( ret >= 0 )
|
|
||||||
{
|
|
||||||
for( int i = 0; i < cn; i++ )
|
|
||||||
_res[i] = res[i];
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
CV_UNUSED(src); CV_UNUSED(_res);
|
|
||||||
#endif
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Scalar sum(InputArray _src)
|
Scalar sum(InputArray _src)
|
||||||
{
|
{
|
||||||
CV_INSTRUMENT_REGION();
|
CV_INSTRUMENT_REGION();
|
||||||
|
|
||||||
#if defined HAVE_OPENCL || defined HAVE_IPP
|
|
||||||
Scalar _res;
|
Scalar _res;
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||||
ocl_sum(_src, _res, OCL_OP_SUM),
|
ocl_sum(_src, _res, OCL_OP_SUM),
|
||||||
_res)
|
_res);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Mat src = _src.getMat();
|
Mat src = _src.getMat();
|
||||||
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res);
|
int cn = src.channels();
|
||||||
|
CV_CheckLE( cn, 4, "cv::sum does not support more than 4 channels" );
|
||||||
|
|
||||||
int k, cn = src.channels(), depth = src.depth();
|
if (_src.dims() <= 2)
|
||||||
|
{
|
||||||
|
CALL_HAL_RET2(sum, cv_hal_sum, _res, src.data, src.step, src.type(), src.cols, src.rows, &_res[0]);
|
||||||
|
}
|
||||||
|
else if (_src.isContinuous())
|
||||||
|
{
|
||||||
|
CALL_HAL_RET2(sum, cv_hal_sum, _res, src.data, 0, src.type(), (int)src.total(), 1, &_res[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int k, depth = src.depth();
|
||||||
SumFunc func = getSumFunc(depth);
|
SumFunc func = getSumFunc(depth);
|
||||||
if (func == nullptr) {
|
if (func == nullptr) {
|
||||||
if (depth == CV_Bool && cn == 1)
|
if (depth == CV_Bool && cn == 1)
|
||||||
return Scalar((double)countNonZero(src));
|
return Scalar((double)countNonZero(src));
|
||||||
CV_Error(Error::StsNotImplemented, "");
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
}
|
}
|
||||||
CV_Assert( cn <= 4 && func != 0 );
|
|
||||||
|
|
||||||
const Mat* arrays[] = {&src, 0};
|
const Mat* arrays[] = {&src, 0};
|
||||||
uchar* ptrs[1] = {};
|
uchar* ptrs[1] = {};
|
||||||
NAryMatIterator it(arrays, ptrs);
|
NAryMatIterator it(arrays, ptrs);
|
||||||
Scalar s;
|
|
||||||
int total = (int)it.size, blockSize = total, partialBlockSize = 0;
|
int total = (int)it.size, blockSize = total, partialBlockSize = 0;
|
||||||
int j, count = 0;
|
int j, count = 0;
|
||||||
int _buf[CV_CN_MAX];
|
int _buf[CV_CN_MAX];
|
||||||
int* buf = (int*)&s[0];
|
int* buf = (int*)&_res[0];
|
||||||
size_t esz = 0;
|
size_t esz = 0;
|
||||||
bool partialSumIsInt = depth < CV_32S;
|
bool partialSumIsInt = depth < CV_32S;
|
||||||
bool blockSum = partialSumIsInt || depth == CV_16F || depth == CV_16BF;
|
bool blockSum = partialSumIsInt || depth == CV_16F || depth == CV_16BF;
|
||||||
@ -241,13 +183,13 @@ Scalar sum(InputArray _src)
|
|||||||
if (partialSumIsInt) {
|
if (partialSumIsInt) {
|
||||||
for( k = 0; k < cn; k++ )
|
for( k = 0; k < cn; k++ )
|
||||||
{
|
{
|
||||||
s[k] += buf[k];
|
_res[k] += buf[k];
|
||||||
buf[k] = 0;
|
buf[k] = 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for( k = 0; k < cn; k++ )
|
for( k = 0; k < cn; k++ )
|
||||||
{
|
{
|
||||||
s[k] += ((float*)buf)[k];
|
_res[k] += ((float*)buf)[k];
|
||||||
buf[k] = 0;
|
buf[k] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -256,7 +198,7 @@ Scalar sum(InputArray _src)
|
|||||||
ptrs[0] += bsz*esz;
|
ptrs[0] += bsz*esz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return s;
|
return _res;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -132,19 +132,25 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
|
|||||||
use_roi = GET_PARAM(2);
|
use_roi = GET_PARAM(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void generateTestData(bool with_val_in_range = false)
|
void generateTestData(bool with_val_in_range = false,
|
||||||
|
double minVal1 = std::numeric_limits<double>::quiet_NaN(), double maxVal1 = std::numeric_limits<double>::quiet_NaN(),
|
||||||
|
double minVal2 = std::numeric_limits<double>::quiet_NaN(), double maxVal2 = std::numeric_limits<double>::quiet_NaN()
|
||||||
|
)
|
||||||
{
|
{
|
||||||
const int type = CV_MAKE_TYPE(depth, cn);
|
const int type = CV_MAKE_TYPE(depth, cn);
|
||||||
|
|
||||||
double minV = cvtest::getMinVal(type);
|
double minV1 = cvIsNaN(minVal1) ? 2 : minVal1;
|
||||||
double maxV = cvtest::getMaxVal(type);
|
double maxV1 = cvIsNaN(maxVal1) ? 11 : maxVal1;
|
||||||
|
|
||||||
|
double minV2 = cvIsNaN(minVal2) ? std::max(-1540., cvtest::getMinVal(type)) : minVal2;
|
||||||
|
double maxV2 = cvIsNaN(maxVal2) ? std::min(1740., cvtest::getMaxVal(type)) : maxVal2;
|
||||||
|
|
||||||
Size roiSize = randomSize(1, MAX_VALUE);
|
Size roiSize = randomSize(1, MAX_VALUE);
|
||||||
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||||
randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11); // FIXIT: Test with minV, maxV
|
randomSubMat(src1, src1_roi, roiSize, src1Border, type, minV1, maxV1); // FIXIT: Test with minV, maxV
|
||||||
|
|
||||||
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||||
randomSubMat(src2, src2_roi, roiSize, src2Border, type, std::max(-1540., minV), std::min(1740., maxV));
|
randomSubMat(src2, src2_roi, roiSize, src2Border, type, minV2, maxV2);
|
||||||
|
|
||||||
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||||
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16);
|
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16);
|
||||||
@ -162,8 +168,8 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
|
|||||||
|
|
||||||
if (with_val_in_range)
|
if (with_val_in_range)
|
||||||
{
|
{
|
||||||
val_in_range = cv::Scalar(rng.uniform(minV, maxV), rng.uniform(minV, maxV),
|
val_in_range = cv::Scalar(rng.uniform(minV1, maxV1), rng.uniform(minV1, maxV1),
|
||||||
rng.uniform(minV, maxV), rng.uniform(minV, maxV));
|
rng.uniform(minV1, maxV1), rng.uniform(minV1, maxV1));
|
||||||
}
|
}
|
||||||
|
|
||||||
UMAT_UPLOAD_INPUT_PARAMETER(src1);
|
UMAT_UPLOAD_INPUT_PARAMETER(src1);
|
||||||
@ -844,14 +850,30 @@ OCL_TEST_P(Pow, Mat)
|
|||||||
for (int j = 0; j < 1/*test_loop_times*/; j++)
|
for (int j = 0; j < 1/*test_loop_times*/; j++)
|
||||||
for (int k = 0, size = sizeof(pows) / sizeof(double); k < size; ++k)
|
for (int k = 0, size = sizeof(pows) / sizeof(double); k < size; ++k)
|
||||||
{
|
{
|
||||||
SCOPED_TRACE(pows[k]);
|
SCOPED_TRACE(cv::format("POW=%g", pows[k]));
|
||||||
|
|
||||||
generateTestData();
|
generateTestData(false, 1, 3);
|
||||||
|
|
||||||
OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi));
|
OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi));
|
||||||
OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi));
|
OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi));
|
||||||
|
|
||||||
OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-5);
|
OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-5);
|
||||||
|
|
||||||
|
if (cvtest::debugLevel >= 100)
|
||||||
|
{
|
||||||
|
cv::Rect roi(0, 0, 4, 4);
|
||||||
|
std::cout << src1_roi(roi) << std::endl;
|
||||||
|
std::cout << dst1_roi(roi) << std::endl;
|
||||||
|
std::cout << udst1_roi(roi) << std::endl;
|
||||||
|
|
||||||
|
Mat diff;
|
||||||
|
cv::absdiff(dst1_roi, udst1_roi, diff);
|
||||||
|
std::cout << std::endl << diff(roi) << std::endl;
|
||||||
|
|
||||||
|
std::cout << std::endl << dst1_roi << std::endl;
|
||||||
|
std::cout << std::endl << udst1_roi << std::endl;
|
||||||
|
std::cout << std::endl << diff << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user