Merge pull request #25307 from MaximMilashchenko:halrvv071

* added hal for cv_hal_cvtBGRtoBGR rvv 0.7.1
This commit is contained in:
Maxim Milashchenko 2024-06-06 15:31:59 +03:00 committed by GitHub
parent cbf3b1187d
commit adcb070396
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 153 additions and 0 deletions

9
3rdparty/hal_rvv/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,9 @@
cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR)
set(HAL_LIB_NAME "")
set(RVV_HAL_FOUND TRUE CACHE INTERNAL "")
set(RVV_HAL_VERSION "0.0.1" CACHE INTERNAL "")
set(RVV_HAL_LIBRARIES ${HAL_LIB_NAME} CACHE INTERNAL "")
set(RVV_HAL_HEADERS "hal_rvv.hpp" CACHE INTERNAL "")
set(RVV_HAL_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "")

24
3rdparty/hal_rvv/hal_rvv.hpp vendored Normal file
View File

@ -0,0 +1,24 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_HAL_RVV_HPP_INCLUDED
#define OPENCV_HAL_RVV_HPP_INCLUDED
#include <riscv_vector.h>
#include "opencv2/core/hal/interface.h"
#ifndef CV_HAL_RVV_071_ENABLED
# if defined(__GNUC__) && __GNUC__ == 10 && __GNUC_MINOR__ == 4 && defined(__THEAD_VERSION__) && defined(__riscv_v) && __riscv_v == 7000
# define CV_HAL_RVV_071_ENABLED 1
# else
# define CV_HAL_RVV_071_ENABLED 0
# endif
#endif
#if CV_HAL_RVV_071_ENABLED
#include "version/hal_rvv_071.hpp"
#endif
#endif

107
3rdparty/hal_rvv/version/hal_rvv_071.hpp vendored Normal file
View File

@ -0,0 +1,107 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_HAL_RVV_071_HPP_INCLUDED
#define OPENCV_HAL_RVV_071_HPP_INCLUDED
#include <limits>
namespace cv { namespace cv_hal_rvv {
#undef cv_hal_cvtBGRtoBGR
#define cv_hal_cvtBGRtoBGR cv::cv_hal_rvv::cvtBGRtoBGR
static const unsigned char index_array_32 [32]
{ 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15, 18, 17, 16, 19, 22, 21, 20, 23, 26, 25, 24, 27, 30, 29, 28, 31 };
static const unsigned char index_array_24 [24]
{ 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17, 16, 15, 20, 19, 18, 23, 22, 21 };
static void vBGRtoBGR(const unsigned char* src, unsigned char * dst, const unsigned char * index, int n, int scn, int dcn, int vsize_pixels, const int vsize)
{
vuint8m2_t vec_index = vle8_v_u8m2(index, vsize);
int i = 0;
for ( ; i <= n-vsize; i += vsize_pixels, src += vsize, dst += vsize)
{
vuint8m2_t vec_src = vle8_v_u8m2(src, vsize);
vuint8m2_t vec_dst = vrgather_vv_u8m2(vec_src, vec_index, vsize);
vse8_v_u8m2(dst, vec_dst, vsize);
}
for ( ; i < n; i++, src += scn, dst += dcn )
{
unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
dst[2] = t0;
dst[1] = t1;
dst[0] = t2;
if(dcn == 4)
{
unsigned char d = src[3];
dst[3] = d;
}
}
}
static void sBGRtoBGR(const unsigned char* src, unsigned char * dst, int n, int scn, int dcn, int bi)
{
for (int i = 0; i < n; i++, src += scn, dst += dcn)
{
unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
dst[bi ] = t0;
dst[1] = t1;
dst[bi^2] = t2;
if(dcn == 4)
{
unsigned char d = scn == 4 ? src[3] : std::numeric_limits<unsigned char>::max();
dst[3] = d;
}
}
}
static int cvtBGRtoBGR(const unsigned char * src_data, size_t src_step, unsigned char * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue)
{
if (depth != CV_8U)
{
return CV_HAL_ERROR_NOT_IMPLEMENTED;
}
const int blueIdx = swapBlue ? 2 : 0;
if (scn == dcn)
{
if (!swapBlue)
{
return CV_HAL_ERROR_NOT_IMPLEMENTED;
}
const int vsize_pixels = 8;
if (scn == 4)
{
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
{
vBGRtoBGR(src_data, dst_data, index_array_32, width, scn, dcn, vsize_pixels, 32);
}
}
else
{
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
{
vBGRtoBGR(src_data, dst_data, index_array_24, width, scn, dcn, vsize_pixels, 24);
}
}
}
else
{
for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
sBGRtoBGR(src_data, dst_data, width, scn, dcn, blueIdx);
}
return CV_HAL_ERROR_OK;
}
}}
#endif

View File

@ -265,6 +265,8 @@ OCV_OPTION(WITH_KLEIDICV "Use KleidiCV library for ARM platforms" OFF
VISIBLE_IF (AARCH64 AND (ANDROID OR UNIX AND NOT IOS AND NOT XROS)))
OCV_OPTION(WITH_NDSRVP "Use Andes RVP extension" (NOT CV_DISABLE_OPTIMIZATION)
VISIBLE_IF RISCV)
OCV_OPTION(WITH_HAL_RVV "Use HAL RVV optimizations" (NOT CV_DISABLE_OPTIMIZATION)
VISIBLE_IF RISCV)
OCV_OPTION(WITH_CPUFEATURES "Use cpufeatures Android library" ON
VISIBLE_IF ANDROID
VERIFY HAVE_CPUFEATURES)
@ -994,6 +996,13 @@ if(WITH_NDSRVP)
endif()
endif()
if(WITH_HAL_RVV)
ocv_debug_message(STATUS "Enable HAL RVV acceleration")
if(NOT ";${OpenCV_HAL};" MATCHES ";halrvv;")
set(OpenCV_HAL "halrvv;${OpenCV_HAL}")
endif()
endif()
foreach(hal ${OpenCV_HAL})
if(hal STREQUAL "carotene")
if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;")
@ -1023,6 +1032,10 @@ foreach(hal ${OpenCV_HAL})
else()
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not open, disabling ndsrvp...")
endif()
elseif(hal STREQUAL "halrvv")
add_subdirectory(3rdparty/hal_rvv/)
ocv_hal_register(RVV_HAL_LIBRARIES RVV_HAL_HEADERS RVV_HAL_INCLUDE_DIRS)
list(APPEND OpenCV_USED_HAL "HAL RVV (ver ${RVV_HAL_VERSION})")
elseif(hal STREQUAL "openvx")
add_subdirectory(3rdparty/openvx)
ocv_hal_register(OPENVX_HAL_LIBRARIES OPENVX_HAL_HEADERS OPENVX_HAL_INCLUDE_DIRS)