From 33d632f85e4c1cc4d70bc7210c2f126fa8a4b0bb Mon Sep 17 00:00:00 2001 From: GenshinImpactStarts Date: Thu, 13 Feb 2025 14:16:44 +0000 Subject: [PATCH] impl hal_rvv norm_hamming Co-authored-by: Liutong HAN --- 3rdparty/hal_rvv/hal_rvv.hpp | 1 + 3rdparty/hal_rvv/hal_rvv_1p0/norm_hamming.hpp | 182 ++++++++++++++++++ modules/core/src/norm.dispatch.cpp | 2 +- 3 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 3rdparty/hal_rvv/hal_rvv_1p0/norm_hamming.hpp diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index a32e0971b7..f09e5aca7a 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -24,6 +24,7 @@ #include "hal_rvv_1p0/mean.hpp" // core #include "hal_rvv_1p0/norm.hpp" // core #include "hal_rvv_1p0/norm_diff.hpp" // core +#include "hal_rvv_1p0/norm_hamming.hpp" // core #include "hal_rvv_1p0/convert_scale.hpp" // core #include "hal_rvv_1p0/minmax.hpp" // core #include "hal_rvv_1p0/atan.hpp" // core diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/norm_hamming.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/norm_hamming.hpp new file mode 100644 index 0000000000..4fa2fe5da3 --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm_hamming.hpp @@ -0,0 +1,182 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#pragma once + +#include +#include + +namespace cv { namespace cv_hal_rvv { + +#undef cv_hal_normHamming8u +#define cv_hal_normHamming8u cv::cv_hal_rvv::normHamming8u +#undef cv_hal_normHammingDiff8u +#define cv_hal_normHammingDiff8u cv::cv_hal_rvv::normHammingDiff8u + +template +inline void normHammingCnt_m8(vuint8m8_t v, vbool1_t mask, size_t len_bool, size_t& result) +{ + auto v_bool0 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 0)); + auto v_bool1 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 1)); + auto v_bool2 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 2)); + auto v_bool3 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 3)); + auto v_bool4 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 4)); + auto v_bool5 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 5)); + auto v_bool6 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 6)); + auto v_bool7 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 7)); + result += CellType::popcount(v_bool0, mask, len_bool); + result += CellType::popcount(v_bool1, mask, len_bool); + result += CellType::popcount(v_bool2, mask, len_bool); + result += CellType::popcount(v_bool3, mask, len_bool); + result += CellType::popcount(v_bool4, mask, len_bool); + result += CellType::popcount(v_bool5, mask, len_bool); + result += CellType::popcount(v_bool6, mask, len_bool); + result += CellType::popcount(v_bool7, mask, len_bool); +} + +template +inline void normHammingCnt_m1(vuint8m1_t v, vbool1_t mask, size_t len_bool, size_t& result) +{ + auto v_bool = __riscv_vreinterpret_b1(v); + result += CellType::popcount(v_bool, mask, len_bool); +} + +struct NormHammingCell1 +{ + static inline vbool1_t generateMask([[maybe_unused]] size_t len) + { + return vbool1_t(); + } + + template + static inline void preprocess([[maybe_unused]] T& v, [[maybe_unused]] size_t len) + { + } + + template + static inline size_t popcount(T v, [[maybe_unused]] vbool1_t mask, size_t len_bool) + { + return __riscv_vcpop(v, len_bool); + } +}; + +struct NormHammingCell2 +{ + static inline vbool1_t generateMask(size_t len) + { + return __riscv_vreinterpret_b1(__riscv_vmv_v_x_u8m1(0x55, len)); + } + + template + static inline void preprocess(T& v, size_t len) + { + v = __riscv_vor(v, __riscv_vsrl(v, 1, len), len); + } + + template + static inline size_t popcount(T v, vbool1_t mask, size_t len_bool) + { + return __riscv_vcpop(mask, v, len_bool); + } +}; + +struct NormHammingCell4 +{ + static inline vbool1_t generateMask(size_t len) + { + return __riscv_vreinterpret_b1(__riscv_vmv_v_x_u8m1(0x11, len)); + } + + template + static inline void preprocess(T& v, size_t len) + { + v = __riscv_vor(v, __riscv_vsrl(v, 2, len), len); + v = __riscv_vor(v, __riscv_vsrl(v, 1, len), len); + } + + template + static inline size_t popcount(T v, vbool1_t mask, size_t len_bool) + { + return __riscv_vcpop(mask, v, len_bool); + } +}; + +template +inline void normHamming8uLoop(const uchar* a, size_t n, size_t& result) +{ + size_t len = __riscv_vsetvlmax_e8m8(); + size_t len_bool = len * 8; + vbool1_t mask = CellType::generateMask(len); + + for (; n >= len; n -= len, a += len) + { + auto v = __riscv_vle8_v_u8m8(a, len); + CellType::preprocess(v, len); + normHammingCnt_m8(v, mask, len_bool, result); + } + for (; n > 0; n -= len, a += len) + { + len = __riscv_vsetvl_e8m1(n); + auto v = __riscv_vle8_v_u8m1(a, len); + CellType::preprocess(v, len); + normHammingCnt_m1(v, mask, len * 8, result); + } +} + +template +inline void normHammingDiff8uLoop(const uchar* a, const uchar* b, size_t n, size_t& result) +{ + size_t len = __riscv_vsetvlmax_e8m8(); + size_t len_bool = len * 8; + vbool1_t mask = CellType::generateMask(len); + + for (; n >= len; n -= len, a += len, b += len) + { + auto v_a = __riscv_vle8_v_u8m8(a, len); + auto v_b = __riscv_vle8_v_u8m8(b, len); + auto v = __riscv_vxor(v_a, v_b, len); + CellType::preprocess(v, len); + normHammingCnt_m8(v, mask, len_bool, result); + } + for (; n > 0; n -= len, a += len, b += len) + { + len = __riscv_vsetvl_e8m1(n); + auto v_a = __riscv_vle8_v_u8m1(a, len); + auto v_b = __riscv_vle8_v_u8m1(b, len); + auto v = __riscv_vxor(v_a, v_b, len); + CellType::preprocess(v, len); + normHammingCnt_m1(v, mask, len * 8, result); + } +} + +inline int normHamming8u(const uchar* a, int n, int cellSize, int* result) +{ + size_t _result = 0; + + switch (cellSize) + { + case 1: normHamming8uLoop(a, n, _result); break; + case 2: normHamming8uLoop(a, n, _result); break; + case 4: normHamming8uLoop(a, n, _result); break; + default: return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + *result = static_cast(_result); + return CV_HAL_ERROR_OK; +} + +inline int normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize, int* result) +{ + size_t _result = 0; + + switch (cellSize) + { + case 1: normHammingDiff8uLoop(a, b, n, _result); break; + case 2: normHammingDiff8uLoop(a, b, n, _result); break; + case 4: normHammingDiff8uLoop(a, b, n, _result); break; + default: return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + *result = static_cast(_result); + return CV_HAL_ERROR_OK; +} + +}} // namespace cv::cv_hal_rvv diff --git a/modules/core/src/norm.dispatch.cpp b/modules/core/src/norm.dispatch.cpp index e43e33c92e..a67df07eba 100644 --- a/modules/core/src/norm.dispatch.cpp +++ b/modules/core/src/norm.dispatch.cpp @@ -586,7 +586,7 @@ double norm( InputArray _src, int normType, InputArray _mask ) if( normType == NORM_HAMMING ) { - return hal::normHamming(data, (int)len); + return hal::normHamming(data, (int)len, 1); } if( normType == NORM_HAMMING2 )