impl hal_rvv norm_hamming

Co-authored-by: Liutong HAN <liutong2020@iscas.ac.cn>
This commit is contained in:
GenshinImpactStarts 2025-02-13 14:16:44 +00:00
parent 6a6a5a765d
commit 33d632f85e
3 changed files with 184 additions and 1 deletions

View File

@ -24,6 +24,7 @@
#include "hal_rvv_1p0/mean.hpp" // core
#include "hal_rvv_1p0/norm.hpp" // core
#include "hal_rvv_1p0/norm_diff.hpp" // core
#include "hal_rvv_1p0/norm_hamming.hpp" // core
#include "hal_rvv_1p0/convert_scale.hpp" // core
#include "hal_rvv_1p0/minmax.hpp" // core
#include "hal_rvv_1p0/atan.hpp" // core

View File

@ -0,0 +1,182 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#pragma once
#include <riscv_vector.h>
#include <opencv2/core/base.hpp>
namespace cv { namespace cv_hal_rvv {
#undef cv_hal_normHamming8u
#define cv_hal_normHamming8u cv::cv_hal_rvv::normHamming8u
#undef cv_hal_normHammingDiff8u
#define cv_hal_normHammingDiff8u cv::cv_hal_rvv::normHammingDiff8u
template <typename CellType>
inline void normHammingCnt_m8(vuint8m8_t v, vbool1_t mask, size_t len_bool, size_t& result)
{
auto v_bool0 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 0));
auto v_bool1 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 1));
auto v_bool2 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 2));
auto v_bool3 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 3));
auto v_bool4 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 4));
auto v_bool5 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 5));
auto v_bool6 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 6));
auto v_bool7 = __riscv_vreinterpret_b1(__riscv_vget_u8m1(v, 7));
result += CellType::popcount(v_bool0, mask, len_bool);
result += CellType::popcount(v_bool1, mask, len_bool);
result += CellType::popcount(v_bool2, mask, len_bool);
result += CellType::popcount(v_bool3, mask, len_bool);
result += CellType::popcount(v_bool4, mask, len_bool);
result += CellType::popcount(v_bool5, mask, len_bool);
result += CellType::popcount(v_bool6, mask, len_bool);
result += CellType::popcount(v_bool7, mask, len_bool);
}
template <typename CellType>
inline void normHammingCnt_m1(vuint8m1_t v, vbool1_t mask, size_t len_bool, size_t& result)
{
auto v_bool = __riscv_vreinterpret_b1(v);
result += CellType::popcount(v_bool, mask, len_bool);
}
struct NormHammingCell1
{
static inline vbool1_t generateMask([[maybe_unused]] size_t len)
{
return vbool1_t();
}
template <typename T>
static inline void preprocess([[maybe_unused]] T& v, [[maybe_unused]] size_t len)
{
}
template <typename T>
static inline size_t popcount(T v, [[maybe_unused]] vbool1_t mask, size_t len_bool)
{
return __riscv_vcpop(v, len_bool);
}
};
struct NormHammingCell2
{
static inline vbool1_t generateMask(size_t len)
{
return __riscv_vreinterpret_b1(__riscv_vmv_v_x_u8m1(0x55, len));
}
template <typename T>
static inline void preprocess(T& v, size_t len)
{
v = __riscv_vor(v, __riscv_vsrl(v, 1, len), len);
}
template <typename T>
static inline size_t popcount(T v, vbool1_t mask, size_t len_bool)
{
return __riscv_vcpop(mask, v, len_bool);
}
};
struct NormHammingCell4
{
static inline vbool1_t generateMask(size_t len)
{
return __riscv_vreinterpret_b1(__riscv_vmv_v_x_u8m1(0x11, len));
}
template <typename T>
static inline void preprocess(T& v, size_t len)
{
v = __riscv_vor(v, __riscv_vsrl(v, 2, len), len);
v = __riscv_vor(v, __riscv_vsrl(v, 1, len), len);
}
template <typename T>
static inline size_t popcount(T v, vbool1_t mask, size_t len_bool)
{
return __riscv_vcpop(mask, v, len_bool);
}
};
template <typename CellType>
inline void normHamming8uLoop(const uchar* a, size_t n, size_t& result)
{
size_t len = __riscv_vsetvlmax_e8m8();
size_t len_bool = len * 8;
vbool1_t mask = CellType::generateMask(len);
for (; n >= len; n -= len, a += len)
{
auto v = __riscv_vle8_v_u8m8(a, len);
CellType::preprocess(v, len);
normHammingCnt_m8<CellType>(v, mask, len_bool, result);
}
for (; n > 0; n -= len, a += len)
{
len = __riscv_vsetvl_e8m1(n);
auto v = __riscv_vle8_v_u8m1(a, len);
CellType::preprocess(v, len);
normHammingCnt_m1<CellType>(v, mask, len * 8, result);
}
}
template <typename CellType>
inline void normHammingDiff8uLoop(const uchar* a, const uchar* b, size_t n, size_t& result)
{
size_t len = __riscv_vsetvlmax_e8m8();
size_t len_bool = len * 8;
vbool1_t mask = CellType::generateMask(len);
for (; n >= len; n -= len, a += len, b += len)
{
auto v_a = __riscv_vle8_v_u8m8(a, len);
auto v_b = __riscv_vle8_v_u8m8(b, len);
auto v = __riscv_vxor(v_a, v_b, len);
CellType::preprocess(v, len);
normHammingCnt_m8<CellType>(v, mask, len_bool, result);
}
for (; n > 0; n -= len, a += len, b += len)
{
len = __riscv_vsetvl_e8m1(n);
auto v_a = __riscv_vle8_v_u8m1(a, len);
auto v_b = __riscv_vle8_v_u8m1(b, len);
auto v = __riscv_vxor(v_a, v_b, len);
CellType::preprocess(v, len);
normHammingCnt_m1<CellType>(v, mask, len * 8, result);
}
}
inline int normHamming8u(const uchar* a, int n, int cellSize, int* result)
{
size_t _result = 0;
switch (cellSize)
{
case 1: normHamming8uLoop<NormHammingCell1>(a, n, _result); break;
case 2: normHamming8uLoop<NormHammingCell2>(a, n, _result); break;
case 4: normHamming8uLoop<NormHammingCell4>(a, n, _result); break;
default: return CV_HAL_ERROR_NOT_IMPLEMENTED;
}
*result = static_cast<int>(_result);
return CV_HAL_ERROR_OK;
}
inline int normHammingDiff8u(const uchar* a, const uchar* b, int n, int cellSize, int* result)
{
size_t _result = 0;
switch (cellSize)
{
case 1: normHammingDiff8uLoop<NormHammingCell1>(a, b, n, _result); break;
case 2: normHammingDiff8uLoop<NormHammingCell2>(a, b, n, _result); break;
case 4: normHammingDiff8uLoop<NormHammingCell4>(a, b, n, _result); break;
default: return CV_HAL_ERROR_NOT_IMPLEMENTED;
}
*result = static_cast<int>(_result);
return CV_HAL_ERROR_OK;
}
}} // namespace cv::cv_hal_rvv

View File

@ -586,7 +586,7 @@ double norm( InputArray _src, int normType, InputArray _mask )
if( normType == NORM_HAMMING )
{
return hal::normHamming(data, (int)len);
return hal::normHamming(data, (int)len, 1);
}
if( normType == NORM_HAMMING2 )