mirror of
https://github.com/opencv/opencv.git
synced 2025-06-22 03:22:10 +08:00
perf: implemented flip_inplace in hal_rvv to boost cv::rotate on RISC-V platforms
This commit is contained in:
parent
fe5bd15cdd
commit
ab5a65b5a2
@ -13,6 +13,17 @@
|
|||||||
#include <opencv2/core/base.hpp>
|
#include <opencv2/core/base.hpp>
|
||||||
#include "hal_rvv_1p0/types.hpp"
|
#include "hal_rvv_1p0/types.hpp"
|
||||||
|
|
||||||
|
#if defined (__clang__) && __clang_major__ < 18
|
||||||
|
#define OPENCV_HAL_IMPL_RVV_VCREATE_x3(suffix, width, v0, v1, v2) \
|
||||||
|
__riscv_vset_v_##suffix##m##width##_##suffix##m##width##x3(v, 0, v0); \
|
||||||
|
v = __riscv_vset(v, 1, v1); \
|
||||||
|
v = __riscv_vset(v, 2, v2);
|
||||||
|
#define __riscv_vcreate_v_u8m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u8, 2, v0, v1, v2)
|
||||||
|
#define __riscv_vcreate_v_u16m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u16, 2, v0, v1, v2)
|
||||||
|
#define __riscv_vcreate_v_u32m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u32, 2, v0, v1, v2)
|
||||||
|
#define __riscv_vcreate_v_u64m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u64, 2, v0, v1, v2)
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace cv { namespace cv_hal_rvv {
|
namespace cv { namespace cv_hal_rvv {
|
||||||
|
|
||||||
#undef cv_hal_flip
|
#undef cv_hal_flip
|
||||||
@ -20,7 +31,7 @@ namespace cv { namespace cv_hal_rvv {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
#define CV_HAL_RVV_FLIPY_C1(name, _Tps, RVV) \
|
#define CV_HAL_RVV_FLIP_C1(name, _Tps, RVV) \
|
||||||
inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int src_width, int src_height, int flip_mode) { \
|
inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int src_width, int src_height, int flip_mode) { \
|
||||||
for (int h = 0; h < src_height; h++) { \
|
for (int h = 0; h < src_height; h++) { \
|
||||||
const _Tps* src_row = (const _Tps*)(src_data + src_step * h); \
|
const _Tps* src_row = (const _Tps*)(src_data + src_step * h); \
|
||||||
@ -34,23 +45,35 @@ inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data,
|
|||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
CV_HAL_RVV_FLIPY_C1(8UC1, uchar, RVV_U8M8)
|
CV_HAL_RVV_FLIP_C1(8UC1, uchar, RVV_U8M8)
|
||||||
CV_HAL_RVV_FLIPY_C1(16UC1, ushort, RVV_U16M8)
|
CV_HAL_RVV_FLIP_C1(16UC1, ushort, RVV_U16M8)
|
||||||
CV_HAL_RVV_FLIPY_C1(32UC1, unsigned, RVV_U32M8)
|
CV_HAL_RVV_FLIP_C1(32UC1, unsigned, RVV_U32M8)
|
||||||
CV_HAL_RVV_FLIPY_C1(64UC1, uint64_t, RVV_U64M8)
|
CV_HAL_RVV_FLIP_C1(64UC1, uint64_t, RVV_U64M8)
|
||||||
|
|
||||||
#if defined (__clang__) && __clang_major__ < 18
|
#define CV_HAL_RVV_FLIP_INPLACE_C1(name, _Tps, RVV) \
|
||||||
#define OPENCV_HAL_IMPL_RVV_VCREATE_x3(suffix, width, v0, v1, v2) \
|
inline void flip_inplace_##name(uchar* data, size_t step, int width, int height, int flip_mode) { \
|
||||||
__riscv_vset_v_##suffix##m##width##_##suffix##m##width##x3(v, 0, v0); \
|
auto new_height = (flip_mode < 0 ? height / 2 : height); \
|
||||||
v = __riscv_vset(v, 1, v1); \
|
auto new_width = width / 2; \
|
||||||
v = __riscv_vset(v, 2, v2);
|
for (int h = 0; h < new_height; h++) { \
|
||||||
#define __riscv_vcreate_v_u8m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u8, 2, v0, v1, v2)
|
_Tps* row_begin = (_Tps*)(data + step * h); \
|
||||||
#define __riscv_vcreate_v_u16m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u16, 2, v0, v1, v2)
|
_Tps* row_end = (_Tps*)(data + step * (flip_mode < 0 ? (new_height - h) : (h + 1))); \
|
||||||
#define __riscv_vcreate_v_u32m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u32, 2, v0, v1, v2)
|
int vl; \
|
||||||
#define __riscv_vcreate_v_u64m2x3(v0, v1, v2) OPENCV_HAL_IMPL_RVV_VCREATE_x3(u64, 2, v0, v1, v2)
|
for (int w = 0; w < new_width; w += vl) { \
|
||||||
#endif
|
vl = RVV::setvl(new_width - w); \
|
||||||
|
RVV::VecType indices = __riscv_vrsub(RVV::vid(vl), vl - 1, vl); \
|
||||||
|
auto v_left = RVV::vload(row_begin + w, vl); \
|
||||||
|
auto v_right = RVV::vload(row_end - w - vl, vl); \
|
||||||
|
RVV::vstore(row_begin + w, __riscv_vrgather(v_right, indices, vl), vl); \
|
||||||
|
RVV::vstore(row_end - w - vl, __riscv_vrgather(v_left, indices, vl), vl); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C1(8UC1, uchar, RVV_U8M8)
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C1(16UC1, ushort, RVV_U16M8)
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C1(32UC1, unsigned, RVV_U32M8)
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C1(64UC1, uint64_t, RVV_U64M8)
|
||||||
|
|
||||||
#define CV_HAL_RVV_FLIPY_C3_TYPES(width) \
|
#define CV_HAL_RVV_FLIP_C3_TYPES(width) \
|
||||||
struct RVV_C3_U##width##M2 : RVV_U##width##M2 { \
|
struct RVV_C3_U##width##M2 : RVV_U##width##M2 { \
|
||||||
static inline vuint##width##m2x3_t vload3(const uint##width##_t *base, size_t vl) { return __riscv_vlseg3e##width##_v_u##width##m2x3(base, vl); } \
|
static inline vuint##width##m2x3_t vload3(const uint##width##_t *base, size_t vl) { return __riscv_vlseg3e##width##_v_u##width##m2x3(base, vl); } \
|
||||||
static inline vuint##width##m2x3_t vflip3(const vuint##width##m2x3_t &v_tuple, const vuint##width##m2_t &indices, size_t vl) { \
|
static inline vuint##width##m2x3_t vflip3(const vuint##width##m2x3_t &v_tuple, const vuint##width##m2_t &indices, size_t vl) { \
|
||||||
@ -62,12 +85,12 @@ struct RVV_C3_U##width##M2 : RVV_U##width##M2 { \
|
|||||||
} \
|
} \
|
||||||
static inline void vstore3(uint##width##_t *base, const vuint##width##m2x3_t &v_tuple, size_t vl) { __riscv_vsseg3e##width(base, v_tuple, vl); } \
|
static inline void vstore3(uint##width##_t *base, const vuint##width##m2x3_t &v_tuple, size_t vl) { __riscv_vsseg3e##width(base, v_tuple, vl); } \
|
||||||
};
|
};
|
||||||
CV_HAL_RVV_FLIPY_C3_TYPES(8)
|
CV_HAL_RVV_FLIP_C3_TYPES(8)
|
||||||
CV_HAL_RVV_FLIPY_C3_TYPES(16)
|
CV_HAL_RVV_FLIP_C3_TYPES(16)
|
||||||
CV_HAL_RVV_FLIPY_C3_TYPES(32)
|
CV_HAL_RVV_FLIP_C3_TYPES(32)
|
||||||
CV_HAL_RVV_FLIPY_C3_TYPES(64)
|
CV_HAL_RVV_FLIP_C3_TYPES(64)
|
||||||
|
|
||||||
#define CV_HAL_RVV_FLIPY_C3(name, _Tps, RVV) \
|
#define CV_HAL_RVV_FLIP_C3(name, _Tps, RVV) \
|
||||||
inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int src_width, int src_height, int flip_mode) { \
|
inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int src_width, int src_height, int flip_mode) { \
|
||||||
for (int h = 0; h < src_height; h++) { \
|
for (int h = 0; h < src_height; h++) { \
|
||||||
const _Tps* src_row = (const _Tps*)(src_data + src_step * h); \
|
const _Tps* src_row = (const _Tps*)(src_data + src_step * h); \
|
||||||
@ -82,10 +105,35 @@ inline void flip_##name(const uchar* src_data, size_t src_step, uchar* dst_data,
|
|||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
CV_HAL_RVV_FLIPY_C3(8UC3, uchar, RVV_C3_U8M2)
|
CV_HAL_RVV_FLIP_C3(8UC3, uchar, RVV_C3_U8M2)
|
||||||
CV_HAL_RVV_FLIPY_C3(16UC3, ushort, RVV_C3_U16M2)
|
CV_HAL_RVV_FLIP_C3(16UC3, ushort, RVV_C3_U16M2)
|
||||||
CV_HAL_RVV_FLIPY_C3(32UC3, unsigned, RVV_C3_U32M2)
|
CV_HAL_RVV_FLIP_C3(32UC3, unsigned, RVV_C3_U32M2)
|
||||||
CV_HAL_RVV_FLIPY_C3(64UC3, uint64_t, RVV_C3_U64M2)
|
CV_HAL_RVV_FLIP_C3(64UC3, uint64_t, RVV_C3_U64M2)
|
||||||
|
|
||||||
|
#define CV_HAL_RVV_FLIP_INPLACE_C3(name, _Tps, RVV) \
|
||||||
|
inline void flip_inplace_##name(uchar* data, size_t step, int width, int height, int flip_mode) { \
|
||||||
|
auto new_height = (flip_mode < 0 ? height / 2 : height); \
|
||||||
|
auto new_width = width / 2; \
|
||||||
|
for (int h = 0; h < new_height; h++) { \
|
||||||
|
_Tps* row_begin = (_Tps*)(data + step * h); \
|
||||||
|
_Tps* row_end = (_Tps*)(data + step * (flip_mode < 0 ? (new_height - h) : (h + 1))); \
|
||||||
|
int vl; \
|
||||||
|
for (int w = 0; w < new_width; w += vl) { \
|
||||||
|
vl = RVV::setvl(new_width - w); \
|
||||||
|
RVV::VecType indices = __riscv_vrsub(RVV::vid(vl), vl - 1, vl); \
|
||||||
|
auto v_left = RVV::vload3(row_begin + 3 * w, vl); \
|
||||||
|
auto flipped_left = RVV::vflip3(v_left, indices, vl); \
|
||||||
|
auto v_right = RVV::vload3(row_end - 3 * (w + vl), vl); \
|
||||||
|
auto flipped_right = RVV::vflip3(v_right, indices, vl); \
|
||||||
|
RVV::vstore3(row_begin + 3 * w, flipped_right, vl); \
|
||||||
|
RVV::vstore3(row_end - 3 * (w + vl), flipped_left, vl); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C3(8UC3, uchar, RVV_C3_U8M2)
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C3(16UC3, ushort, RVV_C3_U16M2)
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C3(32UC3, unsigned, RVV_C3_U32M2)
|
||||||
|
CV_HAL_RVV_FLIP_INPLACE_C3(64UC3, uint64_t, RVV_C3_U64M2)
|
||||||
|
|
||||||
struct FlipVlen256
|
struct FlipVlen256
|
||||||
{
|
{
|
||||||
@ -232,13 +280,48 @@ inline void flipXY(int esz,
|
|||||||
|
|
||||||
} // namespace anonymous
|
} // namespace anonymous
|
||||||
|
|
||||||
|
inline int flip_inplace(int esz, uchar* data, size_t step, int width, int height, int flip_mode) {
|
||||||
|
if (flip_mode == 0) {
|
||||||
|
for (int h = 0; h < (height / 2); h++) {
|
||||||
|
uchar* top_row = data + step * h;
|
||||||
|
uchar* bottom_row = data + step * (height - h - 1);
|
||||||
|
std::swap_ranges(top_row, top_row + esz * width, bottom_row);
|
||||||
|
}
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
using FlipInplaceFunc = void (*)(uchar*, size_t, int, int, int);
|
||||||
|
static FlipInplaceFunc flip_inplace_func_tab[] = {
|
||||||
|
0, flip_inplace_8UC1, flip_inplace_16UC1, flip_inplace_8UC3,
|
||||||
|
flip_inplace_32UC1, 0, flip_inplace_16UC3, 0,
|
||||||
|
flip_inplace_64UC1, 0, 0, 0,
|
||||||
|
flip_inplace_32UC3, 0, 0, 0,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
flip_inplace_64UC3, 0, 0, 0,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
0
|
||||||
|
};
|
||||||
|
FlipInplaceFunc func = flip_inplace_func_tab[esz];
|
||||||
|
if (!func) {
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
func(data, step, width, height, flip_mode);
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
inline int flip(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
inline int flip(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||||
uchar* dst_data, size_t dst_step, int flip_mode)
|
uchar* dst_data, size_t dst_step, int flip_mode)
|
||||||
{
|
{
|
||||||
int esz = CV_ELEM_SIZE(src_type);
|
int esz = CV_ELEM_SIZE(src_type);
|
||||||
if (src_width < 0 || src_height < 0 || src_data == dst_data || esz > 32)
|
if (src_width < 0 || src_height < 0 || esz > 32)
|
||||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
if (src_data == dst_data) {
|
||||||
|
return flip_inplace(esz, dst_data, dst_step, src_width, src_height, flip_mode);
|
||||||
|
}
|
||||||
|
|
||||||
if (flip_mode == 0)
|
if (flip_mode == 0)
|
||||||
{
|
{
|
||||||
for (int h = 0; h < src_height; h++) {
|
for (int h = 0; h < src_height; h++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user