diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index 6f62d8471a..a32e0971b7 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -28,6 +28,7 @@ #include "hal_rvv_1p0/minmax.hpp" // core #include "hal_rvv_1p0/atan.hpp" // core #include "hal_rvv_1p0/split.hpp" // core +#include "hal_rvv_1p0/flip.hpp" // core #endif #endif diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/flip.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/flip.hpp new file mode 100644 index 0000000000..fe5c5a8951 --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/flip.hpp @@ -0,0 +1,248 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#pragma once + +#include +#include +#include + +namespace cv { namespace cv_hal_rvv { + +#undef cv_hal_flip +#define cv_hal_flip cv::cv_hal_rvv::flip + +struct FlipVlen256 +{ + using TableElemType = uchar; + using TableType = vuint8m8_t; + + static inline size_t setvlmax() + { + return __riscv_vsetvlmax_e8m8(); + } + + static inline TableType vid(size_t vl) + { + return __riscv_vid_v_u8m8(vl); + } + + static inline TableType loadTable(const TableElemType* ptr, size_t vl) + { + return __riscv_vle8_v_u8m8(ptr, vl); + } + + static inline void gather(const uchar* src, TableType tab, uchar* dst, size_t vl) + { + auto v = __riscv_vle8_v_u8m8(src, vl); + __riscv_vse8(dst, __riscv_vrgather(v, tab, vl), vl); + } +}; + +struct FlipVlen512 +{ + using TableElemType = uint16_t; + using TableType = vuint16m8_t; + + static inline size_t setvlmax() + { + return __riscv_vsetvlmax_e8m4(); + } + + static inline TableType vid(size_t vl) + { + return __riscv_vid_v_u16m8(vl); + } + + static inline TableType loadTable(const TableElemType* ptr, size_t vl) + { + return __riscv_vle16_v_u16m8(ptr, vl); + } + + static inline void gather(const uchar* src, TableType tab, uchar* dst, size_t vl) + { + auto v = __riscv_vle8_v_u8m4(src, vl); + __riscv_vse8(dst, __riscv_vrgatherei16(v, tab, vl), vl); + } +}; + +template +inline void flipFillBuffer(cv::AutoBuffer& _buf, size_t len, int esz) +{ + T* buf = _buf.data(); + for (int i = (int)len - esz; i >= 0; i -= esz, buf += esz) + for (int j = 0; j < esz; j++) + buf[j] = (T)(i + j); +} + +inline void flipX(int esz, + const uchar* src_data, + size_t src_step, + int src_width, + int src_height, + uchar* dst_data, + size_t dst_step) +{ + size_t w = (size_t)src_width * esz; + auto src0 = src_data, src1 = src_data + src_step * (src_height - 1); + auto dst0 = dst_data, dst1 = dst_data + dst_step * (src_height - 1); + size_t vl; + for (src_height -= 2; src_height >= 0; + src_height -= 2, src0 += src_step, dst0 += dst_step, src1 -= src_step, dst1 -= dst_step) + { + for (size_t i = 0; i < w; i += vl) + { + vl = __riscv_vsetvl_e8m8(w - i); + __riscv_vse8(dst1 + i, __riscv_vle8_v_u8m8(src0 + i, vl), vl); + __riscv_vse8(dst0 + i, __riscv_vle8_v_u8m8(src1 + i, vl), vl); + } + } + if (src_height == -1) + { + for (size_t i = 0; i < w; i += (int)vl) + { + vl = __riscv_vsetvl_e8m8(w - i); + __riscv_vse8(dst0 + i, __riscv_vle8_v_u8m8(src1 + i, vl), vl); + } + } +} + +template +inline void flipY(int esz, + const uchar* src_data, + size_t src_step, + int src_width, + int src_height, + uchar* dst_data, + size_t dst_step) +{ + size_t w = (size_t)src_width * esz; + size_t vl = std::min(FlipVlen::setvlmax() / esz * esz, w); + typename FlipVlen::TableType tab_v; + if (esz == 1) + tab_v = __riscv_vrsub(FlipVlen::vid(vl), vl - 1, vl); + else + { + cv::AutoBuffer buf(vl); + flipFillBuffer(buf, vl, esz); + tab_v = FlipVlen::loadTable(buf.data(), vl); + } + if (vl == w) + for (; src_height; src_height--, src_data += src_step, dst_data += dst_step) + FlipVlen::gather(src_data, tab_v, dst_data, vl); + else + for (; src_height; src_height--, src_data += src_step, dst_data += dst_step) + { + auto src0 = src_data, src1 = src_data + w - vl; + auto dst0 = dst_data, dst1 = dst_data + w - vl; + for (; src0 < src1 + vl; src0 += vl, src1 -= vl, dst0 += vl, dst1 -= vl) + { + FlipVlen::gather(src0, tab_v, dst1, vl); + FlipVlen::gather(src1, tab_v, dst0, vl); + } + } +} + +template +inline void flipXY(int esz, + const uchar* src_data, + size_t src_step, + int src_width, + int src_height, + uchar* dst_data, + size_t dst_step) +{ + size_t w = (size_t)src_width * esz; + size_t vl = std::min(FlipVlen::setvlmax() / esz * esz, w); + typename FlipVlen::TableType tab_v; + if (esz == 1) + tab_v = __riscv_vrsub(FlipVlen::vid(vl), vl - 1, vl); + else + { + cv::AutoBuffer buf(vl); + flipFillBuffer(buf, vl, esz); + tab_v = FlipVlen::loadTable(buf.data(), vl); + } + auto src0 = src_data, src1 = src_data + src_step * (src_height - 1); + auto dst0 = dst_data, dst1 = dst_data + dst_step * (src_height - 1); + if (vl == w) + { + for (src_height -= 2; src_height >= 0; + src_height -= 2, + src0 += src_step, + dst0 += dst_step, + src1 -= src_step, + dst1 -= dst_step) + { + FlipVlen::gather(src0, tab_v, dst1, vl); + FlipVlen::gather(src1, tab_v, dst0, vl); + } + if (src_height == -1) + { + FlipVlen::gather(src1, tab_v, dst0, vl); + } + } + else + { + for (src_height -= 2; src_height >= 0; + src_height -= 2, + src0 += src_step, + dst0 += dst_step, + src1 -= src_step, + dst1 -= dst_step) + { + for (size_t i = 0; 2 * i < w; i += vl) + { + FlipVlen::gather(src0 + i, tab_v, dst1 + w - i - vl, vl); + FlipVlen::gather(src0 + w - i - vl, tab_v, dst1 + i, vl); + FlipVlen::gather(src1 + i, tab_v, dst0 + w - i - vl, vl); + FlipVlen::gather(src1 + w - i - vl, tab_v, dst0 + i, vl); + } + } + if (src_height == -1) + { + for (size_t i = 0; 2 * i < w; i += vl) + { + FlipVlen::gather(src1 + i, tab_v, dst0 + w - i - vl, vl); + FlipVlen::gather(src1 + w - i - vl, tab_v, dst0 + i, vl); + } + } + } +} + +inline int flip(int src_type, + const uchar* src_data, + size_t src_step, + int src_width, + int src_height, + uchar* dst_data, + size_t dst_step, + int flip_mode) +{ + if (src_width < 0 || src_height < 0 || src_data == dst_data) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + int esz = CV_ELEM_SIZE(src_type); + if (flip_mode == 0) + { + flipX(esz, src_data, src_step, src_width, src_height, dst_data, dst_step); + } + else if (flip_mode > 0) + { + if (__riscv_vlenb() * 8 <= 256) + flipY(esz, src_data, src_step, src_width, src_height, dst_data, dst_step); + else + flipY(esz, src_data, src_step, src_width, src_height, dst_data, dst_step); + } + else + { + if (__riscv_vlenb() * 8 <= 256) + flipXY(esz, src_data, src_step, src_width, src_height, dst_data, dst_step); + else + flipXY(esz, src_data, src_step, src_width, src_height, dst_data, dst_step); + } + + return CV_HAL_ERROR_OK; +} + +}} // namespace cv::cv_hal_rvv diff --git a/modules/core/perf/perf_flip.cpp b/modules/core/perf/perf_flip.cpp new file mode 100644 index 0000000000..2a24d394c0 --- /dev/null +++ b/modules/core/perf/perf_flip.cpp @@ -0,0 +1,45 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { +using namespace perf; + +enum +{ + FLIP_XY = 0, + FLIP_X = 1, + FLIP_Y = 2, +}; + +#define FLIP_SIZES szQVGA, szVGA, sz1080p +#define FLIP_TYPES CV_8UC1, CV_8UC3, CV_8UC4 +#define FLIP_CODES FLIP_X, FLIP_Y, FLIP_XY + +CV_FLAGS(FlipCode, FLIP_X, FLIP_Y, FLIP_XY); +typedef tuple Size_MatType_FlipCode_t; +typedef perf::TestBaseWithParam Size_MatType_FlipCode; + +PERF_TEST_P(Size_MatType_FlipCode, + flip, + testing::Combine(testing::Values(FLIP_SIZES), + testing::Values(FLIP_TYPES), + testing::Values(FLIP_CODES))) +{ + Size sz = get<0>(GetParam()); + int matType = get<1>(GetParam()); + int flipCode = get<2>(GetParam()) - 1; + + Mat src(sz, matType); + Mat dst(sz, matType); + + declare.in(src, WARMUP_RNG).out(dst); + + TEST_CYCLE() cv::flip(src, dst, flipCode); + + SANITY_CHECK_NOTHING(); +} + +}} // namespace opencv_test