mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge pull request #26943 from GenshinImpactStarts:flip_hal_rvv
Impl RISC-V HAL for cv::flip | Add perf test for flip #26943 Implement through the existing `cv_hal_flip` interfaces. Add perf test for `cv::flip`. The reason why select these args for testing: - **size**: copied from perf_lut - **type**: - U8C1: basic situation - U8C3: unaligned element size - U8C4: large element size Tested on - MUSE-PI (vlen=256) - Compiler: gcc 14.2 (riscv-collab/riscv-gnu-toolchain Nightly: December 16, 2024) ```sh $ opencv_test_core --gtest_filter="Core_Flip/ElemWiseTest.*" $ opencv_perf_core --gtest_filter="Size_MatType_FlipCode*" --perf_min_samples=300 --perf_force_samples=300 ``` ``` Geometric mean (ms) Name of Test scalar ui rvv ui rvv vs vs scalar scalar (x-factor) (x-factor) flip::Size_MatType_FlipCode::(320x240, 8UC1, FLIP_X) 0.026 0.033 0.031 0.81 0.84 flip::Size_MatType_FlipCode::(320x240, 8UC1, FLIP_XY) 0.206 0.212 0.091 0.97 2.26 flip::Size_MatType_FlipCode::(320x240, 8UC1, FLIP_Y) 0.185 0.189 0.082 0.98 2.25 flip::Size_MatType_FlipCode::(320x240, 8UC3, FLIP_X) 0.070 0.084 0.084 0.83 0.83 flip::Size_MatType_FlipCode::(320x240, 8UC3, FLIP_XY) 0.616 0.612 0.235 1.01 2.62 flip::Size_MatType_FlipCode::(320x240, 8UC3, FLIP_Y) 0.587 0.603 0.204 0.97 2.88 flip::Size_MatType_FlipCode::(320x240, 8UC4, FLIP_X) 0.263 0.110 0.109 2.40 2.41 flip::Size_MatType_FlipCode::(320x240, 8UC4, FLIP_XY) 0.930 0.831 0.316 1.12 2.95 flip::Size_MatType_FlipCode::(320x240, 8UC4, FLIP_Y) 1.175 1.129 0.313 1.04 3.75 flip::Size_MatType_FlipCode::(640x480, 8UC1, FLIP_X) 0.303 0.118 0.111 2.57 2.73 flip::Size_MatType_FlipCode::(640x480, 8UC1, FLIP_XY) 0.949 0.836 0.405 1.14 2.34 flip::Size_MatType_FlipCode::(640x480, 8UC1, FLIP_Y) 0.784 0.783 0.409 1.00 1.92 flip::Size_MatType_FlipCode::(640x480, 8UC3, FLIP_X) 1.084 0.360 0.355 3.01 3.06 flip::Size_MatType_FlipCode::(640x480, 8UC3, FLIP_XY) 3.768 3.348 1.364 1.13 2.76 flip::Size_MatType_FlipCode::(640x480, 8UC3, FLIP_Y) 4.361 4.473 1.296 0.97 3.37 flip::Size_MatType_FlipCode::(640x480, 8UC4, FLIP_X) 1.252 0.469 0.451 2.67 2.78 flip::Size_MatType_FlipCode::(640x480, 8UC4, FLIP_XY) 5.732 5.220 1.303 1.10 4.40 flip::Size_MatType_FlipCode::(640x480, 8UC4, FLIP_Y) 5.041 5.105 1.203 0.99 4.19 flip::Size_MatType_FlipCode::(1920x1080, 8UC1, FLIP_X) 2.382 0.903 0.903 2.64 2.64 flip::Size_MatType_FlipCode::(1920x1080, 8UC1, FLIP_XY) 8.606 7.508 2.581 1.15 3.33 flip::Size_MatType_FlipCode::(1920x1080, 8UC1, FLIP_Y) 8.421 8.535 2.219 0.99 3.80 flip::Size_MatType_FlipCode::(1920x1080, 8UC3, FLIP_X) 6.312 2.416 2.429 2.61 2.60 flip::Size_MatType_FlipCode::(1920x1080, 8UC3, FLIP_XY) 29.174 26.055 12.761 1.12 2.29 flip::Size_MatType_FlipCode::(1920x1080, 8UC3, FLIP_Y) 25.373 25.500 13.382 1.00 1.90 flip::Size_MatType_FlipCode::(1920x1080, 8UC4, FLIP_X) 7.620 3.204 3.115 2.38 2.45 flip::Size_MatType_FlipCode::(1920x1080, 8UC4, FLIP_XY) 32.876 29.310 12.976 1.12 2.53 flip::Size_MatType_FlipCode::(1920x1080, 8UC4, FLIP_Y) 28.831 29.094 14.919 0.99 1.93 ``` The optimization for vlen <= 256 and > 256 are different, but I have no real hardware with vlen > 256. So accuracy tests for that like 512 and 1024 are conducted on QEMU built from the `riscv-collab/riscv-gnu-toolchain`. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
b5f5540e8a
commit
6a6a5a765d
1
3rdparty/hal_rvv/hal_rvv.hpp
vendored
1
3rdparty/hal_rvv/hal_rvv.hpp
vendored
@ -28,6 +28,7 @@
|
||||
#include "hal_rvv_1p0/minmax.hpp" // core
|
||||
#include "hal_rvv_1p0/atan.hpp" // core
|
||||
#include "hal_rvv_1p0/split.hpp" // core
|
||||
#include "hal_rvv_1p0/flip.hpp" // core
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
248
3rdparty/hal_rvv/hal_rvv_1p0/flip.hpp
vendored
Normal file
248
3rdparty/hal_rvv/hal_rvv_1p0/flip.hpp
vendored
Normal file
@ -0,0 +1,248 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#pragma once
|
||||
|
||||
#include <riscv_vector.h>
|
||||
#include <opencv2/core/base.hpp>
|
||||
#include <opencv2/core/utility.hpp>
|
||||
|
||||
namespace cv { namespace cv_hal_rvv {
|
||||
|
||||
#undef cv_hal_flip
|
||||
#define cv_hal_flip cv::cv_hal_rvv::flip
|
||||
|
||||
struct FlipVlen256
|
||||
{
|
||||
using TableElemType = uchar;
|
||||
using TableType = vuint8m8_t;
|
||||
|
||||
static inline size_t setvlmax()
|
||||
{
|
||||
return __riscv_vsetvlmax_e8m8();
|
||||
}
|
||||
|
||||
static inline TableType vid(size_t vl)
|
||||
{
|
||||
return __riscv_vid_v_u8m8(vl);
|
||||
}
|
||||
|
||||
static inline TableType loadTable(const TableElemType* ptr, size_t vl)
|
||||
{
|
||||
return __riscv_vle8_v_u8m8(ptr, vl);
|
||||
}
|
||||
|
||||
static inline void gather(const uchar* src, TableType tab, uchar* dst, size_t vl)
|
||||
{
|
||||
auto v = __riscv_vle8_v_u8m8(src, vl);
|
||||
__riscv_vse8(dst, __riscv_vrgather(v, tab, vl), vl);
|
||||
}
|
||||
};
|
||||
|
||||
struct FlipVlen512
|
||||
{
|
||||
using TableElemType = uint16_t;
|
||||
using TableType = vuint16m8_t;
|
||||
|
||||
static inline size_t setvlmax()
|
||||
{
|
||||
return __riscv_vsetvlmax_e8m4();
|
||||
}
|
||||
|
||||
static inline TableType vid(size_t vl)
|
||||
{
|
||||
return __riscv_vid_v_u16m8(vl);
|
||||
}
|
||||
|
||||
static inline TableType loadTable(const TableElemType* ptr, size_t vl)
|
||||
{
|
||||
return __riscv_vle16_v_u16m8(ptr, vl);
|
||||
}
|
||||
|
||||
static inline void gather(const uchar* src, TableType tab, uchar* dst, size_t vl)
|
||||
{
|
||||
auto v = __riscv_vle8_v_u8m4(src, vl);
|
||||
__riscv_vse8(dst, __riscv_vrgatherei16(v, tab, vl), vl);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline void flipFillBuffer(cv::AutoBuffer<T>& _buf, size_t len, int esz)
|
||||
{
|
||||
T* buf = _buf.data();
|
||||
for (int i = (int)len - esz; i >= 0; i -= esz, buf += esz)
|
||||
for (int j = 0; j < esz; j++)
|
||||
buf[j] = (T)(i + j);
|
||||
}
|
||||
|
||||
inline void flipX(int esz,
|
||||
const uchar* src_data,
|
||||
size_t src_step,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uchar* dst_data,
|
||||
size_t dst_step)
|
||||
{
|
||||
size_t w = (size_t)src_width * esz;
|
||||
auto src0 = src_data, src1 = src_data + src_step * (src_height - 1);
|
||||
auto dst0 = dst_data, dst1 = dst_data + dst_step * (src_height - 1);
|
||||
size_t vl;
|
||||
for (src_height -= 2; src_height >= 0;
|
||||
src_height -= 2, src0 += src_step, dst0 += dst_step, src1 -= src_step, dst1 -= dst_step)
|
||||
{
|
||||
for (size_t i = 0; i < w; i += vl)
|
||||
{
|
||||
vl = __riscv_vsetvl_e8m8(w - i);
|
||||
__riscv_vse8(dst1 + i, __riscv_vle8_v_u8m8(src0 + i, vl), vl);
|
||||
__riscv_vse8(dst0 + i, __riscv_vle8_v_u8m8(src1 + i, vl), vl);
|
||||
}
|
||||
}
|
||||
if (src_height == -1)
|
||||
{
|
||||
for (size_t i = 0; i < w; i += (int)vl)
|
||||
{
|
||||
vl = __riscv_vsetvl_e8m8(w - i);
|
||||
__riscv_vse8(dst0 + i, __riscv_vle8_v_u8m8(src1 + i, vl), vl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FlipVlen>
|
||||
inline void flipY(int esz,
|
||||
const uchar* src_data,
|
||||
size_t src_step,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uchar* dst_data,
|
||||
size_t dst_step)
|
||||
{
|
||||
size_t w = (size_t)src_width * esz;
|
||||
size_t vl = std::min(FlipVlen::setvlmax() / esz * esz, w);
|
||||
typename FlipVlen::TableType tab_v;
|
||||
if (esz == 1)
|
||||
tab_v = __riscv_vrsub(FlipVlen::vid(vl), vl - 1, vl);
|
||||
else
|
||||
{
|
||||
cv::AutoBuffer<typename FlipVlen::TableElemType> buf(vl);
|
||||
flipFillBuffer(buf, vl, esz);
|
||||
tab_v = FlipVlen::loadTable(buf.data(), vl);
|
||||
}
|
||||
if (vl == w)
|
||||
for (; src_height; src_height--, src_data += src_step, dst_data += dst_step)
|
||||
FlipVlen::gather(src_data, tab_v, dst_data, vl);
|
||||
else
|
||||
for (; src_height; src_height--, src_data += src_step, dst_data += dst_step)
|
||||
{
|
||||
auto src0 = src_data, src1 = src_data + w - vl;
|
||||
auto dst0 = dst_data, dst1 = dst_data + w - vl;
|
||||
for (; src0 < src1 + vl; src0 += vl, src1 -= vl, dst0 += vl, dst1 -= vl)
|
||||
{
|
||||
FlipVlen::gather(src0, tab_v, dst1, vl);
|
||||
FlipVlen::gather(src1, tab_v, dst0, vl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FlipVlen>
|
||||
inline void flipXY(int esz,
|
||||
const uchar* src_data,
|
||||
size_t src_step,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uchar* dst_data,
|
||||
size_t dst_step)
|
||||
{
|
||||
size_t w = (size_t)src_width * esz;
|
||||
size_t vl = std::min(FlipVlen::setvlmax() / esz * esz, w);
|
||||
typename FlipVlen::TableType tab_v;
|
||||
if (esz == 1)
|
||||
tab_v = __riscv_vrsub(FlipVlen::vid(vl), vl - 1, vl);
|
||||
else
|
||||
{
|
||||
cv::AutoBuffer<typename FlipVlen::TableElemType> buf(vl);
|
||||
flipFillBuffer(buf, vl, esz);
|
||||
tab_v = FlipVlen::loadTable(buf.data(), vl);
|
||||
}
|
||||
auto src0 = src_data, src1 = src_data + src_step * (src_height - 1);
|
||||
auto dst0 = dst_data, dst1 = dst_data + dst_step * (src_height - 1);
|
||||
if (vl == w)
|
||||
{
|
||||
for (src_height -= 2; src_height >= 0;
|
||||
src_height -= 2,
|
||||
src0 += src_step,
|
||||
dst0 += dst_step,
|
||||
src1 -= src_step,
|
||||
dst1 -= dst_step)
|
||||
{
|
||||
FlipVlen::gather(src0, tab_v, dst1, vl);
|
||||
FlipVlen::gather(src1, tab_v, dst0, vl);
|
||||
}
|
||||
if (src_height == -1)
|
||||
{
|
||||
FlipVlen::gather(src1, tab_v, dst0, vl);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (src_height -= 2; src_height >= 0;
|
||||
src_height -= 2,
|
||||
src0 += src_step,
|
||||
dst0 += dst_step,
|
||||
src1 -= src_step,
|
||||
dst1 -= dst_step)
|
||||
{
|
||||
for (size_t i = 0; 2 * i < w; i += vl)
|
||||
{
|
||||
FlipVlen::gather(src0 + i, tab_v, dst1 + w - i - vl, vl);
|
||||
FlipVlen::gather(src0 + w - i - vl, tab_v, dst1 + i, vl);
|
||||
FlipVlen::gather(src1 + i, tab_v, dst0 + w - i - vl, vl);
|
||||
FlipVlen::gather(src1 + w - i - vl, tab_v, dst0 + i, vl);
|
||||
}
|
||||
}
|
||||
if (src_height == -1)
|
||||
{
|
||||
for (size_t i = 0; 2 * i < w; i += vl)
|
||||
{
|
||||
FlipVlen::gather(src1 + i, tab_v, dst0 + w - i - vl, vl);
|
||||
FlipVlen::gather(src1 + w - i - vl, tab_v, dst0 + i, vl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline int flip(int src_type,
|
||||
const uchar* src_data,
|
||||
size_t src_step,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uchar* dst_data,
|
||||
size_t dst_step,
|
||||
int flip_mode)
|
||||
{
|
||||
if (src_width < 0 || src_height < 0 || src_data == dst_data)
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
int esz = CV_ELEM_SIZE(src_type);
|
||||
if (flip_mode == 0)
|
||||
{
|
||||
flipX(esz, src_data, src_step, src_width, src_height, dst_data, dst_step);
|
||||
}
|
||||
else if (flip_mode > 0)
|
||||
{
|
||||
if (__riscv_vlenb() * 8 <= 256)
|
||||
flipY<FlipVlen256>(esz, src_data, src_step, src_width, src_height, dst_data, dst_step);
|
||||
else
|
||||
flipY<FlipVlen512>(esz, src_data, src_step, src_width, src_height, dst_data, dst_step);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (__riscv_vlenb() * 8 <= 256)
|
||||
flipXY<FlipVlen256>(esz, src_data, src_step, src_width, src_height, dst_data, dst_step);
|
||||
else
|
||||
flipXY<FlipVlen512>(esz, src_data, src_step, src_width, src_height, dst_data, dst_step);
|
||||
}
|
||||
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
}} // namespace cv::cv_hal_rvv
|
45
modules/core/perf/perf_flip.cpp
Normal file
45
modules/core/perf/perf_flip.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
using namespace perf;
|
||||
|
||||
enum
|
||||
{
|
||||
FLIP_XY = 0,
|
||||
FLIP_X = 1,
|
||||
FLIP_Y = 2,
|
||||
};
|
||||
|
||||
#define FLIP_SIZES szQVGA, szVGA, sz1080p
|
||||
#define FLIP_TYPES CV_8UC1, CV_8UC3, CV_8UC4
|
||||
#define FLIP_CODES FLIP_X, FLIP_Y, FLIP_XY
|
||||
|
||||
CV_FLAGS(FlipCode, FLIP_X, FLIP_Y, FLIP_XY);
|
||||
typedef tuple<Size, MatType, FlipCode> Size_MatType_FlipCode_t;
|
||||
typedef perf::TestBaseWithParam<Size_MatType_FlipCode_t> Size_MatType_FlipCode;
|
||||
|
||||
PERF_TEST_P(Size_MatType_FlipCode,
|
||||
flip,
|
||||
testing::Combine(testing::Values(FLIP_SIZES),
|
||||
testing::Values(FLIP_TYPES),
|
||||
testing::Values(FLIP_CODES)))
|
||||
{
|
||||
Size sz = get<0>(GetParam());
|
||||
int matType = get<1>(GetParam());
|
||||
int flipCode = get<2>(GetParam()) - 1;
|
||||
|
||||
Mat src(sz, matType);
|
||||
Mat dst(sz, matType);
|
||||
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
TEST_CYCLE() cv::flip(src, dst, flipCode);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
}} // namespace opencv_test
|
Loading…
Reference in New Issue
Block a user