mirror of
https://github.com/opencv/opencv.git
synced 2025-06-23 04:01:31 +08:00
Add sepFilter.
Co-authored-by: Liutong HAN <liutong2020@iscas.ac.cn>
This commit is contained in:
parent
83104bed32
commit
a2d784b6f5
244
3rdparty/hal_rvv/hal_rvv_1p0/filter.hpp
vendored
244
3rdparty/hal_rvv/hal_rvv_1p0/filter.hpp
vendored
@ -37,10 +37,10 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename... Args>
|
template<typename... Args>
|
||||||
static inline int invoke(int height, std::function<int(int, int, Args...)> func, Args&&... args)
|
static inline int invoke(int start, int end, std::function<int(int, int, Args...)> func, Args&&... args)
|
||||||
{
|
{
|
||||||
cv::parallel_for_(Range(1, height), FilterInvoker(func, std::forward<Args>(args)...), cv::getNumThreads());
|
cv::parallel_for_(Range(start + 1, end), FilterInvoker(func, std::forward<Args>(args)...), cv::getNumThreads());
|
||||||
return func(0, 1, std::forward<Args>(args)...);
|
return func(start, start + 1, std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Filter2D
|
struct Filter2D
|
||||||
@ -182,21 +182,22 @@ static inline int filter(int start, int end, Filter2D* data, const uchar* src_da
|
|||||||
kernel[i] = reinterpret_cast<const float*>(data->kernel_data + (i / ksize) * data->kernel_step)[i % ksize];
|
kernel[i] = reinterpret_cast<const float*>(data->kernel_data + (i / ksize) * data->kernel_step)[i % ksize];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
auto access = [&](int x, int y) {
|
auto access = [&](int x, int y) {
|
||||||
int pi, pj;
|
int pi, pj;
|
||||||
if (data->borderType & BORDER_ISOLATED)
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
{
|
{
|
||||||
pi = borderInterpolate(x - data->anchor_y, height, data->borderType & ~BORDER_ISOLATED);
|
pi = borderInterpolate(x - data->anchor_y, height, data->borderType & ~BORDER_ISOLATED);
|
||||||
pj = borderInterpolate(y - data->anchor_x, width , data->borderType & ~BORDER_ISOLATED);
|
pj = borderInterpolate(y - data->anchor_x, width , data->borderType & ~BORDER_ISOLATED);
|
||||||
if (pi >= 0)
|
pi = pi < 0 ? noval : pi;
|
||||||
pi += offset_y;
|
pj = pj < 0 ? noval : pj;
|
||||||
if (pj >= 0)
|
|
||||||
pj += offset_x;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pi = borderInterpolate(offset_y + x - data->anchor_y, full_height, data->borderType);
|
pi = borderInterpolate(offset_y + x - data->anchor_y, full_height, data->borderType);
|
||||||
pj = borderInterpolate(offset_x + y - data->anchor_x, full_width , data->borderType);
|
pj = borderInterpolate(offset_x + y - data->anchor_x, full_width , data->borderType);
|
||||||
|
pi = pi < 0 ? noval : pi - offset_y;
|
||||||
|
pj = pj < 0 ? noval : pj - offset_x;
|
||||||
}
|
}
|
||||||
return std::make_pair(pi, pj);
|
return std::make_pair(pi, pj);
|
||||||
};
|
};
|
||||||
@ -207,7 +208,7 @@ static inline int filter(int start, int end, Filter2D* data, const uchar* src_da
|
|||||||
for (int i = 0; i < ksize * ksize; i++)
|
for (int i = 0; i < ksize * ksize; i++)
|
||||||
{
|
{
|
||||||
auto p = access(x + i / ksize, y + i % ksize);
|
auto p = access(x + i / ksize, y + i % ksize);
|
||||||
if (p.first >= 0 && p.second >= 0)
|
if (p.first != noval && p.second != noval)
|
||||||
{
|
{
|
||||||
sum0 += kernel[i] * src_data[p.first * src_step + p.second * 4 ];
|
sum0 += kernel[i] * src_data[p.first * src_step + p.second * 4 ];
|
||||||
sum1 += kernel[i] * src_data[p.first * src_step + p.second * 4 + 1];
|
sum1 += kernel[i] * src_data[p.first * src_step + p.second * 4 + 1];
|
||||||
@ -236,17 +237,17 @@ static inline int filter(int start, int end, Filter2D* data, const uchar* src_da
|
|||||||
for (int j = right; j < width; j++)
|
for (int j = right; j < width; j++)
|
||||||
process(i, j);
|
process(i, j);
|
||||||
|
|
||||||
const uchar* row0 = access(i , 0).first < 0 ? nullptr : src_data + access(i , 0).first * src_step;
|
const uchar* row0 = access(i , 0).first == noval ? nullptr : src_data + access(i , 0).first * src_step;
|
||||||
const uchar* row1 = access(i + 1, 0).first < 0 ? nullptr : src_data + access(i + 1, 0).first * src_step;
|
const uchar* row1 = access(i + 1, 0).first == noval ? nullptr : src_data + access(i + 1, 0).first * src_step;
|
||||||
const uchar* row2 = access(i + 2, 0).first < 0 ? nullptr : src_data + access(i + 2, 0).first * src_step;
|
const uchar* row2 = access(i + 2, 0).first == noval ? nullptr : src_data + access(i + 2, 0).first * src_step;
|
||||||
if (ksize == 3)
|
if (ksize == 3)
|
||||||
{
|
{
|
||||||
process3(data->anchor_x, left, right, data->delta, kernel, row0, row1, row2, dst_data + i * width * 4);
|
process3(data->anchor_x, left, right, data->delta, kernel, row0, row1, row2, dst_data + i * width * 4);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const uchar* row3 = access(i + 3, 0).first < 0 ? nullptr : src_data + access(i + 3, 0).first * src_step;
|
const uchar* row3 = access(i + 3, 0).first == noval ? nullptr : src_data + access(i + 3, 0).first * src_step;
|
||||||
const uchar* row4 = access(i + 4, 0).first < 0 ? nullptr : src_data + access(i + 4, 0).first * src_step;
|
const uchar* row4 = access(i + 4, 0).first == noval ? nullptr : src_data + access(i + 4, 0).first * src_step;
|
||||||
process5(data->anchor_x, left, right, data->delta, kernel, row0, row1, row2, row3, row4, dst_data + i * width * 4);
|
process5(data->anchor_x, left, right, data->delta, kernel, row0, row1, row2, row3, row4, dst_data + i * width * 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -257,7 +258,6 @@ static inline int filter(int start, int end, Filter2D* data, const uchar* src_da
|
|||||||
|
|
||||||
inline int filter(cvhalFilter2D* context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
inline int filter(cvhalFilter2D* context, uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
{
|
{
|
||||||
src_data -= offset_y * src_step - offset_x * 4;
|
|
||||||
Filter2D* data = reinterpret_cast<Filter2D*>(context);
|
Filter2D* data = reinterpret_cast<Filter2D*>(context);
|
||||||
std::vector<uchar> dst(width * height * 4);
|
std::vector<uchar> dst(width * height * 4);
|
||||||
|
|
||||||
@ -265,10 +265,10 @@ inline int filter(cvhalFilter2D* context, uchar* src_data, size_t src_step, ucha
|
|||||||
switch (data->kernel_width)
|
switch (data->kernel_width)
|
||||||
{
|
{
|
||||||
case 3:
|
case 3:
|
||||||
res = invoke(height, {filter<3>}, data, src_data, src_step, dst.data(), width, height, full_width, full_height, offset_x, offset_y);
|
res = invoke(0, height, {filter<3>}, data, src_data, src_step, dst.data(), width, height, full_width, full_height, offset_x, offset_y);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
res = invoke(height, {filter<5>}, data, src_data, src_step, dst.data(), width, height, full_width, full_height, offset_x, offset_y);
|
res = invoke(0, height, {filter<5>}, data, src_data, src_step, dst.data(), width, height, full_width, full_height, offset_x, offset_y);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -284,6 +284,218 @@ inline int filterFree(cvhalFilter2D* context)
|
|||||||
}
|
}
|
||||||
} // cv::cv_hal_rvv::filter
|
} // cv::cv_hal_rvv::filter
|
||||||
|
|
||||||
|
namespace sepFilter {
|
||||||
|
#undef cv_hal_sepFilterInit
|
||||||
|
#undef cv_hal_sepFilter
|
||||||
|
#undef cv_hal_sepFilterFree
|
||||||
|
#define cv_hal_sepFilterInit cv::cv_hal_rvv::sepFilter::sepFilterInit
|
||||||
|
#define cv_hal_sepFilter cv::cv_hal_rvv::sepFilter::sepFilter
|
||||||
|
#define cv_hal_sepFilterFree cv::cv_hal_rvv::sepFilter::sepFilterFree
|
||||||
|
|
||||||
|
struct sepFilter2D
|
||||||
|
{
|
||||||
|
int src_type;
|
||||||
|
int dst_type;
|
||||||
|
int kernel_type;
|
||||||
|
const uchar* kernelx_data;
|
||||||
|
int kernelx_length;
|
||||||
|
const uchar* kernely_data;
|
||||||
|
int kernely_length;
|
||||||
|
int anchor_x;
|
||||||
|
int anchor_y;
|
||||||
|
double delta;
|
||||||
|
int borderType;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline int sepFilterInit(cvhalFilter2D **context, int src_type, int dst_type, int kernel_type, uchar *kernelx_data, int kernelx_length, uchar *kernely_data, int kernely_length, int anchor_x, int anchor_y, double delta, int borderType)
|
||||||
|
{
|
||||||
|
if (kernel_type != CV_32FC1 || src_type != CV_8UC1 || (dst_type != CV_16SC1 && dst_type != CV_32FC1))
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (kernelx_length != kernely_length)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
if (kernelx_length != 3 && kernelx_length != 5)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
anchor_x = anchor_x < 0 ? kernelx_length / 2 : anchor_x;
|
||||||
|
anchor_y = anchor_y < 0 ? kernely_length / 2 : anchor_y;
|
||||||
|
*context = reinterpret_cast<cvhalFilter2D*>(new sepFilter2D{src_type, dst_type, kernel_type, kernelx_data, kernelx_length, kernely_data, kernely_length, anchor_x, anchor_y, delta, borderType & ~BORDER_ISOLATED});
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int ksize>
|
||||||
|
static inline int sepFilterRow(int start, int end, sepFilter2D* data, const uchar* src_data, size_t src_step, float* dst_data, int width, int full_width, int offset_x)
|
||||||
|
{
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto access = [&](int y) {
|
||||||
|
int pj;
|
||||||
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
|
{
|
||||||
|
pj = borderInterpolate(y - data->anchor_x, width, data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pj = pj < 0 ? noval : pj;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pj = borderInterpolate(offset_x + y - data->anchor_x, full_width, data->borderType);
|
||||||
|
pj = pj < 0 ? noval : pj - offset_x;
|
||||||
|
}
|
||||||
|
return pj;
|
||||||
|
};
|
||||||
|
|
||||||
|
const float* kx = reinterpret_cast<const float*>(data->kernelx_data);
|
||||||
|
auto process = [&](int x, int y) {
|
||||||
|
float sum = 0;
|
||||||
|
for (int i = 0; i < ksize; i++)
|
||||||
|
{
|
||||||
|
int p = access(y + i);
|
||||||
|
if (p != noval)
|
||||||
|
{
|
||||||
|
sum += kx[i] * src_data[x * src_step + p];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst_data[x * width + y] = sum;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const int left = ksize - 1, right = width - (ksize - 1);
|
||||||
|
if (left >= right)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < left; j++)
|
||||||
|
process(i, j);
|
||||||
|
for (int j = right; j < width; j++)
|
||||||
|
process(i, j);
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = left; j < right; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e8m2(right - j);
|
||||||
|
const uchar* extra = src_data + i * src_step + j - data->anchor_x;
|
||||||
|
auto sum = __riscv_vfmv_v_f_f32m8(0, vl);
|
||||||
|
auto src = __riscv_vfwcvt_f(__riscv_vwcvtu_x(__riscv_vle8_v_u8m2(extra, vl), vl), vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[0], src, vl);
|
||||||
|
src = __riscv_vfslide1down(src, extra[vl], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[1], src, vl);
|
||||||
|
src = __riscv_vfslide1down(src, extra[vl + 1], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[2], src, vl);
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
src = __riscv_vfslide1down(src, extra[vl + 2], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[3], src, vl);
|
||||||
|
src = __riscv_vfslide1down(src, extra[vl + 3], vl);
|
||||||
|
sum = __riscv_vfmacc(sum, kx[4], src, vl);
|
||||||
|
}
|
||||||
|
__riscv_vse32(dst_data + i * width + j, sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int ksize>
|
||||||
|
static inline int sepFilterCol(int start, int end, sepFilter2D* data, const float* src_data, uchar* dst_data, size_t dst_step, int width, int height, int full_height, int offset_y)
|
||||||
|
{
|
||||||
|
constexpr int noval = std::numeric_limits<int>::max();
|
||||||
|
auto access = [&](int x) {
|
||||||
|
int pi;
|
||||||
|
if (data->borderType & BORDER_ISOLATED)
|
||||||
|
{
|
||||||
|
pi = borderInterpolate(x - data->anchor_y, height, data->borderType & ~BORDER_ISOLATED);
|
||||||
|
pi = pi < 0 ? noval : pi;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pi = borderInterpolate(offset_y + x - data->anchor_y, full_height, data->borderType);
|
||||||
|
pi = pi < 0 ? noval : pi - offset_y;
|
||||||
|
}
|
||||||
|
return pi;
|
||||||
|
};
|
||||||
|
|
||||||
|
const float* ky = reinterpret_cast<const float*>(data->kernely_data);
|
||||||
|
for (int i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
const float* row0 = access(i ) == noval ? nullptr : src_data + access(i ) * width;
|
||||||
|
const float* row1 = access(i + 1) == noval ? nullptr : src_data + access(i + 1) * width;
|
||||||
|
const float* row2 = access(i + 2) == noval ? nullptr : src_data + access(i + 2) * width;
|
||||||
|
const float* row3, *row4;
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
row3 = access(i + 3) == noval ? nullptr : src_data + access(i + 3) * width;
|
||||||
|
row4 = access(i + 4) == noval ? nullptr : src_data + access(i + 4) * width;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vl;
|
||||||
|
for (int j = 0; j < width; j += vl)
|
||||||
|
{
|
||||||
|
vl = __riscv_vsetvl_e32m4(width - j);
|
||||||
|
auto v0 = row0 ? __riscv_vle32_v_f32m4(row0 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto v1 = row1 ? __riscv_vle32_v_f32m4(row1 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto v2 = row2 ? __riscv_vle32_v_f32m4(row2 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto sum = __riscv_vfmacc(__riscv_vfmacc(__riscv_vfmacc(__riscv_vfmv_v_f_f32m4(data->delta, vl), ky[0], v0, vl), ky[1], v1, vl), ky[2], v2, vl);
|
||||||
|
|
||||||
|
if (ksize == 5)
|
||||||
|
{
|
||||||
|
auto v3 = row3 ? __riscv_vle32_v_f32m4(row3 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
auto v4 = row4 ? __riscv_vle32_v_f32m4(row4 + j, vl) : __riscv_vfmv_v_f_f32m4(0, vl);
|
||||||
|
sum = __riscv_vfmacc(__riscv_vfmacc(sum, ky[3], v3, vl), ky[4], v4, vl);
|
||||||
|
}
|
||||||
|
if (data->dst_type == CV_16SC1)
|
||||||
|
{
|
||||||
|
__riscv_vse16(reinterpret_cast<short*>(dst_data + i * dst_step) + j, __riscv_vfncvt_x(sum, vl), vl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
__riscv_vse32(reinterpret_cast<float*>(dst_data + i * dst_step) + j, sum, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int sepFilter(cvhalFilter2D *context, uchar *src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y)
|
||||||
|
{
|
||||||
|
sepFilter2D* data = reinterpret_cast<sepFilter2D*>(context);
|
||||||
|
const int padding = data->kernelx_length - 1;
|
||||||
|
std::vector<float> _result(width * (height + 2 * padding));
|
||||||
|
float* result = _result.data() + width * padding;
|
||||||
|
|
||||||
|
int res = CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
switch (data->kernelx_length)
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
res = filter::invoke(-std::min(offset_y, padding), height + std::min(full_height - height - offset_y, padding), {sepFilterRow<3>}, data, src_data, src_step, result, width, full_width, offset_x);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
res = filter::invoke(-std::min(offset_y, padding), height + std::min(full_height - height - offset_y, padding), {sepFilterRow<5>}, data, src_data, src_step, result, width, full_width, offset_x);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (res == CV_HAL_ERROR_NOT_IMPLEMENTED)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
|
||||||
|
switch (data->kernelx_length)
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
return filter::invoke(0, height, {sepFilterCol<3>}, data, result, dst_data, dst_step, width, height, full_height, offset_y);
|
||||||
|
case 5:
|
||||||
|
return filter::invoke(0, height, {sepFilterCol<5>}, data, result, dst_data, dst_step, width, height, full_height, offset_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int sepFilterFree(cvhalFilter2D* context)
|
||||||
|
{
|
||||||
|
delete reinterpret_cast<sepFilter2D*>(context);
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
} // cv::cv_hal_rvv::sepFilter
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user