fix underflow; add tests for the new nearest interpolation; add warpAffine nearest interpolation

This commit is contained in:
Yuantao Feng 2024-11-19 13:29:11 +08:00
parent 2ed6d0f590
commit 63e0b287b4
7 changed files with 3747 additions and 576 deletions

View File

@ -1761,6 +1761,35 @@ OPENCV_HAL_IMPL_RVV_PACK(v_int16, short, v_int32, 16, i16, i32, __riscv_vnclip,
OPENCV_HAL_IMPL_RVV_PACK_32(v_uint32, unsigned, v_uint64, 32, u32, u64, __riscv_vnclipu, __riscv_vnsrl)
OPENCV_HAL_IMPL_RVV_PACK_32(v_int32, int, v_int64, 32, i32, i64, __riscv_vnclip, __riscv_vnsra)
template <int N = VTraits<v_uint16>::max_nlanes>
inline v_uint16 v_pack(const v_uint32& a, const v_uint32& b)
{
ushort bufa[N];
ushort bufb[N];
v_pack_store(bufa, a);
v_pack_store(bufb, b);
ushort buf[N];
for (int i = 0; i < N; i++) {
buf[i] = bufa[i];
buf[i+N/2] = bufb[i];
}
return v_load(buf);
}
template <> inline v_uint16 v_pack<4>(const v_uint32& a, const v_uint32& b)
{
constexpr int N = VTraits<v_uint16>::max_nlanes;
ushort bufa[N];
ushort bufb[N];
v_pack_store(bufa, a);
v_pack_store(bufb, b);
ushort buf[N];
buf[0] = bufa[0]; buf[1] = bufa[1]; buf[2] = bufa[2]; buf[3] = bufa[3];
buf[4] = bufb[0]; buf[5] = bufb[1]; buf[6] = bufb[2]; buf[7] = bufb[3];
return v_load(buf);
}
#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, cast, hvl, vl) \
inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \
{ \

View File

@ -111,48 +111,6 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective,
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
Combine(
Values( Size(640,480), Size(1920,1080), Size(2592,1944) ),
InterType::all(),
BorderMode::all(),
Values( CV_8UC1, CV_8UC4 )
)
)
{
Size size;
int borderMode, interType, type;
size = get<0>(GetParam());
interType = get<1>(GetParam());
borderMode = get<2>(GetParam());
type = get<3>(GetParam());
Scalar borderColor = Scalar::all(150);
Mat src(size, type), dst(size, type);
cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
int shift = static_cast<int>(src.cols*0.04);
Mat srcVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, 0),
Vec2f(static_cast<float>(size.width-1), 0),
Vec2f(static_cast<float>(size.width-1), static_cast<float>(size.height-1)),
Vec2f(0, static_cast<float>(size.height-1)));
Mat dstVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, static_cast<float>(shift)),
Vec2f(static_cast<float>(size.width-shift/2), 0),
Vec2f(static_cast<float>(size.width-shift), static_cast<float>(size.height-shift)),
Vec2f(static_cast<float>(shift/2), static_cast<float>(size.height-1)));
Mat warpMat = getPerspectiveTransform(srcVertices, dstVertices);
declare.in(src).out(dst);
declare.time(100);
TEST_CYCLE()
{
warpPerspective( src, dst, warpMat, size, interType, borderMode, borderColor );
}
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestRemap, map1_32fc1,
Combine(
Values( szVGA, sz1080p ),

View File

@ -1672,6 +1672,56 @@ void cv::remap( InputArray _src, OutputArray _dst,
int type = src.type(), depth = CV_MAT_DEPTH(type);
if (interpolation == INTER_NEAREST && map1.depth() == CV_32F) {
const auto *src_data = src.ptr<const uchar>();
auto *dst_data = dst.ptr<uchar>();
size_t src_step = src.step, dst_step = dst.step,
map1_step = map1.step, map2_step = map2.step;
int src_rows = src.rows, src_cols = src.cols;
int dst_rows = dst.rows, dst_cols = dst.cols;
const float *map1_data = map1.ptr<const float>();
const float *map2_data = map2.ptr<const float>();
switch (src.type()) {
case CV_8UC1: {
CV_CPU_DISPATCH(remapNearestInvoker_8UC1, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_8UC3: {
CV_CPU_DISPATCH(remapNearestInvoker_8UC3, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_8UC4: {
CV_CPU_DISPATCH(remapNearestInvoker_8UC4, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(remapNearestInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(remapNearestInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(remapNearestInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(remapNearestInvoker_32FC1, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(remapNearestInvoker_32FC3, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(remapNearestInvoker_32FC4, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
}
}
if (interpolation == INTER_LINEAR) {
if (map1.depth() == CV_32F) {
const auto *src_data = src.ptr<const uint8_t>();
@ -1708,27 +1758,27 @@ void cv::remap( InputArray _src, OutputArray _dst,
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(remapLinearInvoker_16UC1, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
CV_CPU_DISPATCH(remapLinearInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(remapLinearInvoker_16UC3, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
CV_CPU_DISPATCH(remapLinearInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(remapLinearInvoker_16UC4, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
CV_CPU_DISPATCH(remapLinearInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(remapLinearInvoker_32FC1, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
CV_CPU_DISPATCH(remapLinearInvoker_32FC1, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(remapLinearInvoker_32FC3, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
CV_CPU_DISPATCH(remapLinearInvoker_32FC3, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(remapLinearInvoker_32FC4, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
CV_CPU_DISPATCH(remapLinearInvoker_32FC4, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
@ -2657,6 +2707,48 @@ static void warpAffine(int src_type,
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
if (interpolation == INTER_NEAREST) {
switch (src_type) {
case CV_8UC1: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_8UC3: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_8UC4: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(warpAffineNearestInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
}
}
if (interpolation == INTER_LINEAR) {
switch (src_type) {
case CV_8UC1: {
@ -3324,46 +3416,99 @@ static void warpPerspective(int src_type,
{
CALL_HAL(warpPerspective, cv_hal_warpPerspective, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
if (interpolation == INTER_NEAREST) {
switch (src_type) {
case CV_8UC1: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_8UC3: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_8UC4: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
}
}
if (interpolation == INTER_LINEAR) {
switch (src_type) {
case CV_8UC1: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpPerspectiveLinearApproxInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
} else {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
}
case CV_8UC3: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpPerspectiveLinearApproxInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
} else {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
}
case CV_8UC4: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpPerspectiveLinearApproxInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
} else {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
}
case CV_16UC1: {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
}

View File

@ -3,57 +3,61 @@
// of this distribution and at http://opencv.org/license.html.
// Shuffle
#define CV_WARP_NEAREST_SCALAR_SHUFFLE_DEF(cn, dtype_reg) \
dtype_reg p00##cn;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(cn, dtype_reg) \
dtype_reg p00##cn, p01##cn, p10##cn, p11##cn;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C1(dtype_reg, dtype_ptr) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
#define CV_WARP_SCALAR_SHUFFLE_DEF_C1(inter, dtype_reg, dtype_ptr) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
const dtype_ptr *srcptr = src + srcstep * iy + ix;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C3(dtype_reg, dtype_ptr) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
#define CV_WARP_SCALAR_SHUFFLE_DEF_C3(inter, dtype_reg, dtype_ptr) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
const dtype_ptr *srcptr = src + srcstep * iy + ix*3;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C4(dtype_reg, dtype_ptr) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(a, dtype_reg) \
#define CV_WARP_SCALAR_SHUFFLE_DEF_C4(inter, dtype_reg, dtype_ptr) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(a, dtype_reg) \
const dtype_ptr *srcptr = src + srcstep * iy + ix*4;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_8U(CN) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(int, uint8_t)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_16U(CN) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(int, uint16_t)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_32F(CN) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(float, float)
#define CV_WARP_SCALAR_SHUFFLE_DEF_8U(INTER, CN) \
CV_WARP_SCALAR_SHUFFLE_DEF_##CN(INTER, int, uint8_t)
#define CV_WARP_SCALAR_SHUFFLE_DEF_16U(INTER, CN) \
CV_WARP_SCALAR_SHUFFLE_DEF_##CN(INTER, int, uint16_t)
#define CV_WARP_SCALAR_SHUFFLE_DEF_32F(INTER, CN) \
CV_WARP_SCALAR_SHUFFLE_DEF_##CN(INTER, float, float)
#define CV_WARP_NEAREST_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
p00##CN = srcptr[i];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
p00##CN = srcptr[i]; p01##CN = srcptr[i + cn]; \
p10##CN = srcptr[srcstep + i]; p11##CN = srcptr[srcstep + cn + i];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C1() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 1, 0)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C3() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 3, 0) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 3, 1) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 3, 2)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C4() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 4, 0) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 4, 1) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 4, 2) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(a, 4, 3)
#define CV_WARP_SCALAR_SHUFFLE_LOAD_C1(inter) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(g, 1, 0)
#define CV_WARP_SCALAR_SHUFFLE_LOAD_C3(inter) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(r, 3, 0) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(g, 3, 1) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(b, 3, 2)
#define CV_WARP_SCALAR_SHUFFLE_LOAD_C4(inter) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(r, 4, 0) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(g, 4, 1) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(b, 4, 2) \
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(a, 4, 3)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C1() \
#define CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C1() \
dstptr[x] = bval[0];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C3() \
#define CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C3() \
dstptr[x*3] = bval[0]; \
dstptr[x*3+1] = bval[1]; \
dstptr[x*3+2] = bval[2];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C4() \
#define CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C4() \
dstptr[x*4] = bval[0]; \
dstptr[x*4+1] = bval[1]; \
dstptr[x*4+2] = bval[2]; \
dstptr[x*4+3] = bval[3];
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C1(dy, dx, pxy) \
#define CV_WARP_SCALAR_FETCH_PIXEL_C1(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx; \
pxy##g = srcptr[ofs]; \
@ -67,7 +71,7 @@
size_t glob_ofs = iy_*srcstep + ix_; \
pxy##g = src[glob_ofs]; \
}
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C3(dy, dx, pxy) \
#define CV_WARP_SCALAR_FETCH_PIXEL_C3(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*3; \
pxy##r = srcptr[ofs]; \
@ -89,7 +93,7 @@
pxy##g = src[glob_ofs+1]; \
pxy##b = src[glob_ofs+2]; \
}
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C4(dy, dx, pxy) \
#define CV_WARP_SCALAR_FETCH_PIXEL_C4(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*4; \
pxy##r = srcptr[ofs]; \
@ -115,27 +119,31 @@
pxy##b = src[glob_ofs+2]; \
pxy##a = src[glob_ofs+3]; \
}
#define CV_WARP_NEAREST_SCALAR_FETCH_PIXEL(CN) \
CV_WARP_SCALAR_FETCH_PIXEL_##CN(0, 0, p00)
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL(CN) \
CV_WARP_SCALAR_FETCH_PIXEL_##CN(0, 0, p00) \
CV_WARP_SCALAR_FETCH_PIXEL_##CN(0, 1, p01) \
CV_WARP_SCALAR_FETCH_PIXEL_##CN(1, 0, p10) \
CV_WARP_SCALAR_FETCH_PIXEL_##CN(1, 1, p11)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE(CN, DEPTH) \
#define CV_WARP_SCALAR_SHUFFLE(INTER, CN, DEPTH) \
int ix = cvFloor(sx), iy = cvFloor(sy); \
sx -= ix; sy -= iy; \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##DEPTH(CN); \
CV_WARP_SCALAR_SHUFFLE_DEF_##DEPTH(INTER, CN); \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_##CN() \
CV_WARP_SCALAR_SHUFFLE_LOAD_##CN(INTER) \
} else { \
if ((border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (border_type == BORDER_CONSTANT) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_##CN() \
CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_##CN() \
} \
continue; \
} \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 0, p00); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 1, p01); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 0, p10); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 1, p11); \
CV_WARP_##INTER##_SCALAR_FETCH_PIXEL(CN); \
}
@ -175,23 +183,27 @@
// Store
#define CV_WARP_LINEAR_SCALAR_STORE_C1(dtype) \
dstptr[x] = saturate_cast<dtype>(v0g);
#define CV_WARP_LINEAR_SCALAR_STORE_C3(dtype) \
dstptr[x*3] = saturate_cast<dtype>(v0r); \
dstptr[x*3+1] = saturate_cast<dtype>(v0g); \
dstptr[x*3+2] = saturate_cast<dtype>(v0b);
#define CV_WARP_LINEAR_SCALAR_STORE_C4(dtype) \
dstptr[x*4] = saturate_cast<dtype>(v0r); \
dstptr[x*4+1] = saturate_cast<dtype>(v0g); \
dstptr[x*4+2] = saturate_cast<dtype>(v0b); \
dstptr[x*4+3] = saturate_cast<dtype>(v0a);
#define CV_WARP_LINEAR_SCALAR_STORE_8U(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint8_t)
#define CV_WARP_LINEAR_SCALAR_STORE_16U(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint16_t)
#define CV_WARP_LINEAR_SCALAR_STORE_32F(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(float)
#define CV_WARP_SCALAR_STORE_C1(dtype, var) \
dstptr[x] = saturate_cast<dtype>(var##g);
#define CV_WARP_SCALAR_STORE_C3(dtype, var) \
dstptr[x*3] = saturate_cast<dtype>(var##r); \
dstptr[x*3+1] = saturate_cast<dtype>(var##g); \
dstptr[x*3+2] = saturate_cast<dtype>(var##b);
#define CV_WARP_SCALAR_STORE_C4(dtype, var) \
dstptr[x*4] = saturate_cast<dtype>(var##r); \
dstptr[x*4+1] = saturate_cast<dtype>(var##g); \
dstptr[x*4+2] = saturate_cast<dtype>(var##b); \
dstptr[x*4+3] = saturate_cast<dtype>(var##a);
#define CV_WARP_SCALAR_STORE_8U(CN, var) \
CV_WARP_SCALAR_STORE_##CN(uint8_t, var)
#define CV_WARP_SCALAR_STORE_16U(CN, var) \
CV_WARP_SCALAR_STORE_##CN(uint16_t, var)
#define CV_WARP_SCALAR_STORE_32F(CN, var) \
CV_WARP_SCALAR_STORE_##CN(float, var)
#define CV_WARP_NEAREST_SCALAR_STORE(CN, DEPTH) \
CV_WARP_SCALAR_STORE_##DEPTH(CN, p00)
#define CV_WARP_LINEAR_SCALAR_STORE(CN, DEPTH) \
CV_WARP_LINEAR_SCALAR_STORE_##DEPTH(CN)
CV_WARP_SCALAR_STORE_##DEPTH(CN, v0)
#define CV_WARP_SCALAR_STORE(INTER, CN, DEPTH) \
CV_WARP_##INTER##_SCALAR_STORE(CN, DEPTH)

View File

@ -3,6 +3,26 @@
// of this distribution and at http://opencv.org/license.html.
// Shuffle (all pixels within image)
#define CV_WARP_NEAREST_VECTOR_SHUFFLE_ALLWITHIN_C1(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0];\
}
#define CV_WARP_NEAREST_VECTOR_SHUFFLE_ALLWITHIN_C3(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[3*i] = srcptr[0];\
pixbuf[3*i + 1] = srcptr[1]; \
pixbuf[3*i + 2] = srcptr[2]; \
}
#define CV_WARP_NEAREST_VECTOR_SHUFFLE_ALLWITHIN_C4(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[4*i] = srcptr[0];\
pixbuf[4*i + 1] = srcptr[1]; \
pixbuf[4*i + 2] = srcptr[2]; \
pixbuf[4*i + 3] = srcptr[3]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C1(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
@ -47,15 +67,14 @@
pixbuf[i + uf*11] = srcptr[srcstep + 6]; \
pixbuf[i + uf*15] = srcptr[srcstep + 7]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_8U(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint8_t)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_16U(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint16_t)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_32F(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(float)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(CN, DEPTH) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##DEPTH(CN)
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_8U(INTER, CN) \
CV_WARP_##INTER##_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint8_t)
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_16U(INTER, CN) \
CV_WARP_##INTER##_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint16_t)
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_32F(INTER, CN) \
CV_WARP_##INTER##_VECTOR_SHUFFLE_ALLWITHIN_##CN(float)
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN(INTER, CN, DEPTH) \
CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_##DEPTH(INTER, CN)
// Shuffle (ARM NEON)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
@ -112,39 +131,39 @@
// Shuffle (not all pixels within image)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC1() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC1() \
v_store_low(dstptr + x, bval_v0);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC3() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC3() \
v_store_low(dstptr + x*3, bval_v0); \
v_store_low(dstptr + x*3 + uf, bval_v1); \
v_store_low(dstptr + x*3 + uf*2, bval_v2);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC4() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC4() \
v_store_low(dstptr + x*4, bval_v0); \
v_store_low(dstptr + x*4 + uf, bval_v1); \
v_store_low(dstptr + x*4 + uf*2, bval_v2); \
v_store_low(dstptr + x*4 + uf*3, bval_v3);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC1() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC1() \
v_store(dstptr + x, bval_v0);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC3() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC3() \
v_store(dstptr + x*3, bval_v0); \
v_store(dstptr + x*3 + uf, bval_v1); \
v_store(dstptr + x*3 + uf*2, bval_v2);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC4() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC4() \
v_store(dstptr + x*4, bval_v0); \
v_store(dstptr + x*4 + uf, bval_v1); \
v_store(dstptr + x*4 + uf*2, bval_v2); \
v_store(dstptr + x*4 + uf*3, bval_v3);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC1() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC1() \
v_store(dstptr + x, bval_v0_l); \
v_store(dstptr + x + vlanes_32, bval_v0_h);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC3() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC3() \
v_store(dstptr + x*3, bval_v0_l); \
v_store(dstptr + x*3 + vlanes_32, bval_v0_h); \
v_store(dstptr + x*3 + uf, bval_v1_l); \
v_store(dstptr + x*3 + uf + vlanes_32, bval_v1_h); \
v_store(dstptr + x*3 + uf*2, bval_v2_l); \
v_store(dstptr + x*3 + uf*2 + vlanes_32, bval_v2_h);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC4() \
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC4() \
v_store(dstptr + x*4, bval_v0_l); \
v_store(dstptr + x*4 + vlanes_32, bval_v0_h); \
v_store(dstptr + x*4 + uf, bval_v1_l); \
@ -154,70 +173,83 @@
v_store(dstptr + x*4 + uf*3, bval_v3_l); \
v_store(dstptr + x*4 + uf*3 + vlanes_32, bval_v3_h);
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C1(dy, dx, pixbuf_ofs) \
#define CV_WARP_VECTOR_FETCH_PIXEL_C1(dy, dx, pixbuf_ofs0, pixbuf_ofs1) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs0] = bval[0]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[x + i]; \
pixbuf[i + pixbuf_ofs0] = dstptr[x + i]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
}
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C3(dy, dx, pixbuf_ofs) \
#define CV_WARP_VECTOR_FETCH_PIXEL_C3(dy, dx, pixbuf_ofs0, pixbuf_ofs1) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx*3; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
pixbuf[i + pixbuf_ofs0] = bval[0]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = bval[1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = bval[2]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*3]; \
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*3 + 1]; \
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*3 + 2]; \
pixbuf[i + pixbuf_ofs0] = dstptr[(x + i)*3]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = dstptr[(x + i)*3 + 1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = dstptr[(x + i)*3 + 2]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_*3; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
}
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C4(dy, dx, pixbuf_ofs) \
#define CV_WARP_VECTOR_FETCH_PIXEL_C4(dy, dx, pixbuf_ofs0, pixbuf_ofs1) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx*4; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = src[addr_i+3]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
pixbuf[i + pixbuf_ofs + uf*12] = bval[3]; \
pixbuf[i + pixbuf_ofs0] = bval[0]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = bval[1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = bval[2]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = bval[3]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*4]; \
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*4 + 1]; \
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*4 + 2]; \
pixbuf[i + pixbuf_ofs + uf*12] = dstptr[(x + i)*4 + 3]; \
pixbuf[i + pixbuf_ofs0] = dstptr[(x + i)*4]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = dstptr[(x + i)*4 + 1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = dstptr[(x + i)*4 + 2]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = dstptr[(x + i)*4 + 3]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_*4; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = src[addr_i+3]; \
}
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_C1() \
CV_WARP_VECTOR_FETCH_PIXEL_C1(0, 0, 0, 1);
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_C3() \
CV_WARP_VECTOR_FETCH_PIXEL_C3(0, 0, 2*i, 1);
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_C4() \
CV_WARP_VECTOR_FETCH_PIXEL_C4(0, 0, 3*i, 1);
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL(CN) \
CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_##CN()
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL(CN) \
CV_WARP_VECTOR_FETCH_PIXEL_##CN(0, 0, 0, uf*4); \
CV_WARP_VECTOR_FETCH_PIXEL_##CN(0, 1, uf, uf*4); \
CV_WARP_VECTOR_FETCH_PIXEL_##CN(1, 0, uf*2, uf*4); \
CV_WARP_VECTOR_FETCH_PIXEL_##CN(1, 1, uf*3, uf*4);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(CN, DEPTH) \
#define CV_WARP_VECTOR_SHUFFLE_NOTALLWITHIN(INTER, CN, DEPTH) \
if (border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) { \
mask_0 = v_lt(v_reinterpret_as_u32(v_add(src_ix0, one)), outer_scols); \
mask_1 = v_lt(v_reinterpret_as_u32(v_add(src_ix1, one)), outer_scols); \
@ -226,7 +258,7 @@
v_uint16 outer_mask = v_pack(mask_0, mask_1); \
if (v_reduce_max(outer_mask) == 0) { \
if (border_type == BORDER_CONSTANT) { \
CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_##DEPTH##CN() \
CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_##DEPTH##CN() \
} \
continue; \
} \
@ -237,10 +269,7 @@
vx_store(src_iy + vlanes_32, src_iy1); \
for (int i = 0; i < uf; i++) { \
int ix = src_ix[i], iy = src_iy[i]; \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 0, 0); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 1, uf); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 0, uf*2); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 1, uf*3); \
CV_WARP_##INTER##_VECTOR_FETCH_PIXEL(CN) \
}
// Shuffle (not all pixels within image) (ARM NEON)
@ -263,25 +292,94 @@
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_##CN()
// Load pixels for linear interpolation (uint8_t -> int16_t)
// [New] Load pixels for interpolation
#define CV_WARP_VECTOR_NEAREST_LOAD_CN_8U_16U(cn, i) \
v_uint16 f00##cn = vx_load_expand(pixbuf + uf * i);
#define CV_WARP_VECTOR_NEAREST_LOAD_CN_16U_16U(cn, i) \
v_uint16 f00##cn = vx_load(pixbuf + uf * i);
#define CV_WARP_VECTOR_NEAREST_LOAD_CN_32F_32F(cn, i) \
v_float32 f00##cn##l = vx_load(pixbuf + uf * i); \
v_float32 f00##cn##h = vx_load(pixbuf + uf * i + vlanes_32);
#define CV_WARP_VECTOR_INTER_LOAD_C1(INTER, SDEPTH, DDEPTH) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(g, 0)
#define CV_WARP_VECTOR_INTER_LOAD_C3(INTER, SDEPTH, DDEPTH) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(r, 0) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(g, 1) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(b, 2)
#define CV_WARP_VECTOR_INTER_LOAD_C4(INTER, SDEPTH, DDEPTH) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(r, 0) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(g, 1) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(b, 2) \
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(a, 3)
#define CV_WARP_VECTOR_INTER_LOAD(INTER, CN, SDEPTH, DDEPTH) \
CV_WARP_VECTOR_INTER_LOAD_##CN(INTER, SDEPTH, DDEPTH)
// [New] Store
#define CV_WARP_VECTOR_NEAREST_STORE_C1_16U_8U() \
v_pack_store(dstptr + x, f00g);
#define CV_WARP_VECTOR_NEAREST_STORE_C3_16U_8U() \
v_pack_store(dstptr + 3*x, f00r); \
v_pack_store(dstptr + 3*x + uf, f00g); \
v_pack_store(dstptr + 3*x + uf*2, f00b);
#define CV_WARP_VECTOR_NEAREST_STORE_C4_16U_8U() \
v_pack_store(dstptr + 4*x, f00r); \
v_pack_store(dstptr + 4*x + uf, f00g); \
v_pack_store(dstptr + 4*x + uf*2, f00b); \
v_pack_store(dstptr + 4*x + uf*3, f00a);
#define CV_WARP_VECTOR_NEAREST_STORE_C1_16U_16U() \
vx_store(dstptr + x, f00g);
#define CV_WARP_VECTOR_NEAREST_STORE_C3_16U_16U() \
vx_store(dstptr + 3*x, f00r); \
vx_store(dstptr + 3*x + uf, f00g); \
vx_store(dstptr + 3*x + uf*2, f00b);
#define CV_WARP_VECTOR_NEAREST_STORE_C4_16U_16U() \
vx_store(dstptr + 4*x, f00r); \
vx_store(dstptr + 4*x + uf, f00g); \
vx_store(dstptr + 4*x + uf*2, f00b); \
vx_store(dstptr + 4*x + uf*3, f00a);
#define CV_WARP_VECTOR_NEAREST_STORE_C1_32F_32F() \
vx_store(dstptr + x, f00gl); \
vx_store(dstptr + x + vlanes_32, f00gh);
#define CV_WARP_VECTOR_NEAREST_STORE_C3_32F_32F() \
vx_store(dstptr + 3*x, f00rl); \
vx_store(dstptr + 3*x + vlanes_32, f00rh); \
vx_store(dstptr + 3*x + uf, f00gl); \
vx_store(dstptr + 3*x + uf + vlanes_32, f00gh); \
vx_store(dstptr + 3*x + uf*2, f00bl); \
vx_store(dstptr + 3*x + uf*2 + vlanes_32, f00bh);
#define CV_WARP_VECTOR_NEAREST_STORE_C4_32F_32F() \
vx_store(dstptr + 4*x, f00rl); \
vx_store(dstptr + 4*x + vlanes_32, f00rh); \
vx_store(dstptr + 4*x + uf, f00gl); \
vx_store(dstptr + 4*x + uf + vlanes_32, f00gh); \
vx_store(dstptr + 4*x + uf*2, f00bl); \
vx_store(dstptr + 4*x + uf*2 + vlanes_32, f00bh); \
vx_store(dstptr + 4*x + uf*3, f00al); \
vx_store(dstptr + 4*x + uf*3 + vlanes_32, f00ah);
#define CV_WARP_VECTOR_INTER_STORE(INTER, CN, SDEPTH, DDEPTH) \
CV_WARP_VECTOR_##INTER##_STORE_##CN##_##SDEPTH##_##DDEPTH()
#define CV_WARP_NEAREST_VECTOR_INTER_LOAD_CN_U8S16(cn, i) \
v_int16 f00##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * i));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(cn, i) \
v_int16 f00##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * i)), \
f01##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+1))), \
f10##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+2))), \
f11##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+3)));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_##CN();
#define CV_WARP_VECTOR_INTER_LOAD_U8S16_C1(INTER) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(g, 0)
#define CV_WARP_VECTOR_INTER_LOAD_U8S16_C3(INTER) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(b, 8)
#define CV_WARP_VECTOR_INTER_LOAD_U8S16_C4(INTER) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(b, 8) \
CV_WARP_##INTER##_VECTOR_INTER_LOAD_CN_U8S16(a, 12)
#define CV_WARP_VECTOR_INTER_LOAD_U8S16(INTER, CN) \
CV_WARP_VECTOR_INTER_LOAD_U8S16_##CN(INTER);
// Load pixels for linear interpolation (uint8_t -> int16_t) (ARM NEON)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(cn) \
@ -556,9 +654,30 @@
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F16U8(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F16U8_##CN()
// Special case for C4 load, shuffle and bilinear interpolation
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(ofs) \
// Special case for C4 shuffle, interpolation and store
// SIMD128, nearest
#define CV_WARP_SIMD128_NEAREST_SHUFFLE_INTER_8UC4_I(ofs) \
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
v_uint32 i##ofs##_pix0 = vx_load_expand_q(srcptr##ofs);
#define CV_WARP_SIMD128_NEAREST_SHUFFLE_INTER_16UC4_I(ofs) \
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
v_uint32 i##ofs##_pix0 = vx_load_expand(srcptr##ofs);
#define CV_WARP_SIMD128_NEAREST_SHUFFLE_INTER_32FC4_I(ofs) \
const float *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_pix0 = vx_load(srcptr##ofs);
#define CV_WARP_SIMD128_NEAREST_STORE_8UC4_I() \
v_pack_store(dstptr + 4*(x+i), v_pack(i0_pix0, i1_pix0)); \
v_pack_store(dstptr + 4*(x+i+2), v_pack(i2_pix0, i3_pix0));
#define CV_WARP_SIMD128_NEAREST_STORE_16UC4_I() \
vx_store(dstptr + 4*(x+i), v_pack(i0_pix0, i1_pix0)); \
vx_store(dstptr + 4*(x+i+2), v_pack(i2_pix0, i3_pix0));
#define CV_WARP_SIMD128_NEAREST_STORE_32FC4_I() \
vx_store(dstptr + 4*(x+i), i0_pix0); \
vx_store(dstptr + 4*(x+i+1), i1_pix0); \
vx_store(dstptr + 4*(x+i+2), i2_pix0); \
vx_store(dstptr + 4*(x+i+3), i3_pix0);
// SIMD128, bilinear
#define CV_WARP_SIMD128_LINEAR_SHUFFLE_INTER_8UC4_I(ofs) \
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_pix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs))); \
v_float32 i##ofs##_pix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs+4))); \
@ -569,7 +688,7 @@
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_16UC4_I(ofs) \
#define CV_WARP_SIMD128_LINEAR_SHUFFLE_INTER_16UC4_I(ofs) \
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_pix0 = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand(srcptr##ofs))); \
v_float32 i##ofs##_pix1 = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand(srcptr##ofs+4))); \
@ -580,7 +699,7 @@
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_32FC4_I(ofs) \
#define CV_WARP_SIMD128_LINEAR_SHUFFLE_INTER_32FC4_I(ofs) \
const float *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_pix0 = vx_load(srcptr##ofs); \
v_float32 i##ofs##_pix1 = vx_load(srcptr##ofs+4); \
@ -591,30 +710,59 @@
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
#define CV_WARP_SIMD128_STORE_8UC4_I() \
#define CV_WARP_SIMD128_LINEAR_STORE_8UC4_I() \
v_uint16 i01_pix = v_pack_u(v_round(i0_pix0), v_round(i1_pix0)); \
v_uint16 i23_pix = v_pack_u(v_round(i2_pix0), v_round(i3_pix0)); \
v_pack_store(dstptr + 4*(x+i), i01_pix); \
v_pack_store(dstptr + 4*(x+i+2), i23_pix);
#define CV_WARP_SIMD128_STORE_16UC4_I() \
#define CV_WARP_SIMD128_LINEAR_STORE_16UC4_I() \
v_uint16 i01_pix = v_pack_u(v_round(i0_pix0), v_round(i1_pix0)); \
v_uint16 i23_pix = v_pack_u(v_round(i2_pix0), v_round(i3_pix0)); \
vx_store(dstptr + 4*(x+i), i01_pix); \
vx_store(dstptr + 4*(x+i+2), i23_pix);
#define CV_WARP_SIMD128_STORE_32FC4_I() \
vx_store(dstptr + 4*(x+i), i0_pix0); \
vx_store(dstptr + 4*(x+i)+4, i1_pix0); \
vx_store(dstptr + 4*(x+i)+8, i2_pix0); \
vx_store(dstptr + 4*(x+i)+12, i3_pix0);
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(DEPTH) \
#define CV_WARP_SIMD128_LINEAR_STORE_32FC4_I() \
vx_store(dstptr + 4*(x+i), i0_pix0); \
vx_store(dstptr + 4*(x+i+1), i1_pix0); \
vx_store(dstptr + 4*(x+i+2), i2_pix0); \
vx_store(dstptr + 4*(x+i+3), i3_pix0);
#define CV_WARP_SIMD128_SHUFFLE_INTER_STORE_C4(INTER, DEPTH) \
for (int i = 0; i < uf; i+=vlanes_32) { \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(0); \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(1); \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(2); \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(3); \
CV_WARP_SIMD128_STORE_##DEPTH##C4_I(); \
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(0); \
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(1); \
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(2); \
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(3); \
CV_WARP_SIMD128_##INTER##_STORE_##DEPTH##C4_I(); \
}
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
// SIMD128, nearest
#define CV_WARP_SIMD256_NEAREST_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
const uint8_t *srcptr##ofs0 = src + addr[i+ofs0]; \
const uint8_t *srcptr##ofs1 = src + addr[i+ofs1]; \
v_uint32 i##ofs0##_pix0x = v256_load_expand_q(srcptr##ofs0); \
v_uint32 i##ofs1##_pix0x = v256_load_expand_q(srcptr##ofs1); \
v_uint32 i##ofs0##ofs1##_pix00 = v_combine_low(i##ofs0##_pix0x, i##ofs1##_pix0x);
#define CV_WARP_SIMD256_NEAREST_SHUFFLE_INTER_16UC4_I(ofs0, ofs1) \
const uint16_t *srcptr##ofs0 = src + addr[i+ofs0]; \
const uint16_t *srcptr##ofs1 = src + addr[i+ofs1]; \
v_uint32 i##ofs0##_pix0x = v256_load_expand(srcptr##ofs0); \
v_uint32 i##ofs1##_pix0x = v256_load_expand(srcptr##ofs1); \
v_uint32 i##ofs0##ofs1##_pix00 = v_combine_low(i##ofs0##_pix0x, i##ofs1##_pix0x);
#define CV_WARP_SIMD256_NEAREST_SHUFFLE_INTER_32FC4_I(ofs0, ofs1) \
const float *srcptr##ofs0 = src + addr[i+ofs0]; \
const float *srcptr##ofs1 = src + addr[i+ofs1]; \
v_float32 i##ofs0##ofs1##_fpix00 = vx_load_halves(srcptr##ofs0, srcptr##ofs1);
#define CV_WARP_SIMD256_NEAREST_STORE_8UC4_I() \
v_pack_store(dstptr + 4*(x+i), v_pack(i01_pix00, i23_pix00)); \
v_pack_store(dstptr + 4*(x+i+4), v_pack(i45_pix00, i67_pix00));
#define CV_WARP_SIMD256_NEAREST_STORE_16UC4_I() \
vx_store(dstptr + 4*(x+i), v_pack(i01_pix00, i23_pix00)); \
vx_store(dstptr + 4*(x+i+4), v_pack(i45_pix00, i67_pix00));
#define CV_WARP_SIMD256_NEAREST_STORE_32FC4_I() \
vx_store(dstptr + 4*(x+i), i01_fpix00); \
vx_store(dstptr + 4*(x+i)+8, i23_fpix00); \
vx_store(dstptr + 4*(x+i)+16, i45_fpix00); \
vx_store(dstptr + 4*(x+i)+24, i67_fpix00);
// SIMD256, bilinear
#define CV_WARP_SIMD256_LINEAR_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
const uint8_t *srcptr##ofs0 = src + addr[i+ofs0]; \
const uint8_t *srcptr##ofs1 = src + addr[i+ofs1]; \
v_int32 i##ofs0##_pix01 = v_reinterpret_as_s32(v256_load_expand_q(srcptr##ofs0)), \
@ -635,8 +783,9 @@
i##ofs0##ofs1##_beta = v_combine_low(i##ofs0##_beta, i##ofs1##_beta); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_16UC4_I(ofs0, ofs1) \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
auto i##ofs0##ofs1##_pix00 = v_round(i##ofs0##ofs1##_fpix00);
#define CV_WARP_SIMD256_LINEAR_SHUFFLE_INTER_16UC4_I(ofs0, ofs1) \
const uint16_t *srcptr##ofs0 = src + addr[i+ofs0]; \
const uint16_t *srcptr##ofs1 = src + addr[i+ofs1]; \
v_int32 i##ofs0##_pix01 = v_reinterpret_as_s32(v256_load_expand(srcptr##ofs0)), \
@ -657,8 +806,9 @@
i##ofs0##ofs1##_beta = v_combine_low(i##ofs0##_beta, i##ofs1##_beta); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_32FC4_I(ofs0, ofs1) \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
auto i##ofs0##ofs1##_pix00 = v_round(i##ofs0##ofs1##_fpix00);
#define CV_WARP_SIMD256_LINEAR_SHUFFLE_INTER_32FC4_I(ofs0, ofs1) \
const float *srcptr##ofs0 = src + addr[i+ofs0]; \
const float *srcptr##ofs1 = src + addr[i+ofs1]; \
v_float32 i##ofs0##_fpix01 = v256_load(srcptr##ofs0), \
@ -678,30 +828,48 @@
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
#define CV_WARP_SIMD256_STORE_8UC4_I() \
auto i01_pix = v_round(i01_fpix00), i23_pix = v_round(i23_fpix00); \
v_pack_store(dstptr + 4*(x+i), v_pack_u(i01_pix, i23_pix)); \
auto i45_pix = v_round(i45_fpix00), i67_pix = v_round(i67_fpix00); \
v_pack_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix, i67_pix));
#define CV_WARP_SIMD256_STORE_16UC4_I() \
auto i01_pix = v_round(i01_fpix00), i23_pix = v_round(i23_fpix00); \
vx_store(dstptr + 4*(x+i), v_pack_u(i01_pix, i23_pix)); \
auto i45_pix = v_round(i45_fpix00), i67_pix = v_round(i67_fpix00); \
vx_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix, i67_pix));
#define CV_WARP_SIMD256_STORE_32FC4_I() \
#define CV_WARP_SIMD256_LINEAR_STORE_8UC4_I() \
v_pack_store(dstptr + 4*(x+i), v_pack_u(i01_pix00, i23_pix00)); \
v_pack_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix00, i67_pix00));
#define CV_WARP_SIMD256_LINEAR_STORE_16UC4_I() \
vx_store(dstptr + 4*(x+i), v_pack_u(i01_pix00, i23_pix00)); \
vx_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix00, i67_pix00));
#define CV_WARP_SIMD256_LINEAR_STORE_32FC4_I() \
vx_store(dstptr + 4*(x+i), i01_fpix00); \
vx_store(dstptr + 4*(x+i)+8, i23_fpix00); \
vx_store(dstptr + 4*(x+i)+16, i45_fpix00); \
vx_store(dstptr + 4*(x+i)+24, i67_fpix00);
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(DEPTH) \
#define CV_WARP_SIMD256_SHUFFLE_INTER_STORE_C4(INTER, DEPTH) \
for (int i = 0; i < uf; i+=vlanes_32) { \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(0, 1); \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(2, 3); \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(4, 5); \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(6, 7); \
CV_WARP_SIMD256_STORE_##DEPTH##C4_I(); \
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(0, 1) \
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(2, 3) \
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(4, 5) \
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(6, 7) \
CV_WARP_SIMD256_##INTER##_STORE_##DEPTH##C4_I() \
}
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(ofs) \
// SIMD_SCALABLE (SIMDX), nearest
#define CV_WARP_SIMDX_NEAREST_SHUFFLE_INTER_8UC4_I(ofs) \
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
v_uint32 i##ofs##_pix0 = v_load_expand_q<4>(srcptr##ofs);
#define CV_WARP_SIMDX_NEAREST_SHUFFLE_INTER_16UC4_I(ofs) \
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
v_uint32 i##ofs##_pix0 = v_load_expand<4>(srcptr##ofs);
#define CV_WARP_SIMDX_NEAREST_SHUFFLE_INTER_32FC4_I(ofs) \
const float *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_fpix0 = v_load<4>(srcptr##ofs);
#define CV_WARP_SIMDX_NEAREST_STORE_8UC4_I() \
v_pack_store<8>(dstptr + 4*(x+i), v_pack<4>(i0_pix0, i1_pix0)); \
v_pack_store<8>(dstptr + 4*(x+i+2), v_pack<4>(i2_pix0, i3_pix0));
#define CV_WARP_SIMDX_NEAREST_STORE_16UC4_I() \
v_store<8>(dstptr + 4*(x+i), v_pack<4>(i0_pix0, i1_pix0)); \
v_store<8>(dstptr + 4*(x+i+2), v_pack<4>(i2_pix0, i3_pix0));
#define CV_WARP_SIMDX_NEAREST_STORE_32FC4_I() \
v_store<4>(dstptr + 4*(x+i), i0_fpix0); \
v_store<4>(dstptr + 4*(x+i)+4, i1_fpix0); \
v_store<4>(dstptr + 4*(x+i)+8, i2_fpix0); \
v_store<4>(dstptr + 4*(x+i)+12, i3_fpix0);
// SIMD_SCALABLE (SIMDX), bilinear
#define CV_WARP_SIMDX_LINEAR_SHUFFLE_INTER_8UC4_I(ofs) \
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_fpix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs))), \
i##ofs##_fpix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs+4))), \
@ -711,8 +879,9 @@
i##ofs##_beta = vx_setall_f32(vbeta[i+ofs]); \
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_16UC4_I(ofs) \
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0); \
auto i##ofs##_pix0 = v_round(i##ofs##_fpix0);
#define CV_WARP_SIMDX_LINEAR_SHUFFLE_INTER_16UC4_I(ofs) \
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_fpix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand<4>(srcptr##ofs))), \
i##ofs##_fpix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand<4>(srcptr##ofs+4))), \
@ -722,8 +891,9 @@
i##ofs##_beta = vx_setall_f32(vbeta[i+ofs]); \
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_32FC4_I(ofs) \
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0); \
auto i##ofs##_pix0 = v_round(i##ofs##_fpix0);
#define CV_WARP_SIMDX_LINEAR_SHUFFLE_INTER_32FC4_I(ofs) \
const float *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_fpix0 = v_load<4>(srcptr##ofs), \
i##ofs##_fpix1 = v_load<4>(srcptr##ofs+4), \
@ -734,26 +904,25 @@
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
#define CV_WARP_SIMDX_STORE_8UC4_I() \
auto i01_pix = v_pack_u<4>(v_round(i0_fpix0), v_round(i1_fpix0)), \
i23_pix = v_pack_u<4>(v_round(i2_fpix0), v_round(i3_fpix0)); \
v_pack_store<8>(dstptr + 4*(x+i), i01_pix); \
v_pack_store<8>(dstptr + 4*(x+i+2), i23_pix);
#define CV_WARP_SIMDX_STORE_16UC4_I() \
auto i01_pix = v_pack_u<4>(v_round(i0_fpix0), v_round(i1_fpix0)), \
i23_pix = v_pack_u<4>(v_round(i2_fpix0), v_round(i3_fpix0)); \
v_store<8>(dstptr + 4*(x+i), i01_pix); \
v_store<8>(dstptr + 4*(x+i+2), i23_pix);
#define CV_WARP_SIMDX_STORE_32FC4_I() \
#define CV_WARP_SIMDX_LINEAR_STORE_8UC4_I() \
v_pack_store<8>(dstptr + 4*(x+i), v_pack_u<4>(i0_pix0, i1_pix0)); \
v_pack_store<8>(dstptr + 4*(x+i+2), v_pack_u<4>(i2_pix0, i3_pix0));
#define CV_WARP_SIMDX_LINEAR_STORE_16UC4_I() \
v_store<8>(dstptr + 4*(x+i), v_pack_u<4>(i0_pix0, i1_pix0)); \
v_store<8>(dstptr + 4*(x+i+2), v_pack_u<4>(i2_pix0, i3_pix0));
#define CV_WARP_SIMDX_LINEAR_STORE_32FC4_I() \
v_store<4>(dstptr + 4*(x+i), i0_fpix0); \
v_store<4>(dstptr + 4*(x+i)+4, i1_fpix0); \
v_store<4>(dstptr + 4*(x+i)+8, i2_fpix0); \
v_store<4>(dstptr + 4*(x+i)+12, i3_fpix0);
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(DEPTH) \
#define CV_WARP_SIMDX_SHUFFLE_INTER_STORE_C4(INTER, DEPTH) \
for (int i = 0; i < uf; i+=4) { \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(0); \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(1); \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(2); \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(3); \
CV_WARP_SIMDX_STORE_##DEPTH##C4_I(); \
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(0); \
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(1); \
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(2); \
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(3); \
CV_WARP_SIMDX_##INTER##_STORE_##DEPTH##C4_I(); \
}
#define CV_WARP_VECTOR_SHUFFLE_INTER_STORE_C4(SIMD, INTER, DEPTH) \
CV_WARP_##SIMD##_SHUFFLE_INTER_STORE_C4(INTER, DEPTH)

File diff suppressed because it is too large Load Diff

View File

@ -703,6 +703,16 @@ protected:
virtual void run_func();
virtual void run_reference_func();
template<typename T>
void new_nearest_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
template<typename T>
void new_nearest_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
template<typename T>
void new_nearest_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
template<typename T>
void new_linear_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
@ -720,7 +730,7 @@ protected:
remap_func funcs[2];
private:
template <typename T> void new_remap(const Mat&, Mat&);
template <typename T> void new_remap(const Mat&, Mat&, int);
void remap_nearest(const Mat&, Mat&);
void remap_generic(const Mat&, Mat&);
@ -879,19 +889,19 @@ void CV_Remap_Test::run_reference_func()
if (interpolation == INTER_AREA)
interpolation = INTER_LINEAR;
if (interpolation == INTER_LINEAR && mapx.depth() == CV_32F) {
if ((interpolation == INTER_LINEAR || interpolation == INTER_NEAREST) && mapx.depth() == CV_32F) {
int src_depth = src.depth(), src_channels = src.channels();
Mat tmp = Mat::zeros(dst.size(), dst.type());
if (src_depth == CV_8U && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
new_remap<uint8_t>(src, tmp);
new_remap<uint8_t>(src, tmp, interpolation);
tmp.convertTo(reference_dst, reference_dst.depth());
return;
} else if (src_depth == CV_16U && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
new_remap<uint16_t>(src, tmp);
new_remap<uint16_t>(src, tmp, interpolation);
tmp.convertTo(reference_dst, reference_dst.depth());
return;
} else if (src_depth == CV_32F && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
new_remap<float>(src, tmp);
new_remap<float>(src, tmp, interpolation);
tmp.convertTo(reference_dst, reference_dst.depth());
return;
}
@ -903,7 +913,7 @@ void CV_Remap_Test::run_reference_func()
(this->*funcs[index])(src, reference_dst);
}
#define FETCH_PIXEL_SCALAR(cn, dy, dx) \
#define WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, dy, dx) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr[ofs+ci];} \
@ -917,16 +927,28 @@ void CV_Remap_Test::run_reference_func()
size_t glob_ofs = iy_*srcstep + ix_*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr_[glob_ofs+ci];} \
}
#define WARPAFFINE_SHUFFLE(cn) \
#define WARP_NEAREST_SHUFFLE_FETCH_PIXEL_IN_RANGE(cn) \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
}
#define WARP_LINEAR_SHUFFLE_FETCH_PIXEL_IN_RANGE(cn) \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
pxy[ci+cn] = srcptr[ci+cn]; \
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
}
#define WARP_NEAREST_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn) \
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 0, 0);
#define WARP_LINEAR_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn) \
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 0, 0); \
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 0, 1); \
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 1, 0); \
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 1, 1);
#define WARP_SHUFFLE(inter, cn) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
pxy[ci+cn] = srcptr[ci+cn]; \
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
} \
WARP_##inter##_SHUFFLE_FETCH_PIXEL_IN_RANGE(cn) \
} else { \
if ((borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
@ -936,14 +958,53 @@ void CV_Remap_Test::run_reference_func()
} \
return; \
} \
FETCH_PIXEL_SCALAR(cn, 0, 0); \
FETCH_PIXEL_SCALAR(cn, 0, 1); \
FETCH_PIXEL_SCALAR(cn, 1, 0); \
FETCH_PIXEL_SCALAR(cn, 1, 1); \
WARP_##inter##_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn) \
}
template<typename T>
static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
void CV_Remap_Test::new_nearest_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y) {
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[1];
const T *srcptr = srcptr_ + srcstep*iy + ix;
WARP_SHUFFLE(NEAREST, 1);
dstptr[x+0] = saturate_cast<T>(pxy[0]);
}
template<typename T>
void CV_Remap_Test::new_nearest_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y) {
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[3];
const T *srcptr = srcptr_ + srcstep*iy + ix*3;
WARP_SHUFFLE(NEAREST, 3);
dstptr[x*3+0] = saturate_cast<T>(pxy[0]);
dstptr[x*3+1] = saturate_cast<T>(pxy[1]);
dstptr[x*3+2] = saturate_cast<T>(pxy[2]);
}
template<typename T>
void CV_Remap_Test::new_nearest_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y) {
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[4];
const T *srcptr = srcptr_ + srcstep*iy + ix*4;
WARP_SHUFFLE(NEAREST, 4);
dstptr[x*4+0] = saturate_cast<T>(pxy[0]);
dstptr[x*4+1] = saturate_cast<T>(pxy[1]);
dstptr[x*4+2] = saturate_cast<T>(pxy[2]);
dstptr[x*4+3] = saturate_cast<T>(pxy[3]);
}
template<typename T>
static inline void warp_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
@ -956,7 +1017,6 @@ static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx
dst[ci] = saturate_cast<T>(v0);
}
}
template<typename T>
void CV_Remap_Test::new_linear_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
@ -968,11 +1028,10 @@ void CV_Remap_Test::new_linear_c1(int x, float sx, float sy, const T *srcptr_, T
T pxy[4];
const T *srcptr = srcptr_ + srcstep*iy + ix;
WARPAFFINE_SHUFFLE(1);
WARP_SHUFFLE(LINEAR, 1);
warpaffine_linear_calc(1, pxy, dstptr+x, sx, sy);
warp_linear_calc(1, pxy, dstptr+x, sx, sy);
}
template<typename T>
void CV_Remap_Test::new_linear_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
@ -984,11 +1043,10 @@ void CV_Remap_Test::new_linear_c3(int x, float sx, float sy, const T *srcptr_, T
T pxy[12];
const T *srcptr = srcptr_ + srcstep*iy + ix*3;
WARPAFFINE_SHUFFLE(3);
WARP_SHUFFLE(LINEAR, 3);
warpaffine_linear_calc(3, pxy, dstptr+x*3, sx, sy);
warp_linear_calc(3, pxy, dstptr+x*3, sx, sy);
}
template<typename T>
void CV_Remap_Test::new_linear_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
@ -1000,13 +1058,13 @@ void CV_Remap_Test::new_linear_c4(int x, float sx, float sy, const T *srcptr_, T
T pxy[16];
const T *srcptr = srcptr_ + srcstep*iy + ix*4;
WARPAFFINE_SHUFFLE(4);
WARP_SHUFFLE(LINEAR, 4);
warpaffine_linear_calc(4, pxy, dstptr+x*4, sx, sy);
warp_linear_calc(4, pxy, dstptr+x*4, sx, sy);
}
template <typename T>
void CV_Remap_Test::new_remap(const Mat &_src, Mat &_dst) {
void CV_Remap_Test::new_remap(const Mat &_src, Mat &_dst, int inter) {
int src_channels = _src.channels();
CV_CheckTrue(_src.channels() == 1 || _src.channels() == 3 || _src.channels() == 4, "");
CV_CheckTrue(mapx.depth() == CV_32F, "");
@ -1048,12 +1106,22 @@ void CV_Remap_Test::new_remap(const Mat &_src, Mat &_dst) {
sy = mapx_data[2*offset+1];
}
if (src_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (src_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
if (inter == INTER_NEAREST) {
if (src_channels == 3) {
new_nearest_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (src_channels == 4) {
new_nearest_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_nearest_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
if (src_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (src_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
}
@ -1232,7 +1300,7 @@ private:
void warpAffine(const Mat&, Mat&);
template<typename T>
void newWarpAffine(const Mat&, Mat&, const Mat&);
void new_warpAffine(const Mat&, Mat&, const Mat&, int);
};
CV_WarpAffine_Test::CV_WarpAffine_Test() :
@ -1287,7 +1355,7 @@ void CV_WarpAffine_Test::run_reference_func()
}
template<typename T>
void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM)
void CV_WarpAffine_Test::new_warpAffine(const Mat &_src, Mat &_dst, const Mat &tM, int inter)
{
int num_channels = _dst.channels();
CV_CheckTrue(num_channels == 1 || num_channels == 3 || num_channels == 4, "");
@ -1322,12 +1390,22 @@ void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM
float sx = x*_M[0] + y*_M[1] + _M[2];
float sy = x*_M[3] + y*_M[4] + _M[5];
if (num_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
if (inter == INTER_NEAREST) {
if (num_channels == 3) {
new_nearest_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
new_nearest_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_nearest_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
if (num_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
}
@ -1357,14 +1435,14 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
if (!(interpolation & cv::WARP_INVERSE_MAP))
invertAffineTransform(tM.clone(), tM);
if (inter == INTER_LINEAR) {
if (inter == INTER_LINEAR || inter == INTER_NEAREST) {
int dst_depth = _dst.depth(), dst_channels = _dst.channels();
if (dst_depth == CV_8U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<uint8_t>(_src, _dst, tM);
return new_warpAffine<uint8_t>(_src, _dst, tM, inter);
} else if (dst_depth == CV_16U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<uint16_t>(_src, _dst, tM);
return new_warpAffine<uint16_t>(_src, _dst, tM, inter);
} else if (dst_depth == CV_32F && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<float>(_src, _dst, tM);
return new_warpAffine<float>(_src, _dst, tM, inter);
}
}
@ -1420,7 +1498,7 @@ private:
void warpPerspective(const Mat&, Mat&);
template<typename T>
void newWarpPerspective(const Mat&, Mat&, const Mat&);
void new_warpPerspective(const Mat&, Mat&, const Mat&, int);
};
CV_WarpPerspective_Test::CV_WarpPerspective_Test() :
@ -1470,7 +1548,7 @@ void CV_WarpPerspective_Test::run_reference_func()
}
template<typename T>
void CV_WarpPerspective_Test::newWarpPerspective(const Mat &_src, Mat &_dst, const Mat &tM)
void CV_WarpPerspective_Test::new_warpPerspective(const Mat &_src, Mat &_dst, const Mat &tM, int inter)
{
int num_channels = _dst.channels();
CV_CheckTrue(num_channels == 1 || num_channels == 3 || num_channels == 4, "");
@ -1506,12 +1584,22 @@ void CV_WarpPerspective_Test::newWarpPerspective(const Mat &_src, Mat &_dst, con
float sx = (x*_M[0] + y*_M[1] + _M[2]) / w;
float sy = (x*_M[3] + y*_M[4] + _M[5]) / w;
if (num_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
if (inter == INTER_NEAREST) {
if (num_channels == 3) {
new_nearest_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
new_nearest_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_nearest_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
if (num_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
}
@ -1543,14 +1631,14 @@ void CV_WarpPerspective_Test::warpPerspective(const Mat& _src, Mat& _dst)
if (inter == INTER_AREA)
inter = INTER_LINEAR;
if (inter == INTER_LINEAR) {
if (inter == INTER_LINEAR || inter == INTER_NEAREST) {
int dst_depth = _dst.depth(), dst_channels = _dst.channels();
if (dst_depth == CV_8U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpPerspective<uint8_t>(_src, _dst, M);
return new_warpPerspective<uint8_t>(_src, _dst, M, inter);
} else if (dst_depth == CV_16U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpPerspective<uint16_t>(_src, _dst, M);
return new_warpPerspective<uint16_t>(_src, _dst, M, inter);
} else if (dst_depth == CV_32F && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpPerspective<float>(_src, _dst, M);
return new_warpPerspective<float>(_src, _dst, M, inter);
}
}