mirror of
https://github.com/opencv/opencv.git
synced 2024-12-04 16:59:12 +08:00
Merge pull request #26505 from fengyuentau:imgproc/new_nearest_inter
imgproc: optimized nearest neighbour interpolation for warpAffine, warpPerspective and remap #26505 PR Description has a limit of 65536 characters. So performance stats are attached below. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
b7dacbd5e3
commit
b476ed6d06
@ -1761,6 +1761,35 @@ OPENCV_HAL_IMPL_RVV_PACK(v_int16, short, v_int32, 16, i16, i32, __riscv_vnclip,
|
||||
OPENCV_HAL_IMPL_RVV_PACK_32(v_uint32, unsigned, v_uint64, 32, u32, u64, __riscv_vnclipu, __riscv_vnsrl)
|
||||
OPENCV_HAL_IMPL_RVV_PACK_32(v_int32, int, v_int64, 32, i32, i64, __riscv_vnclip, __riscv_vnsra)
|
||||
|
||||
template <int N = VTraits<v_uint16>::max_nlanes>
|
||||
inline v_uint16 v_pack(const v_uint32& a, const v_uint32& b)
|
||||
{
|
||||
ushort bufa[N];
|
||||
ushort bufb[N];
|
||||
v_pack_store(bufa, a);
|
||||
v_pack_store(bufb, b);
|
||||
ushort buf[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
buf[i] = bufa[i];
|
||||
buf[i+N/2] = bufb[i];
|
||||
}
|
||||
return v_load(buf);
|
||||
}
|
||||
|
||||
template <> inline v_uint16 v_pack<4>(const v_uint32& a, const v_uint32& b)
|
||||
{
|
||||
constexpr int N = VTraits<v_uint16>::max_nlanes;
|
||||
ushort bufa[N];
|
||||
ushort bufb[N];
|
||||
v_pack_store(bufa, a);
|
||||
v_pack_store(bufb, b);
|
||||
|
||||
ushort buf[N];
|
||||
buf[0] = bufa[0]; buf[1] = bufa[1]; buf[2] = bufa[2]; buf[3] = bufa[3];
|
||||
buf[4] = bufb[0]; buf[5] = bufb[1]; buf[6] = bufb[2]; buf[7] = bufb[3];
|
||||
return v_load(buf);
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, cast, hvl, vl) \
|
||||
inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \
|
||||
{ \
|
||||
|
@ -111,48 +111,6 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective,
|
||||
SANITY_CHECK(dst, 1);
|
||||
}
|
||||
|
||||
PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
|
||||
Combine(
|
||||
Values( Size(640,480), Size(1920,1080), Size(2592,1944) ),
|
||||
InterType::all(),
|
||||
BorderMode::all(),
|
||||
Values( CV_8UC1, CV_8UC4 )
|
||||
)
|
||||
)
|
||||
{
|
||||
Size size;
|
||||
int borderMode, interType, type;
|
||||
size = get<0>(GetParam());
|
||||
interType = get<1>(GetParam());
|
||||
borderMode = get<2>(GetParam());
|
||||
type = get<3>(GetParam());
|
||||
Scalar borderColor = Scalar::all(150);
|
||||
|
||||
Mat src(size, type), dst(size, type);
|
||||
cvtest::fillGradient<uint8_t>(src);
|
||||
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
|
||||
int shift = static_cast<int>(src.cols*0.04);
|
||||
Mat srcVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, 0),
|
||||
Vec2f(static_cast<float>(size.width-1), 0),
|
||||
Vec2f(static_cast<float>(size.width-1), static_cast<float>(size.height-1)),
|
||||
Vec2f(0, static_cast<float>(size.height-1)));
|
||||
Mat dstVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, static_cast<float>(shift)),
|
||||
Vec2f(static_cast<float>(size.width-shift/2), 0),
|
||||
Vec2f(static_cast<float>(size.width-shift), static_cast<float>(size.height-shift)),
|
||||
Vec2f(static_cast<float>(shift/2), static_cast<float>(size.height-1)));
|
||||
Mat warpMat = getPerspectiveTransform(srcVertices, dstVertices);
|
||||
|
||||
declare.in(src).out(dst);
|
||||
declare.time(100);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
warpPerspective( src, dst, warpMat, size, interType, borderMode, borderColor );
|
||||
}
|
||||
|
||||
SANITY_CHECK(dst, 1);
|
||||
}
|
||||
|
||||
PERF_TEST_P( TestRemap, map1_32fc1,
|
||||
Combine(
|
||||
Values( szVGA, sz1080p ),
|
||||
|
@ -1672,6 +1672,56 @@ void cv::remap( InputArray _src, OutputArray _dst,
|
||||
|
||||
int type = src.type(), depth = CV_MAT_DEPTH(type);
|
||||
|
||||
if (interpolation == INTER_NEAREST && map1.depth() == CV_32F) {
|
||||
const auto *src_data = src.ptr<const uchar>();
|
||||
auto *dst_data = dst.ptr<uchar>();
|
||||
size_t src_step = src.step, dst_step = dst.step,
|
||||
map1_step = map1.step, map2_step = map2.step;
|
||||
int src_rows = src.rows, src_cols = src.cols;
|
||||
int dst_rows = dst.rows, dst_cols = dst.cols;
|
||||
const float *map1_data = map1.ptr<const float>();
|
||||
const float *map2_data = map2.ptr<const float>();
|
||||
switch (src.type()) {
|
||||
case CV_8UC1: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_8UC1, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_8UC3: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_8UC3, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_8UC4: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_8UC4, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC1: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC3: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC4: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC1: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_32FC1, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC3: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_32FC3, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC4: {
|
||||
CV_CPU_DISPATCH(remapNearestInvoker_32FC4, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
// no default
|
||||
}
|
||||
}
|
||||
|
||||
if (interpolation == INTER_LINEAR) {
|
||||
if (map1.depth() == CV_32F) {
|
||||
const auto *src_data = src.ptr<const uint8_t>();
|
||||
@ -1708,27 +1758,27 @@ void cv::remap( InputArray _src, OutputArray _dst,
|
||||
break;
|
||||
}
|
||||
case CV_16UC1: {
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_16UC1, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC3: {
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_16UC3, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC4: {
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_16UC4, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC1: {
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_32FC1, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_32FC1, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC3: {
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_32FC3, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_32FC3, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC4: {
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_32FC4, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
CV_CPU_DISPATCH(remapLinearInvoker_32FC4, ((const float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
// no default
|
||||
@ -2657,6 +2707,48 @@ static void warpAffine(int src_type,
|
||||
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
|
||||
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
|
||||
|
||||
if (interpolation == INTER_NEAREST) {
|
||||
switch (src_type) {
|
||||
case CV_8UC1: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_8UC3: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_8UC4: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC1: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC3: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC4: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC1: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC3: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC4: {
|
||||
CV_CPU_DISPATCH(warpAffineNearestInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
// no default
|
||||
}
|
||||
}
|
||||
|
||||
if (interpolation == INTER_LINEAR) {
|
||||
switch (src_type) {
|
||||
case CV_8UC1: {
|
||||
@ -3324,46 +3416,99 @@ static void warpPerspective(int src_type,
|
||||
{
|
||||
CALL_HAL(warpPerspective, cv_hal_warpPerspective, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
|
||||
|
||||
if (interpolation == INTER_NEAREST) {
|
||||
switch (src_type) {
|
||||
case CV_8UC1: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_8UC3: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_8UC4: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC1: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC3: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC4: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC1: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC3: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC4: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveNearestInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (interpolation == INTER_LINEAR) {
|
||||
switch (src_type) {
|
||||
case CV_8UC1: {
|
||||
if (hint == cv::ALGO_HINT_APPROX) {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearApproxInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
} else {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
case CV_8UC3: {
|
||||
if (hint == cv::ALGO_HINT_APPROX) {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearApproxInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
} else {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
case CV_8UC4: {
|
||||
if (hint == cv::ALGO_HINT_APPROX) {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearApproxInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
} else {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
case CV_16UC1: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC3: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_16UC4: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC1: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC3: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
case CV_32FC4: {
|
||||
CV_CPU_DISPATCH(warpPerspectiveLinearInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
|
||||
break;
|
||||
}
|
||||
// no default
|
||||
}
|
||||
|
@ -152,42 +152,36 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src
|
||||
if (x < dst_cols)
|
||||
{
|
||||
T scalar = convertScalar(nVal);
|
||||
|
||||
int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset));
|
||||
int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < ROWS_PER_WI; ++i, ++y,
|
||||
map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
|
||||
if (y < dst_rows)
|
||||
{
|
||||
__global const float * map1 = (__global const float *)(map1ptr + map1_index);
|
||||
__global const float * map2 = (__global const float *)(map2ptr + map2_index);
|
||||
__global T * dst = (__global T *)(dstptr + dst_index);
|
||||
for (int dy = y, dy1 = min(dst_rows, y + ROWS_PER_WI); dy < dy1; ++dy, map1_index += map1_step, map2_index += map2_step)
|
||||
{
|
||||
__global const float * map1 = (__global const float *)(map1ptr + map1_index);
|
||||
__global const float * map2 = (__global const float *)(map2ptr + map2_index);
|
||||
|
||||
int gx = convert_int_sat_rte(map1[0]);
|
||||
int gy = convert_int_sat_rte(map2[0]);
|
||||
#if WARP_RELATIVE
|
||||
gx += x;
|
||||
gy += y;
|
||||
#endif
|
||||
float X0 = map1[0];
|
||||
float Y0 = map2[0];
|
||||
#if WARP_RELATIVE
|
||||
X0 += x;
|
||||
Y0 += dy;
|
||||
#endif
|
||||
|
||||
if (NEED_EXTRAPOLATION(gx, gy))
|
||||
{
|
||||
#ifndef BORDER_CONSTANT
|
||||
int2 gxy = (int2)(gx, gy);
|
||||
#endif
|
||||
T v;
|
||||
EXTRAPOLATE(gxy, v)
|
||||
storepix(v, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
|
||||
storepix(loadpix((__global const T*)(srcptr + src_index)), dst);
|
||||
}
|
||||
int sx = convert_int_sat(rint(X0));
|
||||
int sy = convert_int_sat(rint(Y0));
|
||||
|
||||
int2 map_data0 = (int2)(sx, sy);
|
||||
|
||||
T v0 = scalar;
|
||||
if (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) {
|
||||
v0 = loadpix((__global const T *)(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))));
|
||||
} else {
|
||||
EXTRAPOLATE(map_data0, v0);
|
||||
}
|
||||
|
||||
int dst_index = mad24(dy, dst_step, mad24(x, TSIZE, dst_offset));
|
||||
storepix(v0, dstptr + dst_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -202,36 +196,34 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o
|
||||
if (x < dst_cols)
|
||||
{
|
||||
T scalar = convertScalar(nVal);
|
||||
int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
|
||||
int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < ROWS_PER_WI; ++i, ++y,
|
||||
map_index += map_step, dst_index += dst_step)
|
||||
if (y < dst_rows)
|
||||
{
|
||||
__global const float2 * map = (__global const float2 *)(mapptr + map_index);
|
||||
__global T * dst = (__global T *)(dstptr + dst_index);
|
||||
for (int dy = y, dy1 = min(dst_rows, y + ROWS_PER_WI); dy < dy1; ++dy, map_index += map_step)
|
||||
{
|
||||
__global const float2 * map = (__global const float2 *)(mapptr + map_index);
|
||||
float2 map_data = map[0];
|
||||
|
||||
int2 gxy = convert_int2_sat_rte(map[0]);
|
||||
#if WARP_RELATIVE
|
||||
gxy.x += x;
|
||||
gxy.y += y;
|
||||
#endif
|
||||
float X0 = map_data.x;
|
||||
float Y0 = map_data.y;
|
||||
#if WARP_RELATIVE
|
||||
X0 += x;
|
||||
Y0 += dy;
|
||||
#endif
|
||||
|
||||
int gx = gxy.x, gy = gxy.y;
|
||||
int sx = convert_int_sat(rint(X0));
|
||||
int sy = convert_int_sat(rint(Y0));
|
||||
|
||||
if (NEED_EXTRAPOLATION(gx, gy))
|
||||
{
|
||||
T v;
|
||||
EXTRAPOLATE(gxy, v)
|
||||
storepix(v, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
|
||||
storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
|
||||
}
|
||||
int2 map_data0 = (int2)(sx, sy);
|
||||
|
||||
T v0 = scalar;
|
||||
if (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) {
|
||||
v0 = loadpix((__global const T *)(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))));
|
||||
} else {
|
||||
EXTRAPOLATE(map_data0, v0);
|
||||
}
|
||||
|
||||
int dst_index = mad24(dy, dst_step, mad24(x, TSIZE, dst_offset));
|
||||
storepix(v0, dstptr + dst_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -93,27 +93,25 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
|
||||
|
||||
if (dx < dst_cols)
|
||||
{
|
||||
int round_delta = (AB_SCALE >> 1);
|
||||
float X0_ = fma(M[0], (CT)dx, M[2]);
|
||||
float Y0_ = fma(M[3], (CT)dx, M[5]);
|
||||
|
||||
int X0_ = rint(M[0] * dx * AB_SCALE);
|
||||
int Y0_ = rint(M[3] * dx * AB_SCALE);
|
||||
int dst_index = mad24(dy0, dst_step, mad24(dx, pixsize, dst_offset));
|
||||
|
||||
for (int dy = dy0, dy1 = min(dst_rows, dy0 + ROWS_PER_WI); dy < dy1; ++dy, dst_index += dst_step)
|
||||
for (int dy = dy0, dy1 = min(dst_rows, dy0 + ROWS_PER_WI); dy < dy1; ++dy)
|
||||
{
|
||||
int X0 = X0_ + rint(fma(M[1], (CT)dy, M[2]) * AB_SCALE) + round_delta;
|
||||
int Y0 = Y0_ + rint(fma(M[4], (CT)dy, M[5]) * AB_SCALE) + round_delta;
|
||||
float X0 = fma(M[1], (CT)dy, X0_);
|
||||
float Y0 = fma(M[4], (CT)dy, Y0_);
|
||||
|
||||
short sx = convert_short_sat(X0 >> AB_BITS);
|
||||
short sy = convert_short_sat(Y0 >> AB_BITS);
|
||||
int sx = convert_int_sat(rint(X0));
|
||||
int sy = convert_int_sat(rint(Y0));
|
||||
|
||||
T v0 = scalar;
|
||||
if (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows)
|
||||
{
|
||||
int src_index = mad24(sy, src_step, mad24(sx, pixsize, src_offset));
|
||||
storepix(loadpix(srcptr + src_index), dstptr + dst_index);
|
||||
v0 = loadpix(srcptr + mad24(sy, src_step, mad24(sx, pixsize, src_offset)));
|
||||
}
|
||||
else
|
||||
storepix(scalar, dstptr + dst_index);
|
||||
|
||||
int dst_index = mad24(dy, dst_step, mad24(dx, pixsize, dst_offset));
|
||||
storepix(v0, dstptr + dst_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -92,22 +92,21 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
|
||||
|
||||
if (dx < dst_cols && dy < dst_rows)
|
||||
{
|
||||
CT X0 = M[0] * dx + M[1] * dy + M[2];
|
||||
CT Y0 = M[3] * dx + M[4] * dy + M[5];
|
||||
CT W = M[6] * dx + M[7] * dy + M[8];
|
||||
W = W != 0.0f ? 1.f / W : 0.0f;
|
||||
short sx = convert_short_sat_rte(X0*W);
|
||||
short sy = convert_short_sat_rte(Y0*W);
|
||||
float W = fma(M[6], (CT)dx, fma(M[7], (CT)dy, M[8]));
|
||||
float X0 = fma(M[0], (CT)dx, fma(M[1], (CT)dy, M[2])) / W;
|
||||
float Y0 = fma(M[3], (CT)dx, fma(M[4], (CT)dy, M[5])) / W;
|
||||
|
||||
int dst_index = mad24(dy, dst_step, dx * pixsize + dst_offset);
|
||||
int sx = convert_int_sat(rint(X0));
|
||||
int sy = convert_int_sat(rint(Y0));
|
||||
|
||||
T v0 = scalar;
|
||||
if (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows)
|
||||
{
|
||||
int src_index = mad24(sy, src_step, sx * pixsize + src_offset);
|
||||
storepix(loadpix(srcptr + src_index), dstptr + dst_index);
|
||||
v0 = loadpix(srcptr + mad24(sy, src_step, mad24(sx, pixsize, src_offset)));
|
||||
}
|
||||
else
|
||||
storepix(scalar, dstptr + dst_index);
|
||||
|
||||
int dst_index = mad24(dy, dst_step, mad24(dx, pixsize, dst_offset));
|
||||
storepix(v0, dstptr + dst_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,57 +3,61 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Shuffle
|
||||
#define CV_WARP_NEAREST_SCALAR_SHUFFLE_DEF(cn, dtype_reg) \
|
||||
dtype_reg p00##cn;
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(cn, dtype_reg) \
|
||||
dtype_reg p00##cn, p01##cn, p10##cn, p11##cn;
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C1(dtype_reg, dtype_ptr) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
|
||||
#define CV_WARP_SCALAR_SHUFFLE_DEF_C1(inter, dtype_reg, dtype_ptr) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
|
||||
const dtype_ptr *srcptr = src + srcstep * iy + ix;
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C3(dtype_reg, dtype_ptr) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
|
||||
#define CV_WARP_SCALAR_SHUFFLE_DEF_C3(inter, dtype_reg, dtype_ptr) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
|
||||
const dtype_ptr *srcptr = src + srcstep * iy + ix*3;
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C4(dtype_reg, dtype_ptr) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(a, dtype_reg) \
|
||||
#define CV_WARP_SCALAR_SHUFFLE_DEF_C4(inter, dtype_reg, dtype_ptr) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_DEF(a, dtype_reg) \
|
||||
const dtype_ptr *srcptr = src + srcstep * iy + ix*4;
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_8U(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(int, uint8_t)
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_16U(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(int, uint16_t)
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_32F(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(float, float)
|
||||
#define CV_WARP_SCALAR_SHUFFLE_DEF_8U(INTER, CN) \
|
||||
CV_WARP_SCALAR_SHUFFLE_DEF_##CN(INTER, int, uint8_t)
|
||||
#define CV_WARP_SCALAR_SHUFFLE_DEF_16U(INTER, CN) \
|
||||
CV_WARP_SCALAR_SHUFFLE_DEF_##CN(INTER, int, uint16_t)
|
||||
#define CV_WARP_SCALAR_SHUFFLE_DEF_32F(INTER, CN) \
|
||||
CV_WARP_SCALAR_SHUFFLE_DEF_##CN(INTER, float, float)
|
||||
|
||||
#define CV_WARP_NEAREST_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
|
||||
p00##CN = srcptr[i];
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
|
||||
p00##CN = srcptr[i]; p01##CN = srcptr[i + cn]; \
|
||||
p10##CN = srcptr[srcstep + i]; p11##CN = srcptr[srcstep + cn + i];
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C1() \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 1, 0)
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C3() \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 3, 0) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 3, 1) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 3, 2)
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C4() \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 4, 0) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 4, 1) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 4, 2) \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(a, 4, 3)
|
||||
#define CV_WARP_SCALAR_SHUFFLE_LOAD_C1(inter) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(g, 1, 0)
|
||||
#define CV_WARP_SCALAR_SHUFFLE_LOAD_C3(inter) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(r, 3, 0) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(g, 3, 1) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(b, 3, 2)
|
||||
#define CV_WARP_SCALAR_SHUFFLE_LOAD_C4(inter) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(r, 4, 0) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(g, 4, 1) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(b, 4, 2) \
|
||||
CV_WARP_##inter##_SCALAR_SHUFFLE_LOAD(a, 4, 3)
|
||||
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C1() \
|
||||
#define CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C1() \
|
||||
dstptr[x] = bval[0];
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C3() \
|
||||
#define CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C3() \
|
||||
dstptr[x*3] = bval[0]; \
|
||||
dstptr[x*3+1] = bval[1]; \
|
||||
dstptr[x*3+2] = bval[2];
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C4() \
|
||||
#define CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C4() \
|
||||
dstptr[x*4] = bval[0]; \
|
||||
dstptr[x*4+1] = bval[1]; \
|
||||
dstptr[x*4+2] = bval[2]; \
|
||||
dstptr[x*4+3] = bval[3];
|
||||
|
||||
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C1(dy, dx, pxy) \
|
||||
#define CV_WARP_SCALAR_FETCH_PIXEL_C1(dy, dx, pxy) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t ofs = dy*srcstep + dx; \
|
||||
pxy##g = srcptr[ofs]; \
|
||||
@ -67,7 +71,7 @@
|
||||
size_t glob_ofs = iy_*srcstep + ix_; \
|
||||
pxy##g = src[glob_ofs]; \
|
||||
}
|
||||
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C3(dy, dx, pxy) \
|
||||
#define CV_WARP_SCALAR_FETCH_PIXEL_C3(dy, dx, pxy) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t ofs = dy*srcstep + dx*3; \
|
||||
pxy##r = srcptr[ofs]; \
|
||||
@ -89,7 +93,7 @@
|
||||
pxy##g = src[glob_ofs+1]; \
|
||||
pxy##b = src[glob_ofs+2]; \
|
||||
}
|
||||
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C4(dy, dx, pxy) \
|
||||
#define CV_WARP_SCALAR_FETCH_PIXEL_C4(dy, dx, pxy) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t ofs = dy*srcstep + dx*4; \
|
||||
pxy##r = srcptr[ofs]; \
|
||||
@ -115,83 +119,96 @@
|
||||
pxy##b = src[glob_ofs+2]; \
|
||||
pxy##a = src[glob_ofs+3]; \
|
||||
}
|
||||
#define CV_WARP_NEAREST_SCALAR_FETCH_PIXEL(CN) \
|
||||
CV_WARP_SCALAR_FETCH_PIXEL_##CN(0, 0, p00)
|
||||
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL(CN) \
|
||||
CV_WARP_SCALAR_FETCH_PIXEL_##CN(0, 0, p00) \
|
||||
CV_WARP_SCALAR_FETCH_PIXEL_##CN(0, 1, p01) \
|
||||
CV_WARP_SCALAR_FETCH_PIXEL_##CN(1, 0, p10) \
|
||||
CV_WARP_SCALAR_FETCH_PIXEL_##CN(1, 1, p11)
|
||||
|
||||
#define CV_WARP_LINEAR_SCALAR_SHUFFLE(CN, DEPTH) \
|
||||
#define CV_WARP_SCALAR_NEAREST_COMPUTE_COORD() \
|
||||
int ix = cvRound(sx), iy = cvRound(sy);
|
||||
#define CV_WARP_SCALAR_LINEAR_COMPUTE_COORD() \
|
||||
int ix = cvFloor(sx), iy = cvFloor(sy); \
|
||||
sx -= ix; sy -= iy; \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##DEPTH(CN); \
|
||||
sx -= ix; sy -= iy;
|
||||
|
||||
#define CV_WARP_SCALAR_SHUFFLE(INTER, CN, DEPTH) \
|
||||
CV_WARP_SCALAR_##INTER##_COMPUTE_COORD() \
|
||||
CV_WARP_SCALAR_SHUFFLE_DEF_##DEPTH(INTER, CN) \
|
||||
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
|
||||
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_##CN() \
|
||||
CV_WARP_SCALAR_SHUFFLE_LOAD_##CN(INTER) \
|
||||
} else { \
|
||||
if ((border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) && \
|
||||
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
|
||||
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
|
||||
if (border_type == BORDER_CONSTANT) { \
|
||||
CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_##CN() \
|
||||
CV_WARP_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_##CN() \
|
||||
} \
|
||||
continue; \
|
||||
} \
|
||||
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 0, p00); \
|
||||
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 1, p01); \
|
||||
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 0, p10); \
|
||||
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 1, p11); \
|
||||
CV_WARP_##INTER##_SCALAR_FETCH_PIXEL(CN) \
|
||||
}
|
||||
|
||||
|
||||
// Linear interpolation calculation
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(cn) \
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(cn) \
|
||||
float v0##cn = p00##cn + sx*(p01##cn - p00##cn); \
|
||||
float v1##cn = p10##cn + sx*(p11##cn - p10##cn);
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C1() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g)
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C3() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(r) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(b)
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C4() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(r) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(b) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(a)
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32_C1() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(g)
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32_C3() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(r) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(g) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(b)
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32_C4() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(r) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(g) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(b) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32(a)
|
||||
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(cn) \
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(cn) \
|
||||
v0##cn += sy*(v1##cn - v0##cn);
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C1() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g)
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C3() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(r) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(b)
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C4() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(r) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(b) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(a)
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32_C1() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(g)
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32_C3() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(r) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(g) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(b)
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32_C4() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(r) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(g) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(b) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32(a)
|
||||
|
||||
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_F32(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_##CN() \
|
||||
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_##CN()
|
||||
#define CV_WARP_SCALAR_LINEAR_INTER_CALC_F32(CN) \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_ALPHA_F32_##CN() \
|
||||
CV_WARP_SCALAR_LINEAR_INTER_CALC_BETA_F32_##CN()
|
||||
|
||||
|
||||
// Store
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE_C1(dtype) \
|
||||
dstptr[x] = saturate_cast<dtype>(v0g);
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE_C3(dtype) \
|
||||
dstptr[x*3] = saturate_cast<dtype>(v0r); \
|
||||
dstptr[x*3+1] = saturate_cast<dtype>(v0g); \
|
||||
dstptr[x*3+2] = saturate_cast<dtype>(v0b);
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE_C4(dtype) \
|
||||
dstptr[x*4] = saturate_cast<dtype>(v0r); \
|
||||
dstptr[x*4+1] = saturate_cast<dtype>(v0g); \
|
||||
dstptr[x*4+2] = saturate_cast<dtype>(v0b); \
|
||||
dstptr[x*4+3] = saturate_cast<dtype>(v0a);
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE_8U(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint8_t)
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE_16U(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint16_t)
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE_32F(CN) \
|
||||
CV_WARP_LINEAR_SCALAR_STORE_##CN(float)
|
||||
#define CV_WARP_SCALAR_STORE_C1(dtype, var) \
|
||||
dstptr[x] = saturate_cast<dtype>(var##g);
|
||||
#define CV_WARP_SCALAR_STORE_C3(dtype, var) \
|
||||
dstptr[x*3] = saturate_cast<dtype>(var##r); \
|
||||
dstptr[x*3+1] = saturate_cast<dtype>(var##g); \
|
||||
dstptr[x*3+2] = saturate_cast<dtype>(var##b);
|
||||
#define CV_WARP_SCALAR_STORE_C4(dtype, var) \
|
||||
dstptr[x*4] = saturate_cast<dtype>(var##r); \
|
||||
dstptr[x*4+1] = saturate_cast<dtype>(var##g); \
|
||||
dstptr[x*4+2] = saturate_cast<dtype>(var##b); \
|
||||
dstptr[x*4+3] = saturate_cast<dtype>(var##a);
|
||||
#define CV_WARP_SCALAR_STORE_8U(CN, var) \
|
||||
CV_WARP_SCALAR_STORE_##CN(uint8_t, var)
|
||||
#define CV_WARP_SCALAR_STORE_16U(CN, var) \
|
||||
CV_WARP_SCALAR_STORE_##CN(uint16_t, var)
|
||||
#define CV_WARP_SCALAR_STORE_32F(CN, var) \
|
||||
CV_WARP_SCALAR_STORE_##CN(float, var)
|
||||
|
||||
#define CV_WARP_NEAREST_SCALAR_STORE(CN, DEPTH) \
|
||||
CV_WARP_SCALAR_STORE_##DEPTH(CN, p00)
|
||||
#define CV_WARP_LINEAR_SCALAR_STORE(CN, DEPTH) \
|
||||
CV_WARP_LINEAR_SCALAR_STORE_##DEPTH(CN)
|
||||
CV_WARP_SCALAR_STORE_##DEPTH(CN, v0)
|
||||
#define CV_WARP_SCALAR_STORE(INTER, CN, DEPTH) \
|
||||
CV_WARP_##INTER##_SCALAR_STORE(CN, DEPTH)
|
||||
|
@ -3,6 +3,26 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Shuffle (all pixels within image)
|
||||
#define CV_WARP_NEAREST_VECTOR_SHUFFLE_ALLWITHIN_C1(dtype) \
|
||||
for (int i = 0; i < uf; i++) { \
|
||||
const dtype* srcptr = src + addr[i]; \
|
||||
pixbuf[i] = srcptr[0];\
|
||||
}
|
||||
#define CV_WARP_NEAREST_VECTOR_SHUFFLE_ALLWITHIN_C3(dtype) \
|
||||
for (int i = 0; i < uf; i++) { \
|
||||
const dtype* srcptr = src + addr[i]; \
|
||||
pixbuf[3*i] = srcptr[0];\
|
||||
pixbuf[3*i + 1] = srcptr[1]; \
|
||||
pixbuf[3*i + 2] = srcptr[2]; \
|
||||
}
|
||||
#define CV_WARP_NEAREST_VECTOR_SHUFFLE_ALLWITHIN_C4(dtype) \
|
||||
for (int i = 0; i < uf; i++) { \
|
||||
const dtype* srcptr = src + addr[i]; \
|
||||
pixbuf[4*i] = srcptr[0];\
|
||||
pixbuf[4*i + 1] = srcptr[1]; \
|
||||
pixbuf[4*i + 2] = srcptr[2]; \
|
||||
pixbuf[4*i + 3] = srcptr[3]; \
|
||||
}
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C1(dtype) \
|
||||
for (int i = 0; i < uf; i++) { \
|
||||
const dtype* srcptr = src + addr[i]; \
|
||||
@ -47,18 +67,17 @@
|
||||
pixbuf[i + uf*11] = srcptr[srcstep + 6]; \
|
||||
pixbuf[i + uf*15] = srcptr[srcstep + 7]; \
|
||||
}
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_8U(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint8_t)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_16U(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint16_t)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_32F(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(float)
|
||||
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(CN, DEPTH) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##DEPTH(CN)
|
||||
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_8U(INTER, CN) \
|
||||
CV_WARP_##INTER##_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint8_t)
|
||||
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_16U(INTER, CN) \
|
||||
CV_WARP_##INTER##_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint16_t)
|
||||
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_32F(INTER, CN) \
|
||||
CV_WARP_##INTER##_VECTOR_SHUFFLE_ALLWITHIN_##CN(float)
|
||||
#define CV_WARP_VECTOR_SHUFFLE_ALLWITHIN(INTER, CN, DEPTH) \
|
||||
CV_WARP_VECTOR_SHUFFLE_ALLWITHIN_##DEPTH(INTER, CN)
|
||||
|
||||
// Shuffle (ARM NEON)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
uint8x8x4_t t00 = { \
|
||||
vld1_u8(src + addr[0]), \
|
||||
vld1_u8(src + addr[1]), \
|
||||
@ -84,7 +103,7 @@
|
||||
vld1_u8(src + addr[7] + srcstep) \
|
||||
}; \
|
||||
uint32x2_t p00_, p01_, p10_, p11_;
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(coords, cn) \
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(coords, cn) \
|
||||
p00_ = vreinterpret_u32_u8(vtbl4_u8(t00, coords)); \
|
||||
p01_ = vreinterpret_u32_u8(vtbl4_u8(t01, coords)); \
|
||||
p10_ = vreinterpret_u32_u8(vtbl4_u8(t10, coords)); \
|
||||
@ -93,58 +112,58 @@
|
||||
p01##cn = vreinterpret_u8_u32(vtrn2_u32(p00_, p01_)); \
|
||||
p10##cn = vreinterpret_u8_u32(vtrn1_u32(p10_, p11_)); \
|
||||
p11##cn = vreinterpret_u8_u32(vtrn2_u32(p10_, p11_));
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(grays, g)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(reds, r) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(greens, g) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(blues, b)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(reds, r) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(greens, g) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(blues, b) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(alphas, a)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_NEON_U8_##CN()
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_C1() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(grays, g)
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_C3() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(reds, r) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(greens, g) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(blues, b)
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_C4() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_LOAD() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(reds, r) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(greens, g) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(blues, b) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_TRN(alphas, a)
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8(CN) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_ALLWITHIN_NEON_U8_##CN()
|
||||
|
||||
|
||||
// Shuffle (not all pixels within image)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC1() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC1() \
|
||||
v_store_low(dstptr + x, bval_v0);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC3() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC3() \
|
||||
v_store_low(dstptr + x*3, bval_v0); \
|
||||
v_store_low(dstptr + x*3 + uf, bval_v1); \
|
||||
v_store_low(dstptr + x*3 + uf*2, bval_v2);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC4() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC4() \
|
||||
v_store_low(dstptr + x*4, bval_v0); \
|
||||
v_store_low(dstptr + x*4 + uf, bval_v1); \
|
||||
v_store_low(dstptr + x*4 + uf*2, bval_v2); \
|
||||
v_store_low(dstptr + x*4 + uf*3, bval_v3);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC1() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC1() \
|
||||
v_store(dstptr + x, bval_v0);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC3() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC3() \
|
||||
v_store(dstptr + x*3, bval_v0); \
|
||||
v_store(dstptr + x*3 + uf, bval_v1); \
|
||||
v_store(dstptr + x*3 + uf*2, bval_v2);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC4() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC4() \
|
||||
v_store(dstptr + x*4, bval_v0); \
|
||||
v_store(dstptr + x*4 + uf, bval_v1); \
|
||||
v_store(dstptr + x*4 + uf*2, bval_v2); \
|
||||
v_store(dstptr + x*4 + uf*3, bval_v3);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC1() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC1() \
|
||||
v_store(dstptr + x, bval_v0_l); \
|
||||
v_store(dstptr + x + vlanes_32, bval_v0_h);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC3() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC3() \
|
||||
v_store(dstptr + x*3, bval_v0_l); \
|
||||
v_store(dstptr + x*3 + vlanes_32, bval_v0_h); \
|
||||
v_store(dstptr + x*3 + uf, bval_v1_l); \
|
||||
v_store(dstptr + x*3 + uf + vlanes_32, bval_v1_h); \
|
||||
v_store(dstptr + x*3 + uf*2, bval_v2_l); \
|
||||
v_store(dstptr + x*3 + uf*2 + vlanes_32, bval_v2_h);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC4() \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC4() \
|
||||
v_store(dstptr + x*4, bval_v0_l); \
|
||||
v_store(dstptr + x*4 + vlanes_32, bval_v0_h); \
|
||||
v_store(dstptr + x*4 + uf, bval_v1_l); \
|
||||
@ -154,70 +173,83 @@
|
||||
v_store(dstptr + x*4 + uf*3, bval_v3_l); \
|
||||
v_store(dstptr + x*4 + uf*3 + vlanes_32, bval_v3_h);
|
||||
|
||||
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C1(dy, dx, pixbuf_ofs) \
|
||||
#define CV_WARP_VECTOR_FETCH_PIXEL_C1(dy, dx, pixbuf_ofs0, pixbuf_ofs1) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t addr_i = addr[i] + dy*srcstep + dx; \
|
||||
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
|
||||
} else if (border_type == BORDER_CONSTANT) { \
|
||||
pixbuf[i + pixbuf_ofs] = bval[0]; \
|
||||
pixbuf[i + pixbuf_ofs0] = bval[0]; \
|
||||
} else if (border_type == BORDER_TRANSPARENT) { \
|
||||
pixbuf[i + pixbuf_ofs] = dstptr[x + i]; \
|
||||
pixbuf[i + pixbuf_ofs0] = dstptr[x + i]; \
|
||||
} else { \
|
||||
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
|
||||
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
|
||||
size_t addr_i = iy_*srcstep + ix_; \
|
||||
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
|
||||
}
|
||||
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C3(dy, dx, pixbuf_ofs) \
|
||||
#define CV_WARP_VECTOR_FETCH_PIXEL_C3(dy, dx, pixbuf_ofs0, pixbuf_ofs1) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t addr_i = addr[i] + dy*srcstep + dx*3; \
|
||||
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
|
||||
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
|
||||
} else if (border_type == BORDER_CONSTANT) { \
|
||||
pixbuf[i + pixbuf_ofs] = bval[0]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
|
||||
pixbuf[i + pixbuf_ofs0] = bval[0]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = bval[1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = bval[2]; \
|
||||
} else if (border_type == BORDER_TRANSPARENT) { \
|
||||
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*3]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*3 + 1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*3 + 2]; \
|
||||
pixbuf[i + pixbuf_ofs0] = dstptr[(x + i)*3]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = dstptr[(x + i)*3 + 1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = dstptr[(x + i)*3 + 2]; \
|
||||
} else { \
|
||||
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
|
||||
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
|
||||
size_t addr_i = iy_*srcstep + ix_*3; \
|
||||
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
|
||||
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
|
||||
}
|
||||
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C4(dy, dx, pixbuf_ofs) \
|
||||
#define CV_WARP_VECTOR_FETCH_PIXEL_C4(dy, dx, pixbuf_ofs0, pixbuf_ofs1) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t addr_i = addr[i] + dy*srcstep + dx*4; \
|
||||
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
|
||||
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = src[addr_i+3]; \
|
||||
} else if (border_type == BORDER_CONSTANT) { \
|
||||
pixbuf[i + pixbuf_ofs] = bval[0]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*12] = bval[3]; \
|
||||
pixbuf[i + pixbuf_ofs0] = bval[0]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = bval[1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = bval[2]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = bval[3]; \
|
||||
} else if (border_type == BORDER_TRANSPARENT) { \
|
||||
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*4]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*4 + 1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*4 + 2]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*12] = dstptr[(x + i)*4 + 3]; \
|
||||
pixbuf[i + pixbuf_ofs0] = dstptr[(x + i)*4]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = dstptr[(x + i)*4 + 1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = dstptr[(x + i)*4 + 2]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = dstptr[(x + i)*4 + 3]; \
|
||||
} else { \
|
||||
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
|
||||
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
|
||||
size_t addr_i = iy_*srcstep + ix_*4; \
|
||||
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
|
||||
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
|
||||
pixbuf[i + pixbuf_ofs0] = src[addr_i]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1] = src[addr_i+1]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*2] = src[addr_i+2]; \
|
||||
pixbuf[i + pixbuf_ofs0 + pixbuf_ofs1*3] = src[addr_i+3]; \
|
||||
}
|
||||
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_C1() \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_C1(0, 0, 0, 1);
|
||||
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_C3() \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_C3(0, 0, 2*i, 1);
|
||||
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_C4() \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_C4(0, 0, 3*i, 1);
|
||||
#define CV_WARP_NEAREST_VECTOR_FETCH_PIXEL(CN) \
|
||||
CV_WARP_NEAREST_VECTOR_FETCH_PIXEL_##CN()
|
||||
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL(CN) \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_##CN(0, 0, 0, uf*4); \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_##CN(0, 1, uf, uf*4); \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_##CN(1, 0, uf*2, uf*4); \
|
||||
CV_WARP_VECTOR_FETCH_PIXEL_##CN(1, 1, uf*3, uf*4);
|
||||
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(CN, DEPTH) \
|
||||
#define CV_WARP_VECTOR_SHUFFLE_NOTALLWITHIN(INTER, CN, DEPTH) \
|
||||
if (border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) { \
|
||||
mask_0 = v_lt(v_reinterpret_as_u32(v_add(src_ix0, one)), outer_scols); \
|
||||
mask_1 = v_lt(v_reinterpret_as_u32(v_add(src_ix1, one)), outer_scols); \
|
||||
@ -226,7 +258,7 @@
|
||||
v_uint16 outer_mask = v_pack(mask_0, mask_1); \
|
||||
if (v_reduce_max(outer_mask) == 0) { \
|
||||
if (border_type == BORDER_CONSTANT) { \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_##DEPTH##CN() \
|
||||
CV_WARP_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_##DEPTH##CN() \
|
||||
} \
|
||||
continue; \
|
||||
} \
|
||||
@ -237,111 +269,135 @@
|
||||
vx_store(src_iy + vlanes_32, src_iy1); \
|
||||
for (int i = 0; i < uf; i++) { \
|
||||
int ix = src_ix[i], iy = src_iy[i]; \
|
||||
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 0, 0); \
|
||||
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 1, uf); \
|
||||
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 0, uf*2); \
|
||||
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 1, uf*3); \
|
||||
CV_WARP_##INTER##_VECTOR_FETCH_PIXEL(CN) \
|
||||
}
|
||||
|
||||
// Shuffle (not all pixels within image) (ARM NEON)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(cn, offset)\
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(cn, offset)\
|
||||
p00##cn = vld1_u8(pixbuf + offset); \
|
||||
p01##cn = vld1_u8(pixbuf + offset + 8); \
|
||||
p10##cn = vld1_u8(pixbuf + offset + 16); \
|
||||
p11##cn = vld1_u8(pixbuf + offset + 24);
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(g, 0)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(g, 32) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(b, 64)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(g, 32) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(b, 64) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(a, 96)
|
||||
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN_NEON_U8_##CN()
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_C1() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(g, 0)
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_C3() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(r, 0) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(g, 32) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(b, 64)
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_C4() \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(r, 0) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(g, 32) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(b, 64) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_LOAD(a, 96)
|
||||
#define CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8(CN) \
|
||||
CV_WARP_VECTOR_LINEAR_SHUFFLE_NOTALLWITHIN_NEON_U8_##CN()
|
||||
|
||||
// Load pixels for linear interpolation (uint8_t -> int16_t)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(cn, i) \
|
||||
v_int16 f00##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * i)), \
|
||||
f01##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+1))), \
|
||||
f10##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+2))), \
|
||||
f11##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+3)));
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 0)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(a, 12)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_##CN();
|
||||
// [New] Load pixels for interpolation
|
||||
#define CV_WARP_VECTOR_NEAREST_LOAD_CN_8U_16U(cn, i) \
|
||||
v_uint16 f00##cn = vx_load_expand(pixbuf + uf * i);
|
||||
#define CV_WARP_VECTOR_NEAREST_LOAD_CN_16U_16U(cn, i) \
|
||||
v_uint16 f00##cn = vx_load(pixbuf + uf * i);
|
||||
#define CV_WARP_VECTOR_NEAREST_LOAD_CN_32F_32F(cn, i) \
|
||||
v_float32 f00##cn##l = vx_load(pixbuf + uf * i); \
|
||||
v_float32 f00##cn##h = vx_load(pixbuf + uf * i + vlanes_32);
|
||||
#define CV_WARP_VECTOR_LINEAR_LOAD_CN_8U_16U(cn, i) \
|
||||
v_uint16 f00##cn = vx_load_expand(pixbuf + uf * 4*i), \
|
||||
f01##cn = vx_load_expand(pixbuf + uf * (4*i+1)), \
|
||||
f10##cn = vx_load_expand(pixbuf + uf * (4*i+2)), \
|
||||
f11##cn = vx_load_expand(pixbuf + uf * (4*i+3));
|
||||
#define CV_WARP_VECTOR_LINEAR_LOAD_CN_16U_16U(cn, i) \
|
||||
v_uint16 f00##cn = vx_load(pixbuf + uf * 4*i), \
|
||||
f01##cn = vx_load(pixbuf + uf * (4*i+1)), \
|
||||
f10##cn = vx_load(pixbuf + uf * (4*i+2)), \
|
||||
f11##cn = vx_load(pixbuf + uf * (4*i+3));
|
||||
#define CV_WARP_VECTOR_LINEAR_LOAD_CN_32F_32F(cn, i) \
|
||||
v_float32 f00##cn##l = vx_load(pixbuf + uf * 4*i), \
|
||||
f00##cn##h = vx_load(pixbuf + uf * 4*i + vlanes_32); \
|
||||
v_float32 f01##cn##l = vx_load(pixbuf + uf * (4*i+1)), \
|
||||
f01##cn##h = vx_load(pixbuf + uf * (4*i+1) + vlanes_32); \
|
||||
v_float32 f10##cn##l = vx_load(pixbuf + uf * (4*i+2)), \
|
||||
f10##cn##h = vx_load(pixbuf + uf * (4*i+2) + vlanes_32); \
|
||||
v_float32 f11##cn##l = vx_load(pixbuf + uf * (4*i+3)), \
|
||||
f11##cn##h = vx_load(pixbuf + uf * (4*i+3) + vlanes_32);
|
||||
#define CV_WARP_VECTOR_INTER_LOAD_C1(INTER, SDEPTH, DDEPTH) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(g, 0)
|
||||
#define CV_WARP_VECTOR_INTER_LOAD_C3(INTER, SDEPTH, DDEPTH) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(r, 0) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(g, 1) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(b, 2)
|
||||
#define CV_WARP_VECTOR_INTER_LOAD_C4(INTER, SDEPTH, DDEPTH) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(r, 0) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(g, 1) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(b, 2) \
|
||||
CV_WARP_VECTOR_##INTER##_LOAD_CN_##SDEPTH##_##DDEPTH(a, 3)
|
||||
#define CV_WARP_VECTOR_INTER_LOAD(INTER, CN, SDEPTH, DDEPTH) \
|
||||
CV_WARP_VECTOR_INTER_LOAD_##CN(INTER, SDEPTH, DDEPTH)
|
||||
|
||||
// Load pixels for linear interpolation (uint8_t -> int16_t) (ARM NEON)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(cn) \
|
||||
v_int16 f00##cn = v_reinterpret_as_s16(v_uint16(vmovl_u8(p00##cn))), \
|
||||
f01##cn = v_reinterpret_as_s16(v_uint16(vmovl_u8(p01##cn))), \
|
||||
f10##cn = v_reinterpret_as_s16(v_uint16(vmovl_u8(p10##cn))), \
|
||||
f11##cn = v_reinterpret_as_s16(v_uint16(vmovl_u8(p11##cn)));
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_NEON_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(g)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_NEON_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(r) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(g) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(b)
|
||||
// [New] Store
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C1_16U_8U() \
|
||||
v_pack_store(dstptr + x, f00g);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C3_16U_8U() \
|
||||
v_pack_store(dstptr + 3*x, f00r); \
|
||||
v_pack_store(dstptr + 3*x + uf, f00g); \
|
||||
v_pack_store(dstptr + 3*x + uf*2, f00b);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C4_16U_8U() \
|
||||
v_pack_store(dstptr + 4*x, f00r); \
|
||||
v_pack_store(dstptr + 4*x + uf, f00g); \
|
||||
v_pack_store(dstptr + 4*x + uf*2, f00b); \
|
||||
v_pack_store(dstptr + 4*x + uf*3, f00a);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C1_16U_16U() \
|
||||
vx_store(dstptr + x, f00g);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C3_16U_16U() \
|
||||
vx_store(dstptr + 3*x, f00r); \
|
||||
vx_store(dstptr + 3*x + uf, f00g); \
|
||||
vx_store(dstptr + 3*x + uf*2, f00b);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C4_16U_16U() \
|
||||
vx_store(dstptr + 4*x, f00r); \
|
||||
vx_store(dstptr + 4*x + uf, f00g); \
|
||||
vx_store(dstptr + 4*x + uf*2, f00b); \
|
||||
vx_store(dstptr + 4*x + uf*3, f00a);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C1_32F_32F() \
|
||||
vx_store(dstptr + x, f00gl); \
|
||||
vx_store(dstptr + x + vlanes_32, f00gh);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C3_32F_32F() \
|
||||
vx_store(dstptr + 3*x, f00rl); \
|
||||
vx_store(dstptr + 3*x + vlanes_32, f00rh); \
|
||||
vx_store(dstptr + 3*x + uf, f00gl); \
|
||||
vx_store(dstptr + 3*x + uf + vlanes_32, f00gh); \
|
||||
vx_store(dstptr + 3*x + uf*2, f00bl); \
|
||||
vx_store(dstptr + 3*x + uf*2 + vlanes_32, f00bh);
|
||||
#define CV_WARP_VECTOR_NEAREST_STORE_C4_32F_32F() \
|
||||
vx_store(dstptr + 4*x, f00rl); \
|
||||
vx_store(dstptr + 4*x + vlanes_32, f00rh); \
|
||||
vx_store(dstptr + 4*x + uf, f00gl); \
|
||||
vx_store(dstptr + 4*x + uf + vlanes_32, f00gh); \
|
||||
vx_store(dstptr + 4*x + uf*2, f00bl); \
|
||||
vx_store(dstptr + 4*x + uf*2 + vlanes_32, f00bh); \
|
||||
vx_store(dstptr + 4*x + uf*3, f00al); \
|
||||
vx_store(dstptr + 4*x + uf*3 + vlanes_32, f00ah);
|
||||
#define CV_WARP_VECTOR_INTER_STORE(INTER, CN, SDEPTH, DDEPTH) \
|
||||
CV_WARP_VECTOR_##INTER##_STORE_##CN##_##SDEPTH##_##DDEPTH()
|
||||
|
||||
|
||||
// Load pixels for linear interpolation (uint8_t -> uint16_t) (ARM NEON)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(cn) \
|
||||
v_uint16 f00##cn = v_uint16(vmovl_u8(p00##cn)), \
|
||||
f01##cn = v_uint16(vmovl_u8(p01##cn)), \
|
||||
f10##cn = v_uint16(vmovl_u8(p10##cn)), \
|
||||
f11##cn = v_uint16(vmovl_u8(p11##cn));
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8U16_NEON_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(g)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8U16_NEON_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(r) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(g) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(b)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_NEON_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(r) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(g) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(b) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16_NEON(a)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_NEON(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_NEON_##CN();
|
||||
|
||||
// Load pixels for linear interpolation (uint16_t -> uint16_t)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(cn, i) \
|
||||
v_uint16 f00##cn = vx_load(pixbuf + uf * i), \
|
||||
f01##cn = vx_load(pixbuf + uf * (i+1)), \
|
||||
f10##cn = vx_load(pixbuf + uf * (i+2)), \
|
||||
f11##cn = vx_load(pixbuf + uf * (i+3));
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 0)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 4) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(b, 8)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 4) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(b, 8) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(a, 12)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_##CN();
|
||||
|
||||
// Load pixels for linear interpolation (int16_t -> float)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(cn) \
|
||||
v_float32 f00##cn##l = v_cvt_f32(v_expand_low(f00##cn)), f00##cn##h = v_cvt_f32(v_expand_high(f00##cn)), \
|
||||
f01##cn##l = v_cvt_f32(v_expand_low(f01##cn)), f01##cn##h = v_cvt_f32(v_expand_high(f01##cn)), \
|
||||
f10##cn##l = v_cvt_f32(v_expand_low(f10##cn)), f10##cn##h = v_cvt_f32(v_expand_high(f10##cn)), \
|
||||
f11##cn##l = v_cvt_f32(v_expand_low(f11##cn)), f11##cn##h = v_cvt_f32(v_expand_high(f11##cn));
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_S16F32_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(g)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_S16F32_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(r) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(g) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(b)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_S16F32_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(r) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(g) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(b) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_S16F32(a)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_S16F32(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_S16F32_##CN()
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(r) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(g) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(b) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8U16_NEON(a)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8U16_NEON(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8U16_NEON_##CN();
|
||||
|
||||
// Load pixels for linear interpolation (uint16_t -> float)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_CN_U16F32(cn) \
|
||||
@ -363,26 +419,6 @@
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32_##CN()
|
||||
|
||||
// Load pixels for linear interpolation (float -> float)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(cn, i) \
|
||||
v_float32 f00##cn##l = vx_load(pixbuf + uf * i), f00##cn##h = vx_load(pixbuf + uf * i + vlanes_32), \
|
||||
f01##cn##l = vx_load(pixbuf + uf * (i+1)), f01##cn##h = vx_load(pixbuf + uf * (i+1) + vlanes_32), \
|
||||
f10##cn##l = vx_load(pixbuf + uf * (i+2)), f10##cn##h = vx_load(pixbuf + uf * (i+2) + vlanes_32), \
|
||||
f11##cn##l = vx_load(pixbuf + uf * (i+3)), f11##cn##h = vx_load(pixbuf + uf * (i+3) + vlanes_32);
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C1() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 0)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C3() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 4) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(b, 8)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C4() \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(r, 0) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 4) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(b, 8) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(a, 12)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_##CN()
|
||||
|
||||
// Load pixels for linear interpolation (uint8_t -> float16)
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8F16(cn) \
|
||||
v_float16 f00##cn = v_float16(vcvtq_f16_u16(vmovl_u8(p00##cn))), \
|
||||
@ -556,9 +592,30 @@
|
||||
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F16U8(CN) \
|
||||
CV_WARP_LINEAR_VECTOR_INTER_STORE_F16U8_##CN()
|
||||
|
||||
|
||||
// Special case for C4 load, shuffle and bilinear interpolation
|
||||
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(ofs) \
|
||||
// Special case for C4 shuffle, interpolation and store
|
||||
// SIMD128, nearest
|
||||
#define CV_WARP_SIMD128_NEAREST_SHUFFLE_INTER_8UC4_I(ofs) \
|
||||
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_uint32 i##ofs##_pix0 = vx_load_expand_q(srcptr##ofs);
|
||||
#define CV_WARP_SIMD128_NEAREST_SHUFFLE_INTER_16UC4_I(ofs) \
|
||||
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_uint32 i##ofs##_pix0 = vx_load_expand(srcptr##ofs);
|
||||
#define CV_WARP_SIMD128_NEAREST_SHUFFLE_INTER_32FC4_I(ofs) \
|
||||
const float *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_pix0 = vx_load(srcptr##ofs);
|
||||
#define CV_WARP_SIMD128_NEAREST_STORE_8UC4_I() \
|
||||
v_pack_store(dstptr + 4*(x+i), v_pack(i0_pix0, i1_pix0)); \
|
||||
v_pack_store(dstptr + 4*(x+i+2), v_pack(i2_pix0, i3_pix0));
|
||||
#define CV_WARP_SIMD128_NEAREST_STORE_16UC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), v_pack(i0_pix0, i1_pix0)); \
|
||||
vx_store(dstptr + 4*(x+i+2), v_pack(i2_pix0, i3_pix0));
|
||||
#define CV_WARP_SIMD128_NEAREST_STORE_32FC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), i0_pix0); \
|
||||
vx_store(dstptr + 4*(x+i+1), i1_pix0); \
|
||||
vx_store(dstptr + 4*(x+i+2), i2_pix0); \
|
||||
vx_store(dstptr + 4*(x+i+3), i3_pix0);
|
||||
// SIMD128, bilinear
|
||||
#define CV_WARP_SIMD128_LINEAR_SHUFFLE_INTER_8UC4_I(ofs) \
|
||||
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_pix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs))); \
|
||||
v_float32 i##ofs##_pix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs+4))); \
|
||||
@ -569,7 +626,7 @@
|
||||
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
|
||||
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
|
||||
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
|
||||
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_16UC4_I(ofs) \
|
||||
#define CV_WARP_SIMD128_LINEAR_SHUFFLE_INTER_16UC4_I(ofs) \
|
||||
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_pix0 = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand(srcptr##ofs))); \
|
||||
v_float32 i##ofs##_pix1 = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand(srcptr##ofs+4))); \
|
||||
@ -580,7 +637,7 @@
|
||||
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
|
||||
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
|
||||
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
|
||||
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_32FC4_I(ofs) \
|
||||
#define CV_WARP_SIMD128_LINEAR_SHUFFLE_INTER_32FC4_I(ofs) \
|
||||
const float *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_pix0 = vx_load(srcptr##ofs); \
|
||||
v_float32 i##ofs##_pix1 = vx_load(srcptr##ofs+4); \
|
||||
@ -591,30 +648,59 @@
|
||||
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
|
||||
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
|
||||
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
|
||||
#define CV_WARP_SIMD128_STORE_8UC4_I() \
|
||||
#define CV_WARP_SIMD128_LINEAR_STORE_8UC4_I() \
|
||||
v_uint16 i01_pix = v_pack_u(v_round(i0_pix0), v_round(i1_pix0)); \
|
||||
v_uint16 i23_pix = v_pack_u(v_round(i2_pix0), v_round(i3_pix0)); \
|
||||
v_pack_store(dstptr + 4*(x+i), i01_pix); \
|
||||
v_pack_store(dstptr + 4*(x+i+2), i23_pix);
|
||||
#define CV_WARP_SIMD128_STORE_16UC4_I() \
|
||||
#define CV_WARP_SIMD128_LINEAR_STORE_16UC4_I() \
|
||||
v_uint16 i01_pix = v_pack_u(v_round(i0_pix0), v_round(i1_pix0)); \
|
||||
v_uint16 i23_pix = v_pack_u(v_round(i2_pix0), v_round(i3_pix0)); \
|
||||
vx_store(dstptr + 4*(x+i), i01_pix); \
|
||||
vx_store(dstptr + 4*(x+i+2), i23_pix);
|
||||
#define CV_WARP_SIMD128_STORE_32FC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), i0_pix0); \
|
||||
vx_store(dstptr + 4*(x+i)+4, i1_pix0); \
|
||||
vx_store(dstptr + 4*(x+i)+8, i2_pix0); \
|
||||
vx_store(dstptr + 4*(x+i)+12, i3_pix0);
|
||||
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(DEPTH) \
|
||||
#define CV_WARP_SIMD128_LINEAR_STORE_32FC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), i0_pix0); \
|
||||
vx_store(dstptr + 4*(x+i+1), i1_pix0); \
|
||||
vx_store(dstptr + 4*(x+i+2), i2_pix0); \
|
||||
vx_store(dstptr + 4*(x+i+3), i3_pix0);
|
||||
#define CV_WARP_SIMD128_SHUFFLE_INTER_STORE_C4(INTER, DEPTH) \
|
||||
for (int i = 0; i < uf; i+=vlanes_32) { \
|
||||
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(0); \
|
||||
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(1); \
|
||||
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(2); \
|
||||
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(3); \
|
||||
CV_WARP_SIMD128_STORE_##DEPTH##C4_I(); \
|
||||
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(0); \
|
||||
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(1); \
|
||||
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(2); \
|
||||
CV_WARP_SIMD128_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(3); \
|
||||
CV_WARP_SIMD128_##INTER##_STORE_##DEPTH##C4_I(); \
|
||||
}
|
||||
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
|
||||
// SIMD128, nearest
|
||||
#define CV_WARP_SIMD256_NEAREST_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
|
||||
const uint8_t *srcptr##ofs0 = src + addr[i+ofs0]; \
|
||||
const uint8_t *srcptr##ofs1 = src + addr[i+ofs1]; \
|
||||
v_uint32 i##ofs0##_pix0x = v256_load_expand_q(srcptr##ofs0); \
|
||||
v_uint32 i##ofs1##_pix0x = v256_load_expand_q(srcptr##ofs1); \
|
||||
v_uint32 i##ofs0##ofs1##_pix00 = v_combine_low(i##ofs0##_pix0x, i##ofs1##_pix0x);
|
||||
#define CV_WARP_SIMD256_NEAREST_SHUFFLE_INTER_16UC4_I(ofs0, ofs1) \
|
||||
const uint16_t *srcptr##ofs0 = src + addr[i+ofs0]; \
|
||||
const uint16_t *srcptr##ofs1 = src + addr[i+ofs1]; \
|
||||
v_uint32 i##ofs0##_pix0x = v256_load_expand(srcptr##ofs0); \
|
||||
v_uint32 i##ofs1##_pix0x = v256_load_expand(srcptr##ofs1); \
|
||||
v_uint32 i##ofs0##ofs1##_pix00 = v_combine_low(i##ofs0##_pix0x, i##ofs1##_pix0x);
|
||||
#define CV_WARP_SIMD256_NEAREST_SHUFFLE_INTER_32FC4_I(ofs0, ofs1) \
|
||||
const float *srcptr##ofs0 = src + addr[i+ofs0]; \
|
||||
const float *srcptr##ofs1 = src + addr[i+ofs1]; \
|
||||
v_float32 i##ofs0##ofs1##_fpix00 = vx_load_halves(srcptr##ofs0, srcptr##ofs1);
|
||||
#define CV_WARP_SIMD256_NEAREST_STORE_8UC4_I() \
|
||||
v_pack_store(dstptr + 4*(x+i), v_pack(i01_pix00, i23_pix00)); \
|
||||
v_pack_store(dstptr + 4*(x+i+4), v_pack(i45_pix00, i67_pix00));
|
||||
#define CV_WARP_SIMD256_NEAREST_STORE_16UC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), v_pack(i01_pix00, i23_pix00)); \
|
||||
vx_store(dstptr + 4*(x+i+4), v_pack(i45_pix00, i67_pix00));
|
||||
#define CV_WARP_SIMD256_NEAREST_STORE_32FC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), i01_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i)+8, i23_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i)+16, i45_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i)+24, i67_fpix00);
|
||||
// SIMD256, bilinear
|
||||
#define CV_WARP_SIMD256_LINEAR_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
|
||||
const uint8_t *srcptr##ofs0 = src + addr[i+ofs0]; \
|
||||
const uint8_t *srcptr##ofs1 = src + addr[i+ofs1]; \
|
||||
v_int32 i##ofs0##_pix01 = v_reinterpret_as_s32(v256_load_expand_q(srcptr##ofs0)), \
|
||||
@ -635,8 +721,9 @@
|
||||
i##ofs0##ofs1##_beta = v_combine_low(i##ofs0##_beta, i##ofs1##_beta); \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
|
||||
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
|
||||
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_16UC4_I(ofs0, ofs1) \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
|
||||
auto i##ofs0##ofs1##_pix00 = v_round(i##ofs0##ofs1##_fpix00);
|
||||
#define CV_WARP_SIMD256_LINEAR_SHUFFLE_INTER_16UC4_I(ofs0, ofs1) \
|
||||
const uint16_t *srcptr##ofs0 = src + addr[i+ofs0]; \
|
||||
const uint16_t *srcptr##ofs1 = src + addr[i+ofs1]; \
|
||||
v_int32 i##ofs0##_pix01 = v_reinterpret_as_s32(v256_load_expand(srcptr##ofs0)), \
|
||||
@ -657,8 +744,9 @@
|
||||
i##ofs0##ofs1##_beta = v_combine_low(i##ofs0##_beta, i##ofs1##_beta); \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
|
||||
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
|
||||
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_32FC4_I(ofs0, ofs1) \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
|
||||
auto i##ofs0##ofs1##_pix00 = v_round(i##ofs0##ofs1##_fpix00);
|
||||
#define CV_WARP_SIMD256_LINEAR_SHUFFLE_INTER_32FC4_I(ofs0, ofs1) \
|
||||
const float *srcptr##ofs0 = src + addr[i+ofs0]; \
|
||||
const float *srcptr##ofs1 = src + addr[i+ofs1]; \
|
||||
v_float32 i##ofs0##_fpix01 = v256_load(srcptr##ofs0), \
|
||||
@ -678,30 +766,48 @@
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
|
||||
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
|
||||
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
|
||||
#define CV_WARP_SIMD256_STORE_8UC4_I() \
|
||||
auto i01_pix = v_round(i01_fpix00), i23_pix = v_round(i23_fpix00); \
|
||||
v_pack_store(dstptr + 4*(x+i), v_pack_u(i01_pix, i23_pix)); \
|
||||
auto i45_pix = v_round(i45_fpix00), i67_pix = v_round(i67_fpix00); \
|
||||
v_pack_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix, i67_pix));
|
||||
#define CV_WARP_SIMD256_STORE_16UC4_I() \
|
||||
auto i01_pix = v_round(i01_fpix00), i23_pix = v_round(i23_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i), v_pack_u(i01_pix, i23_pix)); \
|
||||
auto i45_pix = v_round(i45_fpix00), i67_pix = v_round(i67_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix, i67_pix));
|
||||
#define CV_WARP_SIMD256_STORE_32FC4_I() \
|
||||
#define CV_WARP_SIMD256_LINEAR_STORE_8UC4_I() \
|
||||
v_pack_store(dstptr + 4*(x+i), v_pack_u(i01_pix00, i23_pix00)); \
|
||||
v_pack_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix00, i67_pix00));
|
||||
#define CV_WARP_SIMD256_LINEAR_STORE_16UC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), v_pack_u(i01_pix00, i23_pix00)); \
|
||||
vx_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix00, i67_pix00));
|
||||
#define CV_WARP_SIMD256_LINEAR_STORE_32FC4_I() \
|
||||
vx_store(dstptr + 4*(x+i), i01_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i)+8, i23_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i)+16, i45_fpix00); \
|
||||
vx_store(dstptr + 4*(x+i)+24, i67_fpix00);
|
||||
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(DEPTH) \
|
||||
#define CV_WARP_SIMD256_SHUFFLE_INTER_STORE_C4(INTER, DEPTH) \
|
||||
for (int i = 0; i < uf; i+=vlanes_32) { \
|
||||
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(0, 1); \
|
||||
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(2, 3); \
|
||||
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(4, 5); \
|
||||
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(6, 7); \
|
||||
CV_WARP_SIMD256_STORE_##DEPTH##C4_I(); \
|
||||
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(0, 1) \
|
||||
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(2, 3) \
|
||||
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(4, 5) \
|
||||
CV_WARP_SIMD256_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(6, 7) \
|
||||
CV_WARP_SIMD256_##INTER##_STORE_##DEPTH##C4_I() \
|
||||
}
|
||||
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(ofs) \
|
||||
// SIMD_SCALABLE (SIMDX), nearest
|
||||
#define CV_WARP_SIMDX_NEAREST_SHUFFLE_INTER_8UC4_I(ofs) \
|
||||
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_uint32 i##ofs##_pix0 = v_load_expand_q<4>(srcptr##ofs);
|
||||
#define CV_WARP_SIMDX_NEAREST_SHUFFLE_INTER_16UC4_I(ofs) \
|
||||
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_uint32 i##ofs##_pix0 = v_load_expand<4>(srcptr##ofs);
|
||||
#define CV_WARP_SIMDX_NEAREST_SHUFFLE_INTER_32FC4_I(ofs) \
|
||||
const float *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_fpix0 = v_load<4>(srcptr##ofs);
|
||||
#define CV_WARP_SIMDX_NEAREST_STORE_8UC4_I() \
|
||||
v_pack_store<8>(dstptr + 4*(x+i), v_pack<4>(i0_pix0, i1_pix0)); \
|
||||
v_pack_store<8>(dstptr + 4*(x+i+2), v_pack<4>(i2_pix0, i3_pix0));
|
||||
#define CV_WARP_SIMDX_NEAREST_STORE_16UC4_I() \
|
||||
v_store<8>(dstptr + 4*(x+i), v_pack<4>(i0_pix0, i1_pix0)); \
|
||||
v_store<8>(dstptr + 4*(x+i+2), v_pack<4>(i2_pix0, i3_pix0));
|
||||
#define CV_WARP_SIMDX_NEAREST_STORE_32FC4_I() \
|
||||
v_store<4>(dstptr + 4*(x+i), i0_fpix0); \
|
||||
v_store<4>(dstptr + 4*(x+i)+4, i1_fpix0); \
|
||||
v_store<4>(dstptr + 4*(x+i)+8, i2_fpix0); \
|
||||
v_store<4>(dstptr + 4*(x+i)+12, i3_fpix0);
|
||||
// SIMD_SCALABLE (SIMDX), bilinear
|
||||
#define CV_WARP_SIMDX_LINEAR_SHUFFLE_INTER_8UC4_I(ofs) \
|
||||
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_fpix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs))), \
|
||||
i##ofs##_fpix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs+4))), \
|
||||
@ -711,8 +817,9 @@
|
||||
i##ofs##_beta = vx_setall_f32(vbeta[i+ofs]); \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
|
||||
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
|
||||
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_16UC4_I(ofs) \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0); \
|
||||
auto i##ofs##_pix0 = v_round(i##ofs##_fpix0);
|
||||
#define CV_WARP_SIMDX_LINEAR_SHUFFLE_INTER_16UC4_I(ofs) \
|
||||
const uint16_t *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_fpix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand<4>(srcptr##ofs))), \
|
||||
i##ofs##_fpix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand<4>(srcptr##ofs+4))), \
|
||||
@ -722,8 +829,9 @@
|
||||
i##ofs##_beta = vx_setall_f32(vbeta[i+ofs]); \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
|
||||
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
|
||||
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_32FC4_I(ofs) \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0); \
|
||||
auto i##ofs##_pix0 = v_round(i##ofs##_fpix0);
|
||||
#define CV_WARP_SIMDX_LINEAR_SHUFFLE_INTER_32FC4_I(ofs) \
|
||||
const float *srcptr##ofs = src + addr[i+ofs]; \
|
||||
v_float32 i##ofs##_fpix0 = v_load<4>(srcptr##ofs), \
|
||||
i##ofs##_fpix1 = v_load<4>(srcptr##ofs+4), \
|
||||
@ -734,26 +842,25 @@
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
|
||||
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
|
||||
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
|
||||
#define CV_WARP_SIMDX_STORE_8UC4_I() \
|
||||
auto i01_pix = v_pack_u<4>(v_round(i0_fpix0), v_round(i1_fpix0)), \
|
||||
i23_pix = v_pack_u<4>(v_round(i2_fpix0), v_round(i3_fpix0)); \
|
||||
v_pack_store<8>(dstptr + 4*(x+i), i01_pix); \
|
||||
v_pack_store<8>(dstptr + 4*(x+i+2), i23_pix);
|
||||
#define CV_WARP_SIMDX_STORE_16UC4_I() \
|
||||
auto i01_pix = v_pack_u<4>(v_round(i0_fpix0), v_round(i1_fpix0)), \
|
||||
i23_pix = v_pack_u<4>(v_round(i2_fpix0), v_round(i3_fpix0)); \
|
||||
v_store<8>(dstptr + 4*(x+i), i01_pix); \
|
||||
v_store<8>(dstptr + 4*(x+i+2), i23_pix);
|
||||
#define CV_WARP_SIMDX_STORE_32FC4_I() \
|
||||
#define CV_WARP_SIMDX_LINEAR_STORE_8UC4_I() \
|
||||
v_pack_store<8>(dstptr + 4*(x+i), v_pack_u<4>(i0_pix0, i1_pix0)); \
|
||||
v_pack_store<8>(dstptr + 4*(x+i+2), v_pack_u<4>(i2_pix0, i3_pix0));
|
||||
#define CV_WARP_SIMDX_LINEAR_STORE_16UC4_I() \
|
||||
v_store<8>(dstptr + 4*(x+i), v_pack_u<4>(i0_pix0, i1_pix0)); \
|
||||
v_store<8>(dstptr + 4*(x+i+2), v_pack_u<4>(i2_pix0, i3_pix0));
|
||||
#define CV_WARP_SIMDX_LINEAR_STORE_32FC4_I() \
|
||||
v_store<4>(dstptr + 4*(x+i), i0_fpix0); \
|
||||
v_store<4>(dstptr + 4*(x+i)+4, i1_fpix0); \
|
||||
v_store<4>(dstptr + 4*(x+i)+8, i2_fpix0); \
|
||||
v_store<4>(dstptr + 4*(x+i)+12, i3_fpix0);
|
||||
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(DEPTH) \
|
||||
#define CV_WARP_SIMDX_SHUFFLE_INTER_STORE_C4(INTER, DEPTH) \
|
||||
for (int i = 0; i < uf; i+=4) { \
|
||||
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(0); \
|
||||
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(1); \
|
||||
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(2); \
|
||||
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_##DEPTH##C4_I(3); \
|
||||
CV_WARP_SIMDX_STORE_##DEPTH##C4_I(); \
|
||||
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(0); \
|
||||
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(1); \
|
||||
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(2); \
|
||||
CV_WARP_SIMDX_##INTER##_SHUFFLE_INTER_##DEPTH##C4_I(3); \
|
||||
CV_WARP_SIMDX_##INTER##_STORE_##DEPTH##C4_I(); \
|
||||
}
|
||||
|
||||
#define CV_WARP_VECTOR_SHUFFLE_INTER_STORE_C4(SIMD, INTER, DEPTH) \
|
||||
CV_WARP_##SIMD##_SHUFFLE_INTER_STORE_C4(INTER, DEPTH)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -703,6 +703,16 @@ protected:
|
||||
virtual void run_func();
|
||||
virtual void run_reference_func();
|
||||
|
||||
template<typename T>
|
||||
void new_nearest_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y);
|
||||
template<typename T>
|
||||
void new_nearest_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y);
|
||||
template<typename T>
|
||||
void new_nearest_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y);
|
||||
|
||||
template<typename T>
|
||||
void new_linear_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y);
|
||||
@ -720,7 +730,7 @@ protected:
|
||||
remap_func funcs[2];
|
||||
|
||||
private:
|
||||
template <typename T> void new_remap(const Mat&, Mat&);
|
||||
template <typename T> void new_remap(const Mat&, Mat&, int);
|
||||
void remap_nearest(const Mat&, Mat&);
|
||||
void remap_generic(const Mat&, Mat&);
|
||||
|
||||
@ -879,19 +889,19 @@ void CV_Remap_Test::run_reference_func()
|
||||
if (interpolation == INTER_AREA)
|
||||
interpolation = INTER_LINEAR;
|
||||
|
||||
if (interpolation == INTER_LINEAR && mapx.depth() == CV_32F) {
|
||||
if ((interpolation == INTER_LINEAR) && mapx.depth() == CV_32F) {
|
||||
int src_depth = src.depth(), src_channels = src.channels();
|
||||
Mat tmp = Mat::zeros(dst.size(), dst.type());
|
||||
if (src_depth == CV_8U && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
|
||||
new_remap<uint8_t>(src, tmp);
|
||||
new_remap<uint8_t>(src, tmp, interpolation);
|
||||
tmp.convertTo(reference_dst, reference_dst.depth());
|
||||
return;
|
||||
} else if (src_depth == CV_16U && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
|
||||
new_remap<uint16_t>(src, tmp);
|
||||
new_remap<uint16_t>(src, tmp, interpolation);
|
||||
tmp.convertTo(reference_dst, reference_dst.depth());
|
||||
return;
|
||||
} else if (src_depth == CV_32F && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
|
||||
new_remap<float>(src, tmp);
|
||||
new_remap<float>(src, tmp, interpolation);
|
||||
tmp.convertTo(reference_dst, reference_dst.depth());
|
||||
return;
|
||||
}
|
||||
@ -903,7 +913,7 @@ void CV_Remap_Test::run_reference_func()
|
||||
(this->*funcs[index])(src, reference_dst);
|
||||
}
|
||||
|
||||
#define FETCH_PIXEL_SCALAR(cn, dy, dx) \
|
||||
#define WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, dy, dx) \
|
||||
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
|
||||
size_t ofs = dy*srcstep + dx*cn; \
|
||||
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr[ofs+ci];} \
|
||||
@ -917,16 +927,28 @@ void CV_Remap_Test::run_reference_func()
|
||||
size_t glob_ofs = iy_*srcstep + ix_*cn; \
|
||||
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr_[glob_ofs+ci];} \
|
||||
}
|
||||
|
||||
#define WARPAFFINE_SHUFFLE(cn) \
|
||||
#define WARP_NEAREST_SHUFFLE_FETCH_PIXEL_IN_RANGE(cn) \
|
||||
for (int ci = 0; ci < cn; ci++) { \
|
||||
pxy[ci] = srcptr[ci]; \
|
||||
}
|
||||
#define WARP_LINEAR_SHUFFLE_FETCH_PIXEL_IN_RANGE(cn) \
|
||||
for (int ci = 0; ci < cn; ci++) { \
|
||||
pxy[ci] = srcptr[ci]; \
|
||||
pxy[ci+cn] = srcptr[ci+cn]; \
|
||||
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
|
||||
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
|
||||
}
|
||||
#define WARP_NEAREST_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn) \
|
||||
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 0, 0);
|
||||
#define WARP_LINEAR_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn) \
|
||||
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 0, 0); \
|
||||
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 0, 1); \
|
||||
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 1, 0); \
|
||||
WARP_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn, 1, 1);
|
||||
#define WARP_SHUFFLE(inter, cn) \
|
||||
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
|
||||
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
|
||||
for (int ci = 0; ci < cn; ci++) { \
|
||||
pxy[ci] = srcptr[ci]; \
|
||||
pxy[ci+cn] = srcptr[ci+cn]; \
|
||||
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
|
||||
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
|
||||
} \
|
||||
WARP_##inter##_SHUFFLE_FETCH_PIXEL_IN_RANGE(cn) \
|
||||
} else { \
|
||||
if ((borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT) && \
|
||||
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
|
||||
@ -936,14 +958,50 @@ void CV_Remap_Test::run_reference_func()
|
||||
} \
|
||||
return; \
|
||||
} \
|
||||
FETCH_PIXEL_SCALAR(cn, 0, 0); \
|
||||
FETCH_PIXEL_SCALAR(cn, 0, 1); \
|
||||
FETCH_PIXEL_SCALAR(cn, 1, 0); \
|
||||
FETCH_PIXEL_SCALAR(cn, 1, 1); \
|
||||
WARP_##inter##_SHUFFLE_FETCH_PIXEL_OUT_RANGE(cn) \
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
|
||||
void CV_Remap_Test::new_nearest_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y) {
|
||||
int ix = (int)roundf(sx), iy = (int)roundf(sy);
|
||||
|
||||
T pxy[1];
|
||||
const T *srcptr = srcptr_ + srcstep*iy + ix;
|
||||
WARP_SHUFFLE(NEAREST, 1);
|
||||
|
||||
dstptr[x+0] = saturate_cast<T>(pxy[0]);
|
||||
}
|
||||
template<typename T>
|
||||
void CV_Remap_Test::new_nearest_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y) {
|
||||
int ix = (int)roundf(sx), iy = (int)roundf(sy);
|
||||
|
||||
T pxy[3];
|
||||
const T *srcptr = srcptr_ + srcstep*iy + ix*3;
|
||||
WARP_SHUFFLE(NEAREST, 3);
|
||||
|
||||
dstptr[x*3+0] = saturate_cast<T>(pxy[0]);
|
||||
dstptr[x*3+1] = saturate_cast<T>(pxy[1]);
|
||||
dstptr[x*3+2] = saturate_cast<T>(pxy[2]);
|
||||
}
|
||||
template<typename T>
|
||||
void CV_Remap_Test::new_nearest_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
|
||||
const T *bval, int borderType_x, int borderType_y) {
|
||||
int ix = (int)roundf(sx), iy = (int)roundf(sy);
|
||||
|
||||
T pxy[4];
|
||||
const T *srcptr = srcptr_ + srcstep*iy + ix*4;
|
||||
WARP_SHUFFLE(NEAREST, 4);
|
||||
|
||||
dstptr[x*4+0] = saturate_cast<T>(pxy[0]);
|
||||
dstptr[x*4+1] = saturate_cast<T>(pxy[1]);
|
||||
dstptr[x*4+2] = saturate_cast<T>(pxy[2]);
|
||||
dstptr[x*4+3] = saturate_cast<T>(pxy[3]);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline void warp_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
|
||||
{
|
||||
for (int ci = 0; ci < cn; ci++) {
|
||||
float p00 = pxy[ci];
|
||||
@ -956,7 +1014,6 @@ static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx
|
||||
dst[ci] = saturate_cast<T>(v0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void CV_Remap_Test::new_linear_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr,
|
||||
int srccols, int srcrows, size_t srcstep,
|
||||
@ -968,11 +1025,10 @@ void CV_Remap_Test::new_linear_c1(int x, float sx, float sy, const T *srcptr_, T
|
||||
T pxy[4];
|
||||
const T *srcptr = srcptr_ + srcstep*iy + ix;
|
||||
|
||||
WARPAFFINE_SHUFFLE(1);
|
||||
WARP_SHUFFLE(LINEAR, 1);
|
||||
|
||||
warpaffine_linear_calc(1, pxy, dstptr+x, sx, sy);
|
||||
warp_linear_calc(1, pxy, dstptr+x, sx, sy);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void CV_Remap_Test::new_linear_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr,
|
||||
int srccols, int srcrows, size_t srcstep,
|
||||
@ -984,11 +1040,10 @@ void CV_Remap_Test::new_linear_c3(int x, float sx, float sy, const T *srcptr_, T
|
||||
T pxy[12];
|
||||
const T *srcptr = srcptr_ + srcstep*iy + ix*3;
|
||||
|
||||
WARPAFFINE_SHUFFLE(3);
|
||||
WARP_SHUFFLE(LINEAR, 3);
|
||||
|
||||
warpaffine_linear_calc(3, pxy, dstptr+x*3, sx, sy);
|
||||
warp_linear_calc(3, pxy, dstptr+x*3, sx, sy);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void CV_Remap_Test::new_linear_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr,
|
||||
int srccols, int srcrows, size_t srcstep,
|
||||
@ -1000,13 +1055,14 @@ void CV_Remap_Test::new_linear_c4(int x, float sx, float sy, const T *srcptr_, T
|
||||
T pxy[16];
|
||||
const T *srcptr = srcptr_ + srcstep*iy + ix*4;
|
||||
|
||||
WARPAFFINE_SHUFFLE(4);
|
||||
WARP_SHUFFLE(LINEAR, 4);
|
||||
|
||||
warpaffine_linear_calc(4, pxy, dstptr+x*4, sx, sy);
|
||||
warp_linear_calc(4, pxy, dstptr+x*4, sx, sy);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CV_Remap_Test::new_remap(const Mat &_src, Mat &_dst) {
|
||||
void CV_Remap_Test::new_remap(const Mat &_src, Mat &_dst, int inter) {
|
||||
CV_UNUSED(inter);
|
||||
int src_channels = _src.channels();
|
||||
CV_CheckTrue(_src.channels() == 1 || _src.channels() == 3 || _src.channels() == 4, "");
|
||||
CV_CheckTrue(mapx.depth() == CV_32F, "");
|
||||
@ -1232,7 +1288,7 @@ private:
|
||||
void warpAffine(const Mat&, Mat&);
|
||||
|
||||
template<typename T>
|
||||
void newWarpAffine(const Mat&, Mat&, const Mat&);
|
||||
void new_warpAffine(const Mat&, Mat&, const Mat&, int);
|
||||
};
|
||||
|
||||
CV_WarpAffine_Test::CV_WarpAffine_Test() :
|
||||
@ -1287,8 +1343,9 @@ void CV_WarpAffine_Test::run_reference_func()
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM)
|
||||
void CV_WarpAffine_Test::new_warpAffine(const Mat &_src, Mat &_dst, const Mat &tM, int inter)
|
||||
{
|
||||
CV_UNUSED(inter);
|
||||
int num_channels = _dst.channels();
|
||||
CV_CheckTrue(num_channels == 1 || num_channels == 3 || num_channels == 4, "");
|
||||
|
||||
@ -1360,11 +1417,11 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
|
||||
if (inter == INTER_LINEAR) {
|
||||
int dst_depth = _dst.depth(), dst_channels = _dst.channels();
|
||||
if (dst_depth == CV_8U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
|
||||
return newWarpAffine<uint8_t>(_src, _dst, tM);
|
||||
return new_warpAffine<uint8_t>(_src, _dst, tM, inter);
|
||||
} else if (dst_depth == CV_16U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
|
||||
return newWarpAffine<uint16_t>(_src, _dst, tM);
|
||||
return new_warpAffine<uint16_t>(_src, _dst, tM, inter);
|
||||
} else if (dst_depth == CV_32F && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
|
||||
return newWarpAffine<float>(_src, _dst, tM);
|
||||
return new_warpAffine<float>(_src, _dst, tM, inter);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1420,7 +1477,7 @@ private:
|
||||
void warpPerspective(const Mat&, Mat&);
|
||||
|
||||
template<typename T>
|
||||
void newWarpPerspective(const Mat&, Mat&, const Mat&);
|
||||
void new_warpPerspective(const Mat&, Mat&, const Mat&, int);
|
||||
};
|
||||
|
||||
CV_WarpPerspective_Test::CV_WarpPerspective_Test() :
|
||||
@ -1470,8 +1527,9 @@ void CV_WarpPerspective_Test::run_reference_func()
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void CV_WarpPerspective_Test::newWarpPerspective(const Mat &_src, Mat &_dst, const Mat &tM)
|
||||
void CV_WarpPerspective_Test::new_warpPerspective(const Mat &_src, Mat &_dst, const Mat &tM, int inter)
|
||||
{
|
||||
CV_UNUSED(inter);
|
||||
int num_channels = _dst.channels();
|
||||
CV_CheckTrue(num_channels == 1 || num_channels == 3 || num_channels == 4, "");
|
||||
|
||||
@ -1546,11 +1604,11 @@ void CV_WarpPerspective_Test::warpPerspective(const Mat& _src, Mat& _dst)
|
||||
if (inter == INTER_LINEAR) {
|
||||
int dst_depth = _dst.depth(), dst_channels = _dst.channels();
|
||||
if (dst_depth == CV_8U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
|
||||
return newWarpPerspective<uint8_t>(_src, _dst, M);
|
||||
return new_warpPerspective<uint8_t>(_src, _dst, M, inter);
|
||||
} else if (dst_depth == CV_16U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
|
||||
return newWarpPerspective<uint16_t>(_src, _dst, M);
|
||||
return new_warpPerspective<uint16_t>(_src, _dst, M, inter);
|
||||
} else if (dst_depth == CV_32F && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
|
||||
return newWarpPerspective<float>(_src, _dst, M);
|
||||
return new_warpPerspective<float>(_src, _dst, M, inter);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user