Merge pull request #25984 from fengyuentau:imgproc/warpaffine_opt

imgproc: add optimized warpAffine kernels for 8U/16U/32F + C1/C3/C4 inputs #25984

Merge wtih https://github.com/opencv/opencv_extra/pull/1198.
Merge with https://github.com/opencv/opencv_contrib/pull/3787.


### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Yuantao Feng 2024-10-03 19:01:36 +08:00 committed by GitHub
parent ebf11d36f4
commit 97681bdfce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 3070 additions and 179 deletions

View File

@ -2228,9 +2228,7 @@ inline v_int16x8 v_round(const v_float16x8 &a)
inline v_int16x8 v_floor(const v_float16x8 &a)
{
int16x8_t a1 = vcvtq_s16_f16(a.val);
uint16x8_t mask = vcgtq_f16(vcvtq_f16_s16(a1), a.val);
return v_int16x8(vaddq_s16(a1, vreinterpretq_s16_u16(mask)));
return v_int16x8(vcvtmq_s16_f16(a.val));
}
inline v_int16x8 v_ceil(const v_float16x8 &a)
@ -2271,9 +2269,13 @@ inline v_int32x4 v_round(const v_float32x4& a)
#endif
inline v_int32x4 v_floor(const v_float32x4& a)
{
#if __ARM_ARCH > 7
return v_int32x4(vcvtmq_s32_f32(a.val));
#else
int32x4_t a1 = vcvtq_s32_f32(a.val);
uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val);
return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask)));
#endif
}
inline v_int32x4 v_ceil(const v_float32x4& a)

View File

@ -261,7 +261,7 @@ private:
h = rect.height; w = rect.width;
pose = Matx23f(c, -s, -(float)rect.x,
s, c, -(float)rect.y);
warpAffine(image, rotImage, pose, Size(w, h), INTER_LINEAR, BORDER_REPLICATE);
warpAffine(image, rotImage, pose, Size(w, h), INTER_LINEAR, BORDER_REPLICATE, Scalar(), cv::ALGO_HINT_ACCURATE);
}
if( tilt == 1 )
warpedImage = rotImage;
@ -275,7 +275,7 @@ private:
pose(0, 2) /= tilt;
}
if( phi != 0 || tilt != 1 )
warpAffine(mask0, warpedMask, pose, warpedImage.size(), INTER_NEAREST);
warpAffine(mask0, warpedMask, pose, warpedImage.size(), INTER_NEAREST, BORDER_CONSTANT, Scalar(), cv::ALGO_HINT_ACCURATE);
else
warpedMask = mask0;
}

View File

@ -10,6 +10,7 @@ ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
ocv_add_dispatched_file(warp_kernels SSE2 SSE4_1 AVX2 NEON NEON_FP16 RVV LASX)
ocv_define_module(imgproc opencv_core WRAP java objc python js)
ocv_module_include_directories(opencv_imgproc ${ZLIB_INCLUDE_DIRS})

View File

@ -2474,6 +2474,7 @@ flag #WARP_INVERSE_MAP that means that M is the inverse transformation (
borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to
the "outliers" in the source image are not modified by the function.
@param borderValue value used in case of a constant border; by default, it is 0.
@param hint Implementation modfication flags. See #AlgorithmHint
@sa warpPerspective, resize, remap, getRectSubPix, transform
*/
@ -2481,7 +2482,8 @@ CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst,
InputArray M, Size dsize,
int flags = INTER_LINEAR,
int borderMode = BORDER_CONSTANT,
const Scalar& borderValue = Scalar());
const Scalar& borderValue = Scalar(),
AlgorithmHint hint = cv::ALGO_HINT_DEFAULT);
/** @example samples/cpp/snippets/warpPerspective_demo.cpp
An example program shows using cv::getPerspectiveTransform and cv::warpPerspective for image warping

View File

@ -72,7 +72,10 @@ OCL_PERF_TEST_P(WarpAffineFixture, WarpAffine,
const WarpAffineParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), interpolation = get<2>(params);
const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 1 : interpolation == INTER_CUBIC ? 2e-3 : 1e-4;
// BUG: OpenCL and CPU version diverges a bit
// Ticket: https://github.com/opencv/opencv/issues/26235
const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 2 : interpolation == INTER_CUBIC ? 2e-3 : 3e-2;
checkDeviceMaxMemoryAllocSize(srcSize, type);

View File

@ -15,24 +15,6 @@ typedef TestBaseWithParam<MatInfo_SizePair_t> MatInfo_SizePair;
CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4, \
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4
// For gradient-ish testing of the other matrix formats
template<typename T>
static void fillFPGradient(Mat& img)
{
const int ch = img.channels();
int r, c, i;
for(r=0; r<img.rows; r++)
{
for(c=0; c<img.cols; c++)
{
T vals[] = {(T)r, (T)c, (T)(r*c), (T)(r*c/(r+c+1))};
T *p = (T*)img.ptr(r, c);
for(i=0; i<ch; i++) p[i] = (T)vals[i];
}
}
}
PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear,
testing::Values(
MatInfo_Size_Size_t(CV_8UC1, szVGA, szqHD),
@ -51,7 +33,7 @@ PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear,
Size to = get<2>(GetParam());
cv::Mat src(from, matType), dst(to, matType);
cvtest::fillGradient(src);
cvtest::fillGradient<uint8_t>(src);
declare.in(src).out(dst);
TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR_EXACT);
@ -79,9 +61,9 @@ PERF_TEST_P(MatInfo_SizePair, resizeUpLinearNonExact,
cv::Mat src(from, matType), dst(to, matType);
switch(src.depth())
{
case CV_8U: cvtest::fillGradient(src); break;
case CV_16U: fillFPGradient<ushort>(src); break;
case CV_32F: fillFPGradient<float>(src); break;
case CV_8U: cvtest::fillGradient<uint8_t>(src); break;
case CV_16U: cvtest::fillGradient<ushort>(src); break;
case CV_32F: cvtest::fillGradient<float>(src); break;
}
declare.in(src).out(dst);
@ -120,7 +102,7 @@ PERF_TEST_P(MatInfo_Size_Size, resizeDownLinear,
Size to = get<2>(GetParam());
cv::Mat src(from, matType), dst(to, matType);
cvtest::fillGradient(src);
cvtest::fillGradient<uint8_t>(src);
declare.in(src).out(dst);
TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR_EXACT);
@ -155,9 +137,9 @@ PERF_TEST_P(MatInfo_SizePair, resizeDownLinearNonExact,
cv::Mat src(from, matType), dst(to, matType);
switch(src.depth())
{
case CV_8U: cvtest::fillGradient(src); break;
case CV_16U: fillFPGradient<ushort>(src); break;
case CV_32F: fillFPGradient<float>(src); break;
case CV_8U: cvtest::fillGradient<uint8_t>(src); break;
case CV_16U: cvtest::fillGradient<ushort>(src); break;
case CV_32F: cvtest::fillGradient<float>(src); break;
}
declare.in(src).out(dst);

View File

@ -12,7 +12,7 @@ CV_ENUM(InterType, INTER_NEAREST, INTER_LINEAR)
CV_ENUM(InterTypeExtended, INTER_NEAREST, INTER_LINEAR, WARP_RELATIVE_MAP)
CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH)
typedef TestBaseWithParam< tuple<MatType, Size, InterType, BorderMode> > TestWarpAffine;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpAffine;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, int> > TestWarpPerspective;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspectiveNear_t;
typedef TestBaseWithParam< tuple<MatType, Size, InterTypeExtended, BorderMode, RemapMode> > TestRemap;
@ -21,24 +21,39 @@ void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, boo
PERF_TEST_P( TestWarpAffine, WarpAffine,
Combine(
Values(CV_8UC1, CV_8UC4),
Values( szVGA, sz720p, sz1080p ),
InterType::all(),
BorderMode::all()
BorderMode::all(),
Values(CV_8UC3, CV_16UC3, CV_32FC3, CV_8UC1, CV_16UC1, CV_32FC1, CV_8UC4, CV_16UC4, CV_32FC4)
)
)
{
Size sz, szSrc(512, 512);
int borderMode, interType, dataType;
dataType = get<0>(GetParam());
sz = get<1>(GetParam());
interType = get<2>(GetParam());
borderMode = get<3>(GetParam());
int type, borderMode, interType;
sz = get<0>(GetParam());
interType = get<1>(GetParam());
borderMode = get<2>(GetParam());
type = get<3>(GetParam());
Scalar borderColor = Scalar::all(150);
Mat src(szSrc, dataType), dst(sz, dataType);
cvtest::fillGradient(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
Mat src(szSrc,type), dst(sz, type);
switch (src.depth()) {
case CV_8U: {
cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
break;
}
case CV_16U: {
cvtest::fillGradient<uint16_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint16_t>(src, borderColor, 1);
break;
}
case CV_32F: {
cvtest::fillGradient<float>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<float>(src, borderColor, 1);
break;
}
}
Mat warpMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2);
declare.in(src).out(dst);
@ -47,36 +62,6 @@ PERF_TEST_P( TestWarpAffine, WarpAffine,
SANITY_CHECK(dst, 1);
}
PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx,
Combine(
Values(CV_8UC1, CV_8UC4),
Values(szVGA, sz720p, sz1080p),
InterType::all(),
BorderMode::all()
)
)
{
Size sz, szSrc(512, 512);
int borderMode, interType, dataType;
dataType = get<0>(GetParam());
sz = get<1>(GetParam());
interType = get<2>(GetParam());
borderMode = get<3>(GetParam());
Scalar borderColor = Scalar::all(150);
Mat src(szSrc, dataType), dst(sz, dataType);
cvtest::fillGradient(src);
if (borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
Mat warpMat = getRotationMatrix2D(Point2f(src.cols / 2.f, src.rows / 2.f), 30., 2.2);
declare.in(src).out(dst);
TEST_CYCLE() warpAffine(src, dst, warpMat, sz, interType, borderMode, borderColor);
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestWarpPerspective, WarpPerspective,
Combine(
Values( szVGA, sz720p, sz1080p ),
@ -96,8 +81,8 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective,
Scalar borderColor = Scalar::all(150);
Mat src(szSrc, CV_8UC(channels)), dst(sz, CV_8UC(channels));
cvtest::fillGradient(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
Mat rotMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2);
Mat warpMat(3, 3, CV_64FC1);
for(int r=0; r<2; r++)
@ -114,42 +99,6 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective,
SANITY_CHECK(dst, 1);
}
PERF_TEST_P(TestWarpPerspective, DISABLED_WarpPerspective_ovx,
Combine(
Values(szVGA, sz720p, sz1080p),
InterType::all(),
BorderMode::all(),
Values(1)
)
)
{
Size sz, szSrc(512, 512);
int borderMode, interType, channels;
sz = get<0>(GetParam());
interType = get<1>(GetParam());
borderMode = get<2>(GetParam());
channels = get<3>(GetParam());
Scalar borderColor = Scalar::all(150);
Mat src(szSrc, CV_8UC(channels)), dst(sz, CV_8UC(channels));
cvtest::fillGradient(src);
if (borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
Mat rotMat = getRotationMatrix2D(Point2f(src.cols / 2.f, src.rows / 2.f), 30., 2.2);
Mat warpMat(3, 3, CV_64FC1);
for (int r = 0; r<2; r++)
for (int c = 0; c<3; c++)
warpMat.at<double>(r, c) = rotMat.at<double>(r, c);
warpMat.at<double>(2, 0) = .3 / sz.width;
warpMat.at<double>(2, 1) = .3 / sz.height;
warpMat.at<double>(2, 2) = 1;
declare.in(src).out(dst);
TEST_CYCLE() warpPerspective(src, dst, warpMat, sz, interType, borderMode, borderColor);
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
Combine(
Values( Size(640,480), Size(1920,1080), Size(2592,1944) ),
@ -168,8 +117,8 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
Scalar borderColor = Scalar::all(150);
Mat src(size, type), dst(size, type);
cvtest::fillGradient(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
int shift = static_cast<int>(src.cols*0.04);
Mat srcVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, 0),
Vec2f(static_cast<float>(size.width-1), 0),

View File

@ -55,6 +55,9 @@
#include "opencv2/core/softfloat.hpp"
#include "imgwarp.hpp"
#include "warp_kernels.simd.hpp"
#include "warp_kernels.simd_declarations.hpp"
using namespace cv;
namespace cv
@ -1351,6 +1354,9 @@ static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, Input
int cn = _src.channels(), type = _src.type(), depth = _src.depth(),
rowsPerWI = dev.isIntel() ? 4 : 1;
if(!dev.hasFP64() && depth == CV_64F)
return false;
if (borderType == BORDER_TRANSPARENT || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
|| _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
return false;
@ -2571,16 +2577,70 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation
namespace hal {
void warpAffine(int src_type,
const uchar * src_data, size_t src_step, int src_width, int src_height,
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[6], int interpolation, int borderType, const double borderValue[4])
static void warpAffine(int src_type,
const uchar * src_data, size_t src_step, int src_width, int src_height,
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[6], int interpolation, int borderType, const double borderValue[4], AlgorithmHint hint)
{
CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
if (interpolation == INTER_LINEAR) {
switch (src_type) {
case CV_8UC1: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpAffineLinearApproxInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(warpAffineLinearInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_8UC3: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpAffineLinearApproxInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(warpAffineLinearInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_8UC4: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpAffineLinearApproxInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(warpAffineLinearInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
}
}
int x;
AutoBuffer<int> _abdelta(dst.cols*2);
int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
@ -2697,10 +2757,14 @@ void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int
void cv::warpAffine( InputArray _src, OutputArray _dst,
InputArray _M0, Size dsize,
int flags, int borderType, const Scalar& borderValue )
int flags, int borderType, const Scalar& borderValue,
AlgorithmHint hint )
{
CV_INSTRUMENT_REGION();
if (hint == cv::ALGO_HINT_DEFAULT)
hint = cv::getDefaultAlgorithmHint();
int interpolation = flags & INTER_MAX;
CV_Assert( _src.channels() <= 4 || (interpolation != INTER_LANCZOS4 &&
interpolation != INTER_CUBIC) );
@ -2808,7 +2872,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
#endif
hal::warpAffine(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
M, interpolation, borderType, borderValue.val);
M, interpolation, borderType, borderValue.val, hint);
}

View File

@ -0,0 +1,11 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef __OPENCV_IMGPROC_WARP_COMMON_HPP__
#define __OPENCV_IMGPROC_WARP_COMMON_HPP__
#include "warp_common.vector.hpp"
#include "warp_common.scalar.hpp"
#endif // __OPENCV_IMGPROC_WARP_COMMON_HPP__

View File

@ -0,0 +1,171 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Shuffle
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
p00##CN = srcptr[i]; p01##CN = srcptr[i + cn]; \
p10##CN = srcptr[srcstep + i]; p11##CN = srcptr[srcstep + cn + i];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C1() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 1, 0)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C3() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 3, 0) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 3, 1) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 3, 2)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C4() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 4, 0) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 4, 1) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 4, 2) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(a, 4, 3)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C1() \
dstptr[x] = bval[0];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C3() \
dstptr[x*3] = bval[0]; \
dstptr[x*3+1] = bval[1]; \
dstptr[x*3+2] = bval[2];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C4() \
dstptr[x*4] = bval[0]; \
dstptr[x*4+1] = bval[1]; \
dstptr[x*4+2] = bval[2]; \
dstptr[x*4+3] = bval[3];
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C1(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx; \
pxy##g = srcptr[ofs]; \
} else if (border_type == BORDER_CONSTANT) { \
pxy##g = bval[0]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pxy##g = dstptr[x]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t glob_ofs = iy_*srcstep + ix_; \
pxy##g = src[glob_ofs]; \
}
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C3(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*3; \
pxy##r = srcptr[ofs]; \
pxy##g = srcptr[ofs+1]; \
pxy##b = srcptr[ofs+2]; \
} else if (border_type == BORDER_CONSTANT) { \
pxy##r = bval[0]; \
pxy##g = bval[1]; \
pxy##b = bval[2]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pxy##r = dstptr[x*3]; \
pxy##g = dstptr[x*3+1]; \
pxy##b = dstptr[x*3+2]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t glob_ofs = iy_*srcstep + ix_*3; \
pxy##r = src[glob_ofs]; \
pxy##g = src[glob_ofs+1]; \
pxy##b = src[glob_ofs+2]; \
}
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C4(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*4; \
pxy##r = srcptr[ofs]; \
pxy##g = srcptr[ofs+1]; \
pxy##b = srcptr[ofs+2]; \
pxy##a = srcptr[ofs+3]; \
} else if (border_type == BORDER_CONSTANT) { \
pxy##r = bval[0]; \
pxy##g = bval[1]; \
pxy##b = bval[2]; \
pxy##a = bval[3]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pxy##r = dstptr[x*4]; \
pxy##g = dstptr[x*4+1]; \
pxy##b = dstptr[x*4+2]; \
pxy##a = dstptr[x*4+3]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t glob_ofs = iy_*srcstep + ix_*4; \
pxy##r = src[glob_ofs]; \
pxy##g = src[glob_ofs+1]; \
pxy##b = src[glob_ofs+2]; \
pxy##a = src[glob_ofs+3]; \
}
#define CV_WARP_LINEAR_SCALAR_SHUFFLE(CN) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_##CN() \
} else { \
if ((border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (border_type == BORDER_CONSTANT) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_##CN() \
} \
continue; \
} \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 0, p00); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 1, p01); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 0, p10); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 1, p11); \
}
// Linear interpolation calculation
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(cn) \
float v0##cn = p00##cn + sx*(p01##cn - p00##cn); \
float v1##cn = p10##cn + sx*(p11##cn - p10##cn);
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C1() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C3() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(b)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C4() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(b) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(a)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(cn) \
v0##cn += sy*(v1##cn - v0##cn);
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C1() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C3() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(b)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C4() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(b) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(a)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_F32(CN) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_##CN() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_##CN()
// Store
#define CV_WARP_LINEAR_SCALAR_STORE_C1(dtype) \
dstptr[x] = saturate_cast<dtype>(v0g);
#define CV_WARP_LINEAR_SCALAR_STORE_C3(dtype) \
dstptr[x*3] = saturate_cast<dtype>(v0r); \
dstptr[x*3+1] = saturate_cast<dtype>(v0g); \
dstptr[x*3+2] = saturate_cast<dtype>(v0b);
#define CV_WARP_LINEAR_SCALAR_STORE_C4(dtype) \
dstptr[x*4] = saturate_cast<dtype>(v0r); \
dstptr[x*4+1] = saturate_cast<dtype>(v0g); \
dstptr[x*4+2] = saturate_cast<dtype>(v0b); \
dstptr[x*4+3] = saturate_cast<dtype>(v0a);
#define CV_WARP_LINEAR_SCALAR_STORE_8U(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint8_t)
#define CV_WARP_LINEAR_SCALAR_STORE_16U(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint16_t)
#define CV_WARP_LINEAR_SCALAR_STORE_32F(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(float)
#define CV_WARP_LINEAR_SCALAR_STORE(CN, DEPTH) \
CV_WARP_LINEAR_SCALAR_STORE_##DEPTH(CN)

View File

@ -0,0 +1,387 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Shuffle (all pixels within image)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C1(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0]; \
pixbuf[i + uf] = srcptr[1]; \
pixbuf[i + uf*2] = srcptr[srcstep]; \
pixbuf[i + uf*3] = srcptr[srcstep + 1]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C3(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0]; \
pixbuf[i + uf*4] = srcptr[1]; \
pixbuf[i + uf*8] = srcptr[2]; \
pixbuf[i + uf] = srcptr[3]; \
pixbuf[i + uf*5] = srcptr[4]; \
pixbuf[i + uf*9] = srcptr[5]; \
pixbuf[i + uf*2] = srcptr[srcstep]; \
pixbuf[i + uf*6] = srcptr[srcstep + 1]; \
pixbuf[i + uf*10] = srcptr[srcstep + 2]; \
pixbuf[i + uf*3] = srcptr[srcstep + 3]; \
pixbuf[i + uf*7] = srcptr[srcstep + 4]; \
pixbuf[i + uf*11] = srcptr[srcstep + 5]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C4(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0]; \
pixbuf[i + uf*4] = srcptr[1]; \
pixbuf[i + uf*8] = srcptr[2]; \
pixbuf[i + uf*12] = srcptr[3]; \
pixbuf[i + uf] = srcptr[4]; \
pixbuf[i + uf*5] = srcptr[5]; \
pixbuf[i + uf*9] = srcptr[6]; \
pixbuf[i + uf*13] = srcptr[7]; \
pixbuf[i + uf*2] = srcptr[srcstep]; \
pixbuf[i + uf*6] = srcptr[srcstep + 1]; \
pixbuf[i + uf*10] = srcptr[srcstep + 2]; \
pixbuf[i + uf*14] = srcptr[srcstep + 3]; \
pixbuf[i + uf*3] = srcptr[srcstep + 4]; \
pixbuf[i + uf*7] = srcptr[srcstep + 5]; \
pixbuf[i + uf*11] = srcptr[srcstep + 6]; \
pixbuf[i + uf*15] = srcptr[srcstep + 7]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_8U(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint8_t)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_16U(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint16_t)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_32F(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(float)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(CN, DEPTH) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##DEPTH(CN)
// Shuffle (not all pixels within image)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC1() \
v_store_low(dstptr + x, bval_v0);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC3() \
v_store_low(dstptr + x*3, bval_v0); \
v_store_low(dstptr + x*3 + uf, bval_v1); \
v_store_low(dstptr + x*3 + uf*2, bval_v2);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC4() \
v_store_low(dstptr + x*4, bval_v0); \
v_store_low(dstptr + x*4 + uf, bval_v1); \
v_store_low(dstptr + x*4 + uf*2, bval_v2); \
v_store_low(dstptr + x*4 + uf*3, bval_v3);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC1() \
v_store(dstptr + x, bval_v0);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC3() \
v_store(dstptr + x*3, bval_v0); \
v_store(dstptr + x*3 + uf, bval_v1); \
v_store(dstptr + x*3 + uf*2, bval_v2);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC4() \
v_store(dstptr + x*4, bval_v0); \
v_store(dstptr + x*4 + uf, bval_v1); \
v_store(dstptr + x*4 + uf*2, bval_v2); \
v_store(dstptr + x*4 + uf*3, bval_v3);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC1() \
v_store(dstptr + x, bval_v0_l); \
v_store(dstptr + x + vlanes_32, bval_v0_h);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC3() \
v_store(dstptr + x*3, bval_v0_l); \
v_store(dstptr + x*3 + vlanes_32, bval_v0_h); \
v_store(dstptr + x*3 + uf, bval_v1_l); \
v_store(dstptr + x*3 + uf + vlanes_32, bval_v1_h); \
v_store(dstptr + x*3 + uf*2, bval_v2_l); \
v_store(dstptr + x*3 + uf*2 + vlanes_32, bval_v2_h);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC4() \
v_store(dstptr + x*4, bval_v0_l); \
v_store(dstptr + x*4 + vlanes_32, bval_v0_h); \
v_store(dstptr + x*4 + uf, bval_v1_l); \
v_store(dstptr + x*4 + uf + vlanes_32, bval_v1_h); \
v_store(dstptr + x*4 + uf*2, bval_v2_l); \
v_store(dstptr + x*4 + uf*2 + vlanes_32, bval_v2_h); \
v_store(dstptr + x*4 + uf*3, bval_v3_l); \
v_store(dstptr + x*4 + uf*3 + vlanes_32, bval_v3_h);
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C1(dy, dx, pixbuf_ofs) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[x + i]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
}
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C3(dy, dx, pixbuf_ofs) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx*3; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*3]; \
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*3 + 1]; \
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*3 + 2]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_*3; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
}
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C4(dy, dx, pixbuf_ofs) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx*4; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
pixbuf[i + pixbuf_ofs + uf*12] = bval[3]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*4]; \
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*4 + 1]; \
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*4 + 2]; \
pixbuf[i + pixbuf_ofs + uf*12] = dstptr[(x + i)*4 + 3]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_*4; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(CN, DEPTH) \
if (border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) { \
mask_0 = v_lt(v_reinterpret_as_u32(v_add(src_ix0, one)), outer_scols); \
mask_1 = v_lt(v_reinterpret_as_u32(v_add(src_ix1, one)), outer_scols); \
mask_0 = v_and(mask_0, v_lt(v_reinterpret_as_u32(v_add(src_iy0, one)), outer_srows)); \
mask_1 = v_and(mask_1, v_lt(v_reinterpret_as_u32(v_add(src_iy1, one)), outer_srows)); \
v_uint16 outer_mask = v_pack(mask_0, mask_1); \
if (v_reduce_max(outer_mask) == 0) { \
if (border_type == BORDER_CONSTANT) { \
CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_##DEPTH##CN() \
} \
continue; \
} \
} \
vx_store(src_ix, src_ix0); \
vx_store(src_iy, src_iy0); \
vx_store(src_ix + vlanes_32, src_ix1); \
vx_store(src_iy + vlanes_32, src_iy1); \
for (int i = 0; i < uf; i++) { \
int ix = src_ix[i], iy = src_iy[i]; \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 0, 0); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 1, uf); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 0, uf*2); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 1, uf*3); \
}
// Load pixels for linear interpolation (uint8_t -> int16_t)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(cn, i) \
v_int16 f00##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * i)), \
f01##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+1))), \
f10##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+2))), \
f11##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+3)));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_##CN();
// Load pixels for linear interpolation (uint16_t -> uint16_t)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(cn, i) \
v_uint16 f00##cn = vx_load(pixbuf + uf * i), \
f01##cn = vx_load(pixbuf + uf * (i+1)), \
f10##cn = vx_load(pixbuf + uf * (i+2)), \
f11##cn = vx_load(pixbuf + uf * (i+3));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_##CN();
// Load pixels for linear interpolation (int16_t -> float)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(cn) \
v_float32 f00##cn##l = v_cvt_f32(v_expand_low(f00##cn)), f00##cn##h = v_cvt_f32(v_expand_high(f00##cn)), \
f01##cn##l = v_cvt_f32(v_expand_low(f01##cn)), f01##cn##h = v_cvt_f32(v_expand_high(f01##cn)), \
f10##cn##l = v_cvt_f32(v_expand_low(f10##cn)), f10##cn##h = v_cvt_f32(v_expand_high(f10##cn)), \
f11##cn##l = v_cvt_f32(v_expand_low(f11##cn)), f11##cn##h = v_cvt_f32(v_expand_high(f11##cn));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_##CN()
// Load pixels for linear interpolation (uint16_t -> float)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(cn) \
v_float32 f00##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f00##cn))), f00##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f00##cn))), \
f01##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f01##cn))), f01##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f01##cn))), \
f10##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f10##cn))), f10##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f10##cn))), \
f11##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f11##cn))), f11##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f11##cn)));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_##CN()
// Load pixels for linear interpolation (float -> float)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(cn, i) \
v_float32 f00##cn##l = vx_load(pixbuf + uf * i), f00##cn##h = vx_load(pixbuf + uf * i + vlanes_32), \
f01##cn##l = vx_load(pixbuf + uf * (i+1)), f01##cn##h = vx_load(pixbuf + uf * (i+1) + vlanes_32), \
f10##cn##l = vx_load(pixbuf + uf * (i+2)), f10##cn##h = vx_load(pixbuf + uf * (i+2) + vlanes_32), \
f11##cn##l = vx_load(pixbuf + uf * (i+3)), f11##cn##h = vx_load(pixbuf + uf * (i+3) + vlanes_32);
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_##CN()
// Linear interpolation calculation
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(cn) \
f00##cn##l = v_fma(alphal, v_sub(f01##cn##l, f00##cn##l), f00##cn##l); f00##cn##h = v_fma(alphah, v_sub(f01##cn##h, f00##cn##h), f00##cn##h); \
f10##cn##l = v_fma(alphal, v_sub(f11##cn##l, f10##cn##l), f10##cn##l); f10##cn##h = v_fma(alphah, v_sub(f11##cn##h, f10##cn##h), f10##cn##h);
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(cn) \
f00##cn##l = v_fma(betal, v_sub(f10##cn##l, f00##cn##l), f00##cn##l); f00##cn##h = v_fma(betah, v_sub(f10##cn##h, f00##cn##h), f00##cn##h);
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(CN) \
v_float32 alphal = src_x0, alphah = src_x1, \
betal = src_y0, betah = src_y1; \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_##CN() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_##CN()
// Store
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_C1() \
v_uint16 f00_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)); \
v_uint8 f00_u8 = v_pack(f00_u16, vx_setall_u16(0)); \
v_store_low(dstptr + x, f00_u8);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_C3() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)); \
uint16_t tbuf[max_vlanes_16*3]; \
v_store_interleave(tbuf, f00r_u16, f00g_u16, f00b_u16); \
v_pack_store(dstptr + x*3, vx_load(tbuf)); \
v_pack_store(dstptr + x*3 + vlanes_16, vx_load(tbuf + vlanes_16)); \
v_pack_store(dstptr + x*3 + vlanes_16*2, vx_load(tbuf + vlanes_16*2));
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_C4() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)), \
f00a_u16 = v_pack_u(v_round(f00al), v_round(f00ah)); \
uint16_t tbuf[max_vlanes_16*4]; \
v_store_interleave(tbuf, f00r_u16, f00g_u16, f00b_u16, f00a_u16); \
v_pack_store(dstptr + x*4, vx_load(tbuf)); \
v_pack_store(dstptr + x*4 + vlanes_16, vx_load(tbuf + vlanes_16)); \
v_pack_store(dstptr + x*4 + vlanes_16*2, vx_load(tbuf + vlanes_16*2)); \
v_pack_store(dstptr + x*4 + vlanes_16*3, vx_load(tbuf + vlanes_16*3));
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_##CN()
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_C1() \
v_uint16 f00_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)); \
v_store(dstptr + x, f00_u16);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_C3() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)); \
v_store_interleave(dstptr + x*3, f00r_u16, f00g_u16, f00b_u16);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_C4() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)), \
f00a_u16 = v_pack_u(v_round(f00al), v_round(f00ah)); \
v_store_interleave(dstptr + x*4, f00r_u16, f00g_u16, f00b_u16, f00a_u16);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_##CN()
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_C1() \
vx_store(dstptr + x, f00gl); \
vx_store(dstptr + x + vlanes_32, f00gh);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_C3() \
v_store_interleave(dstptr + x*3, f00rl, f00gl, f00bl); \
v_store_interleave(dstptr + x*3 + vlanes_32*3, f00rh, f00gh, f00bh);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_C4() \
v_store_interleave(dstptr + x*4, f00rl, f00gl, f00bl, f00al); \
v_store_interleave(dstptr + x*4 + vlanes_32*4, f00rh, f00gh, f00bh, f00ah);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_##CN()

File diff suppressed because it is too large Load Diff

View File

@ -172,7 +172,7 @@ OCL_TEST_P(WarpAffine, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
double eps = depth < CV_32F ? 0.04 : 0.06;
double eps = depth < CV_32F ? ( depth < CV_16U ? 0.09 : 0.04 ) : 0.06;
random_roi();
Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),
@ -189,7 +189,7 @@ OCL_TEST_P(WarpAffine, inplace_25853) // when src and dst are the same variable,
{
for (int j = 0; j < test_loop_times; j++)
{
double eps = depth < CV_32F ? 0.04 : 0.06;
double eps = depth < CV_32F ? ( depth < CV_16U ? 0.09 : 0.04 ) : 0.06;
random_roi();
Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),

View File

@ -150,7 +150,7 @@ void CV_ImageWarpBaseTest::generate_test_data()
while (depth == CV_8S || depth == CV_32S)
depth = rng.uniform(0, CV_64F);
int cn = rng.uniform(1, 4);
int cn = rng.uniform(1, 5);
src.create(ssize, CV_MAKE_TYPE(depth, cn));
@ -1045,6 +1045,13 @@ protected:
Mat M;
private:
void warpAffine(const Mat&, Mat&);
template<typename T>
void newWarpAffine(const Mat&, Mat&, const Mat&);
template<int channels, typename T>
void newLinear(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
};
CV_WarpAffine_Test::CV_WarpAffine_Test() :
@ -1088,7 +1095,7 @@ void CV_WarpAffine_Test::run_func()
float CV_WarpAffine_Test::get_success_error_level(int _interpolation, int _depth) const
{
return _depth == CV_8U ? 0 : CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
return _depth == CV_8U ? 0.f : CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
}
void CV_WarpAffine_Test::run_reference_func()
@ -1098,6 +1105,152 @@ void CV_WarpAffine_Test::run_reference_func()
tmp.convertTo(reference_dst, reference_dst.depth());
}
#define FETCH_PIXEL_SCALAR(cn, dy, dx) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr[ofs+ci];} \
} else if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = bval[ci];} \
} else if (borderType == BORDER_TRANSPARENT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = dstptr[x*cn+ci];} \
} else { \
int ix_ = borderInterpolate(ix + dx, srccols, borderType_x); \
int iy_ = borderInterpolate(iy + dy, srcrows, borderType_y); \
size_t glob_ofs = iy_*srcstep + ix_*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr_[glob_ofs+ci];} \
}
#define WARPAFFINE_SHUFFLE(cn) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
pxy[ci+cn] = srcptr[ci+cn]; \
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
} \
} else { \
if ((borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { dstptr[x*cn+ci] = bval[ci]; } \
} \
return; \
} \
FETCH_PIXEL_SCALAR(cn, 0, 0); \
FETCH_PIXEL_SCALAR(cn, 0, 1); \
FETCH_PIXEL_SCALAR(cn, 1, 0); \
FETCH_PIXEL_SCALAR(cn, 1, 1); \
}
template<typename T>
static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = saturate_cast<T>(v0);
}
}
template<>
inline void warpaffine_linear_calc<float>(int cn, const float *pxy, float *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = v0;
}
}
template<int channels, typename T>
void CV_WarpAffine_Test::newLinear(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[channels*4];
const T *srcptr = srcptr_ + srcstep*iy + ix*channels;
WARPAFFINE_SHUFFLE(channels);
warpaffine_linear_calc(channels, pxy, dstptr+x*channels, sx, sy);
}
template<>
void CV_WarpAffine_Test::newLinear<3, float>(int x, float sx, float sy, const float *srcptr_, float *dstptr,
int srccols, int srcrows, size_t srcstep,
const float *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
float pxy[12];
const float *srcptr = srcptr_ + srcstep*iy + ix*3;
WARPAFFINE_SHUFFLE(3);
warpaffine_linear_calc(3, pxy, dstptr+x*3, sx, sy);
}
template<typename T>
void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM)
{
int num_channels = _dst.channels();
CV_CheckTrue(num_channels == 1 || num_channels == 3 || num_channels == 4, "");
auto *srcptr_ = _src.ptr<const T>();
auto *dstptr_ = _dst.ptr<T>();
size_t srcstep = _src.step/sizeof(T), dststep = _dst.step/sizeof(T);
int srccols = _src.cols, srcrows = _src.rows;
int dstcols = _dst.cols, dstrows = _dst.rows;
Mat ttM;
tM.convertTo(ttM, CV_32F);
auto *_M = ttM.ptr<const float>();
T bval[] = {
saturate_cast<T>(borderValue[0]),
saturate_cast<T>(borderValue[1]),
saturate_cast<T>(borderValue[2]),
saturate_cast<T>(borderValue[3]),
};
int borderType_x = borderType != BORDER_CONSTANT &&
borderType != BORDER_TRANSPARENT &&
srccols <= 1 ? BORDER_REPLICATE : borderType;
int borderType_y = borderType != BORDER_CONSTANT &&
borderType != BORDER_TRANSPARENT &&
srcrows <= 1 ? BORDER_REPLICATE : borderType;
for (int y = 0; y < dstrows; y++) {
T* dstptr = dstptr_ + y*dststep;
for (int x = 0; x < dstcols; x++) {
float sx = x*_M[0] + y*_M[1] + _M[2];
float sy = x*_M[3] + y*_M[4] + _M[5];
if (num_channels == 3) {
newLinear<3>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
newLinear<4>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
newLinear<1>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
}
void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
{
Size dsize = _dst.size();
@ -1122,6 +1275,17 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
if (!(interpolation & cv::WARP_INVERSE_MAP))
invertAffineTransform(tM.clone(), tM);
if (inter == INTER_LINEAR) {
int dst_depth = _dst.depth(), dst_channels = _dst.channels();
if (dst_depth == CV_8U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<uint8_t>(_src, _dst, tM);
} else if (dst_depth == CV_16U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<uint16_t>(_src, _dst, tM);
} else if (dst_depth == CV_32F && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<float>(_src, _dst, tM);
}
}
const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS;
int round_delta = (inter == INTER_NEAREST) ? AB_SCALE / 2 : (AB_SCALE / INTER_TAB_SIZE / 2);
@ -1134,7 +1298,7 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
{
int v1 = saturate_cast<int>(saturate_cast<int>(data_tM[0] * dx * AB_SCALE) +
saturate_cast<int>((data_tM[1] * dy + data_tM[2]) * AB_SCALE) + round_delta),
v2 = saturate_cast<int>(saturate_cast<int>(data_tM[3] * dx * AB_SCALE) +
v2 = saturate_cast<int>(saturate_cast<int>(data_tM[3] * dx * AB_SCALE) +
saturate_cast<int>((data_tM[4] * dy + data_tM[5]) * AB_SCALE) + round_delta);
v1 >>= AB_BITS - INTER_BITS;
v2 >>= AB_BITS - INTER_BITS;

View File

@ -748,8 +748,76 @@ struct DefaultRngAuto
// test images generation functions
void fillGradient(Mat& img, int delta = 5);
void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
template<typename T>
void fillGradient(Mat& img, int delta = 5)
{
CV_UNUSED(delta);
const int ch = img.channels();
int r, c, i;
for(r=0; r<img.rows; r++)
{
for(c=0; c<img.cols; c++)
{
T vals[] = {(T)r, (T)c, (T)(r*c), (T)(r*c/(r+c+1))};
T *p = (T*)img.ptr(r, c);
for(i=0; i<ch; i++) p[i] = (T)vals[i];
}
}
}
template<>
void fillGradient<uint8_t>(Mat& img, int delta);
template<typename T>
void smoothBorder(Mat& img, const Scalar& color, int delta = 3)
{
const int ch = img.channels();
CV_Assert(!img.empty() && ch <= 4);
Scalar s;
int n = 100/delta;
int nR = std::min(n, (img.rows+1)/2), nC = std::min(n, (img.cols+1)/2);
int r, c, i;
for(r=0; r<nR; r++)
{
double k1 = r*delta/100., k2 = 1-k1;
for(c=0; c<img.cols; c++)
{
auto *p = img.ptr<T>(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
for(c=0; c<img.cols; c++)
{
auto *p = img.ptr<T>(img.rows-r-1, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
}
for(r=0; r<img.rows; r++)
{
for(c=0; c<nC; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
auto *p = img.ptr<T>(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
for(c=0; c<n; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
auto *p = img.ptr<T>(r, img.cols-c-1);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
}
}
// Utility functions

View File

@ -686,7 +686,8 @@ TS* TS::ptr()
return &ts;
}
void fillGradient(Mat& img, int delta)
template<>
void fillGradient<uint8_t>(Mat& img, int delta)
{
const int ch = img.channels();
CV_Assert(!img.empty() && img.depth() == CV_8U && ch <= 4);
@ -708,57 +709,6 @@ void fillGradient(Mat& img, int delta)
}
}
void smoothBorder(Mat& img, const Scalar& color, int delta)
{
const int ch = img.channels();
CV_Assert(!img.empty() && img.depth() == CV_8U && ch <= 4);
Scalar s;
uchar *p = NULL;
int n = 100/delta;
int nR = std::min(n, (img.rows+1)/2), nC = std::min(n, (img.cols+1)/2);
int r, c, i;
for(r=0; r<nR; r++)
{
double k1 = r*delta/100., k2 = 1-k1;
for(c=0; c<img.cols; c++)
{
p = img.ptr(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
for(c=0; c<img.cols; c++)
{
p = img.ptr(img.rows-r-1, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
}
for(r=0; r<img.rows; r++)
{
for(c=0; c<nC; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
p = img.ptr(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
for(c=0; c<n; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
p = img.ptr(r, img.cols-c-1);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
}
}
bool test_ipp_check = false;