Merge pull request #25984 from fengyuentau:imgproc/warpaffine_opt

imgproc: add optimized warpAffine kernels for 8U/16U/32F + C1/C3/C4 inputs #25984

Merge wtih https://github.com/opencv/opencv_extra/pull/1198.
Merge with https://github.com/opencv/opencv_contrib/pull/3787.


### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Yuantao Feng 2024-10-03 19:01:36 +08:00 committed by GitHub
parent ebf11d36f4
commit 97681bdfce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 3070 additions and 179 deletions

View File

@ -2228,9 +2228,7 @@ inline v_int16x8 v_round(const v_float16x8 &a)
inline v_int16x8 v_floor(const v_float16x8 &a) inline v_int16x8 v_floor(const v_float16x8 &a)
{ {
int16x8_t a1 = vcvtq_s16_f16(a.val); return v_int16x8(vcvtmq_s16_f16(a.val));
uint16x8_t mask = vcgtq_f16(vcvtq_f16_s16(a1), a.val);
return v_int16x8(vaddq_s16(a1, vreinterpretq_s16_u16(mask)));
} }
inline v_int16x8 v_ceil(const v_float16x8 &a) inline v_int16x8 v_ceil(const v_float16x8 &a)
@ -2271,9 +2269,13 @@ inline v_int32x4 v_round(const v_float32x4& a)
#endif #endif
inline v_int32x4 v_floor(const v_float32x4& a) inline v_int32x4 v_floor(const v_float32x4& a)
{ {
#if __ARM_ARCH > 7
return v_int32x4(vcvtmq_s32_f32(a.val));
#else
int32x4_t a1 = vcvtq_s32_f32(a.val); int32x4_t a1 = vcvtq_s32_f32(a.val);
uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val); uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val);
return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask))); return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask)));
#endif
} }
inline v_int32x4 v_ceil(const v_float32x4& a) inline v_int32x4 v_ceil(const v_float32x4& a)

View File

@ -261,7 +261,7 @@ private:
h = rect.height; w = rect.width; h = rect.height; w = rect.width;
pose = Matx23f(c, -s, -(float)rect.x, pose = Matx23f(c, -s, -(float)rect.x,
s, c, -(float)rect.y); s, c, -(float)rect.y);
warpAffine(image, rotImage, pose, Size(w, h), INTER_LINEAR, BORDER_REPLICATE); warpAffine(image, rotImage, pose, Size(w, h), INTER_LINEAR, BORDER_REPLICATE, Scalar(), cv::ALGO_HINT_ACCURATE);
} }
if( tilt == 1 ) if( tilt == 1 )
warpedImage = rotImage; warpedImage = rotImage;
@ -275,7 +275,7 @@ private:
pose(0, 2) /= tilt; pose(0, 2) /= tilt;
} }
if( phi != 0 || tilt != 1 ) if( phi != 0 || tilt != 1 )
warpAffine(mask0, warpedMask, pose, warpedImage.size(), INTER_NEAREST); warpAffine(mask0, warpedMask, pose, warpedImage.size(), INTER_NEAREST, BORDER_CONSTANT, Scalar(), cv::ALGO_HINT_ACCURATE);
else else
warpedMask = mask0; warpedMask = mask0;
} }

View File

@ -10,6 +10,7 @@ ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
ocv_add_dispatched_file(warp_kernels SSE2 SSE4_1 AVX2 NEON NEON_FP16 RVV LASX)
ocv_define_module(imgproc opencv_core WRAP java objc python js) ocv_define_module(imgproc opencv_core WRAP java objc python js)
ocv_module_include_directories(opencv_imgproc ${ZLIB_INCLUDE_DIRS}) ocv_module_include_directories(opencv_imgproc ${ZLIB_INCLUDE_DIRS})

View File

@ -2474,6 +2474,7 @@ flag #WARP_INVERSE_MAP that means that M is the inverse transformation (
borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to
the "outliers" in the source image are not modified by the function. the "outliers" in the source image are not modified by the function.
@param borderValue value used in case of a constant border; by default, it is 0. @param borderValue value used in case of a constant border; by default, it is 0.
@param hint Implementation modfication flags. See #AlgorithmHint
@sa warpPerspective, resize, remap, getRectSubPix, transform @sa warpPerspective, resize, remap, getRectSubPix, transform
*/ */
@ -2481,7 +2482,8 @@ CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst,
InputArray M, Size dsize, InputArray M, Size dsize,
int flags = INTER_LINEAR, int flags = INTER_LINEAR,
int borderMode = BORDER_CONSTANT, int borderMode = BORDER_CONSTANT,
const Scalar& borderValue = Scalar()); const Scalar& borderValue = Scalar(),
AlgorithmHint hint = cv::ALGO_HINT_DEFAULT);
/** @example samples/cpp/snippets/warpPerspective_demo.cpp /** @example samples/cpp/snippets/warpPerspective_demo.cpp
An example program shows using cv::getPerspectiveTransform and cv::warpPerspective for image warping An example program shows using cv::getPerspectiveTransform and cv::warpPerspective for image warping

View File

@ -72,7 +72,10 @@ OCL_PERF_TEST_P(WarpAffineFixture, WarpAffine,
const WarpAffineParams params = GetParam(); const WarpAffineParams params = GetParam();
const Size srcSize = get<0>(params); const Size srcSize = get<0>(params);
const int type = get<1>(params), interpolation = get<2>(params); const int type = get<1>(params), interpolation = get<2>(params);
const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 1 : interpolation == INTER_CUBIC ? 2e-3 : 1e-4;
// BUG: OpenCL and CPU version diverges a bit
// Ticket: https://github.com/opencv/opencv/issues/26235
const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 2 : interpolation == INTER_CUBIC ? 2e-3 : 3e-2;
checkDeviceMaxMemoryAllocSize(srcSize, type); checkDeviceMaxMemoryAllocSize(srcSize, type);

View File

@ -15,24 +15,6 @@ typedef TestBaseWithParam<MatInfo_SizePair_t> MatInfo_SizePair;
CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4, \ CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4, \
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4 CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4
// For gradient-ish testing of the other matrix formats
template<typename T>
static void fillFPGradient(Mat& img)
{
const int ch = img.channels();
int r, c, i;
for(r=0; r<img.rows; r++)
{
for(c=0; c<img.cols; c++)
{
T vals[] = {(T)r, (T)c, (T)(r*c), (T)(r*c/(r+c+1))};
T *p = (T*)img.ptr(r, c);
for(i=0; i<ch; i++) p[i] = (T)vals[i];
}
}
}
PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear, PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear,
testing::Values( testing::Values(
MatInfo_Size_Size_t(CV_8UC1, szVGA, szqHD), MatInfo_Size_Size_t(CV_8UC1, szVGA, szqHD),
@ -51,7 +33,7 @@ PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear,
Size to = get<2>(GetParam()); Size to = get<2>(GetParam());
cv::Mat src(from, matType), dst(to, matType); cv::Mat src(from, matType), dst(to, matType);
cvtest::fillGradient(src); cvtest::fillGradient<uint8_t>(src);
declare.in(src).out(dst); declare.in(src).out(dst);
TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR_EXACT); TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR_EXACT);
@ -79,9 +61,9 @@ PERF_TEST_P(MatInfo_SizePair, resizeUpLinearNonExact,
cv::Mat src(from, matType), dst(to, matType); cv::Mat src(from, matType), dst(to, matType);
switch(src.depth()) switch(src.depth())
{ {
case CV_8U: cvtest::fillGradient(src); break; case CV_8U: cvtest::fillGradient<uint8_t>(src); break;
case CV_16U: fillFPGradient<ushort>(src); break; case CV_16U: cvtest::fillGradient<ushort>(src); break;
case CV_32F: fillFPGradient<float>(src); break; case CV_32F: cvtest::fillGradient<float>(src); break;
} }
declare.in(src).out(dst); declare.in(src).out(dst);
@ -120,7 +102,7 @@ PERF_TEST_P(MatInfo_Size_Size, resizeDownLinear,
Size to = get<2>(GetParam()); Size to = get<2>(GetParam());
cv::Mat src(from, matType), dst(to, matType); cv::Mat src(from, matType), dst(to, matType);
cvtest::fillGradient(src); cvtest::fillGradient<uint8_t>(src);
declare.in(src).out(dst); declare.in(src).out(dst);
TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR_EXACT); TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR_EXACT);
@ -155,9 +137,9 @@ PERF_TEST_P(MatInfo_SizePair, resizeDownLinearNonExact,
cv::Mat src(from, matType), dst(to, matType); cv::Mat src(from, matType), dst(to, matType);
switch(src.depth()) switch(src.depth())
{ {
case CV_8U: cvtest::fillGradient(src); break; case CV_8U: cvtest::fillGradient<uint8_t>(src); break;
case CV_16U: fillFPGradient<ushort>(src); break; case CV_16U: cvtest::fillGradient<ushort>(src); break;
case CV_32F: fillFPGradient<float>(src); break; case CV_32F: cvtest::fillGradient<float>(src); break;
} }
declare.in(src).out(dst); declare.in(src).out(dst);

View File

@ -12,7 +12,7 @@ CV_ENUM(InterType, INTER_NEAREST, INTER_LINEAR)
CV_ENUM(InterTypeExtended, INTER_NEAREST, INTER_LINEAR, WARP_RELATIVE_MAP) CV_ENUM(InterTypeExtended, INTER_NEAREST, INTER_LINEAR, WARP_RELATIVE_MAP)
CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH) CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH)
typedef TestBaseWithParam< tuple<MatType, Size, InterType, BorderMode> > TestWarpAffine; typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpAffine;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, int> > TestWarpPerspective; typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, int> > TestWarpPerspective;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspectiveNear_t; typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspectiveNear_t;
typedef TestBaseWithParam< tuple<MatType, Size, InterTypeExtended, BorderMode, RemapMode> > TestRemap; typedef TestBaseWithParam< tuple<MatType, Size, InterTypeExtended, BorderMode, RemapMode> > TestRemap;
@ -21,24 +21,39 @@ void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, boo
PERF_TEST_P( TestWarpAffine, WarpAffine, PERF_TEST_P( TestWarpAffine, WarpAffine,
Combine( Combine(
Values(CV_8UC1, CV_8UC4),
Values( szVGA, sz720p, sz1080p ), Values( szVGA, sz720p, sz1080p ),
InterType::all(), InterType::all(),
BorderMode::all() BorderMode::all(),
Values(CV_8UC3, CV_16UC3, CV_32FC3, CV_8UC1, CV_16UC1, CV_32FC1, CV_8UC4, CV_16UC4, CV_32FC4)
) )
) )
{ {
Size sz, szSrc(512, 512); Size sz, szSrc(512, 512);
int borderMode, interType, dataType; int type, borderMode, interType;
dataType = get<0>(GetParam()); sz = get<0>(GetParam());
sz = get<1>(GetParam()); interType = get<1>(GetParam());
interType = get<2>(GetParam()); borderMode = get<2>(GetParam());
borderMode = get<3>(GetParam()); type = get<3>(GetParam());
Scalar borderColor = Scalar::all(150); Scalar borderColor = Scalar::all(150);
Mat src(szSrc, dataType), dst(sz, dataType); Mat src(szSrc,type), dst(sz, type);
cvtest::fillGradient(src); switch (src.depth()) {
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1); case CV_8U: {
cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
break;
}
case CV_16U: {
cvtest::fillGradient<uint16_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint16_t>(src, borderColor, 1);
break;
}
case CV_32F: {
cvtest::fillGradient<float>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<float>(src, borderColor, 1);
break;
}
}
Mat warpMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2); Mat warpMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2);
declare.in(src).out(dst); declare.in(src).out(dst);
@ -47,36 +62,6 @@ PERF_TEST_P( TestWarpAffine, WarpAffine,
SANITY_CHECK(dst, 1); SANITY_CHECK(dst, 1);
} }
PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx,
Combine(
Values(CV_8UC1, CV_8UC4),
Values(szVGA, sz720p, sz1080p),
InterType::all(),
BorderMode::all()
)
)
{
Size sz, szSrc(512, 512);
int borderMode, interType, dataType;
dataType = get<0>(GetParam());
sz = get<1>(GetParam());
interType = get<2>(GetParam());
borderMode = get<3>(GetParam());
Scalar borderColor = Scalar::all(150);
Mat src(szSrc, dataType), dst(sz, dataType);
cvtest::fillGradient(src);
if (borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
Mat warpMat = getRotationMatrix2D(Point2f(src.cols / 2.f, src.rows / 2.f), 30., 2.2);
declare.in(src).out(dst);
TEST_CYCLE() warpAffine(src, dst, warpMat, sz, interType, borderMode, borderColor);
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestWarpPerspective, WarpPerspective, PERF_TEST_P( TestWarpPerspective, WarpPerspective,
Combine( Combine(
Values( szVGA, sz720p, sz1080p ), Values( szVGA, sz720p, sz1080p ),
@ -96,8 +81,8 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective,
Scalar borderColor = Scalar::all(150); Scalar borderColor = Scalar::all(150);
Mat src(szSrc, CV_8UC(channels)), dst(sz, CV_8UC(channels)); Mat src(szSrc, CV_8UC(channels)), dst(sz, CV_8UC(channels));
cvtest::fillGradient(src); cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1); if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
Mat rotMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2); Mat rotMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2);
Mat warpMat(3, 3, CV_64FC1); Mat warpMat(3, 3, CV_64FC1);
for(int r=0; r<2; r++) for(int r=0; r<2; r++)
@ -114,42 +99,6 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective,
SANITY_CHECK(dst, 1); SANITY_CHECK(dst, 1);
} }
PERF_TEST_P(TestWarpPerspective, DISABLED_WarpPerspective_ovx,
Combine(
Values(szVGA, sz720p, sz1080p),
InterType::all(),
BorderMode::all(),
Values(1)
)
)
{
Size sz, szSrc(512, 512);
int borderMode, interType, channels;
sz = get<0>(GetParam());
interType = get<1>(GetParam());
borderMode = get<2>(GetParam());
channels = get<3>(GetParam());
Scalar borderColor = Scalar::all(150);
Mat src(szSrc, CV_8UC(channels)), dst(sz, CV_8UC(channels));
cvtest::fillGradient(src);
if (borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
Mat rotMat = getRotationMatrix2D(Point2f(src.cols / 2.f, src.rows / 2.f), 30., 2.2);
Mat warpMat(3, 3, CV_64FC1);
for (int r = 0; r<2; r++)
for (int c = 0; c<3; c++)
warpMat.at<double>(r, c) = rotMat.at<double>(r, c);
warpMat.at<double>(2, 0) = .3 / sz.width;
warpMat.at<double>(2, 1) = .3 / sz.height;
warpMat.at<double>(2, 2) = 1;
declare.in(src).out(dst);
TEST_CYCLE() warpPerspective(src, dst, warpMat, sz, interType, borderMode, borderColor);
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear, PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
Combine( Combine(
Values( Size(640,480), Size(1920,1080), Size(2592,1944) ), Values( Size(640,480), Size(1920,1080), Size(2592,1944) ),
@ -168,8 +117,8 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
Scalar borderColor = Scalar::all(150); Scalar borderColor = Scalar::all(150);
Mat src(size, type), dst(size, type); Mat src(size, type), dst(size, type);
cvtest::fillGradient(src); cvtest::fillGradient<uint8_t>(src);
if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1); if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder<uint8_t>(src, borderColor, 1);
int shift = static_cast<int>(src.cols*0.04); int shift = static_cast<int>(src.cols*0.04);
Mat srcVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, 0), Mat srcVertices = (Mat_<Vec2f>(1, 4) << Vec2f(0, 0),
Vec2f(static_cast<float>(size.width-1), 0), Vec2f(static_cast<float>(size.width-1), 0),

View File

@ -55,6 +55,9 @@
#include "opencv2/core/softfloat.hpp" #include "opencv2/core/softfloat.hpp"
#include "imgwarp.hpp" #include "imgwarp.hpp"
#include "warp_kernels.simd.hpp"
#include "warp_kernels.simd_declarations.hpp"
using namespace cv; using namespace cv;
namespace cv namespace cv
@ -1351,6 +1354,9 @@ static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, Input
int cn = _src.channels(), type = _src.type(), depth = _src.depth(), int cn = _src.channels(), type = _src.type(), depth = _src.depth(),
rowsPerWI = dev.isIntel() ? 4 : 1; rowsPerWI = dev.isIntel() ? 4 : 1;
if(!dev.hasFP64() && depth == CV_64F)
return false;
if (borderType == BORDER_TRANSPARENT || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST) if (borderType == BORDER_TRANSPARENT || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
|| _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1) || _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
return false; return false;
@ -2571,16 +2577,70 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation
namespace hal { namespace hal {
void warpAffine(int src_type, static void warpAffine(int src_type,
const uchar * src_data, size_t src_step, int src_width, int src_height, const uchar * src_data, size_t src_step, int src_width, int src_height,
uchar * dst_data, size_t dst_step, int dst_width, int dst_height, uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[6], int interpolation, int borderType, const double borderValue[4]) const double M[6], int interpolation, int borderType, const double borderValue[4], AlgorithmHint hint)
{ {
CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue); CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step); Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
if (interpolation == INTER_LINEAR) {
switch (src_type) {
case CV_8UC1: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpAffineLinearApproxInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(warpAffineLinearInvoker_8UC1, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_8UC3: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpAffineLinearApproxInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(warpAffineLinearInvoker_8UC3, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_8UC4: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(warpAffineLinearApproxInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(warpAffineLinearInvoker_8UC4, (src_data, src_step, src_height, src_width, dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_16UC1, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_16UC3, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_16UC4, ((const uint16_t*)src_data, src_step, src_height, src_width, (uint16_t*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_32FC1, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_32FC3, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(warpAffineLinearInvoker_32FC4, ((const float*)src_data, src_step, src_height, src_width, (float*)dst_data, dst_step, dst_height, dst_width, M, borderType, borderValue), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
}
}
int x; int x;
AutoBuffer<int> _abdelta(dst.cols*2); AutoBuffer<int> _abdelta(dst.cols*2);
int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols; int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
@ -2697,10 +2757,14 @@ void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int
void cv::warpAffine( InputArray _src, OutputArray _dst, void cv::warpAffine( InputArray _src, OutputArray _dst,
InputArray _M0, Size dsize, InputArray _M0, Size dsize,
int flags, int borderType, const Scalar& borderValue ) int flags, int borderType, const Scalar& borderValue,
AlgorithmHint hint )
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
if (hint == cv::ALGO_HINT_DEFAULT)
hint = cv::getDefaultAlgorithmHint();
int interpolation = flags & INTER_MAX; int interpolation = flags & INTER_MAX;
CV_Assert( _src.channels() <= 4 || (interpolation != INTER_LANCZOS4 && CV_Assert( _src.channels() <= 4 || (interpolation != INTER_LANCZOS4 &&
interpolation != INTER_CUBIC) ); interpolation != INTER_CUBIC) );
@ -2808,7 +2872,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
#endif #endif
hal::warpAffine(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, hal::warpAffine(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
M, interpolation, borderType, borderValue.val); M, interpolation, borderType, borderValue.val, hint);
} }

View File

@ -0,0 +1,11 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef __OPENCV_IMGPROC_WARP_COMMON_HPP__
#define __OPENCV_IMGPROC_WARP_COMMON_HPP__
#include "warp_common.vector.hpp"
#include "warp_common.scalar.hpp"
#endif // __OPENCV_IMGPROC_WARP_COMMON_HPP__

View File

@ -0,0 +1,171 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Shuffle
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
p00##CN = srcptr[i]; p01##CN = srcptr[i + cn]; \
p10##CN = srcptr[srcstep + i]; p11##CN = srcptr[srcstep + cn + i];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C1() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 1, 0)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C3() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 3, 0) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 3, 1) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 3, 2)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_C4() \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(r, 4, 0) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(g, 4, 1) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(b, 4, 2) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(a, 4, 3)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C1() \
dstptr[x] = bval[0];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C3() \
dstptr[x*3] = bval[0]; \
dstptr[x*3+1] = bval[1]; \
dstptr[x*3+2] = bval[2];
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_C4() \
dstptr[x*4] = bval[0]; \
dstptr[x*4+1] = bval[1]; \
dstptr[x*4+2] = bval[2]; \
dstptr[x*4+3] = bval[3];
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C1(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx; \
pxy##g = srcptr[ofs]; \
} else if (border_type == BORDER_CONSTANT) { \
pxy##g = bval[0]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pxy##g = dstptr[x]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t glob_ofs = iy_*srcstep + ix_; \
pxy##g = src[glob_ofs]; \
}
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C3(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*3; \
pxy##r = srcptr[ofs]; \
pxy##g = srcptr[ofs+1]; \
pxy##b = srcptr[ofs+2]; \
} else if (border_type == BORDER_CONSTANT) { \
pxy##r = bval[0]; \
pxy##g = bval[1]; \
pxy##b = bval[2]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pxy##r = dstptr[x*3]; \
pxy##g = dstptr[x*3+1]; \
pxy##b = dstptr[x*3+2]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t glob_ofs = iy_*srcstep + ix_*3; \
pxy##r = src[glob_ofs]; \
pxy##g = src[glob_ofs+1]; \
pxy##b = src[glob_ofs+2]; \
}
#define CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_C4(dy, dx, pxy) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*4; \
pxy##r = srcptr[ofs]; \
pxy##g = srcptr[ofs+1]; \
pxy##b = srcptr[ofs+2]; \
pxy##a = srcptr[ofs+3]; \
} else if (border_type == BORDER_CONSTANT) { \
pxy##r = bval[0]; \
pxy##g = bval[1]; \
pxy##b = bval[2]; \
pxy##a = bval[3]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pxy##r = dstptr[x*4]; \
pxy##g = dstptr[x*4+1]; \
pxy##b = dstptr[x*4+2]; \
pxy##a = dstptr[x*4+3]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t glob_ofs = iy_*srcstep + ix_*4; \
pxy##r = src[glob_ofs]; \
pxy##g = src[glob_ofs+1]; \
pxy##b = src[glob_ofs+2]; \
pxy##a = src[glob_ofs+3]; \
}
#define CV_WARP_LINEAR_SCALAR_SHUFFLE(CN) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_##CN() \
} else { \
if ((border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (border_type == BORDER_CONSTANT) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_STORE_CONSTANT_BORDER_##CN() \
} \
continue; \
} \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 0, p00); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(0, 1, p01); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 0, p10); \
CV_WARP_LINEAR_SCALAR_FETCH_PIXEL_##CN(1, 1, p11); \
}
// Linear interpolation calculation
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(cn) \
float v0##cn = p00##cn + sx*(p01##cn - p00##cn); \
float v1##cn = p10##cn + sx*(p11##cn - p10##cn);
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C1() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C3() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(b)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_C4() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(b) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32(a)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(cn) \
v0##cn += sy*(v1##cn - v0##cn);
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C1() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C3() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(b)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_C4() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(b) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32(a)
#define CV_WARP_LINEAR_SCALAR_INTER_CALC_F32(CN) \
CV_WARP_LINEAR_SCALAR_INTER_CALC_ALPHA_F32_##CN() \
CV_WARP_LINEAR_SCALAR_INTER_CALC_BETA_F32_##CN()
// Store
#define CV_WARP_LINEAR_SCALAR_STORE_C1(dtype) \
dstptr[x] = saturate_cast<dtype>(v0g);
#define CV_WARP_LINEAR_SCALAR_STORE_C3(dtype) \
dstptr[x*3] = saturate_cast<dtype>(v0r); \
dstptr[x*3+1] = saturate_cast<dtype>(v0g); \
dstptr[x*3+2] = saturate_cast<dtype>(v0b);
#define CV_WARP_LINEAR_SCALAR_STORE_C4(dtype) \
dstptr[x*4] = saturate_cast<dtype>(v0r); \
dstptr[x*4+1] = saturate_cast<dtype>(v0g); \
dstptr[x*4+2] = saturate_cast<dtype>(v0b); \
dstptr[x*4+3] = saturate_cast<dtype>(v0a);
#define CV_WARP_LINEAR_SCALAR_STORE_8U(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint8_t)
#define CV_WARP_LINEAR_SCALAR_STORE_16U(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(uint16_t)
#define CV_WARP_LINEAR_SCALAR_STORE_32F(CN) \
CV_WARP_LINEAR_SCALAR_STORE_##CN(float)
#define CV_WARP_LINEAR_SCALAR_STORE(CN, DEPTH) \
CV_WARP_LINEAR_SCALAR_STORE_##DEPTH(CN)

View File

@ -0,0 +1,387 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Shuffle (all pixels within image)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C1(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0]; \
pixbuf[i + uf] = srcptr[1]; \
pixbuf[i + uf*2] = srcptr[srcstep]; \
pixbuf[i + uf*3] = srcptr[srcstep + 1]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C3(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0]; \
pixbuf[i + uf*4] = srcptr[1]; \
pixbuf[i + uf*8] = srcptr[2]; \
pixbuf[i + uf] = srcptr[3]; \
pixbuf[i + uf*5] = srcptr[4]; \
pixbuf[i + uf*9] = srcptr[5]; \
pixbuf[i + uf*2] = srcptr[srcstep]; \
pixbuf[i + uf*6] = srcptr[srcstep + 1]; \
pixbuf[i + uf*10] = srcptr[srcstep + 2]; \
pixbuf[i + uf*3] = srcptr[srcstep + 3]; \
pixbuf[i + uf*7] = srcptr[srcstep + 4]; \
pixbuf[i + uf*11] = srcptr[srcstep + 5]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_C4(dtype) \
for (int i = 0; i < uf; i++) { \
const dtype* srcptr = src + addr[i]; \
pixbuf[i] = srcptr[0]; \
pixbuf[i + uf*4] = srcptr[1]; \
pixbuf[i + uf*8] = srcptr[2]; \
pixbuf[i + uf*12] = srcptr[3]; \
pixbuf[i + uf] = srcptr[4]; \
pixbuf[i + uf*5] = srcptr[5]; \
pixbuf[i + uf*9] = srcptr[6]; \
pixbuf[i + uf*13] = srcptr[7]; \
pixbuf[i + uf*2] = srcptr[srcstep]; \
pixbuf[i + uf*6] = srcptr[srcstep + 1]; \
pixbuf[i + uf*10] = srcptr[srcstep + 2]; \
pixbuf[i + uf*14] = srcptr[srcstep + 3]; \
pixbuf[i + uf*3] = srcptr[srcstep + 4]; \
pixbuf[i + uf*7] = srcptr[srcstep + 5]; \
pixbuf[i + uf*11] = srcptr[srcstep + 6]; \
pixbuf[i + uf*15] = srcptr[srcstep + 7]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_8U(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint8_t)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_16U(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(uint16_t)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_32F(CN) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##CN(float)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(CN, DEPTH) \
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN_##DEPTH(CN)
// Shuffle (not all pixels within image)
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC1() \
v_store_low(dstptr + x, bval_v0);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC3() \
v_store_low(dstptr + x*3, bval_v0); \
v_store_low(dstptr + x*3 + uf, bval_v1); \
v_store_low(dstptr + x*3 + uf*2, bval_v2);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_8UC4() \
v_store_low(dstptr + x*4, bval_v0); \
v_store_low(dstptr + x*4 + uf, bval_v1); \
v_store_low(dstptr + x*4 + uf*2, bval_v2); \
v_store_low(dstptr + x*4 + uf*3, bval_v3);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC1() \
v_store(dstptr + x, bval_v0);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC3() \
v_store(dstptr + x*3, bval_v0); \
v_store(dstptr + x*3 + uf, bval_v1); \
v_store(dstptr + x*3 + uf*2, bval_v2);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_16UC4() \
v_store(dstptr + x*4, bval_v0); \
v_store(dstptr + x*4 + uf, bval_v1); \
v_store(dstptr + x*4 + uf*2, bval_v2); \
v_store(dstptr + x*4 + uf*3, bval_v3);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC1() \
v_store(dstptr + x, bval_v0_l); \
v_store(dstptr + x + vlanes_32, bval_v0_h);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC3() \
v_store(dstptr + x*3, bval_v0_l); \
v_store(dstptr + x*3 + vlanes_32, bval_v0_h); \
v_store(dstptr + x*3 + uf, bval_v1_l); \
v_store(dstptr + x*3 + uf + vlanes_32, bval_v1_h); \
v_store(dstptr + x*3 + uf*2, bval_v2_l); \
v_store(dstptr + x*3 + uf*2 + vlanes_32, bval_v2_h);
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_32FC4() \
v_store(dstptr + x*4, bval_v0_l); \
v_store(dstptr + x*4 + vlanes_32, bval_v0_h); \
v_store(dstptr + x*4 + uf, bval_v1_l); \
v_store(dstptr + x*4 + uf + vlanes_32, bval_v1_h); \
v_store(dstptr + x*4 + uf*2, bval_v2_l); \
v_store(dstptr + x*4 + uf*2 + vlanes_32, bval_v2_h); \
v_store(dstptr + x*4 + uf*3, bval_v3_l); \
v_store(dstptr + x*4 + uf*3 + vlanes_32, bval_v3_h);
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C1(dy, dx, pixbuf_ofs) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[x + i]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
}
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C3(dy, dx, pixbuf_ofs) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx*3; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*3]; \
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*3 + 1]; \
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*3 + 2]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_*3; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
}
#define CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_C4(dy, dx, pixbuf_ofs) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t addr_i = addr[i] + dy*srcstep + dx*4; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
} else if (border_type == BORDER_CONSTANT) { \
pixbuf[i + pixbuf_ofs] = bval[0]; \
pixbuf[i + pixbuf_ofs + uf*4] = bval[1]; \
pixbuf[i + pixbuf_ofs + uf*8] = bval[2]; \
pixbuf[i + pixbuf_ofs + uf*12] = bval[3]; \
} else if (border_type == BORDER_TRANSPARENT) { \
pixbuf[i + pixbuf_ofs] = dstptr[(x + i)*4]; \
pixbuf[i + pixbuf_ofs + uf*4] = dstptr[(x + i)*4 + 1]; \
pixbuf[i + pixbuf_ofs + uf*8] = dstptr[(x + i)*4 + 2]; \
pixbuf[i + pixbuf_ofs + uf*12] = dstptr[(x + i)*4 + 3]; \
} else { \
int ix_ = borderInterpolate_fast(ix + dx, srccols, border_type_x); \
int iy_ = borderInterpolate_fast(iy + dy, srcrows, border_type_y); \
size_t addr_i = iy_*srcstep + ix_*4; \
pixbuf[i + pixbuf_ofs] = src[addr_i]; \
pixbuf[i + pixbuf_ofs + uf*4] = src[addr_i+1]; \
pixbuf[i + pixbuf_ofs + uf*8] = src[addr_i+2]; \
pixbuf[i + pixbuf_ofs + uf*12] = src[addr_i+3]; \
}
#define CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(CN, DEPTH) \
if (border_type == BORDER_CONSTANT || border_type == BORDER_TRANSPARENT) { \
mask_0 = v_lt(v_reinterpret_as_u32(v_add(src_ix0, one)), outer_scols); \
mask_1 = v_lt(v_reinterpret_as_u32(v_add(src_ix1, one)), outer_scols); \
mask_0 = v_and(mask_0, v_lt(v_reinterpret_as_u32(v_add(src_iy0, one)), outer_srows)); \
mask_1 = v_and(mask_1, v_lt(v_reinterpret_as_u32(v_add(src_iy1, one)), outer_srows)); \
v_uint16 outer_mask = v_pack(mask_0, mask_1); \
if (v_reduce_max(outer_mask) == 0) { \
if (border_type == BORDER_CONSTANT) { \
CV_WARP_LINEAR_VECTOR_SHUFFLE_STORE_CONSTANT_BORDER_##DEPTH##CN() \
} \
continue; \
} \
} \
vx_store(src_ix, src_ix0); \
vx_store(src_iy, src_iy0); \
vx_store(src_ix + vlanes_32, src_ix1); \
vx_store(src_iy + vlanes_32, src_iy1); \
for (int i = 0; i < uf; i++) { \
int ix = src_ix[i], iy = src_iy[i]; \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 0, 0); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(0, 1, uf); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 0, uf*2); \
CV_WARP_LINEAR_VECTOR_FETCH_PIXEL_##CN(1, 1, uf*3); \
}
// Load pixels for linear interpolation (uint8_t -> int16_t)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(cn, i) \
v_int16 f00##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * i)), \
f01##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+1))), \
f10##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+2))), \
f11##cn = v_reinterpret_as_s16(vx_load_expand(pixbuf + uf * (i+3)));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U8S16(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U8S16_##CN();
// Load pixels for linear interpolation (uint16_t -> uint16_t)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(cn, i) \
v_uint16 f00##cn = vx_load(pixbuf + uf * i), \
f01##cn = vx_load(pixbuf + uf * (i+1)), \
f10##cn = vx_load(pixbuf + uf * (i+2)), \
f11##cn = vx_load(pixbuf + uf * (i+3));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16_##CN();
// Load pixels for linear interpolation (int16_t -> float)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(cn) \
v_float32 f00##cn##l = v_cvt_f32(v_expand_low(f00##cn)), f00##cn##h = v_cvt_f32(v_expand_high(f00##cn)), \
f01##cn##l = v_cvt_f32(v_expand_low(f01##cn)), f01##cn##h = v_cvt_f32(v_expand_high(f01##cn)), \
f10##cn##l = v_cvt_f32(v_expand_low(f10##cn)), f10##cn##h = v_cvt_f32(v_expand_high(f10##cn)), \
f11##cn##l = v_cvt_f32(v_expand_low(f11##cn)), f11##cn##h = v_cvt_f32(v_expand_high(f11##cn));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_S16F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_S16F32_##CN()
// Load pixels for linear interpolation (uint16_t -> float)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(cn) \
v_float32 f00##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f00##cn))), f00##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f00##cn))), \
f01##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f01##cn))), f01##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f01##cn))), \
f10##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f10##cn))), f10##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f10##cn))), \
f11##cn##l = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(f11##cn))), f11##cn##h = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(f11##cn)));
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_U16F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16F32_##CN()
// Load pixels for linear interpolation (float -> float)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(cn, i) \
v_float32 f00##cn##l = vx_load(pixbuf + uf * i), f00##cn##h = vx_load(pixbuf + uf * i + vlanes_32), \
f01##cn##l = vx_load(pixbuf + uf * (i+1)), f01##cn##h = vx_load(pixbuf + uf * (i+1) + vlanes_32), \
f10##cn##l = vx_load(pixbuf + uf * (i+2)), f10##cn##h = vx_load(pixbuf + uf * (i+2) + vlanes_32), \
f11##cn##l = vx_load(pixbuf + uf * (i+3)), f11##cn##h = vx_load(pixbuf + uf * (i+3) + vlanes_32);
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 0)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(b, 8)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(r, 0) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(g, 4) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(b, 8) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_CN_F32(a, 12)
#define CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32_##CN()
// Linear interpolation calculation
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(cn) \
f00##cn##l = v_fma(alphal, v_sub(f01##cn##l, f00##cn##l), f00##cn##l); f00##cn##h = v_fma(alphah, v_sub(f01##cn##h, f00##cn##h), f00##cn##h); \
f10##cn##l = v_fma(alphal, v_sub(f11##cn##l, f10##cn##l), f10##cn##l); f10##cn##h = v_fma(alphah, v_sub(f11##cn##h, f10##cn##h), f10##cn##h);
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(cn) \
f00##cn##l = v_fma(betal, v_sub(f10##cn##l, f00##cn##l), f00##cn##l); f00##cn##h = v_fma(betah, v_sub(f10##cn##h, f00##cn##h), f00##cn##h);
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_C1() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(g)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_C3() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(b)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_C4() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(r) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(g) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(b) \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32(a)
#define CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(CN) \
v_float32 alphal = src_x0, alphah = src_x1, \
betal = src_y0, betah = src_y1; \
CV_WARP_LINEAR_VECTOR_INTER_CALC_ALPHA_F32_##CN() \
CV_WARP_LINEAR_VECTOR_INTER_CALC_BETA_F32_##CN()
// Store
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_C1() \
v_uint16 f00_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)); \
v_uint8 f00_u8 = v_pack(f00_u16, vx_setall_u16(0)); \
v_store_low(dstptr + x, f00_u8);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_C3() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)); \
uint16_t tbuf[max_vlanes_16*3]; \
v_store_interleave(tbuf, f00r_u16, f00g_u16, f00b_u16); \
v_pack_store(dstptr + x*3, vx_load(tbuf)); \
v_pack_store(dstptr + x*3 + vlanes_16, vx_load(tbuf + vlanes_16)); \
v_pack_store(dstptr + x*3 + vlanes_16*2, vx_load(tbuf + vlanes_16*2));
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_C4() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)), \
f00a_u16 = v_pack_u(v_round(f00al), v_round(f00ah)); \
uint16_t tbuf[max_vlanes_16*4]; \
v_store_interleave(tbuf, f00r_u16, f00g_u16, f00b_u16, f00a_u16); \
v_pack_store(dstptr + x*4, vx_load(tbuf)); \
v_pack_store(dstptr + x*4 + vlanes_16, vx_load(tbuf + vlanes_16)); \
v_pack_store(dstptr + x*4 + vlanes_16*2, vx_load(tbuf + vlanes_16*2)); \
v_pack_store(dstptr + x*4 + vlanes_16*3, vx_load(tbuf + vlanes_16*3));
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U8_##CN()
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_C1() \
v_uint16 f00_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)); \
v_store(dstptr + x, f00_u16);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_C3() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)); \
v_store_interleave(dstptr + x*3, f00r_u16, f00g_u16, f00b_u16);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_C4() \
v_uint16 f00r_u16 = v_pack_u(v_round(f00rl), v_round(f00rh)), \
f00g_u16 = v_pack_u(v_round(f00gl), v_round(f00gh)), \
f00b_u16 = v_pack_u(v_round(f00bl), v_round(f00bh)), \
f00a_u16 = v_pack_u(v_round(f00al), v_round(f00ah)); \
v_store_interleave(dstptr + x*4, f00r_u16, f00g_u16, f00b_u16, f00a_u16);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16_##CN()
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_C1() \
vx_store(dstptr + x, f00gl); \
vx_store(dstptr + x + vlanes_32, f00gh);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_C3() \
v_store_interleave(dstptr + x*3, f00rl, f00gl, f00bl); \
v_store_interleave(dstptr + x*3 + vlanes_32*3, f00rh, f00gh, f00bh);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_C4() \
v_store_interleave(dstptr + x*4, f00rl, f00gl, f00bl, f00al); \
v_store_interleave(dstptr + x*4 + vlanes_32*4, f00rh, f00gh, f00bh, f00ah);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32_##CN()

File diff suppressed because it is too large Load Diff

View File

@ -172,7 +172,7 @@ OCL_TEST_P(WarpAffine, Mat)
{ {
for (int j = 0; j < test_loop_times; j++) for (int j = 0; j < test_loop_times; j++)
{ {
double eps = depth < CV_32F ? 0.04 : 0.06; double eps = depth < CV_32F ? ( depth < CV_16U ? 0.09 : 0.04 ) : 0.06;
random_roi(); random_roi();
Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f), Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),
@ -189,7 +189,7 @@ OCL_TEST_P(WarpAffine, inplace_25853) // when src and dst are the same variable,
{ {
for (int j = 0; j < test_loop_times; j++) for (int j = 0; j < test_loop_times; j++)
{ {
double eps = depth < CV_32F ? 0.04 : 0.06; double eps = depth < CV_32F ? ( depth < CV_16U ? 0.09 : 0.04 ) : 0.06;
random_roi(); random_roi();
Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f), Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),

View File

@ -150,7 +150,7 @@ void CV_ImageWarpBaseTest::generate_test_data()
while (depth == CV_8S || depth == CV_32S) while (depth == CV_8S || depth == CV_32S)
depth = rng.uniform(0, CV_64F); depth = rng.uniform(0, CV_64F);
int cn = rng.uniform(1, 4); int cn = rng.uniform(1, 5);
src.create(ssize, CV_MAKE_TYPE(depth, cn)); src.create(ssize, CV_MAKE_TYPE(depth, cn));
@ -1045,6 +1045,13 @@ protected:
Mat M; Mat M;
private: private:
void warpAffine(const Mat&, Mat&); void warpAffine(const Mat&, Mat&);
template<typename T>
void newWarpAffine(const Mat&, Mat&, const Mat&);
template<int channels, typename T>
void newLinear(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
}; };
CV_WarpAffine_Test::CV_WarpAffine_Test() : CV_WarpAffine_Test::CV_WarpAffine_Test() :
@ -1088,7 +1095,7 @@ void CV_WarpAffine_Test::run_func()
float CV_WarpAffine_Test::get_success_error_level(int _interpolation, int _depth) const float CV_WarpAffine_Test::get_success_error_level(int _interpolation, int _depth) const
{ {
return _depth == CV_8U ? 0 : CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth); return _depth == CV_8U ? 0.f : CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
} }
void CV_WarpAffine_Test::run_reference_func() void CV_WarpAffine_Test::run_reference_func()
@ -1098,6 +1105,152 @@ void CV_WarpAffine_Test::run_reference_func()
tmp.convertTo(reference_dst, reference_dst.depth()); tmp.convertTo(reference_dst, reference_dst.depth());
} }
#define FETCH_PIXEL_SCALAR(cn, dy, dx) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr[ofs+ci];} \
} else if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = bval[ci];} \
} else if (borderType == BORDER_TRANSPARENT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = dstptr[x*cn+ci];} \
} else { \
int ix_ = borderInterpolate(ix + dx, srccols, borderType_x); \
int iy_ = borderInterpolate(iy + dy, srcrows, borderType_y); \
size_t glob_ofs = iy_*srcstep + ix_*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr_[glob_ofs+ci];} \
}
#define WARPAFFINE_SHUFFLE(cn) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
pxy[ci+cn] = srcptr[ci+cn]; \
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
} \
} else { \
if ((borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { dstptr[x*cn+ci] = bval[ci]; } \
} \
return; \
} \
FETCH_PIXEL_SCALAR(cn, 0, 0); \
FETCH_PIXEL_SCALAR(cn, 0, 1); \
FETCH_PIXEL_SCALAR(cn, 1, 0); \
FETCH_PIXEL_SCALAR(cn, 1, 1); \
}
template<typename T>
static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = saturate_cast<T>(v0);
}
}
template<>
inline void warpaffine_linear_calc<float>(int cn, const float *pxy, float *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = v0;
}
}
template<int channels, typename T>
void CV_WarpAffine_Test::newLinear(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[channels*4];
const T *srcptr = srcptr_ + srcstep*iy + ix*channels;
WARPAFFINE_SHUFFLE(channels);
warpaffine_linear_calc(channels, pxy, dstptr+x*channels, sx, sy);
}
template<>
void CV_WarpAffine_Test::newLinear<3, float>(int x, float sx, float sy, const float *srcptr_, float *dstptr,
int srccols, int srcrows, size_t srcstep,
const float *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
float pxy[12];
const float *srcptr = srcptr_ + srcstep*iy + ix*3;
WARPAFFINE_SHUFFLE(3);
warpaffine_linear_calc(3, pxy, dstptr+x*3, sx, sy);
}
template<typename T>
void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM)
{
int num_channels = _dst.channels();
CV_CheckTrue(num_channels == 1 || num_channels == 3 || num_channels == 4, "");
auto *srcptr_ = _src.ptr<const T>();
auto *dstptr_ = _dst.ptr<T>();
size_t srcstep = _src.step/sizeof(T), dststep = _dst.step/sizeof(T);
int srccols = _src.cols, srcrows = _src.rows;
int dstcols = _dst.cols, dstrows = _dst.rows;
Mat ttM;
tM.convertTo(ttM, CV_32F);
auto *_M = ttM.ptr<const float>();
T bval[] = {
saturate_cast<T>(borderValue[0]),
saturate_cast<T>(borderValue[1]),
saturate_cast<T>(borderValue[2]),
saturate_cast<T>(borderValue[3]),
};
int borderType_x = borderType != BORDER_CONSTANT &&
borderType != BORDER_TRANSPARENT &&
srccols <= 1 ? BORDER_REPLICATE : borderType;
int borderType_y = borderType != BORDER_CONSTANT &&
borderType != BORDER_TRANSPARENT &&
srcrows <= 1 ? BORDER_REPLICATE : borderType;
for (int y = 0; y < dstrows; y++) {
T* dstptr = dstptr_ + y*dststep;
for (int x = 0; x < dstcols; x++) {
float sx = x*_M[0] + y*_M[1] + _M[2];
float sy = x*_M[3] + y*_M[4] + _M[5];
if (num_channels == 3) {
newLinear<3>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
newLinear<4>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
newLinear<1>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
}
void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst) void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
{ {
Size dsize = _dst.size(); Size dsize = _dst.size();
@ -1122,6 +1275,17 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
if (!(interpolation & cv::WARP_INVERSE_MAP)) if (!(interpolation & cv::WARP_INVERSE_MAP))
invertAffineTransform(tM.clone(), tM); invertAffineTransform(tM.clone(), tM);
if (inter == INTER_LINEAR) {
int dst_depth = _dst.depth(), dst_channels = _dst.channels();
if (dst_depth == CV_8U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<uint8_t>(_src, _dst, tM);
} else if (dst_depth == CV_16U && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<uint16_t>(_src, _dst, tM);
} else if (dst_depth == CV_32F && (dst_channels == 1 || dst_channels == 3 || dst_channels == 4)) {
return newWarpAffine<float>(_src, _dst, tM);
}
}
const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS; const int AB_SCALE = 1 << AB_BITS;
int round_delta = (inter == INTER_NEAREST) ? AB_SCALE / 2 : (AB_SCALE / INTER_TAB_SIZE / 2); int round_delta = (inter == INTER_NEAREST) ? AB_SCALE / 2 : (AB_SCALE / INTER_TAB_SIZE / 2);
@ -1134,7 +1298,7 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
{ {
int v1 = saturate_cast<int>(saturate_cast<int>(data_tM[0] * dx * AB_SCALE) + int v1 = saturate_cast<int>(saturate_cast<int>(data_tM[0] * dx * AB_SCALE) +
saturate_cast<int>((data_tM[1] * dy + data_tM[2]) * AB_SCALE) + round_delta), saturate_cast<int>((data_tM[1] * dy + data_tM[2]) * AB_SCALE) + round_delta),
v2 = saturate_cast<int>(saturate_cast<int>(data_tM[3] * dx * AB_SCALE) + v2 = saturate_cast<int>(saturate_cast<int>(data_tM[3] * dx * AB_SCALE) +
saturate_cast<int>((data_tM[4] * dy + data_tM[5]) * AB_SCALE) + round_delta); saturate_cast<int>((data_tM[4] * dy + data_tM[5]) * AB_SCALE) + round_delta);
v1 >>= AB_BITS - INTER_BITS; v1 >>= AB_BITS - INTER_BITS;
v2 >>= AB_BITS - INTER_BITS; v2 >>= AB_BITS - INTER_BITS;

View File

@ -748,8 +748,76 @@ struct DefaultRngAuto
// test images generation functions // test images generation functions
void fillGradient(Mat& img, int delta = 5); template<typename T>
void smoothBorder(Mat& img, const Scalar& color, int delta = 3); void fillGradient(Mat& img, int delta = 5)
{
CV_UNUSED(delta);
const int ch = img.channels();
int r, c, i;
for(r=0; r<img.rows; r++)
{
for(c=0; c<img.cols; c++)
{
T vals[] = {(T)r, (T)c, (T)(r*c), (T)(r*c/(r+c+1))};
T *p = (T*)img.ptr(r, c);
for(i=0; i<ch; i++) p[i] = (T)vals[i];
}
}
}
template<>
void fillGradient<uint8_t>(Mat& img, int delta);
template<typename T>
void smoothBorder(Mat& img, const Scalar& color, int delta = 3)
{
const int ch = img.channels();
CV_Assert(!img.empty() && ch <= 4);
Scalar s;
int n = 100/delta;
int nR = std::min(n, (img.rows+1)/2), nC = std::min(n, (img.cols+1)/2);
int r, c, i;
for(r=0; r<nR; r++)
{
double k1 = r*delta/100., k2 = 1-k1;
for(c=0; c<img.cols; c++)
{
auto *p = img.ptr<T>(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
for(c=0; c<img.cols; c++)
{
auto *p = img.ptr<T>(img.rows-r-1, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
}
for(r=0; r<img.rows; r++)
{
for(c=0; c<nC; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
auto *p = img.ptr<T>(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
for(c=0; c<n; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
auto *p = img.ptr<T>(r, img.cols-c-1);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = static_cast<T>((s[i]));
}
}
}
// Utility functions // Utility functions

View File

@ -686,7 +686,8 @@ TS* TS::ptr()
return &ts; return &ts;
} }
void fillGradient(Mat& img, int delta) template<>
void fillGradient<uint8_t>(Mat& img, int delta)
{ {
const int ch = img.channels(); const int ch = img.channels();
CV_Assert(!img.empty() && img.depth() == CV_8U && ch <= 4); CV_Assert(!img.empty() && img.depth() == CV_8U && ch <= 4);
@ -708,57 +709,6 @@ void fillGradient(Mat& img, int delta)
} }
} }
void smoothBorder(Mat& img, const Scalar& color, int delta)
{
const int ch = img.channels();
CV_Assert(!img.empty() && img.depth() == CV_8U && ch <= 4);
Scalar s;
uchar *p = NULL;
int n = 100/delta;
int nR = std::min(n, (img.rows+1)/2), nC = std::min(n, (img.cols+1)/2);
int r, c, i;
for(r=0; r<nR; r++)
{
double k1 = r*delta/100., k2 = 1-k1;
for(c=0; c<img.cols; c++)
{
p = img.ptr(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
for(c=0; c<img.cols; c++)
{
p = img.ptr(img.rows-r-1, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
}
for(r=0; r<img.rows; r++)
{
for(c=0; c<nC; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
p = img.ptr(r, c);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
for(c=0; c<n; c++)
{
double k1 = c*delta/100., k2 = 1-k1;
p = img.ptr(r, img.cols-c-1);
for(i=0; i<ch; i++) s[i] = p[i];
s = s * k1 + color * k2;
for(i=0; i<ch; i++) p[i] = uchar(s[i]);
}
}
}
bool test_ipp_check = false; bool test_ipp_check = false;