mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 03:00:14 +08:00
Merge pull request #25786 from plctlab:rvp_3rdparty
3rdparty: NDSRVP - Part 1.5: New Interfaces
This commit is contained in:
commit
ecbff5a20c
2
3rdparty/ndsrvp/include/core.hpp
vendored
2
3rdparty/ndsrvp/include/core.hpp
vendored
@ -1,6 +1,6 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_NDSRVP_CORE_HPP
|
||||
#define OPENCV_NDSRVP_CORE_HPP
|
||||
|
45
3rdparty/ndsrvp/include/imgproc.hpp
vendored
45
3rdparty/ndsrvp/include/imgproc.hpp
vendored
@ -1,18 +1,12 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_NDSRVP_IMGPROC_HPP
|
||||
#define OPENCV_NDSRVP_IMGPROC_HPP
|
||||
|
||||
namespace cv {
|
||||
|
||||
// ################ remap ################
|
||||
|
||||
void remap(InputArray _src, OutputArray _dst,
|
||||
InputArray _map1, InputArray _map2,
|
||||
int interpolation, int borderType, const Scalar& borderValue);
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
enum InterpolationMasks {
|
||||
@ -36,23 +30,36 @@ int integral(int depth, int sdepth, int sqdepth,
|
||||
|
||||
// ################ warpAffine ################
|
||||
|
||||
int warpAffine(int src_type,
|
||||
const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||
const double M[6], int interpolation, int borderType, const double borderValue[4]);
|
||||
int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw);
|
||||
|
||||
#undef cv_hal_warpAffine
|
||||
#define cv_hal_warpAffine (cv::ndsrvp::warpAffine)
|
||||
#undef cv_hal_warpAffineBlocklineNN
|
||||
#define cv_hal_warpAffineBlocklineNN (cv::ndsrvp::warpAffineBlocklineNN)
|
||||
|
||||
int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw);
|
||||
|
||||
#undef cv_hal_warpAffineBlockline
|
||||
#define cv_hal_warpAffineBlockline (cv::ndsrvp::warpAffineBlockline)
|
||||
|
||||
// ################ warpPerspective ################
|
||||
|
||||
int warpPerspective(int src_type,
|
||||
const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||
const double M[9], int interpolation, int borderType, const double borderValue[4]);
|
||||
int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw);
|
||||
|
||||
#undef cv_hal_warpPerspective
|
||||
#define cv_hal_warpPerspective (cv::ndsrvp::warpPerspective)
|
||||
#undef cv_hal_warpPerspectiveBlocklineNN
|
||||
#define cv_hal_warpPerspectiveBlocklineNN (cv::ndsrvp::warpPerspectiveBlocklineNN)
|
||||
|
||||
int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw);
|
||||
|
||||
#undef cv_hal_warpPerspectiveBlockline
|
||||
#define cv_hal_warpPerspectiveBlockline (cv::ndsrvp::warpPerspectiveBlockline)
|
||||
|
||||
// ################ remap ################
|
||||
|
||||
int remap32f(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar *dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step,
|
||||
float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4]);
|
||||
|
||||
#undef cv_hal_remap32f
|
||||
#define cv_hal_remap32f (cv::ndsrvp::remap32f)
|
||||
|
||||
// ################ threshold ################
|
||||
|
||||
|
5
3rdparty/ndsrvp/ndsrvp_hal.hpp
vendored
5
3rdparty/ndsrvp/ndsrvp_hal.hpp
vendored
@ -1,13 +1,14 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_NDSRVP_HAL_HPP
|
||||
#define OPENCV_NDSRVP_HAL_HPP
|
||||
|
||||
#include "opencv2/core/mat.hpp"
|
||||
#include <nds_intrinsic.h>
|
||||
|
||||
#include "opencv2/core/hal/interface.h"
|
||||
|
||||
#include "include/core.hpp"
|
||||
#include "include/imgproc.hpp"
|
||||
#include "include/features2d.hpp"
|
||||
|
78
3rdparty/ndsrvp/src/cvutils.cpp
vendored
Normal file
78
3rdparty/ndsrvp/src/cvutils.cpp
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "cvutils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
// fastMalloc
|
||||
|
||||
// [0][1][2][3][4][5][6][7][8][9]
|
||||
// ^udata
|
||||
// ^adata
|
||||
// ^adata[-1] == udata
|
||||
|
||||
void* fastMalloc(size_t size)
|
||||
{
|
||||
uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN);
|
||||
if(!udata)
|
||||
ndsrvp_error(Error::StsNoMem, "fastMalloc(): Not enough memory");
|
||||
uchar** adata = (uchar**)align((size_t)((uchar**)udata + 1), CV_MALLOC_ALIGN);
|
||||
adata[-1] = udata;
|
||||
return adata;
|
||||
}
|
||||
|
||||
void fastFree(void* ptr)
|
||||
{
|
||||
if(ptr)
|
||||
{
|
||||
uchar* udata = ((uchar**)ptr)[-1];
|
||||
if(!(udata < (uchar*)ptr && ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*) + CV_MALLOC_ALIGN)))
|
||||
ndsrvp_error(Error::StsBadArg, "fastFree(): Invalid memory block");
|
||||
free(udata);
|
||||
}
|
||||
}
|
||||
|
||||
// borderInterpolate
|
||||
|
||||
int borderInterpolate(int p, int len, int borderType)
|
||||
{
|
||||
if( (unsigned)p < (unsigned)len )
|
||||
;
|
||||
else if( borderType == CV_HAL_BORDER_REPLICATE )
|
||||
p = p < 0 ? 0 : len - 1;
|
||||
else if( borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101 )
|
||||
{
|
||||
int delta = borderType == CV_HAL_BORDER_REFLECT_101;
|
||||
if( len == 1 )
|
||||
return 0;
|
||||
do
|
||||
{
|
||||
if( p < 0 )
|
||||
p = -p - 1 + delta;
|
||||
else
|
||||
p = len - 1 - (p - len) - delta;
|
||||
}
|
||||
while( (unsigned)p >= (unsigned)len );
|
||||
}
|
||||
else if( borderType == CV_HAL_BORDER_WRAP )
|
||||
{
|
||||
ndsrvp_assert(len > 0);
|
||||
if( p < 0 )
|
||||
p -= ((p - len + 1) / len) * len;
|
||||
if( p >= len )
|
||||
p %= len;
|
||||
}
|
||||
else if( borderType == CV_HAL_BORDER_CONSTANT )
|
||||
p = -1;
|
||||
else
|
||||
ndsrvp_error(Error::StsBadArg, "borderInterpolate(): Unknown/unsupported border type");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace ndsrvp
|
||||
|
||||
} // namespace cv
|
108
3rdparty/ndsrvp/src/cvutils.hpp
vendored
Normal file
108
3rdparty/ndsrvp/src/cvutils.hpp
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_NDSRVP_CVUTILS_HPP
|
||||
#define OPENCV_NDSRVP_CVUTILS_HPP
|
||||
|
||||
#include <nds_intrinsic.h>
|
||||
|
||||
#include "opencv2/core/hal/interface.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <array>
|
||||
#include <climits>
|
||||
#include <algorithm>
|
||||
|
||||
// misc functions that not exposed to public interface
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
void* fastMalloc(size_t size);
|
||||
void fastFree(void* ptr);
|
||||
int borderInterpolate(int p, int len, int borderType);
|
||||
|
||||
#ifndef MAX
|
||||
# define MAX(a,b) ((a) < (b) ? (b) : (a))
|
||||
#endif
|
||||
|
||||
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
|
||||
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
|
||||
|
||||
#define CV_MALLOC_ALIGN 64
|
||||
|
||||
// error codes
|
||||
|
||||
enum Error{
|
||||
StsNoMem = -4,
|
||||
StsBadArg = -5,
|
||||
StsAssert = -215
|
||||
};
|
||||
|
||||
// output error
|
||||
|
||||
#define ndsrvp_assert(expr) { if(!(expr)) ndsrvp_error(Error::StsAssert, std::string(#expr)); }
|
||||
|
||||
inline void ndsrvp_error(int code, std::string msg = "")
|
||||
{
|
||||
std::cerr << "NDSRVP Error: code " << code << std::endl;
|
||||
if(!msg.empty())
|
||||
std::cerr << msg << std::endl;
|
||||
if(code < 0)
|
||||
throw code;
|
||||
}
|
||||
|
||||
// clip & vclip
|
||||
|
||||
inline int clip(int x, int a, int b)
|
||||
{
|
||||
return x >= a ? (x < b ? x : b - 1) : a;
|
||||
}
|
||||
|
||||
inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b)
|
||||
{
|
||||
return (int32x2_t)__nds__bpick((long)a, __nds__bpick((long)(b - 1), (long)x, (long)(x < b)), (long)(x >= a));
|
||||
}
|
||||
|
||||
// saturate
|
||||
|
||||
template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> inline uchar saturate_cast<uchar>(int v) { return __nds__uclip32(v, 8); }
|
||||
template<> inline uchar saturate_cast<uchar>(float v) { return saturate_cast<uchar>((int)lrintf(v)); }
|
||||
template<> inline uchar saturate_cast<uchar>(double v) { return saturate_cast<uchar>((int)lrint(v)); }
|
||||
|
||||
template<> inline char saturate_cast<char>(int v) { return __nds__sclip32(v, 7); }
|
||||
template<> inline char saturate_cast<char>(float v) { return saturate_cast<char>((int)lrintf(v)); }
|
||||
template<> inline char saturate_cast<char>(double v) { return saturate_cast<char>((int)lrint(v)); }
|
||||
|
||||
template<> inline ushort saturate_cast<ushort>(int v) { return __nds__uclip32(v, 16); }
|
||||
template<> inline ushort saturate_cast<ushort>(float v) { return saturate_cast<ushort>((int)lrintf(v)); }
|
||||
template<> inline ushort saturate_cast<ushort>(double v) { return saturate_cast<ushort>((int)lrint(v)); }
|
||||
|
||||
template<> inline short saturate_cast<short>(int v) { return __nds__sclip32(v, 15); }
|
||||
template<> inline short saturate_cast<short>(float v) { return saturate_cast<short>((int)lrintf(v)); }
|
||||
template<> inline short saturate_cast<short>(double v) { return saturate_cast<short>((int)lrint(v)); }
|
||||
|
||||
template<> inline int saturate_cast<int>(float v) { return (int)lrintf(v); }
|
||||
template<> inline int saturate_cast<int>(double v) { return (int)lrint(v); }
|
||||
|
||||
// align
|
||||
|
||||
inline long align(size_t v, int n)
|
||||
{
|
||||
return (v + n - 1) & -n;
|
||||
}
|
||||
|
||||
} // namespace ndsrvp
|
||||
|
||||
} // namespace cv
|
||||
|
||||
#endif
|
2
3rdparty/ndsrvp/src/integral.cpp
vendored
2
3rdparty/ndsrvp/src/integral.cpp
vendored
@ -3,6 +3,8 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "ndsrvp_hal.hpp"
|
||||
#include "opencv2/imgproc/hal/interface.h"
|
||||
#include "cvutils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
|
188
3rdparty/ndsrvp/src/remap.cpp
vendored
Normal file
188
3rdparty/ndsrvp/src/remap.cpp
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "ndsrvp_hal.hpp"
|
||||
#include "opencv2/imgproc/hal/interface.h"
|
||||
#include "cvutils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
int remap32f(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar* dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step,
|
||||
float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4])
|
||||
{
|
||||
const bool isRelative = ((interpolation & CV_HAL_WARP_RELATIVE_MAP) != 0);
|
||||
interpolation &= ~CV_HAL_WARP_RELATIVE_MAP;
|
||||
|
||||
if( interpolation == CV_HAL_INTER_AREA )
|
||||
interpolation = CV_HAL_INTER_LINEAR;
|
||||
|
||||
if( interpolation != CV_HAL_INTER_NEAREST )
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
// only CV_8U
|
||||
if( (src_type & CV_MAT_DEPTH_MASK) != CV_8U )
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
int cn = CV_MAT_CN(src_type);
|
||||
|
||||
src_step /= sizeof(uchar);
|
||||
dst_step /= sizeof(uchar);
|
||||
|
||||
// mapping CV_32FC1
|
||||
mapx_step /= sizeof(float);
|
||||
mapy_step /= sizeof(float);
|
||||
|
||||
// border
|
||||
uchar border_const[CV_CN_MAX];
|
||||
for( int k = 0; k < CV_CN_MAX; k++ )
|
||||
border_const[k] = saturate_cast<uchar>(border_value[k & 3]);
|
||||
|
||||
// divide into blocks
|
||||
const int BLOCK_SIZE = 1024;
|
||||
int x, y, x1, y1;
|
||||
std::array<short, BLOCK_SIZE * BLOCK_SIZE * 2> aXY;
|
||||
short* XY = aXY.data();
|
||||
size_t XY_step = BLOCK_SIZE * 2;
|
||||
|
||||
// vectorize
|
||||
const int32x2_t src_wh = {src_width, src_height};
|
||||
const int32x2_t arr_index = {cn, (int)src_step};
|
||||
|
||||
for (y = 0; y < dst_height; y += BLOCK_SIZE)
|
||||
{
|
||||
int dy = std::min(BLOCK_SIZE, dst_height - y);
|
||||
for (x = 0; x < dst_width; x += BLOCK_SIZE)
|
||||
{
|
||||
const int off_y = isRelative ? y : 0;
|
||||
const int off_x = isRelative ? x : 0;
|
||||
const int32x2_t voff = {off_x, off_y};
|
||||
|
||||
int dx = std::min(BLOCK_SIZE, dst_width - x);
|
||||
// prepare mapping data XY
|
||||
for (y1 = 0; y1 < dy; y1++)
|
||||
{
|
||||
short* rXY = XY + y1 * XY_step;
|
||||
const float* sX = mapx + (y + y1) * mapx_step + x;
|
||||
const float* sY = mapy + (y + y1) * mapy_step + x;
|
||||
for (x1 = 0; x1 < dx; x1++)
|
||||
{
|
||||
rXY[x1 * 2] = saturate_cast<short>(sX[x1]);
|
||||
rXY[x1 * 2 + 1] = saturate_cast<short>(sY[x1]);
|
||||
}
|
||||
}
|
||||
|
||||
// precalulate offset
|
||||
if(isRelative)
|
||||
{
|
||||
int16x8_t voff_x;
|
||||
int16x8_t voff_y = {0, 0, 1, 0, 2, 0, 3, 0};
|
||||
int16x8_t vones_x = {4, 0, 4, 0, 4, 0, 4, 0};
|
||||
int16x8_t vones_y = {0, 1, 0, 1, 0, 1, 0, 1};
|
||||
for(y1 = 0; y1 < BLOCK_SIZE; y1++, voff_y += vones_y)
|
||||
{
|
||||
int16x8_t* vrXY = (int16x8_t*)(XY + y1 * XY_step);
|
||||
for(x1 = 0, voff_x = voff_y; x1 < BLOCK_SIZE; x1 += 4, vrXY++, voff_x += vones_x)
|
||||
{
|
||||
*vrXY += voff_x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// process the block
|
||||
for( y1 = 0; y1 < dy; y1++ )
|
||||
{
|
||||
uchar* dst_row = dst_data + (y + y1) * dst_step + x * cn;
|
||||
const short* rXY = XY + y1 * XY_step;
|
||||
if( cn == 1 )
|
||||
{
|
||||
for( x1 = 0; x1 < dx; x1++ )
|
||||
{
|
||||
int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff;
|
||||
if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 )
|
||||
dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)];
|
||||
else
|
||||
{
|
||||
if( border_type == CV_HAL_BORDER_REPLICATE )
|
||||
{
|
||||
vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh);
|
||||
dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)];
|
||||
}
|
||||
else if( border_type == CV_HAL_BORDER_CONSTANT )
|
||||
dst_row[x1] = border_const[0];
|
||||
else if( border_type != CV_HAL_BORDER_TRANSPARENT )
|
||||
{
|
||||
vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type);
|
||||
vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type);
|
||||
dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
uchar* dst_ptr = dst_row;
|
||||
for(x1 = 0; x1 < dx; x1++, dst_ptr += cn )
|
||||
{
|
||||
int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff;
|
||||
const uchar *src_ptr;
|
||||
if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 )
|
||||
{
|
||||
if( cn == 3 )
|
||||
{
|
||||
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
|
||||
dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2];
|
||||
// performance loss, commented out
|
||||
// *(unsigned*)dst_ptr = __nds__bpick(*(unsigned*)dst_ptr, *(unsigned*)src_ptr, 0xFF000000);
|
||||
}
|
||||
else if( cn == 4 )
|
||||
{
|
||||
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
|
||||
*(uint8x4_t*)dst_ptr = *(uint8x4_t*)src_ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
|
||||
int k = cn;
|
||||
for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8)
|
||||
*(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr;
|
||||
while( k-- )
|
||||
dst_ptr[k] = src_ptr[k];
|
||||
}
|
||||
}
|
||||
else if( border_type != CV_HAL_BORDER_TRANSPARENT )
|
||||
{
|
||||
if( border_type == CV_HAL_BORDER_REPLICATE )
|
||||
{
|
||||
vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh);
|
||||
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
|
||||
}
|
||||
else if( border_type == CV_HAL_BORDER_CONSTANT )
|
||||
src_ptr = &border_const[0];
|
||||
else
|
||||
{
|
||||
vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type);
|
||||
vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type);
|
||||
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
|
||||
}
|
||||
int k = cn;
|
||||
for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8)
|
||||
*(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr;
|
||||
while( k-- )
|
||||
dst_ptr[k] = src_ptr[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
} // namespace ndsrvp
|
||||
|
||||
} // namespace cv
|
147
3rdparty/ndsrvp/src/threshold.cpp
vendored
147
3rdparty/ndsrvp/src/threshold.cpp
vendored
@ -4,65 +4,44 @@
|
||||
|
||||
#include "ndsrvp_hal.hpp"
|
||||
#include "opencv2/imgproc/hal/interface.h"
|
||||
#include "cvutils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
template <typename type, typename vtype>
|
||||
class operators_threshold_t {
|
||||
public:
|
||||
virtual ~operators_threshold_t() {};
|
||||
virtual inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
|
||||
{
|
||||
(void)src;
|
||||
(void)thresh;
|
||||
(void)maxval;
|
||||
CV_Error(cv::Error::StsBadArg, "");
|
||||
return vtype();
|
||||
}
|
||||
virtual inline type scalar(const type& src, const type& thresh, const type& maxval)
|
||||
{
|
||||
(void)src;
|
||||
(void)thresh;
|
||||
(void)maxval;
|
||||
CV_Error(cv::Error::StsBadArg, "");
|
||||
return type();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename type, typename vtype>
|
||||
class opThreshBinary : public operators_threshold_t<type, vtype> {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
|
||||
struct opThreshBinary_t {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
|
||||
{
|
||||
return (vtype)__nds__bpick((long)maxval, (long)0, (long)(src > thresh));
|
||||
}
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval) override
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval)
|
||||
{
|
||||
return src > thresh ? maxval : 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename type, typename vtype>
|
||||
class opThreshBinaryInv : public operators_threshold_t<type, vtype> {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
|
||||
struct opThreshBinaryInv_t {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
|
||||
{
|
||||
return (vtype)__nds__bpick((long)0, (long)maxval, (long)(src > thresh));
|
||||
}
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval) override
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval)
|
||||
{
|
||||
return src > thresh ? 0 : maxval;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename type, typename vtype>
|
||||
class opThreshTrunc : public operators_threshold_t<type, vtype> {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
|
||||
struct opThreshTrunc_t {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
|
||||
{
|
||||
(void)maxval;
|
||||
return (vtype)__nds__bpick((long)thresh, (long)src, (long)(src > thresh));
|
||||
}
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval) override
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval)
|
||||
{
|
||||
(void)maxval;
|
||||
return src > thresh ? thresh : src;
|
||||
@ -70,13 +49,13 @@ class opThreshTrunc : public operators_threshold_t<type, vtype> {
|
||||
};
|
||||
|
||||
template <typename type, typename vtype>
|
||||
class opThreshToZero : public operators_threshold_t<type, vtype> {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
|
||||
struct opThreshToZero_t {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
|
||||
{
|
||||
(void)maxval;
|
||||
return (vtype)__nds__bpick((long)src, (long)0, (long)(src > thresh));
|
||||
}
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval) override
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval)
|
||||
{
|
||||
(void)maxval;
|
||||
return src > thresh ? src : 0;
|
||||
@ -84,29 +63,36 @@ class opThreshToZero : public operators_threshold_t<type, vtype> {
|
||||
};
|
||||
|
||||
template <typename type, typename vtype>
|
||||
class opThreshToZeroInv : public operators_threshold_t<type, vtype> {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
|
||||
struct opThreshToZeroInv_t {
|
||||
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
|
||||
{
|
||||
(void)maxval;
|
||||
return (vtype)__nds__bpick((long)0, (long)src, (long)(src > thresh));
|
||||
}
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval) override
|
||||
inline type scalar(const type& src, const type& thresh, const type& maxval)
|
||||
{
|
||||
(void)maxval;
|
||||
return src > thresh ? 0 : src;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename type, typename vtype, int nlane>
|
||||
static void threshold_op(const type* src_data, size_t src_step,
|
||||
type* dst_data, size_t dst_step,
|
||||
template <typename type, typename vtype, int nlane,
|
||||
template <typename ttype, typename vttype> typename opThresh_t>
|
||||
static inline void threshold_op(const uchar* src, size_t src_step,
|
||||
uchar* dst, size_t dst_step,
|
||||
int width, int height, int cn,
|
||||
type thresh, type maxval, int thtype)
|
||||
double thresh_d, double maxval_d)
|
||||
{
|
||||
int i, j;
|
||||
width *= cn;
|
||||
|
||||
type* src_data = (type*)src;
|
||||
type* dst_data = (type*)dst;
|
||||
src_step /= sizeof(type);
|
||||
dst_step /= sizeof(type);
|
||||
|
||||
type thresh = saturate_cast<type>(thresh_d);
|
||||
type maxval = saturate_cast<type>(maxval_d);
|
||||
vtype vthresh;
|
||||
vtype vmaxval;
|
||||
for (i = 0; i < nlane; i++) {
|
||||
@ -114,62 +100,63 @@ static void threshold_op(const type* src_data, size_t src_step,
|
||||
vmaxval[i] = maxval;
|
||||
}
|
||||
|
||||
operators_threshold_t<type, vtype>* op;
|
||||
switch (thtype) {
|
||||
case CV_HAL_THRESH_BINARY:
|
||||
op = new opThreshBinary<type, vtype>();
|
||||
break;
|
||||
case CV_HAL_THRESH_BINARY_INV:
|
||||
op = new opThreshBinaryInv<type, vtype>();
|
||||
break;
|
||||
case CV_HAL_THRESH_TRUNC:
|
||||
op = new opThreshTrunc<type, vtype>();
|
||||
break;
|
||||
case CV_HAL_THRESH_TOZERO:
|
||||
op = new opThreshToZero<type, vtype>();
|
||||
break;
|
||||
case CV_HAL_THRESH_TOZERO_INV:
|
||||
op = new opThreshToZeroInv<type, vtype>();
|
||||
break;
|
||||
default:
|
||||
CV_Error(cv::Error::StsBadArg, "");
|
||||
return;
|
||||
}
|
||||
opThresh_t<type, vtype> opThresh;
|
||||
|
||||
for (i = 0; i < height; i++, src_data += src_step, dst_data += dst_step) {
|
||||
for (j = 0; j <= width - nlane; j += nlane) {
|
||||
vtype vs = *(vtype*)(src_data + j);
|
||||
*(vtype*)(dst_data + j) = op->vector(vs, vthresh, vmaxval);
|
||||
*(vtype*)(dst_data + j) = opThresh.vector(*(vtype*)(src_data + j), vthresh, vmaxval);
|
||||
}
|
||||
for (; j < width; j++) {
|
||||
dst_data[j] = op->scalar(src_data[j], thresh, maxval);
|
||||
dst_data[j] = opThresh.scalar(src_data[j], thresh, maxval);
|
||||
}
|
||||
}
|
||||
|
||||
delete op;
|
||||
return;
|
||||
}
|
||||
|
||||
typedef void (*ThreshFunc)(const uchar* src_data, size_t src_step,
|
||||
uchar* dst_data, size_t dst_step,
|
||||
int width, int height, int cn,
|
||||
double thresh, double maxval);
|
||||
|
||||
int threshold(const uchar* src_data, size_t src_step,
|
||||
uchar* dst_data, size_t dst_step,
|
||||
int width, int height, int depth, int cn,
|
||||
double thresh, double maxValue, int thresholdType)
|
||||
{
|
||||
if (width <= 255 && height <= 255) // slower at small size
|
||||
static ThreshFunc thfuncs[4][5] =
|
||||
{
|
||||
{
|
||||
threshold_op<uchar, uint8x8_t, 8, opThreshBinary_t>,
|
||||
threshold_op<uchar, uint8x8_t, 8, opThreshBinaryInv_t>,
|
||||
threshold_op<uchar, uint8x8_t, 8, opThreshTrunc_t>,
|
||||
threshold_op<uchar, uint8x8_t, 8, opThreshToZero_t>,
|
||||
threshold_op<uchar, uint8x8_t, 8, opThreshToZeroInv_t> },
|
||||
{
|
||||
threshold_op<char, int8x8_t, 8, opThreshBinary_t>,
|
||||
threshold_op<char, int8x8_t, 8, opThreshBinaryInv_t>,
|
||||
threshold_op<char, int8x8_t, 8, opThreshTrunc_t>,
|
||||
threshold_op<char, int8x8_t, 8, opThreshToZero_t>,
|
||||
threshold_op<char, int8x8_t, 8, opThreshToZeroInv_t> },
|
||||
{
|
||||
threshold_op<ushort, uint16x4_t, 4, opThreshBinary_t>,
|
||||
threshold_op<ushort, uint16x4_t, 4, opThreshBinaryInv_t>,
|
||||
threshold_op<ushort, uint16x4_t, 4, opThreshTrunc_t>,
|
||||
threshold_op<ushort, uint16x4_t, 4, opThreshToZero_t>,
|
||||
threshold_op<ushort, uint16x4_t, 4, opThreshToZeroInv_t> },
|
||||
{
|
||||
threshold_op<short, int16x4_t, 4, opThreshBinary_t>,
|
||||
threshold_op<short, int16x4_t, 4, opThreshBinaryInv_t>,
|
||||
threshold_op<short, int16x4_t, 4, opThreshTrunc_t>,
|
||||
threshold_op<short, int16x4_t, 4, opThreshToZero_t>,
|
||||
threshold_op<short, int16x4_t, 4, opThreshToZeroInv_t> }
|
||||
};
|
||||
|
||||
if(depth < 0 || depth > 3 || thresholdType < 0 || thresholdType > 4 || (width < 256 && height < 256))
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
if (depth == CV_8U) {
|
||||
threshold_op<uchar, uint8x8_t, 8>((uchar*)src_data, src_step, (uchar*)dst_data, dst_step, width, height, cn, (uchar)thresh, (uchar)maxValue, thresholdType);
|
||||
return CV_HAL_ERROR_OK;
|
||||
} else if (depth == CV_16S) {
|
||||
threshold_op<short, int16x4_t, 4>((short*)src_data, src_step, (short*)dst_data, dst_step, width, height, cn, (short)thresh, (short)maxValue, thresholdType);
|
||||
return CV_HAL_ERROR_OK;
|
||||
} else if (depth == CV_16U) {
|
||||
threshold_op<ushort, uint16x4_t, 4>((ushort*)src_data, src_step, (ushort*)dst_data, dst_step, width, height, cn, (ushort)thresh, (ushort)maxValue, thresholdType);
|
||||
return CV_HAL_ERROR_OK;
|
||||
} else {
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
thfuncs[depth][thresholdType](src_data, src_step, dst_data, dst_step, width, height, cn, thresh, maxValue);
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
} // namespace ndsrvp
|
||||
|
182
3rdparty/ndsrvp/src/warpAffine.cpp
vendored
182
3rdparty/ndsrvp/src/warpAffine.cpp
vendored
@ -3,148 +3,68 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "ndsrvp_hal.hpp"
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgproc/hal/interface.h"
|
||||
#include "cvutils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
class WarpAffineInvoker : public ParallelLoopBody {
|
||||
public:
|
||||
WarpAffineInvoker(const Mat& _src, Mat& _dst, int _interpolation, int _borderType,
|
||||
const Scalar& _borderValue, int* _adelta, int* _bdelta, const double* _M)
|
||||
: ParallelLoopBody()
|
||||
, src(_src)
|
||||
, dst(_dst)
|
||||
, interpolation(_interpolation)
|
||||
, borderType(_borderType)
|
||||
, borderValue(_borderValue)
|
||||
, adelta(_adelta)
|
||||
, bdelta(_bdelta)
|
||||
, M(_M)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void operator()(const Range& range) const CV_OVERRIDE
|
||||
{
|
||||
const int BLOCK_SZ = 64;
|
||||
AutoBuffer<short, 0> __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ);
|
||||
short *XY = __XY.data(), *A = __A.data();
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
const int AB_SCALE = 1 << AB_BITS;
|
||||
int round_delta = interpolation == CV_HAL_INTER_NEAREST ? AB_SCALE / 2 : AB_SCALE / INTER_TAB_SIZE / 2, x, y, x1, y1;
|
||||
|
||||
int bh0 = std::min(BLOCK_SZ / 2, dst.rows);
|
||||
int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, dst.cols);
|
||||
bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, dst.rows);
|
||||
|
||||
for (y = range.start; y < range.end; y += bh0) {
|
||||
for (x = 0; x < dst.cols; x += bw0) {
|
||||
int bw = std::min(bw0, dst.cols - x);
|
||||
int bh = std::min(bh0, range.end - y);
|
||||
|
||||
Mat _XY(bh, bw, CV_16SC2, XY);
|
||||
Mat dpart(dst, Rect(x, y, bw, bh));
|
||||
|
||||
for (y1 = 0; y1 < bh; y1++) {
|
||||
short* xy = XY + y1 * bw * 2;
|
||||
int X0 = saturate_cast<int>((M[1] * (y + y1) + M[2]) * AB_SCALE) + round_delta;
|
||||
int Y0 = saturate_cast<int>((M[4] * (y + y1) + M[5]) * AB_SCALE) + round_delta;
|
||||
|
||||
if (interpolation == CV_HAL_INTER_NEAREST) {
|
||||
x1 = 0;
|
||||
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] };
|
||||
int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] };
|
||||
|
||||
vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15);
|
||||
vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
int X = (X0 + adelta[x + x1]) >> AB_BITS;
|
||||
int Y = (Y0 + bdelta[x + x1]) >> AB_BITS;
|
||||
xy[x1 * 2] = saturate_cast<short>(X);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
|
||||
}
|
||||
} else {
|
||||
short* alpha = A + y1 * bw;
|
||||
x1 = 0;
|
||||
|
||||
const int INTER_MASK = INTER_TAB_SIZE - 1;
|
||||
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] };
|
||||
int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] };
|
||||
vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS));
|
||||
vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS));
|
||||
|
||||
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
|
||||
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
|
||||
|
||||
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
|
||||
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
int X = (X0 + adelta[x + x1]) >> (AB_BITS - INTER_BITS);
|
||||
int Y = (Y0 + bdelta[x + x1]) >> (AB_BITS - INTER_BITS);
|
||||
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (interpolation == CV_HAL_INTER_NEAREST)
|
||||
remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue);
|
||||
else {
|
||||
Mat _matA(bh, bw, CV_16U, A);
|
||||
remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Mat src;
|
||||
Mat dst;
|
||||
int interpolation, borderType;
|
||||
Scalar borderValue;
|
||||
int *adelta, *bdelta;
|
||||
const double* M;
|
||||
};
|
||||
|
||||
int warpAffine(int src_type,
|
||||
const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||
const double M[6], int interpolation, int borderType, const double borderValue[4])
|
||||
int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw)
|
||||
{
|
||||
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
|
||||
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
|
||||
|
||||
int x;
|
||||
AutoBuffer<int> _abdelta(dst.cols * 2);
|
||||
int *adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
const int AB_SCALE = 1 << AB_BITS;
|
||||
int x1 = 0;
|
||||
|
||||
for (x = 0; x < dst.cols; x++) {
|
||||
adelta[x] = saturate_cast<int>(M[0] * x * AB_SCALE);
|
||||
bdelta[x] = saturate_cast<int>(M[3] * x * AB_SCALE);
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] };
|
||||
int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] };
|
||||
|
||||
vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15);
|
||||
vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
int X = X0 + adelta[x1];
|
||||
int Y = Y0 + bdelta[x1];
|
||||
xy[x1 * 2] = saturate_cast<short>(X);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
|
||||
}
|
||||
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw)
|
||||
{
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
int x1 = 0;
|
||||
|
||||
const int INTER_MASK = INTER_TAB_SIZE - 1;
|
||||
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] };
|
||||
int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] };
|
||||
vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS));
|
||||
vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS));
|
||||
|
||||
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
|
||||
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
|
||||
|
||||
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
|
||||
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
int X = X0 + adelta[x1];
|
||||
int Y = Y0 + bdelta[x1];
|
||||
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK));
|
||||
}
|
||||
|
||||
Range range(0, dst.rows);
|
||||
WarpAffineInvoker invoker(src, dst, interpolation, borderType,
|
||||
Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]),
|
||||
adelta, bdelta, M);
|
||||
parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
|
216
3rdparty/ndsrvp/src/warpPerspective.cpp
vendored
216
3rdparty/ndsrvp/src/warpPerspective.cpp
vendored
@ -3,154 +3,90 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "ndsrvp_hal.hpp"
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgproc/hal/interface.h"
|
||||
#include "cvutils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace ndsrvp {
|
||||
|
||||
class WarpPerspectiveInvoker : public ParallelLoopBody {
|
||||
public:
|
||||
WarpPerspectiveInvoker(const Mat& _src, Mat& _dst, const double* _M, int _interpolation,
|
||||
int _borderType, const Scalar& _borderValue)
|
||||
: ParallelLoopBody()
|
||||
, src(_src)
|
||||
, dst(_dst)
|
||||
, M(_M)
|
||||
, interpolation(_interpolation)
|
||||
, borderType(_borderType)
|
||||
, borderValue(_borderValue)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void operator()(const Range& range) const CV_OVERRIDE
|
||||
{
|
||||
const int BLOCK_SZ = 32;
|
||||
short XY[BLOCK_SZ * BLOCK_SZ * 2], A[BLOCK_SZ * BLOCK_SZ];
|
||||
int x, y, y1, width = dst.cols, height = dst.rows;
|
||||
|
||||
int bh0 = std::min(BLOCK_SZ / 2, height);
|
||||
int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, width);
|
||||
bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, height);
|
||||
|
||||
for (y = range.start; y < range.end; y += bh0) {
|
||||
for (x = 0; x < width; x += bw0) {
|
||||
int bw = std::min(bw0, width - x);
|
||||
int bh = std::min(bh0, range.end - y); // height
|
||||
|
||||
Mat _XY(bh, bw, CV_16SC2, XY);
|
||||
Mat dpart(dst, Rect(x, y, bw, bh));
|
||||
|
||||
for (y1 = 0; y1 < bh; y1++) {
|
||||
short* xy = XY + y1 * bw * 2;
|
||||
double X0 = M[0] * x + M[1] * (y + y1) + M[2];
|
||||
double Y0 = M[3] * x + M[4] * (y + y1) + M[5];
|
||||
double W0 = M[6] * x + M[7] * (y + y1) + M[8];
|
||||
|
||||
if (interpolation == CV_HAL_INTER_NEAREST) {
|
||||
int x1 = 0;
|
||||
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
|
||||
W1 = W1 ? 1. / W1 : 0;
|
||||
W2 = W2 ? 1. / W2 : 0;
|
||||
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
|
||||
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
|
||||
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
|
||||
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
|
||||
|
||||
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
|
||||
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
|
||||
|
||||
vX = __nds__v_sclip32(vX, 15);
|
||||
vY = __nds__v_sclip32(vY, 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
double W = W0 + M[6] * x1;
|
||||
W = W ? 1. / W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1 * 2] = saturate_cast<short>(X);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
|
||||
}
|
||||
} else {
|
||||
short* alpha = A + y1 * bw;
|
||||
int x1 = 0;
|
||||
|
||||
const int INTER_MASK = INTER_TAB_SIZE - 1;
|
||||
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
|
||||
W1 = W1 ? INTER_TAB_SIZE / W1 : 0;
|
||||
W2 = W2 ? INTER_TAB_SIZE / W2 : 0;
|
||||
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
|
||||
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
|
||||
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
|
||||
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
|
||||
|
||||
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
|
||||
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
|
||||
|
||||
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
|
||||
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
|
||||
|
||||
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
|
||||
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
double W = W0 + M[6] * x1;
|
||||
W = W ? INTER_TAB_SIZE / W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (interpolation == CV_HAL_INTER_NEAREST)
|
||||
remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue);
|
||||
else {
|
||||
Mat _matA(bh, bw, CV_16U, A);
|
||||
remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Mat src;
|
||||
Mat dst;
|
||||
const double* M;
|
||||
int interpolation, borderType;
|
||||
Scalar borderValue;
|
||||
};
|
||||
|
||||
int warpPerspective(int src_type,
|
||||
const uchar* src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||
const double M[9], int interpolation, int borderType, const double borderValue[4])
|
||||
int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw)
|
||||
{
|
||||
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
|
||||
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
|
||||
int x1 = 0;
|
||||
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
|
||||
W1 = W1 ? 1. / W1 : 0;
|
||||
W2 = W2 ? 1. / W2 : 0;
|
||||
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
|
||||
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
|
||||
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
|
||||
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
|
||||
|
||||
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
|
||||
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
|
||||
|
||||
vX = __nds__v_sclip32(vX, 15);
|
||||
vY = __nds__v_sclip32(vY, 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
double W = W0 + M[6] * x1;
|
||||
W = W ? 1. / W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1 * 2] = saturate_cast<short>(X);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
|
||||
}
|
||||
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
|
||||
{
|
||||
int x1 = 0;
|
||||
|
||||
const int INTER_MASK = INTER_TAB_SIZE - 1;
|
||||
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
|
||||
for (; x1 < bw; x1 += 2) {
|
||||
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
|
||||
W1 = W1 ? INTER_TAB_SIZE / W1 : 0;
|
||||
W2 = W2 ? INTER_TAB_SIZE / W2 : 0;
|
||||
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
|
||||
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
|
||||
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
|
||||
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
|
||||
|
||||
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
|
||||
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
|
||||
|
||||
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
|
||||
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
|
||||
|
||||
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
|
||||
|
||||
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
|
||||
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
|
||||
}
|
||||
|
||||
for (; x1 < bw; x1++) {
|
||||
double W = W0 + M[6] * x1;
|
||||
W = W ? INTER_TAB_SIZE / W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK));
|
||||
}
|
||||
|
||||
Range range(0, dst.rows);
|
||||
WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]));
|
||||
parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
|
@ -1040,7 +1040,7 @@ foreach(hal ${OpenCV_HAL})
|
||||
ocv_hal_register(NDSRVP_HAL_LIBRARIES NDSRVP_HAL_HEADERS NDSRVP_HAL_INCLUDE_DIRS)
|
||||
list(APPEND OpenCV_USED_HAL "ndsrvp (ver ${NDSRVP_HAL_VERSION})")
|
||||
else()
|
||||
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not open, disabling ndsrvp...")
|
||||
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not enabled, disabling ndsrvp...")
|
||||
endif()
|
||||
elseif(hal STREQUAL "halrvv")
|
||||
if(";${CPU_BASELINE_FINAL};" MATCHES ";RVV;")
|
||||
|
@ -108,11 +108,19 @@ CV_EXPORTS void warpAffine(int src_type,
|
||||
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||
const double M[6], int interpolation, int borderType, const double borderValue[4]);
|
||||
|
||||
CV_EXPORTS void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw);
|
||||
|
||||
CV_EXPORTS void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw);
|
||||
|
||||
CV_EXPORTS void warpPerspective(int src_type,
|
||||
const uchar * src_data, size_t src_step, int src_width, int src_height,
|
||||
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
|
||||
const double M[9], int interpolation, int borderType, const double borderValue[4]);
|
||||
|
||||
CV_EXPORTS void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw);
|
||||
|
||||
CV_EXPORTS void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw);
|
||||
|
||||
CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step,
|
||||
uchar * dst_data, size_t dst_step,
|
||||
int width, int height,
|
||||
|
@ -12,6 +12,12 @@
|
||||
#define CV_HAL_INTER_CUBIC 2
|
||||
#define CV_HAL_INTER_AREA 3
|
||||
#define CV_HAL_INTER_LANCZOS4 4
|
||||
#define CV_HAL_INTER_LINEAR_EXACT 5
|
||||
#define CV_HAL_INTER_NEAREST_EXACT 6
|
||||
#define CV_HAL_INTER_MAX 7
|
||||
#define CV_HAL_WARP_FILL_OUTLIERS 8
|
||||
#define CV_HAL_WARP_INVERSE_MAP 16
|
||||
#define CV_HAL_WARP_RELATIVE_MAP 32
|
||||
//! @}
|
||||
|
||||
//! @name Morphology operations
|
||||
|
@ -273,6 +273,29 @@ inline int hal_ni_resize(int src_type, const uchar *src_data, size_t src_step, i
|
||||
@sa cv::warpAffine, cv::hal::warpAffine
|
||||
*/
|
||||
inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||
/**
|
||||
@brief hal_warpAffineBlocklineNN doing a row of affine transformation
|
||||
@param adelta input M0 * x array
|
||||
@param bdelta input M3 * x array
|
||||
@param xy output (x', y') coordinates
|
||||
@param X0 input M1 * y + M2 value
|
||||
@param Y0 input M4 * y + M5 value
|
||||
@param bw length of the row
|
||||
@sa cv::warpAffineBlocklineNN, cv::hal::warpAffineBlocklineNN
|
||||
*/
|
||||
inline int hal_ni_warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||
/**
|
||||
@brief hal_warpAffineBlockline doing a row of affine transformation
|
||||
@param adelta input M0 * x array
|
||||
@param bdelta input M3 * x array
|
||||
@param xy output (x', y') coordinates
|
||||
@param alpha output least significant bits of the (x', y') coordinates for interpolation
|
||||
@param X0 input M1 * y + M2 value
|
||||
@param Y0 input M4 * y + M5 value
|
||||
@param bw length of the row
|
||||
@sa cv::warpAffineBlockline, cv::hal::warpAffineBlockline
|
||||
*/
|
||||
inline int hal_ni_warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||
/**
|
||||
@brief hal_warpPerspective
|
||||
@param src_type source and destination image type
|
||||
@ -291,11 +314,38 @@ inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_ste
|
||||
@sa cv::warpPerspective, cv::hal::warpPerspective
|
||||
*/
|
||||
inline int hal_ni_warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||
/**
|
||||
@brief hal_warpPerspectiveBlocklineNN doing a row of perspective transformation
|
||||
@param M 3x3 matrix with transform coefficients
|
||||
@param xy output (x', y') coordinates
|
||||
@param X0 input M0 * x0 + M1 * y + M2 value
|
||||
@param Y0 input M3 * x0 + M4 * y + M5 value
|
||||
@param W0 input M6 * x0 + M7 * y + M8 value
|
||||
@param bw length of the row
|
||||
@sa cv::warpPerspectiveBlocklineNN, cv::hal::warpPerspectiveBlocklineNN
|
||||
*/
|
||||
inline int hal_ni_warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||
/**
|
||||
@brief hal_warpPerspectiveBlockline doing a row of perspective transformation
|
||||
@param M 3x3 matrix with transform coefficients
|
||||
@param xy output (x', y') coordinates
|
||||
@param alpha output least significant bits of the (x', y') coordinates for interpolation
|
||||
@param X0 input M0 * x0 + M1 * y + M2 value
|
||||
@param Y0 input M3 * x0 + M4 * y + M5 value
|
||||
@param W0 input M6 * x0 + M7 * y + M8 value
|
||||
@param bw length of the row
|
||||
@sa cv::warpPerspectiveBlockline, cv::hal::warpPerspectiveBlockline
|
||||
*/
|
||||
inline int hal_ni_warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||
|
||||
//! @cond IGNORED
|
||||
#define cv_hal_resize hal_ni_resize
|
||||
#define cv_hal_warpAffine hal_ni_warpAffine
|
||||
#define cv_hal_warpAffineBlocklineNN hal_ni_warpAffineBlocklineNN
|
||||
#define cv_hal_warpAffineBlockline hal_ni_warpAffineBlockline
|
||||
#define cv_hal_warpPerspective hal_ni_warpPerspective
|
||||
#define cv_hal_warpPerspectiveBlocklineNN hal_ni_warpPerspectiveBlocklineNN
|
||||
#define cv_hal_warpPerspectiveBlockline hal_ni_warpPerspectiveBlockline
|
||||
//! @endcond
|
||||
|
||||
/**
|
||||
|
@ -2268,16 +2268,7 @@ public:
|
||||
short *XY = __XY.data(), *A = __A.data();
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
const int AB_SCALE = 1 << AB_BITS;
|
||||
int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
|
||||
#if CV_TRY_AVX2
|
||||
bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
|
||||
#endif
|
||||
#if CV_TRY_SSE4_1
|
||||
bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
|
||||
#endif
|
||||
#if CV_TRY_LASX
|
||||
bool useLASX = CV_CPU_HAS_SUPPORT_LASX;
|
||||
#endif
|
||||
int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, y1;
|
||||
|
||||
int bh0 = std::min(BLOCK_SZ/2, dst.rows);
|
||||
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
|
||||
@ -2300,84 +2291,9 @@ public:
|
||||
int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
|
||||
|
||||
if( interpolation == INTER_NEAREST )
|
||||
{
|
||||
x1 = 0;
|
||||
#if CV_TRY_SSE4_1
|
||||
if( useSSE4_1 )
|
||||
opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta + x, bdelta + x, xy, X0, Y0, bw);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128
|
||||
{
|
||||
v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
|
||||
int span = VTraits<v_uint16x8>::vlanes();
|
||||
for( ; x1 <= bw - span; x1 += span )
|
||||
{
|
||||
v_int16x8 v_dst[2];
|
||||
#define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\
|
||||
v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4))))
|
||||
v_dst[0] = CV_CONVERT_MAP(adelta, x+x1, v_X0);
|
||||
v_dst[1] = CV_CONVERT_MAP(bdelta, x+x1, v_Y0);
|
||||
#undef CV_CONVERT_MAP
|
||||
v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x1 < bw; x1++ )
|
||||
{
|
||||
int X = (X0 + adelta[x+x1]) >> AB_BITS;
|
||||
int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
|
||||
xy[x1*2] = saturate_cast<short>(X);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y);
|
||||
}
|
||||
}
|
||||
}
|
||||
hal::warpAffineBlocklineNN(adelta + x, bdelta + x, xy, X0, Y0, bw);
|
||||
else
|
||||
{
|
||||
short* alpha = A + y1*bw;
|
||||
x1 = 0;
|
||||
#if CV_TRY_AVX2
|
||||
if ( useAVX2 )
|
||||
x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
|
||||
#endif
|
||||
#if CV_TRY_LASX
|
||||
if ( useLASX )
|
||||
x1 = opt_LASX::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
|
||||
#endif
|
||||
#if CV_SIMD128
|
||||
{
|
||||
v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
|
||||
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
|
||||
int span = VTraits<v_float32x4>::vlanes();
|
||||
for( ; x1 <= bw - span * 2; x1 += span * 2 )
|
||||
{
|
||||
v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(this->adelta + x + x1)));
|
||||
v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(this->bdelta + x + x1)));
|
||||
v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(this->adelta + x + x1 + span)));
|
||||
v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(this->bdelta + x + x1 + span)));
|
||||
|
||||
v_int16x8 v_xy[2];
|
||||
v_xy[0] = v_pack(v_shr<INTER_BITS>(v_X0), v_shr<INTER_BITS>(v_X1));
|
||||
v_xy[1] = v_pack(v_shr<INTER_BITS>(v_Y0), v_shr<INTER_BITS>(v_Y1));
|
||||
v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]);
|
||||
|
||||
v_int32x4 v_alpha0 = v_or(v_shl<INTER_BITS>(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask));
|
||||
v_int32x4 v_alpha1 = v_or(v_shl<INTER_BITS>(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask));
|
||||
v_store(alpha + x1, v_pack(v_alpha0, v_alpha1));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x1 < bw; x1++ )
|
||||
{
|
||||
int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
|
||||
int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
|
||||
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
|
||||
(X & (INTER_TAB_SIZE-1)));
|
||||
}
|
||||
}
|
||||
hal::warpAffineBlockline(adelta + x, bdelta + x, xy, A + y1*bw, X0, Y0, bw);
|
||||
}
|
||||
|
||||
if( interpolation == INTER_NEAREST )
|
||||
@ -2802,6 +2718,97 @@ void warpAffine(int src_type,
|
||||
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||
}
|
||||
|
||||
void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw)
|
||||
{
|
||||
CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw);
|
||||
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
int x1 = 0;
|
||||
#if CV_TRY_SSE4_1
|
||||
bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
|
||||
if( useSSE4_1 )
|
||||
opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128
|
||||
{
|
||||
v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
|
||||
int span = VTraits<v_uint16x8>::vlanes();
|
||||
for( ; x1 <= bw - span; x1 += span )
|
||||
{
|
||||
v_int16x8 v_dst[2];
|
||||
#define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\
|
||||
v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4))))
|
||||
v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0);
|
||||
v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0);
|
||||
#undef CV_CONVERT_MAP
|
||||
v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x1 < bw; x1++ )
|
||||
{
|
||||
int X = (X0 + adelta[x1]) >> AB_BITS;
|
||||
int Y = (Y0 + bdelta[x1]) >> AB_BITS;
|
||||
xy[x1*2] = saturate_cast<short>(X);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw)
|
||||
{
|
||||
CALL_HAL(warpAffineBlockline, cv_hal_warpAffineBlockline, adelta, bdelta, xy, alpha, X0, Y0, bw);
|
||||
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
int x1 = 0;
|
||||
#if CV_TRY_AVX2
|
||||
bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
|
||||
if ( useAVX2 )
|
||||
x1 = opt_AVX2::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw);
|
||||
#endif
|
||||
#if CV_TRY_LASX
|
||||
bool useLASX = CV_CPU_HAS_SUPPORT_LASX;
|
||||
if ( useLASX )
|
||||
x1 = opt_LASX::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw);
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128
|
||||
{
|
||||
v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
|
||||
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
|
||||
int span = VTraits<v_float32x4>::vlanes();
|
||||
for( ; x1 <= bw - span * 2; x1 += span * 2 )
|
||||
{
|
||||
v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(adelta + x1)));
|
||||
v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(bdelta + x1)));
|
||||
v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(adelta + x1 + span)));
|
||||
v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(bdelta + x1 + span)));
|
||||
|
||||
v_int16x8 v_xy[2];
|
||||
v_xy[0] = v_pack(v_shr<INTER_BITS>(v_X0), v_shr<INTER_BITS>(v_X1));
|
||||
v_xy[1] = v_pack(v_shr<INTER_BITS>(v_Y0), v_shr<INTER_BITS>(v_Y1));
|
||||
v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]);
|
||||
|
||||
v_int32x4 v_alpha0 = v_or(v_shl<INTER_BITS>(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask));
|
||||
v_int32x4 v_alpha1 = v_or(v_shl<INTER_BITS>(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask));
|
||||
v_store(alpha + x1, v_pack(v_alpha0, v_alpha1));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x1 < bw; x1++ )
|
||||
{
|
||||
int X = (X0 + adelta[x1]) >> (AB_BITS - INTER_BITS);
|
||||
int Y = (Y0 + bdelta[x1]) >> (AB_BITS - INTER_BITS);
|
||||
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
|
||||
(X & (INTER_TAB_SIZE-1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // hal::
|
||||
} // cv::
|
||||
|
||||
@ -3204,12 +3211,6 @@ public:
|
||||
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
|
||||
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
|
||||
|
||||
#if CV_TRY_SSE4_1
|
||||
Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
|
||||
if(CV_CPU_HAS_SUPPORT_SSE4_1)
|
||||
pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
|
||||
#endif
|
||||
|
||||
for( y = range.start; y < range.end; y += bh0 )
|
||||
{
|
||||
for( x = 0; x < width; x += bw0 )
|
||||
@ -3228,57 +3229,9 @@ public:
|
||||
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
|
||||
|
||||
if( interpolation == INTER_NEAREST )
|
||||
{
|
||||
#if CV_TRY_SSE4_1
|
||||
if (pwarp_impl_sse4)
|
||||
pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
|
||||
else
|
||||
#endif
|
||||
#if CV_SIMD128_64F
|
||||
WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw);
|
||||
#else
|
||||
for( int x1 = 0; x1 < bw; x1++ )
|
||||
{
|
||||
double W = W0 + M[6]*x1;
|
||||
W = W ? 1./W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1*2] = saturate_cast<short>(X);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
hal::warpPerspectiveBlocklineNN(M, xy, X0, Y0, W0, bw);
|
||||
else
|
||||
{
|
||||
short* alpha = A + y1*bw;
|
||||
|
||||
#if CV_TRY_SSE4_1
|
||||
if (pwarp_impl_sse4)
|
||||
pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
|
||||
else
|
||||
#endif
|
||||
#if CV_SIMD128_64F
|
||||
WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw);
|
||||
#else
|
||||
for( int x1 = 0; x1 < bw; x1++ )
|
||||
{
|
||||
double W = W0 + M[6]*x1;
|
||||
W = W ? INTER_TAB_SIZE/W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
|
||||
(X & (INTER_TAB_SIZE-1)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
hal::warpPerspectiveBlockline(M, xy, A + y1*bw, X0, Y0, W0, bw);
|
||||
}
|
||||
|
||||
if( interpolation == INTER_NEAREST )
|
||||
@ -3371,6 +3324,74 @@ void warpPerspective(int src_type,
|
||||
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||
}
|
||||
|
||||
void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw)
|
||||
{
|
||||
CALL_HAL(warpPerspectiveBlocklineNN, cv_hal_warpPerspectiveBlocklineNN, M, xy, X0, Y0, W0, bw);
|
||||
|
||||
#if CV_TRY_SSE4_1
|
||||
Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
|
||||
if(CV_CPU_HAS_SUPPORT_SSE4_1)
|
||||
pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
|
||||
|
||||
if (pwarp_impl_sse4)
|
||||
pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128_64F
|
||||
WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw);
|
||||
#else
|
||||
for( int x1 = 0; x1 < bw; x1++ )
|
||||
{
|
||||
double W = W0 + M[6]*x1;
|
||||
W = W ? 1./W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1*2] = saturate_cast<short>(X);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
|
||||
{
|
||||
CALL_HAL(warpPerspectiveBlockline, cv_hal_warpPerspectiveBlockline, M, xy, alpha, X0, Y0, W0, bw);
|
||||
|
||||
#if CV_TRY_SSE4_1
|
||||
Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
|
||||
if(CV_CPU_HAS_SUPPORT_SSE4_1)
|
||||
pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
|
||||
|
||||
if (pwarp_impl_sse4)
|
||||
pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128_64F
|
||||
WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw);
|
||||
#else
|
||||
for( int x1 = 0; x1 < bw; x1++ )
|
||||
{
|
||||
double W = W0 + M[6]*x1;
|
||||
W = W ? INTER_TAB_SIZE/W : 0;
|
||||
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
|
||||
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
|
||||
int X = saturate_cast<int>(fX);
|
||||
int Y = saturate_cast<int>(fY);
|
||||
|
||||
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
|
||||
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
|
||||
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
|
||||
(X & (INTER_TAB_SIZE-1)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
} // hal::
|
||||
} // cv::
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user