mirror of
https://github.com/opencv/opencv.git
synced 2025-08-02 03:06:29 +08:00
Merge pull request #25506 from savuor:rv/hal_mul16
HAL mul8x8to16 added #25506 Fixes #25034 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
83e32c4d37
commit
69af621ef6
@ -585,9 +585,14 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1,
|
||||||
|
const uchar* src2, size_t step2,
|
||||||
|
uchar* dst, size_t step, int width, int height,
|
||||||
|
void*);
|
||||||
|
|
||||||
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||||
InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
|
InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
|
||||||
void* usrdata=0, int oclop=-1 )
|
void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr )
|
||||||
{
|
{
|
||||||
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
|
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
|
||||||
_InputArray::KindFlag kind1 = psrc1->kind(), kind2 = psrc2->kind();
|
_InputArray::KindFlag kind1 = psrc1->kind(), kind2 = psrc2->kind();
|
||||||
@ -617,9 +622,13 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
|
|
||||||
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
|
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
|
||||||
Size sz = getContinuousSize2D(src1, src2, dst, src1.channels());
|
Size sz = getContinuousSize2D(src1, src2, dst, src1.channels());
|
||||||
|
if (!extendedFunc || extendedFunc(src1.ptr(), src1.step, src2.ptr(), src2.step,
|
||||||
|
dst.ptr(), dst.step, sz.width, sz.height, usrdata) != 0)
|
||||||
|
{
|
||||||
BinaryFuncC func = tab[depth1];
|
BinaryFuncC func = tab[depth1];
|
||||||
CV_Assert(func);
|
CV_Assert(func);
|
||||||
func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
|
func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -750,14 +759,22 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
_buf.allocate(bufesz*blocksize + 64);
|
_buf.allocate(bufesz*blocksize + 64);
|
||||||
buf = _buf.data();
|
buf = _buf.data();
|
||||||
if( cvtsrc1 )
|
if( cvtsrc1 )
|
||||||
|
{
|
||||||
buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
|
buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
|
||||||
|
}
|
||||||
if( cvtsrc2 )
|
if( cvtsrc2 )
|
||||||
|
{
|
||||||
buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
|
buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
|
||||||
|
}
|
||||||
wbuf = maskbuf = buf;
|
wbuf = maskbuf = buf;
|
||||||
if( cvtdst )
|
if( cvtdst )
|
||||||
|
{
|
||||||
buf = alignPtr(buf + blocksize*wsz, 16);
|
buf = alignPtr(buf + blocksize*wsz, 16);
|
||||||
|
}
|
||||||
if( haveMask )
|
if( haveMask )
|
||||||
|
{
|
||||||
maskbuf = buf;
|
maskbuf = buf;
|
||||||
|
}
|
||||||
|
|
||||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||||
{
|
{
|
||||||
@ -767,38 +784,44 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
Size bszn(bsz*cn, 1);
|
Size bszn(bsz*cn, 1);
|
||||||
const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
|
const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
|
||||||
uchar* dptr = ptrs[2];
|
uchar* dptr = ptrs[2];
|
||||||
|
// try to perform operation with conversion in one call
|
||||||
|
// if fail, use converter functions
|
||||||
|
uchar* opconverted = haveMask ? maskbuf : dptr;
|
||||||
|
if (!extendedFunc || extendedFunc(sptr1, 1, sptr2, 1, opconverted, (!haveMask),
|
||||||
|
bszn.width, bszn.height, usrdata) != 0)
|
||||||
|
{
|
||||||
if( cvtsrc1 )
|
if( cvtsrc1 )
|
||||||
{
|
{
|
||||||
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
|
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
|
||||||
sptr1 = buf1;
|
sptr1 = buf1;
|
||||||
}
|
}
|
||||||
if( ptrs[0] == ptrs[1] )
|
if( ptrs[0] == ptrs[1] )
|
||||||
|
{
|
||||||
sptr2 = sptr1;
|
sptr2 = sptr1;
|
||||||
|
}
|
||||||
else if( cvtsrc2 )
|
else if( cvtsrc2 )
|
||||||
{
|
{
|
||||||
cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
|
cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
|
||||||
sptr2 = buf2;
|
sptr2 = buf2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !haveMask && !cvtdst )
|
uchar* fdst = (haveMask || cvtdst) ? wbuf : dptr;
|
||||||
func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
|
func(sptr1, 1, sptr2, 1, fdst, (!haveMask && !cvtdst), bszn.width, bszn.height, usrdata);
|
||||||
else
|
|
||||||
|
if (cvtdst)
|
||||||
{
|
{
|
||||||
func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata );
|
uchar* cdst = haveMask ? maskbuf : dptr;
|
||||||
if( !haveMask )
|
cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0);
|
||||||
cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
|
}
|
||||||
else if( !cvtdst )
|
opconverted = cvtdst ? maskbuf : wbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (haveMask)
|
||||||
{
|
{
|
||||||
copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
|
copymask(opconverted, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz);
|
||||||
ptrs[3] += bsz;
|
ptrs[3] += bsz;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
|
|
||||||
copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
|
|
||||||
ptrs[3] += bsz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
|
ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -814,13 +837,19 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
_buf.allocate(bufesz*blocksize + 64);
|
_buf.allocate(bufesz*blocksize + 64);
|
||||||
buf = _buf.data();
|
buf = _buf.data();
|
||||||
if( cvtsrc1 )
|
if( cvtsrc1 )
|
||||||
|
{
|
||||||
buf1 = buf, buf = alignPtr(buf + blocksize * wsz, 16);
|
buf1 = buf, buf = alignPtr(buf + blocksize * wsz, 16);
|
||||||
|
}
|
||||||
buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
|
buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
|
||||||
wbuf = maskbuf = buf;
|
wbuf = maskbuf = buf;
|
||||||
if( cvtdst )
|
if( cvtdst )
|
||||||
|
{
|
||||||
buf = alignPtr(buf + blocksize * wsz, 16);
|
buf = alignPtr(buf + blocksize * wsz, 16);
|
||||||
|
}
|
||||||
if( haveMask )
|
if( haveMask )
|
||||||
|
{
|
||||||
maskbuf = buf;
|
maskbuf = buf;
|
||||||
|
}
|
||||||
|
|
||||||
convertAndUnrollScalar( src2, wtype, buf2, blocksize);
|
convertAndUnrollScalar( src2, wtype, buf2, blocksize);
|
||||||
|
|
||||||
@ -834,6 +863,17 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
const uchar* sptr2 = buf2;
|
const uchar* sptr2 = buf2;
|
||||||
uchar* dptr = ptrs[1];
|
uchar* dptr = ptrs[1];
|
||||||
|
|
||||||
|
const uchar* extSptr1 = sptr1;
|
||||||
|
const uchar* extSptr2 = sptr2;
|
||||||
|
if( swapped12 )
|
||||||
|
std::swap(extSptr1, extSptr1);
|
||||||
|
|
||||||
|
// try to perform operation with conversion in one call
|
||||||
|
// if fail, use converter functions
|
||||||
|
uchar* opconverted = haveMask ? maskbuf : dptr;
|
||||||
|
if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1,
|
||||||
|
bszn.width, bszn.height, usrdata) != 0)
|
||||||
|
{
|
||||||
if( cvtsrc1 )
|
if( cvtsrc1 )
|
||||||
{
|
{
|
||||||
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
|
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
|
||||||
@ -843,25 +883,23 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
|||||||
if( swapped12 )
|
if( swapped12 )
|
||||||
std::swap(sptr1, sptr2);
|
std::swap(sptr1, sptr2);
|
||||||
|
|
||||||
if( !haveMask && !cvtdst )
|
uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
|
||||||
func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
|
func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata );
|
||||||
else
|
|
||||||
|
if (cvtdst)
|
||||||
{
|
{
|
||||||
func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata );
|
uchar* cdst = haveMask ? maskbuf : dptr;
|
||||||
if( !haveMask )
|
cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0);
|
||||||
cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
|
}
|
||||||
else if( !cvtdst )
|
opconverted = cvtdst ? maskbuf : wbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (haveMask)
|
||||||
{
|
{
|
||||||
copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
|
copymask(opconverted, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz);
|
||||||
ptrs[2] += bsz;
|
ptrs[2] += bsz;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
|
|
||||||
copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
|
|
||||||
ptrs[2] += bsz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
|
ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -949,6 +987,38 @@ void cv::copyTo(InputArray _src, OutputArray _dst, InputArray _mask)
|
|||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static int mul8u16uWrapper(const uchar* src1, size_t step1,
|
||||||
|
const uchar* src2, size_t step2,
|
||||||
|
uchar* dst, size_t step, int width, int height,
|
||||||
|
void* usrdata)
|
||||||
|
{
|
||||||
|
double scale = *((double*)usrdata);
|
||||||
|
int res = cv_hal_mul8u16u(src1, step1, src2, step2, (ushort *)dst, step, width, height, scale);
|
||||||
|
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
|
||||||
|
return res;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Error_(cv::Error::StsInternal, ("HAL implementation mul8u16u ==> " CVAUX_STR(cv_hal_mul8u16u)
|
||||||
|
" returned %d (0x%08x)", res, res));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mul8s16sWrapper(const uchar* src1, size_t step1,
|
||||||
|
const uchar* src2, size_t step2,
|
||||||
|
uchar* dst, size_t step, int width, int height,
|
||||||
|
void* usrdata)
|
||||||
|
{
|
||||||
|
double scale = *((double*)usrdata);
|
||||||
|
int res = cv_hal_mul8s16s((schar *)src1, step1, (schar *)src2, step2, (short *)dst, step, width, height, scale);
|
||||||
|
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
|
||||||
|
return res;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Error_(cv::Error::StsInternal, ("HAL implementation mul8s16s ==> " CVAUX_STR(cv_hal_mul8s16s)
|
||||||
|
" returned %d (0x%08x)", res, res));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static BinaryFuncC* getMulTab()
|
static BinaryFuncC* getMulTab()
|
||||||
{
|
{
|
||||||
static BinaryFuncC mulTab[CV_DEPTH_MAX] =
|
static BinaryFuncC mulTab[CV_DEPTH_MAX] =
|
||||||
@ -961,6 +1031,22 @@ static BinaryFuncC* getMulTab()
|
|||||||
return mulTab;
|
return mulTab;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ExtendedTypeFunc getMulExtFunc(int src1Type, int src2Type, int dstType)
|
||||||
|
{
|
||||||
|
if (src1Type == CV_8U && src2Type == CV_8U && dstType == CV_16U)
|
||||||
|
{
|
||||||
|
return mul8u16uWrapper;
|
||||||
|
}
|
||||||
|
else if (src1Type == CV_8U && src2Type == CV_8S && dstType == CV_16S)
|
||||||
|
{
|
||||||
|
return mul8s16sWrapper;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static BinaryFuncC* getDivTab()
|
static BinaryFuncC* getDivTab()
|
||||||
{
|
{
|
||||||
static BinaryFuncC divTab[CV_DEPTH_MAX] =
|
static BinaryFuncC divTab[CV_DEPTH_MAX] =
|
||||||
@ -990,8 +1076,10 @@ void multiply(InputArray src1, InputArray src2,
|
|||||||
{
|
{
|
||||||
CV_INSTRUMENT_REGION();
|
CV_INSTRUMENT_REGION();
|
||||||
|
|
||||||
|
ExtendedTypeFunc mulExtFunc = getMulExtFunc(src1.depth(), src2.depth(), dtype < 0 ? dst.depth() : dtype);
|
||||||
arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
|
arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
|
||||||
true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
|
/* muldiv */ true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE,
|
||||||
|
/* extendedFunc */ mulExtFunc );
|
||||||
}
|
}
|
||||||
|
|
||||||
void divide(InputArray src1, InputArray src2,
|
void divide(InputArray src1, InputArray src2,
|
||||||
|
@ -324,6 +324,8 @@ inline int hal_ni_mul16s(const short *src1_data, size_t src1_step, const short *
|
|||||||
inline int hal_ni_mul32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_mul32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
inline int hal_ni_mul32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_mul32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
inline int hal_ni_mul64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
inline int hal_ni_mul64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_mul8u16u(const uchar* src1_data, size_t src1_step, const uchar* src2_data, size_t src2_step, ushort* dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_mul8s16s(const schar* src1_data, size_t src1_step, const schar* src2_data, size_t src2_step, short* dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
//! @}
|
//! @}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -378,6 +380,8 @@ inline int hal_ni_recip64f(const double *src_data, size_t src_step, double *dst_
|
|||||||
#define cv_hal_mul32s hal_ni_mul32s
|
#define cv_hal_mul32s hal_ni_mul32s
|
||||||
#define cv_hal_mul32f hal_ni_mul32f
|
#define cv_hal_mul32f hal_ni_mul32f
|
||||||
#define cv_hal_mul64f hal_ni_mul64f
|
#define cv_hal_mul64f hal_ni_mul64f
|
||||||
|
#define cv_hal_mul8u16u hal_ni_mul8u16u
|
||||||
|
#define cv_hal_mul8s16s hal_ni_mul8s16s
|
||||||
#define cv_hal_div8u hal_ni_div8u
|
#define cv_hal_div8u hal_ni_div8u
|
||||||
#define cv_hal_div8s hal_ni_div8s
|
#define cv_hal_div8s hal_ni_div8s
|
||||||
#define cv_hal_div16u hal_ni_div16u
|
#define cv_hal_div16u hal_ni_div16u
|
||||||
|
Loading…
Reference in New Issue
Block a user