From 67f07b16cbf7e556bc2d2861d8a586fe9e0f3fab Mon Sep 17 00:00:00 2001 From: Rostislav Vasilikhin Date: Wed, 13 Nov 2024 06:33:19 +0100 Subject: [PATCH] Merge pull request #25624 from savuor:rv/hal_addscalar HAL added for add(array, scalar) #25624 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/core/src/arithm.cpp | 106 ++++++++++++++++++++------- modules/core/src/hal_replacement.hpp | 16 ++++ 2 files changed, 96 insertions(+), 26 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 72d5806ebb..aea697e762 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -585,6 +585,10 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, #endif +typedef int (*ScalarFunc)(const uchar* src, size_t step_src, + uchar* dst, size_t step_dst, int width, int height, + void* scalar, bool scalarIsFirst); + typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, @@ -592,7 +596,8 @@ typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1, static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false, - void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr ) + void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr, + ScalarFunc scalarFunc = nullptr) { const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; _InputArray::KindFlag kind1 = psrc1->kind(), kind2 = psrc2->kind(); @@ -638,8 +643,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) || (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) ) { - if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4)) && - checkScalar(*psrc1, type2, kind1, kind2)) + if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4)) && src1Scalar) { // src1 is a scalar; swap it with src2 swap(psrc1, psrc2); @@ -654,7 +658,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, if ( oclop == OCL_OP_DIV_SCALE ) oclop = OCL_OP_RDIV_SCALE; } - else if( !checkScalar(*psrc2, type1, kind2, kind1) ) + else if( !src2Scalar ) CV_Error( cv::Error::StsUnmatchedSizes, "The operation is neither 'array op array' " "(where arrays have the same size and the same number of channels), " @@ -866,32 +870,38 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, const uchar* extSptr1 = sptr1; const uchar* extSptr2 = sptr2; if( swapped12 ) - std::swap(extSptr1, extSptr1); + std::swap(extSptr1, extSptr2); - // try to perform operation with conversion in one call - // if fail, use converter functions + // try to perform operation in 1 call, fallback to classic way if fail uchar* opconverted = haveMask ? maskbuf : dptr; - if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1, - bszn.width, bszn.height, usrdata) != 0) + if (!scalarFunc || src2.total() != 1 || + scalarFunc(extSptr1, 1, opconverted, 1, bszn.width, bszn.height, (void*)extSptr2, swapped12) != 0) { - if( cvtsrc1 ) + // try to perform operation with conversion in one call + // if fail, use converter functions + + if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1, + bszn.width, bszn.height, usrdata) != 0) { - cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); - sptr1 = buf1; + if( cvtsrc1 ) + { + cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); + sptr1 = buf1; + } + + if( swapped12 ) + std::swap(sptr1, sptr2); + + uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr; + func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata ); + + if (cvtdst) + { + uchar* cdst = haveMask ? maskbuf : dptr; + cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0); + } + opconverted = cvtdst ? maskbuf : wbuf; } - - if( swapped12 ) - std::swap(sptr1, sptr2); - - uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr; - func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata ); - - if (cvtdst) - { - uchar* cdst = haveMask ? maskbuf : dptr; - cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0); - } - opconverted = cvtdst ? maskbuf : wbuf; } if (haveMask) @@ -920,6 +930,48 @@ static BinaryFuncC* getAddTab() return addTab; } +static int addScalar32f32fWrapper(const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height, + void* scalar, bool /*scalarIsFirst*/) +{ + int res = cv_hal_addScalar32f32f((const float*)src, step_src, (float *)dst, step_dst, width, height, (const float*)scalar); + if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED) + return res; + else + { + CV_Error_(cv::Error::StsInternal, ("HAL implementation addScalar32f32f ==> " CVAUX_STR(cv_hal_addScalar32f32f) + " returned %d (0x%08x)", res, res)); + } +} + +static int addScalar16s16sWrapper(const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height, + void* scalar, bool /*scalarIsFirst*/) +{ + int res = cv_hal_addScalar16s16s((const int16_t*)src, step_src, (int16_t *)dst, step_dst, width, height, (const int16_t*)scalar); + if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED) + return res; + else + { + CV_Error_(cv::Error::StsInternal, ("HAL implementation addScalar16s16s ==> " CVAUX_STR(cv_hal_addScalar16s16s) + " returned %d (0x%08x)", res, res)); + } +} + +static ScalarFunc getAddScalarFunc(int srcType, int dstType) +{ + if (srcType == CV_32F && dstType == CV_32F) + { + return addScalar32f32fWrapper; + } + else if (srcType == CV_16S && dstType == CV_16S) + { + return addScalar16s16sWrapper; + } + else + { + return nullptr; + } +} + static int sub8u32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ) { @@ -1004,7 +1056,9 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst, return; } - arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD ); + ScalarFunc scalarFunc = getAddScalarFunc(src1.depth(), dtype < 0 ? dst.depth() : dtype); + arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD, nullptr, + /* scalarFunc */ scalarFunc ); } void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst, diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 67f3ac7141..013ca97875 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -98,6 +98,20 @@ inline int hal_ni_sub64f(const double *src1_data, size_t src1_step, const double inline int hal_ni_sub8u32f(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } inline int hal_ni_sub8s32f(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +/** +Add scalar: _dst[i] = src[i] + scalar + +@param src_data source image data +@param src_step source image step +@param dst_data destination image data +@param dst_step destination image step +@param width width of the images +@param height height of the images +@param scalar_data pointer to scalar value +*/ +inline int hal_ni_addScalar32f32f(const float *src_data, size_t src_step, float *dst_data, size_t dst_step, int width, int height, const float* scalar_data) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addScalar16s16s(const int16_t *src_data, size_t src_step, int16_t *dst_data, size_t dst_step, int width, int height, const int16_t* scalar_data) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } //! @} /** @@ -192,6 +206,8 @@ inline int hal_ni_not8u(const uchar *src_data, size_t src_step, uchar *dst_data, #define cv_hal_sub64f hal_ni_sub64f #define cv_hal_sub8u32f hal_ni_sub8u32f #define cv_hal_sub8s32f hal_ni_sub8s32f +#define cv_hal_addScalar32f32f hal_ni_addScalar32f32f +#define cv_hal_addScalar16s16s hal_ni_addScalar16s16s #define cv_hal_max8u hal_ni_max8u #define cv_hal_max8s hal_ni_max8s #define cv_hal_max16u hal_ni_max16u