Merge pull request #26369 from WanliZhong:5x_fix_hfloat_vfunc

Fix hfloat conflicts of v_func in merging 4.x to 5.x #26369 This PR solves the conflicts in merging 4.x to 5.x https://github.com/opencv/opencv/pull/26358 1. Explicitly convert the inputs number for `v_setall_` to hfloat number 2. Loosens the threshold for `v_sincos` test. (related issue: https://github.com/opencv/opencv/issues/26362) 3. Remove the new but temp api `template <> inline v_float16x8 v_setall_(float v) { return v_setall_f16((hfloat)v); }` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2025-08-06 14:36:36 +08:00 · 2024-10-27 00:54:13 +08:00 · 2024-10-27 00:54:13 +08:00 · 29e712ed93
commit 29e712ed93
parent 05e7988e9c
3 changed files with 29 additions and 33 deletions
--- a/modules/core/include/opencv2/core/hal/intrin_math.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp
@ -38,19 +38,19 @@
 // Implementation is the same as float32 vector.
 template<typename _TpVec16F, typename _TpVec16S>
 inline _TpVec16F v_exp_default_16f(const _TpVec16F &x) {
-    const _TpVec16F _vexp_lo_f16 = v_setall_<_TpVec16F>(-10.7421875f);
-    const _TpVec16F _vexp_hi_f16 = v_setall_<_TpVec16F>(11.f);
-    const _TpVec16F _vexp_half_fp16 = v_setall_<_TpVec16F>(0.5f);
-    const _TpVec16F _vexp_one_fp16 = v_setall_<_TpVec16F>(1.f);
-    const _TpVec16F _vexp_LOG2EF_f16 = v_setall_<_TpVec16F>(1.44269504088896341f);
-    const _TpVec16F _vexp_C1_f16 = v_setall_<_TpVec16F>(-6.93359375E-1f);
-    const _TpVec16F _vexp_C2_f16 = v_setall_<_TpVec16F>(2.12194440E-4f);
-    const _TpVec16F _vexp_p0_f16 = v_setall_<_TpVec16F>(1.9875691500E-4f);
-    const _TpVec16F _vexp_p1_f16 = v_setall_<_TpVec16F>(1.3981999507E-3f);
-    const _TpVec16F _vexp_p2_f16 = v_setall_<_TpVec16F>(8.3334519073E-3f);
-    const _TpVec16F _vexp_p3_f16 = v_setall_<_TpVec16F>(4.1665795894E-2f);
-    const _TpVec16F _vexp_p4_f16 = v_setall_<_TpVec16F>(1.6666665459E-1f);
-    const _TpVec16F _vexp_p5_f16 = v_setall_<_TpVec16F>(5.0000001201E-1f);
+    const _TpVec16F _vexp_lo_f16 = v_setall_<_TpVec16F>(hfloat(-10.7421875f));
+    const _TpVec16F _vexp_hi_f16 = v_setall_<_TpVec16F>(hfloat(11.f));
+    const _TpVec16F _vexp_half_fp16 = v_setall_<_TpVec16F>(hfloat(0.5f));
+    const _TpVec16F _vexp_one_fp16 = v_setall_<_TpVec16F>(hfloat(1.f));
+    const _TpVec16F _vexp_LOG2EF_f16 = v_setall_<_TpVec16F>(hfloat(1.44269504088896341f));
+    const _TpVec16F _vexp_C1_f16 = v_setall_<_TpVec16F>(hfloat(-6.93359375E-1f));
+    const _TpVec16F _vexp_C2_f16 = v_setall_<_TpVec16F>(hfloat(2.12194440E-4f));
+    const _TpVec16F _vexp_p0_f16 = v_setall_<_TpVec16F>(hfloat(1.9875691500E-4f));
+    const _TpVec16F _vexp_p1_f16 = v_setall_<_TpVec16F>(hfloat(1.3981999507E-3f));
+    const _TpVec16F _vexp_p2_f16 = v_setall_<_TpVec16F>(hfloat(8.3334519073E-3f));
+    const _TpVec16F _vexp_p3_f16 = v_setall_<_TpVec16F>(hfloat(4.1665795894E-2f));
+    const _TpVec16F _vexp_p4_f16 = v_setall_<_TpVec16F>(hfloat(1.6666665459E-1f));
+    const _TpVec16F _vexp_p5_f16 = v_setall_<_TpVec16F>(hfloat(5.0000001201E-1f));

    _TpVec16F _vexp_, _vexp_x, _vexp_y, _vexp_xx;
    _TpVec16S _vexp_mm;
@ -192,19 +192,19 @@ inline _TpVec64F v_exp_default_64f(const _TpVec64F &x) {
 //! @{
 template<typename _TpVec16F, typename _TpVec16S>
 inline _TpVec16F v_log_default_16f(const _TpVec16F &x) {
-    const _TpVec16F _vlog_one_fp16 = v_setall_<_TpVec16F>(1.0f);
-    const _TpVec16F _vlog_SQRTHF_fp16 = v_setall_<_TpVec16F>(0.707106781186547524f);
-    const _TpVec16F _vlog_q1_fp16 = v_setall_<_TpVec16F>(-2.12194440E-4f);
-    const _TpVec16F _vlog_q2_fp16 = v_setall_<_TpVec16F>(0.693359375f);
-    const _TpVec16F _vlog_p0_fp16 = v_setall_<_TpVec16F>(7.0376836292E-2f);
-    const _TpVec16F _vlog_p1_fp16 = v_setall_<_TpVec16F>(-1.1514610310E-1f);
-    const _TpVec16F _vlog_p2_fp16 = v_setall_<_TpVec16F>(1.1676998740E-1f);
-    const _TpVec16F _vlog_p3_fp16 = v_setall_<_TpVec16F>(-1.2420140846E-1f);
-    const _TpVec16F _vlog_p4_fp16 = v_setall_<_TpVec16F>(1.4249322787E-1f);
-    const _TpVec16F _vlog_p5_fp16 = v_setall_<_TpVec16F>(-1.6668057665E-1f);
-    const _TpVec16F _vlog_p6_fp16 = v_setall_<_TpVec16F>(2.0000714765E-1f);
-    const _TpVec16F _vlog_p7_fp16 = v_setall_<_TpVec16F>(-2.4999993993E-1f);
-    const _TpVec16F _vlog_p8_fp16 = v_setall_<_TpVec16F>(3.3333331174E-1f);
+    const _TpVec16F _vlog_one_fp16 = v_setall_<_TpVec16F>(hfloat(1.0f));
+    const _TpVec16F _vlog_SQRTHF_fp16 = v_setall_<_TpVec16F>(hfloat(0.707106781186547524f));
+    const _TpVec16F _vlog_q1_fp16 = v_setall_<_TpVec16F>(hfloat(-2.12194440E-4f));
+    const _TpVec16F _vlog_q2_fp16 = v_setall_<_TpVec16F>(hfloat(0.693359375f));
+    const _TpVec16F _vlog_p0_fp16 = v_setall_<_TpVec16F>(hfloat(7.0376836292E-2f));
+    const _TpVec16F _vlog_p1_fp16 = v_setall_<_TpVec16F>(hfloat(-1.1514610310E-1f));
+    const _TpVec16F _vlog_p2_fp16 = v_setall_<_TpVec16F>(hfloat(1.1676998740E-1f));
+    const _TpVec16F _vlog_p3_fp16 = v_setall_<_TpVec16F>(hfloat(-1.2420140846E-1f));
+    const _TpVec16F _vlog_p4_fp16 = v_setall_<_TpVec16F>(hfloat(1.4249322787E-1f));
+    const _TpVec16F _vlog_p5_fp16 = v_setall_<_TpVec16F>(hfloat(-1.6668057665E-1f));
+    const _TpVec16F _vlog_p6_fp16 = v_setall_<_TpVec16F>(hfloat(2.0000714765E-1f));
+    const _TpVec16F _vlog_p7_fp16 = v_setall_<_TpVec16F>(hfloat(-2.4999993993E-1f));
+    const _TpVec16F _vlog_p8_fp16 = v_setall_<_TpVec16F>(hfloat(3.3333331174E-1f));

    _TpVec16F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp;
    _TpVec16S _vlog_ux, _vlog_emm0;
@ -214,7 +214,7 @@ inline _TpVec16F v_log_default_16f(const _TpVec16F &x) {
    _vlog_emm0 = v_shr(_vlog_ux, 10);

    _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s16);
-    _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s16(v_setall_<_TpVec16F>(0.5f)));
+    _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s16(v_setall_<_TpVec16F>(hfloat(0.5f))));
    _vlog_x = v_reinterpret_as_f16(_vlog_ux);

    _vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec16S>((short)0xf));
@ -243,7 +243,7 @@ inline _TpVec16F v_log_default_16f(const _TpVec16F &x) {

    _vlog_y = v_fma(_vlog_e, _vlog_q1_fp16, _vlog_y);

-    _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, v_setall_<_TpVec16F>(0.5f)));
+    _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, v_setall_<_TpVec16F>(hfloat(0.5f))));

    _vlog_x = v_add(_vlog_x, _vlog_y);
    _vlog_x = v_fma(_vlog_e, _vlog_q2_fp16, _vlog_x);
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@ -437,7 +437,6 @@ OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, uint64, u64)
 OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, int64, s64)
 #if CV_SIMD128_FP16
 OPENCV_HAL_IMPL_NEON_INIT(float16x8, hfloat, __fp16,  f16);
-template <> inline v_float16x8 v_setall_(float v) { return v_setall_f16((hfloat)v); }
 #define OPENCV_HAL_IMPL_NEON_INIT_FP16(_Tpv, suffix) \
 inline v_float16x8 v_reinterpret_as_f16(const v_##_Tpv& v) { return v_float16x8(vreinterpretq_f16_##suffix(v.val)); }
 OPENCV_HAL_IMPL_NEON_INIT_FP16(uint8x16, u8)
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -2162,14 +2162,11 @@ template<typename R> struct TheTest
        }
    }

-    // BUG: https://github.com/opencv/opencv/issues/26362
    TheTest &test_sincos_fp16() {
-#if 0 // CV_SIMD_FP16
        hfloat flt16_min;
        uint16_t flt16_min_hex = 0x0400;
        std::memcpy(&flt16_min, &flt16_min_hex, sizeof(hfloat));
-        __test_sincos((hfloat) 1e-3, flt16_min);
-#endif
+        __test_sincos((hfloat) 4e-3, flt16_min);
        return *this;
    }