core: Rename cv::float16_t to cv::hfloat (#25217)

* rename cv::float16_t to cv::fp16_t * add typedef fp16_t float16_t * remove zero(), bits() from fp16_t class * fp16_t -> hfloat * remove cv::float16_t::fromBits; add hfloatFromBits * undo changes in conv_winograd_f63.simd.hpp and conv_block.simd.hpp * undo some changes in dnn
2025-07-25 22:57:53 +08:00 · 2024-03-22 04:44:19 +08:00 · 2024-03-22 04:44:19 +08:00 · 3afe8ddaf8
commit 3afe8ddaf8
parent 3aefd4862c
31 changed files with 156 additions and 166 deletions
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@ -809,40 +809,20 @@ using std::uint64_t;
 namespace cv
 {

-class float16_t
+class hfloat
 {
 public:
 #if CV_FP16_TYPE

-    float16_t() : h(0) {}
-    explicit float16_t(float x) { h = (__fp16)x; }
+    hfloat() : h(0) {}
+    explicit hfloat(float x) { h = (__fp16)x; }
    operator float() const { return (float)h; }
-    static float16_t fromBits(ushort w)
-    {
-        Cv16suf u;
-        u.u = w;
-        float16_t result;
-        result.h = u.h;
-        return result;
-    }
-    static float16_t zero()
-    {
-        float16_t result;
-        result.h = (__fp16)0;
-        return result;
-    }
-    ushort bits() const
-    {
-        Cv16suf u;
-        u.h = h;
-        return u.u;
-    }
 protected:
    __fp16 h;

 #else
-    float16_t() : w(0) {}
-    explicit float16_t(float x)
+    hfloat() : w(0) {}
+    explicit hfloat(float x)
    {
    #if CV_FP16 && CV_AVX2
        __m128 v = _mm_load_ss(&x);
@ -893,25 +873,35 @@ protected:
    #endif
    }

-    static float16_t fromBits(ushort b)
-    {
-        float16_t result;
-        result.w = b;
-        return result;
-    }
-    static float16_t zero()
-    {
-        float16_t result;
-        result.w = (ushort)0;
-        return result;
-    }
-    ushort bits() const { return w; }
 protected:
    ushort w;

 #endif
 };

+inline hfloat hfloatFromBits(ushort w) {
+#if CV_FP16_TYPE
+    Cv16suf u;
+    u.u = w;
+    hfloat res(float(u.h));
+    return res;
+#else
+    Cv32suf out;
+
+    unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
+    unsigned sign = (w & 0x8000) << 16;
+    unsigned e = w & 0x7c00;
+
+    out.u = t + (1 << 23);
+    out.u = (e >= 0x7c00 ? t + 0x38000000 :
+            e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
+    hfloat res(out.f);
+    return res;
+#endif
+}
+
+typedef hfloat float16_t;
+
 }
 #endif

--- a/modules/core/include/opencv2/core/hal/hal.hpp
+++ b/modules/core/include/opencv2/core/hal/hal.hpp
@ -195,8 +195,8 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
 CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );

-CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
-CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
+CV_EXPORTS void cvt16f32f( const hfloat* src, float* dst, int len );
+CV_EXPORTS void cvt32f16f( const float* src, hfloat* dst, int len );

 CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
 CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@ -708,7 +708,7 @@ namespace CV__SIMD_NAMESPACE {
    inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
    inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
    inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
-    inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_float32 vx_load_expand(const hfloat * ptr) { return VXPREFIX(_load_expand)(ptr); }
    //! @}

    //! @name Wide load with quad expansion
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@ -3137,7 +3137,7 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, u
 // FP16
 //

-inline v_float32x8 v256_load_expand(const float16_t* ptr)
+inline v_float32x8 v256_load_expand(const hfloat* ptr)
 {
 #if CV_FP16
    return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
@ -3149,7 +3149,7 @@ inline v_float32x8 v256_load_expand(const float16_t* ptr)
 #endif
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
+inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
 {
 #if CV_FP16
    __m128i ah = _mm256_cvtps_ph(a.val, 0);
@ -3158,7 +3158,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
    float CV_DECL_ALIGNED(32) buf[8];
    v_store_aligned(buf, a);
    for (int i = 0; i < 8; i++)
-        ptr[i] = float16_t(buf[i]);
+        ptr[i] = hfloat(buf[i]);
 #endif
 }

--- a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp
@ -506,12 +506,12 @@ inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a)
 { return v_float64x8(_mm512_castps_pd(a.val)); }

 // FP16
-inline v_float32x16 v512_load_expand(const float16_t* ptr)
+inline v_float32x16 v512_load_expand(const hfloat* ptr)
 {
    return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr)));
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x16& a)
+inline void v_pack_store(hfloat* ptr, const v_float32x16& a)
 {
    __m256i ah = _mm512_cvtps_ph(a.val, 0);
    _mm256_storeu_si256((__m256i*)ptr, ah);
--- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
@ -3251,7 +3251,7 @@ template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int,
 ////// FP16 support ///////

 inline v_reg<float, simd128_width / sizeof(float)>
-v_load_expand(const float16_t* ptr)
+v_load_expand(const hfloat* ptr)
 {
    v_reg<float, simd128_width / sizeof(float)> v;
    for( int i = 0; i < v.nlanes; i++ )
@ -3262,7 +3262,7 @@ v_load_expand(const float16_t* ptr)
 }
 #if CV_SIMD256
 inline v_reg<float, simd256_width / sizeof(float)>
-v256_load_expand(const float16_t* ptr)
+v256_load_expand(const hfloat* ptr)
 {
    v_reg<float, simd256_width / sizeof(float)> v;
    for (int i = 0; i < v.nlanes; i++)
@ -3274,7 +3274,7 @@ v256_load_expand(const float16_t* ptr)
 #endif
 #if CV_SIMD512
 inline v_reg<float, simd512_width / sizeof(float)>
-v512_load_expand(const float16_t* ptr)
+v512_load_expand(const hfloat* ptr)
 {
    v_reg<float, simd512_width / sizeof(float)> v;
    for (int i = 0; i < v.nlanes; i++)
@ -3286,11 +3286,11 @@ v512_load_expand(const float16_t* ptr)
 #endif

 template<int n> inline void
-v_pack_store(float16_t* ptr, const v_reg<float, n>& v)
+v_pack_store(hfloat* ptr, const v_reg<float, n>& v)
 {
    for( int i = 0; i < v.nlanes; i++ )
    {
-        ptr[i] = float16_t(v.s[i]);
+        ptr[i] = hfloat(v.s[i]);
    }
 }

--- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
@ -2983,7 +2983,7 @@ OPENCV_HAL_IMPL_LASX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4,
 // FP16
 //

-inline v_float32x8 v256_load_expand(const float16_t* ptr)
+inline v_float32x8 v256_load_expand(const hfloat* ptr)
 {
 #if CV_FP16
    //1-load128, 2-permi, 3-cvt
@ -2996,7 +2996,7 @@ inline v_float32x8 v256_load_expand(const float16_t* ptr)
 #endif
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
+inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
 {
 #if CV_FP16
    __m256i ah = __lasx_xvfcvt_h_s(a.val, a.val);
@ -3005,7 +3005,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
    float CV_DECL_ALIGNED(32) buf[8];
    v_store_aligned(buf, a);
    for (int i = 0; i < 8; i++)
-        ptr[i] = float16_t(buf[i]);
+        ptr[i] = hfloat(buf[i]);
 #endif
 }

--- a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp
@ -2498,7 +2498,7 @@ OPENCV_HAL_IMPL_LSX_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, u
 // FP16
 //

-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
 #if CV_FP16
    return v_float32x4(__lsx_vfcvtl_s_h((__m128)__lsx_vld(ptr, 0)));
@ -2510,7 +2510,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
 #endif
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& a)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& a)
 {
 #if CV_FP16
    __m128i res = (__m218i)__lsx_vfcvt_h_s(a.val, a.val);
@ -2519,7 +2519,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& a)
    float CV_DECL_ALIGNED(32) buf[4];
    v_store_aligned(buf, a);
    for (int i = 0; i < 4; i++)
-        ptr[i] = float16_t(buf[i]);
+        ptr[i] = hfloat(buf[i]);
 #endif
 }

--- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp
@ -1838,7 +1838,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a)

 ////// FP16 support ///////
 #if CV_FP16
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
 #ifndef msa_ld1_f16
    v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr);
@ -1848,7 +1848,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    return v_float32x4(msa_cvt_f32_f16(v));
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    v4f16 hv = msa_cvt_f16_f32(v.val);

@ -1859,7 +1859,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
 #endif
 }
 #else
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    float buf[4];
    for( int i = 0; i < 4; i++ )
@ -1867,12 +1867,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    return v_load(buf);
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    float buf[4];
    v_store(buf, v);
    for( int i = 0; i < 4; i++ )
-        ptr[i] = (float16_t)buf[i];
+        ptr[i] = (hfloat)buf[i];
 }
 #endif

--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@ -2605,7 +2605,7 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo

 ////// FP16 support ///////
 #if CV_FP16
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    float16x4_t v =
    #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
@ -2616,7 +2616,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    return v_float32x4(vcvt_f32_f16(v));
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    float16x4_t hv = vcvt_f16_f32(v.val);

@ -2627,7 +2627,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
    #endif
 }
 #else
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    const int N = 4;
    float buf[N];
@ -2635,12 +2635,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    return v_load(buf);
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    const int N = 4;
    float buf[N];
    v_store(buf, v);
-    for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
+    for( int i = 0; i < N; i++ ) ptr[i] = hfloat(buf[i]);
 }
 #endif

--- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
@ -2873,17 +2873,17 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
 ////// FP16 support ///////

 #if CV_FP16
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr, 4), 4));
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v, 4), 4);
 }
 #else
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    const int N = 4;
    float buf[N];
@ -2891,12 +2891,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    return v_load(buf);
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    const int N = 4;
    float buf[N];
    v_store(buf, v);
-    for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
+    for( int i = 0; i < N; i++ ) ptr[i] = hfloat(buf[i]);
 }
 #endif

--- a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
@ -2858,14 +2858,14 @@ inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b,
 #endif
 ////// FP16 support ///////
 #if __riscv_v == 7000
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    vfloat16m1_t v = vle16_v_f16m1((__fp16*)ptr, 4);
    vfloat32m2_t v32 = vfwcvt_f_f_v_f32m2(v, 4);
    return v_float32x4(vget_v_f32m2_f32m1(v32, 0));
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    vfloat32m2_t v32 = vundefined_f32m2();
    v32 = vset_v_f32m1_f32m2(v32, 0, v.val);
@ -2873,14 +2873,14 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
    vse16_v_f16m1((__fp16*)ptr, hv, 4);
 }
 #else
-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    vfloat16mf2_t v = vle16_v_f16mf2((__fp16*)ptr, 4);
    vfloat32m1_t v32 = vfwcvt_f_f_v_f32m1(v, 4);
    return v_float32x4(v32);
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    //vfloat32m2_t v32 = vundefined_f32m2();
    //v32 = vset_f32m2(v32, 0, v.val);
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
@ -1810,28 +1810,28 @@ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float64, vlmul_trunc_u8mf8)
 ////// FP16 support ///////

 #if defined(__riscv_zfh) && __riscv_zfh
-inline v_float32 v_load_expand(const float16_t* ptr)
+inline v_float32 v_load_expand(const hfloat* ptr)
 {
    return vfwcvt_f(vle16_v_f16mf2((_Float16*)ptr, VTraits<v_float32>::vlanes()) ,VTraits<v_float32>::vlanes());;
 }

-inline void v_pack_store(float16_t* ptr, const v_float32& v)
+inline void v_pack_store(hfloat* ptr, const v_float32& v)
 {
    vse16_v_f16mf2((_Float16*)ptr, vfncvt_f_f_w_f16mf2(v, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
 }
 #else
-inline v_float32 v_load_expand(const float16_t* ptr)
+inline v_float32 v_load_expand(const hfloat* ptr)
 {
    float buf[32];
    for( int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) buf[i] = (float)ptr[i];
    return v_load(buf);
 }

-inline void v_pack_store(float16_t* ptr, const v_float32& v)
+inline void v_pack_store(hfloat* ptr, const v_float32& v)
 {
    float buf[32];
    v_store(buf, v);
-    for( int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) ptr[i] = float16_t(buf[i]);
+    for( int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) ptr[i] = hfloat(buf[i]);
 }
 #endif
 ////////////// Rounding //////////////
--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@ -3407,7 +3407,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& v)

 ////////////// FP16 support ///////////////////////////

-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
 #if CV_FP16
    return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
@ -3427,7 +3427,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
 #endif
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
 #if CV_FP16
    __m128i fp16_value = _mm_cvtps_ph(v.val, 0);
--- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
@ -1361,7 +1361,7 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec)

 /////// FP16 support ////////

-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr);
 #if CV_VSX3 && defined(vec_extract_fp_from_shorth)
@ -1388,7 +1388,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
 #endif
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
 // fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
 #if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
--- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
@ -2754,7 +2754,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a)

 ////////////// FP16 support ///////////////////////////

-inline v_float32x4 v_load_expand(const float16_t* ptr)
+inline v_float32x4 v_load_expand(const hfloat* ptr)
 {
    float a[4];
    for (int i = 0; i < 4; i++)
@ -2762,14 +2762,14 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
    return v_float32x4(wasm_v128_load(a));
 }

-inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
 {
    double v_[4];
    wasm_v128_store(v_, v.val);
-    ptr[0] = float16_t(v_[0]);
-    ptr[1] = float16_t(v_[1]);
-    ptr[2] = float16_t(v_[2]);
-    ptr[3] = float16_t(v_[3]);
+    ptr[0] = hfloat(v_[0]);
+    ptr[1] = hfloat(v_[1]);
+    ptr[2] = hfloat(v_[2]);
+    ptr[3] = hfloat(v_[3]);
 }

 inline void v_cleanup() {}
--- a/modules/core/include/opencv2/core/saturate.hpp
+++ b/modules/core/include/opencv2/core/saturate.hpp
@ -158,20 +158,20 @@ template<> inline uint64 saturate_cast<uint64>(int64 v)      { return (uint64)st
 template<> inline int64 saturate_cast<int64>(uint64 v)       { return (int64)std::min(v, (uint64)LLONG_MAX); }

 /** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); }
+template<typename _Tp> static inline _Tp saturate_cast(hfloat v) { return saturate_cast<_Tp>((float)v); }

 // in theory, we could use a LUT for 8u/8s->16f conversion,
 // but with hardware support for FP32->FP16 conversion the current approach is preferable
-template<> inline float16_t saturate_cast<float16_t>(uchar v)   { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(schar v)   { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(ushort v)  { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(short v)   { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(unsigned v){ return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(int v)     { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(uint64 v)  { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(int64 v)   { return float16_t((float)v); }
-template<> inline float16_t saturate_cast<float16_t>(float v)   { return float16_t(v); }
-template<> inline float16_t saturate_cast<float16_t>(double v)  { return float16_t((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(uchar v)   { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(schar v)   { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(ushort v)  { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(short v)   { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(unsigned v){ return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(int v)     { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(uint64 v)  { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(int64 v)   { return hfloat((float)v); }
+template<> inline hfloat saturate_cast<hfloat>(float v)   { return hfloat(v); }
+template<> inline hfloat saturate_cast<hfloat>(double v)  { return hfloat((float)v); }

 //! @}

--- a/modules/core/include/opencv2/core/traits.hpp
+++ b/modules/core/include/opencv2/core/traits.hpp
@ -261,10 +261,10 @@ public:
         };
 };

-template<> class DataType<float16_t>
+template<> class DataType<hfloat>
 {
 public:
-    typedef float16_t   value_type;
+    typedef hfloat   value_type;
    typedef float       work_type;
    typedef value_type  channel_type;
    typedef value_type  vec_type;
@ -347,7 +347,7 @@ template<> class TypeDepth<CV_64F>
 template<> class TypeDepth<CV_16F>
 {
    enum { depth = CV_16F };
-    typedef float16_t value_type;
+    typedef hfloat value_type;
 };

 #endif
--- a/modules/core/src/convert.dispatch.cpp
+++ b/modules/core/src/convert.dispatch.cpp
@ -11,13 +11,13 @@
 namespace cv {

 namespace hal {
-void cvt16f32f(const float16_t* src, float* dst, int len)
+void cvt16f32f(const hfloat* src, float* dst, int len)
 {
    CV_INSTRUMENT_REGION();
    CV_CPU_DISPATCH(cvt16f32f, (src, dst, len),
        CV_CPU_DISPATCH_MODES_ALL);
 }
-void cvt32f16f(const float* src, float16_t* dst, int len)
+void cvt32f16f(const float* src, hfloat* dst, int len)
 {
    CV_INSTRUMENT_REGION();
    CV_CPU_DISPATCH(cvt32f16f, (src, dst, len),
--- a/modules/core/src/convert.hpp
+++ b/modules/core/src/convert.hpp
@ -31,7 +31,7 @@ static inline void vx_load_as(const int* ptr, v_float32& a)
 static inline void vx_load_as(const float* ptr, v_float32& a)
 { a = vx_load(ptr); }

-static inline void vx_load_as(const float16_t* ptr, v_float32& a)
+static inline void vx_load_as(const hfloat* ptr, v_float32& a)
 { a = vx_load_expand(ptr); }

 static inline void v_store_as(ushort* ptr, const v_float32& a)
@ -46,7 +46,7 @@ static inline void v_store_as(int* ptr, const v_float32& a)
 static inline void v_store_as(float* ptr, const v_float32& a)
 { v_store(ptr, a); }

-static inline void v_store_as(float16_t* ptr, const v_float32& a)
+static inline void v_store_as(hfloat* ptr, const v_float32& a)
 { v_pack_store(ptr, a); }

 static inline void vx_load_pair_as(const uchar* ptr, v_uint16& a, v_uint16& b)
@ -150,7 +150,7 @@ static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b)
 static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b)
 { a = vx_load(ptr); b = vx_load(ptr + VTraits<v_float32>::vlanes()); }

-static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b)
+static inline void vx_load_pair_as(const hfloat* ptr, v_float32& a, v_float32& b)
 {
    a = vx_load_expand(ptr);
    b = vx_load_expand(ptr + VTraits<v_float32>::vlanes());
@ -294,7 +294,7 @@ static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b
    b = vx_load(ptr + VTraits<v_float64>::vlanes());
 }

-static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
+static inline void vx_load_pair_as(const hfloat* ptr, v_float64& a, v_float64& b)
 {
    v_float32 v0 = vx_load_expand(ptr);
    a = v_cvt_f64(v0);
@ -348,7 +348,7 @@ static inline void v_store_pair_as(float* ptr, const v_float64& a, const v_float
    v_store(ptr, v);
 }

-static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_float64& b)
+static inline void v_store_pair_as(hfloat* ptr, const v_float64& a, const v_float64& b)
 {
    v_float32 v = v_cvt_f32(a, b);
    v_pack_store(ptr, v);
--- a/modules/core/src/convert.simd.hpp
+++ b/modules/core/src/convert.simd.hpp
@ -14,8 +14,8 @@ namespace cv {
 namespace hal {
 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN

-void cvt16f32f(const float16_t* src, float* dst, int len);
-void cvt32f16f(const float* src, float16_t* dst, int len);
+void cvt16f32f(const hfloat* src, float* dst, int len);
+void cvt32f16f(const float* src, hfloat* dst, int len);
 void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len);
 void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len);

@ -35,7 +35,7 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN

 BinaryFunc getConvertFunc(int sdepth, int ddepth);

-void cvt16f32f( const float16_t* src, float* dst, int len )
+void cvt16f32f( const hfloat* src, float* dst, int len )
 {
    CV_INSTRUMENT_REGION();
    int j = 0;
@ -56,7 +56,7 @@ void cvt16f32f( const float16_t* src, float* dst, int len )
        dst[j] = (float)src[j];
 }

-void cvt32f16f( const float* src, float16_t* dst, int len )
+void cvt32f16f( const float* src, hfloat* dst, int len )
 {
    CV_INSTRUMENT_REGION();
    int j = 0;
@ -74,7 +74,7 @@ void cvt32f16f( const float* src, float16_t* dst, int len )
    }
 #endif
    for( ; j < len; j++ )
-        dst[j] = float16_t(src[j]);
+        dst[j] = hfloat(src[j]);
 }

 void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
@ -188,7 +188,7 @@ DEF_CVT_FUNC(8u16s, cvt_,  uchar, short,    v_int16)
 DEF_CVT_FUNC(8u32s, cvt_,  uchar, int,      v_int32)
 DEF_CVT_FUNC(8u32f, cvt_,  uchar, float,    v_float32)
 DEF_CVT_FUNC(8u64f, cvt_,  uchar, double,   v_int32)
-DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
+DEF_CVT_FUNC(8u16f, cvt1_, uchar, hfloat, v_float32)

 ////////////////////// 8s -> ... ////////////////////////

@ -198,7 +198,7 @@ DEF_CVT_FUNC(8s16s, cvt_,  schar, short,    v_int16)
 DEF_CVT_FUNC(8s32s, cvt_,  schar, int,      v_int32)
 DEF_CVT_FUNC(8s32f, cvt_,  schar, float,    v_float32)
 DEF_CVT_FUNC(8s64f, cvt_,  schar, double,   v_int32)
-DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
+DEF_CVT_FUNC(8s16f, cvt1_, schar, hfloat, v_float32)

 ////////////////////// 16u -> ... ////////////////////////

@ -208,7 +208,7 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short,  v_int32)
 DEF_CVT_FUNC(16u32s, cvt_, ushort, int,    v_int32)
 DEF_CVT_FUNC(16u32f, cvt_, ushort, float,  v_float32)
 DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32)
-DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
+DEF_CVT_FUNC(16u16f, cvt1_,ushort, hfloat, v_float32)

 ////////////////////// 16s -> ... ////////////////////////

@ -218,7 +218,7 @@ DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32)
 DEF_CVT_FUNC(16s32s, cvt_, short, int,    v_int32)
 DEF_CVT_FUNC(16s32f, cvt_, short, float,  v_float32)
 DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32)
-DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
+DEF_CVT_FUNC(16s16f, cvt1_,short, hfloat, v_float32)

 ////////////////////// 32s -> ... ////////////////////////

@ -228,7 +228,7 @@ DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32)
 DEF_CVT_FUNC(32s16s, cvt_, int, short,  v_int32)
 DEF_CVT_FUNC(32s32f, cvt_, int, float,  v_float32)
 DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32)
-DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
+DEF_CVT_FUNC(32s16f, cvt1_,int, hfloat, v_float32)

 ////////////////////// 32f -> ... ////////////////////////

@ -238,7 +238,7 @@ DEF_CVT_FUNC(32f16u, cvt_, float, ushort, v_float32)
 DEF_CVT_FUNC(32f16s, cvt_, float, short,  v_float32)
 DEF_CVT_FUNC(32f32s, cvt_, float, int,    v_float32)
 DEF_CVT_FUNC(32f64f, cvt_, float, double, v_float32)
-DEF_CVT_FUNC(32f16f, cvt1_,float, float16_t, v_float32)
+DEF_CVT_FUNC(32f16f, cvt1_,float, hfloat, v_float32)

 ////////////////////// 64f -> ... ////////////////////////

@ -248,17 +248,17 @@ DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32)
 DEF_CVT_FUNC(64f16s, cvt_, double, short,  v_int32)
 DEF_CVT_FUNC(64f32s, cvt_, double, int,    v_int32)
 DEF_CVT_FUNC(64f32f, cvt_, double, float,  v_float32)
-DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
+DEF_CVT_FUNC(64f16f, cvt1_,double, hfloat, v_float32)

 ////////////////////// 16f -> ... ////////////////////////

-DEF_CVT_FUNC(16f8u,  cvt_,  float16_t, uchar,  v_float32)
-DEF_CVT_FUNC(16f8s,  cvt_,  float16_t, schar,  v_float32)
-DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
-DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short,  v_float32)
-DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int,    v_float32)
-DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float,  v_float32)
-DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
+DEF_CVT_FUNC(16f8u,  cvt_,  hfloat, uchar,  v_float32)
+DEF_CVT_FUNC(16f8s,  cvt_,  hfloat, schar,  v_float32)
+DEF_CVT_FUNC(16f16u, cvt1_, hfloat, ushort, v_float32)
+DEF_CVT_FUNC(16f16s, cvt1_, hfloat, short,  v_float32)
+DEF_CVT_FUNC(16f32s, cvt1_, hfloat, int,    v_float32)
+DEF_CVT_FUNC(16f32f, cvt1_, hfloat, float,  v_float32)
+DEF_CVT_FUNC(16f64f, cvt1_, hfloat, double, v_float32)

 ///////////// "conversion" w/o conversion ///////////////

--- a/modules/core/src/convert_scale.simd.hpp
+++ b/modules/core/src/convert_scale.simd.hpp
@ -232,7 +232,7 @@ DEF_CVT_SCALE_FUNC(16s8u,  cvt_32f, short,  uchar, float)
 DEF_CVT_SCALE_FUNC(32s8u,  cvt_32f, int,    uchar, float)
 DEF_CVT_SCALE_FUNC(32f8u,  cvt_32f, float,  uchar, float)
 DEF_CVT_SCALE_FUNC(64f8u,  cvt_32f, double, uchar, float)
-DEF_CVT_SCALE_FUNC(16f8u,  cvt_32f, float16_t, uchar, float)
+DEF_CVT_SCALE_FUNC(16f8u,  cvt_32f, hfloat, uchar, float)

 DEF_CVT_SCALE_FUNC(8u8s,   cvt_32f, uchar,  schar, float)
 DEF_CVT_SCALE_FUNC(8s,     cvt_32f, schar,  schar, float)
@ -241,7 +241,7 @@ DEF_CVT_SCALE_FUNC(16s8s,  cvt_32f, short,  schar, float)
 DEF_CVT_SCALE_FUNC(32s8s,  cvt_32f, int,    schar, float)
 DEF_CVT_SCALE_FUNC(32f8s,  cvt_32f, float,  schar, float)
 DEF_CVT_SCALE_FUNC(64f8s,  cvt_32f, double, schar, float)
-DEF_CVT_SCALE_FUNC(16f8s,  cvt_32f, float16_t, schar, float)
+DEF_CVT_SCALE_FUNC(16f8s,  cvt_32f, hfloat, schar, float)

 DEF_CVT_SCALE_FUNC(8u16u,  cvt_32f, uchar,  ushort, float)
 DEF_CVT_SCALE_FUNC(8s16u,  cvt_32f, schar,  ushort, float)
@ -250,7 +250,7 @@ DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short,  ushort, float)
 DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int,    ushort, float)
 DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float,  ushort, float)
 DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float)
-DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float)
+DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, hfloat, ushort, float)

 DEF_CVT_SCALE_FUNC(8u16s,  cvt_32f, uchar,  short, float)
 DEF_CVT_SCALE_FUNC(8s16s,  cvt_32f, schar,  short, float)
@ -259,7 +259,7 @@ DEF_CVT_SCALE_FUNC(16s,    cvt_32f, short,  short, float)
 DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int,    short, float)
 DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float,  short, float)
 DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float)
-DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float)
+DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, hfloat, short, float)

 DEF_CVT_SCALE_FUNC(8u32s,  cvt_32f, uchar,  int, float)
 DEF_CVT_SCALE_FUNC(8s32s,  cvt_32f, schar,  int, float)
@ -268,7 +268,7 @@ DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short,  int, float)
 DEF_CVT_SCALE_FUNC(32s,    cvt_64f, int,    int, double)
 DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float,  int, float)
 DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double)
-DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float)
+DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, hfloat, int, float)

 DEF_CVT_SCALE_FUNC(8u32f,  cvt_32f, uchar,  float, float)
 DEF_CVT_SCALE_FUNC(8s32f,  cvt_32f, schar,  float, float)
@ -277,7 +277,7 @@ DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short,  float, float)
 DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int,    float, float)
 DEF_CVT_SCALE_FUNC(32f,    cvt_32f, float,  float, float)
 DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double)
-DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float)
+DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, hfloat, float, float)

 DEF_CVT_SCALE_FUNC(8u64f,  cvt_64f, uchar,  double, double)
 DEF_CVT_SCALE_FUNC(8s64f,  cvt_64f, schar,  double, double)
@ -286,16 +286,16 @@ DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short,  double, double)
 DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int,    double, double)
 DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float,  double, double)
 DEF_CVT_SCALE_FUNC(64f,    cvt_64f, double, double, double)
-DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double)
+DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, hfloat, double, double)

-DEF_CVT_SCALE_FUNC(8u16f,  cvt1_32f, uchar,  float16_t, float)
-DEF_CVT_SCALE_FUNC(8s16f,  cvt1_32f, schar,  float16_t, float)
-DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float)
-DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short,  float16_t, float)
-DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int,    float16_t, float)
-DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float,  float16_t, float)
-DEF_CVT_SCALE_FUNC(64f16f, cvt_64f,  double, float16_t, double)
-DEF_CVT_SCALE_FUNC(16f,    cvt1_32f, float16_t, float16_t, float)
+DEF_CVT_SCALE_FUNC(8u16f,  cvt1_32f, uchar,  hfloat, float)
+DEF_CVT_SCALE_FUNC(8s16f,  cvt1_32f, schar,  hfloat, float)
+DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, hfloat, float)
+DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short,  hfloat, float)
+DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int,    hfloat, float)
+DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float,  hfloat, float)
+DEF_CVT_SCALE_FUNC(64f16f, cvt_64f,  double, hfloat, double)
+DEF_CVT_SCALE_FUNC(16f,    cvt1_32f, hfloat, hfloat, float)

 BinaryFunc getCvtScaleAbsFunc(int depth)
 {
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@ -93,7 +93,7 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
        scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
        break;
    case CV_16F:
-        scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
+        scalarToRawData_<hfloat>(s, (hfloat*)_buf, cn, unroll_to);
        break;
    default:
        CV_Error(cv::Error::StsUnsupportedFormat,"");
--- a/modules/core/src/norm.cpp
+++ b/modules/core/src/norm.cpp
@ -752,7 +752,7 @@ double norm( InputArray _src, int normType, InputArray _mask )
            for (int j = 0; j < total; j += blockSize)
            {
                int bsz = std::min(total - j, blockSize);
-                hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
+                hal::cvt16f32f((const hfloat*)ptrs[0], data0, bsz * cn);
                func((uchar*)data0, ptrs[1], (uchar*)&result.f, bsz, cn);
                ptrs[0] += bsz*esz;
                if (ptrs[1])
@ -1222,8 +1222,8 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
            for (int j = 0; j < total; j += blockSize)
            {
                int bsz = std::min(total - j, blockSize);
-                hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
-                hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
+                hal::cvt16f32f((const hfloat*)ptrs[0], data0, bsz * cn);
+                hal::cvt16f32f((const hfloat*)ptrs[1], data1, bsz * cn);
                func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.f, bsz, cn);
                ptrs[0] += bsz*esz;
                ptrs[1] += bsz*esz;
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -7210,7 +7210,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)

    typedef std::string (* func_t)(const Mat &);
    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
-                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<float16_t> };
+                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<hfloat> };
    const func_t func = funcs[ddepth];
    CV_Assert(func != 0);

--- a/modules/core/src/out.cpp
+++ b/modules/core/src/out.cpp
@ -77,7 +77,7 @@ namespace cv
        void valueToStr32s() { snprintf(buf, sizeof(buf), "%d", mtx.ptr<int>(row, col)[cn]); }
        void valueToStr32f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr<float>(row, col)[cn]); }
        void valueToStr64f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr<double>(row, col)[cn]); }
-        void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr<float16_t>(row, col)[cn]); }
+        void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr<hfloat>(row, col)[cn]); }
        void valueToStrOther() { buf[0] = 0; }

    public:
--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@ -270,7 +270,7 @@ int calcStructSize( const char* dt, int initial_size )
        case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int   ) ); break; }
        case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; }
        case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; }
-        case 'h': { elem_max_size = std::max(elem_max_size, sizeof(float16_t)); break; }
+        case 'h': { elem_max_size = std::max(elem_max_size, sizeof(hfloat)); break; }
        default:
            CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt));
        }
@ -1129,8 +1129,8 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s
                        data += sizeof(double);
                        break;
                    case CV_16F: /* reference */
-                        ptr = fs::floatToString(buf, sizeof(buf), (float) *(float16_t *) data, true, explicitZero);
-                        data += sizeof(float16_t);
+                        ptr = fs::floatToString(buf, sizeof(buf), (float) *(hfloat *) data, true, explicitZero);
+                        data += sizeof(hfloat);
                        break;
                    default:
                        CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported type");
@ -1809,7 +1809,7 @@ char *FileStorage::Impl::parseBase64(char *ptr, int indent, FileNode &collection
                        node_type = FileNode::REAL;
                        break;
                    case CV_16F:
-                        fval = (float) float16_t::fromBits(base64decoder.getUInt16());
+                        fval = float(hfloatFromBits(base64decoder.getUInt16()));
                        node_type = FileNode::REAL;
                        break;
                    default:
@ -2600,8 +2600,8 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
                            data += sizeof(double);
                            break;
                        case CV_16F:
-                            *(float16_t*)data = float16_t((float)ival);
-                            data += sizeof(float16_t);
+                            *(hfloat*)data = hfloat((float)ival);
+                            data += sizeof(hfloat);
                            break;
                        default:
                            CV_Error( Error::StsUnsupportedFormat, "Unsupported type" );
@ -2642,8 +2642,8 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
                            data += sizeof(double);
                            break;
                        case CV_16F:
-                            *(float16_t*)data = float16_t((float)fval);
-                            data += sizeof(float16_t);
+                            *(hfloat*)data = hfloat((float)fval);
+                            data += sizeof(hfloat);
                            break;
                        default:
                            CV_Error( Error::StsUnsupportedFormat, "Unsupported type" );
--- a/modules/core/src/rand.cpp
+++ b/modules/core/src/rand.cpp
@ -195,7 +195,7 @@ randf_64f( double* arr, int len, uint64* state, const Vec2d* p, void*, bool )
    hal::addRNGBias64f(arr, &p[0][0], len);
 }

-static void randf_16f( float16_t* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool )
+static void randf_16f( hfloat* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool )
 {
    uint64 temp = *state;
    for( int i = 0; i < len; i++ )
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -1557,14 +1557,14 @@ template<typename R> struct TheTest
        AlignedData<v_float32> data_f32; data_f32.a.clear();
        AlignedData<v_uint16> out;

-        R r1 = vx_load_expand((const cv::float16_t*)data.a.d);
+        R r1 = vx_load_expand((const cv::hfloat*)data.a.d);
        R r2(r1);
        EXPECT_EQ(1.0f, v_get0(r1));
        v_store(data_f32.a.d, r2);
        EXPECT_EQ(-2.0f, data_f32.a.d[VTraits<R>::vlanes() - 1]);

        out.a.clear();
-        v_pack_store((cv::float16_t*)out.a.d, r2);
+        v_pack_store((cv::hfloat*)out.a.d, r2);
        for (int i = 0; i < VTraits<R>::vlanes(); ++i)
        {
            EXPECT_EQ(data.a[i], out.a[i]) << "i=" << i;
@ -1588,7 +1588,7 @@ template<typename R> struct TheTest

        // check some initialization methods
        R r1 = data.u;
-        R r2 = vx_load_expand((const float16_t*)data.a.d);
+        R r2 = vx_load_expand((const hfloat*)data.a.d);
        R r3(r2);
        EXPECT_EQ(data.u[0], v_get0(r1));
        EXPECT_EQ(data.a[0], v_get0(r2));
--- a/modules/gapi/src/backends/ie/giebackend.cpp
+++ b/modules/gapi/src/backends/ie/giebackend.cpp
@ -381,7 +381,7 @@ inline void copyFromIE(const IE::Blob::Ptr &blob, MatType &mat) {
        HANDLE(U8, uint8_t);
        HANDLE(FP32, float);
        HANDLE(I32, int);
-        HANDLE(FP16, cv::float16_t);
+        HANDLE(FP16, cv::hfloat);
 #undef HANDLE
        case IE::Precision::I64: {
            GAPI_LOG_WARNING(NULL, "INT64 isn't supported for cv::Mat. Conversion to INT32 is used.");
--- a/modules/gapi/test/common/gapi_tests_common.hpp
+++ b/modules/gapi/test/common/gapi_tests_common.hpp
@ -370,7 +370,7 @@ public:
            initMatByPointsVectorRandU<Pt<double>>(sz_in);
            break;
        case CV_16F:
-            initMatByPointsVectorRandU<Pt<cv::float16_t>>(sz_in);
+            initMatByPointsVectorRandU<Pt<cv::hfloat>>(sz_in);
            break;
        default:
            GAPI_Error("Unsupported depth");