diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 03f65ad328..65f2f3d7db 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -809,40 +809,20 @@ using std::uint64_t; namespace cv { -class float16_t +class hfloat { public: #if CV_FP16_TYPE - float16_t() : h(0) {} - explicit float16_t(float x) { h = (__fp16)x; } + hfloat() : h(0) {} + explicit hfloat(float x) { h = (__fp16)x; } operator float() const { return (float)h; } - static float16_t fromBits(ushort w) - { - Cv16suf u; - u.u = w; - float16_t result; - result.h = u.h; - return result; - } - static float16_t zero() - { - float16_t result; - result.h = (__fp16)0; - return result; - } - ushort bits() const - { - Cv16suf u; - u.h = h; - return u.u; - } protected: __fp16 h; #else - float16_t() : w(0) {} - explicit float16_t(float x) + hfloat() : w(0) {} + explicit hfloat(float x) { #if CV_FP16 && CV_AVX2 __m128 v = _mm_load_ss(&x); @@ -893,25 +873,35 @@ protected: #endif } - static float16_t fromBits(ushort b) - { - float16_t result; - result.w = b; - return result; - } - static float16_t zero() - { - float16_t result; - result.w = (ushort)0; - return result; - } - ushort bits() const { return w; } protected: ushort w; #endif }; +inline hfloat hfloatFromBits(ushort w) { +#if CV_FP16_TYPE + Cv16suf u; + u.u = w; + hfloat res(float(u.h)); + return res; +#else + Cv32suf out; + + unsigned t = ((w & 0x7fff) << 13) + 0x38000000; + unsigned sign = (w & 0x8000) << 16; + unsigned e = w & 0x7c00; + + out.u = t + (1 << 23); + out.u = (e >= 0x7c00 ? t + 0x38000000 : + e == 0 ? (static_cast(out.f -= 6.103515625e-05f), out.u) : t) | sign; + hfloat res(out.f); + return res; +#endif +} + +typedef hfloat float16_t; + } #endif diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 0d68078d98..24a6077101 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -195,8 +195,8 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); -CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len ); -CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len ); +CV_EXPORTS void cvt16f32f( const hfloat* src, float* dst, int len ); +CV_EXPORTS void cvt32f16f( const float* src, hfloat* dst, int len ); CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ); CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len ); diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 7897fb503f..27beccd9ab 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -708,7 +708,7 @@ namespace CV__SIMD_NAMESPACE { inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); } inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); } inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); } - inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_float32 vx_load_expand(const hfloat * ptr) { return VXPREFIX(_load_expand)(ptr); } //! @} //! @name Wide load with quad expansion diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 979b6163d8..eed609f80e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -3137,7 +3137,7 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, u // FP16 // -inline v_float32x8 v256_load_expand(const float16_t* ptr) +inline v_float32x8 v256_load_expand(const hfloat* ptr) { #if CV_FP16 return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); @@ -3149,7 +3149,7 @@ inline v_float32x8 v256_load_expand(const float16_t* ptr) #endif } -inline void v_pack_store(float16_t* ptr, const v_float32x8& a) +inline void v_pack_store(hfloat* ptr, const v_float32x8& a) { #if CV_FP16 __m128i ah = _mm256_cvtps_ph(a.val, 0); @@ -3158,7 +3158,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a) float CV_DECL_ALIGNED(32) buf[8]; v_store_aligned(buf, a); for (int i = 0; i < 8; i++) - ptr[i] = float16_t(buf[i]); + ptr[i] = hfloat(buf[i]); #endif } diff --git a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp index d20d6dd1ff..e59b8d92eb 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp @@ -506,12 +506,12 @@ inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a) { return v_float64x8(_mm512_castps_pd(a.val)); } // FP16 -inline v_float32x16 v512_load_expand(const float16_t* ptr) +inline v_float32x16 v512_load_expand(const hfloat* ptr) { return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr))); } -inline void v_pack_store(float16_t* ptr, const v_float32x16& a) +inline void v_pack_store(hfloat* ptr, const v_float32x16& a) { __m256i ah = _mm512_cvtps_ph(a.val, 0); _mm256_storeu_si256((__m256i*)ptr, ah); diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index e9a09d12ae..8619fec60c 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -3251,7 +3251,7 @@ template inline v_reg v_dotprod_expand_fast(const v_reg -v_load_expand(const float16_t* ptr) +v_load_expand(const hfloat* ptr) { v_reg v; for( int i = 0; i < v.nlanes; i++ ) @@ -3262,7 +3262,7 @@ v_load_expand(const float16_t* ptr) } #if CV_SIMD256 inline v_reg -v256_load_expand(const float16_t* ptr) +v256_load_expand(const hfloat* ptr) { v_reg v; for (int i = 0; i < v.nlanes; i++) @@ -3274,7 +3274,7 @@ v256_load_expand(const float16_t* ptr) #endif #if CV_SIMD512 inline v_reg -v512_load_expand(const float16_t* ptr) +v512_load_expand(const hfloat* ptr) { v_reg v; for (int i = 0; i < v.nlanes; i++) @@ -3286,11 +3286,11 @@ v512_load_expand(const float16_t* ptr) #endif template inline void -v_pack_store(float16_t* ptr, const v_reg& v) +v_pack_store(hfloat* ptr, const v_reg& v) { for( int i = 0; i < v.nlanes; i++ ) { - ptr[i] = float16_t(v.s[i]); + ptr[i] = hfloat(v.s[i]); } } diff --git a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp index 6546d6db7d..4a98dbf96e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp @@ -2983,7 +2983,7 @@ OPENCV_HAL_IMPL_LASX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, // FP16 // -inline v_float32x8 v256_load_expand(const float16_t* ptr) +inline v_float32x8 v256_load_expand(const hfloat* ptr) { #if CV_FP16 //1-load128, 2-permi, 3-cvt @@ -2996,7 +2996,7 @@ inline v_float32x8 v256_load_expand(const float16_t* ptr) #endif } -inline void v_pack_store(float16_t* ptr, const v_float32x8& a) +inline void v_pack_store(hfloat* ptr, const v_float32x8& a) { #if CV_FP16 __m256i ah = __lasx_xvfcvt_h_s(a.val, a.val); @@ -3005,7 +3005,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a) float CV_DECL_ALIGNED(32) buf[8]; v_store_aligned(buf, a); for (int i = 0; i < 8; i++) - ptr[i] = float16_t(buf[i]); + ptr[i] = hfloat(buf[i]); #endif } diff --git a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp index ef83a2e466..6e3290426f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp @@ -2498,7 +2498,7 @@ OPENCV_HAL_IMPL_LSX_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, u // FP16 // -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { #if CV_FP16 return v_float32x4(__lsx_vfcvtl_s_h((__m128)__lsx_vld(ptr, 0))); @@ -2510,7 +2510,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) #endif } -inline void v_pack_store(float16_t* ptr, const v_float32x4& a) +inline void v_pack_store(hfloat* ptr, const v_float32x4& a) { #if CV_FP16 __m128i res = (__m218i)__lsx_vfcvt_h_s(a.val, a.val); @@ -2519,7 +2519,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& a) float CV_DECL_ALIGNED(32) buf[4]; v_store_aligned(buf, a); for (int i = 0; i < 4; i++) - ptr[i] = float16_t(buf[i]); + ptr[i] = hfloat(buf[i]); #endif } diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index c035fdad60..23d6ebd3d1 100644 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -1838,7 +1838,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a) ////// FP16 support /////// #if CV_FP16 -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { #ifndef msa_ld1_f16 v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr); @@ -1848,7 +1848,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) return v_float32x4(msa_cvt_f32_f16(v)); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { v4f16 hv = msa_cvt_f16_f32(v.val); @@ -1859,7 +1859,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v) #endif } #else -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { float buf[4]; for( int i = 0; i < 4; i++ ) @@ -1867,12 +1867,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) return v_load(buf); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { float buf[4]; v_store(buf, v); for( int i = 0; i < 4; i++ ) - ptr[i] = (float16_t)buf[i]; + ptr[i] = (hfloat)buf[i]; } #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 85b62a58f0..6e843d68ea 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -2605,7 +2605,7 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo ////// FP16 support /////// #if CV_FP16 -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { float16x4_t v = #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro @@ -2616,7 +2616,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) return v_float32x4(vcvt_f32_f16(v)); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { float16x4_t hv = vcvt_f16_f32(v.val); @@ -2627,7 +2627,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v) #endif } #else -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { const int N = 4; float buf[N]; @@ -2635,12 +2635,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) return v_load(buf); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { const int N = 4; float buf[N]; v_store(buf, v); - for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); + for( int i = 0; i < N; i++ ) ptr[i] = hfloat(buf[i]); } #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index d70660249c..d446a05db5 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -2873,17 +2873,17 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } ////// FP16 support /////// #if CV_FP16 -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr, 4), 4)); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v, 4), 4); } #else -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { const int N = 4; float buf[N]; @@ -2891,12 +2891,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) return v_load(buf); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { const int N = 4; float buf[N]; v_store(buf, v); - for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); + for( int i = 0; i < N; i++ ) ptr[i] = hfloat(buf[i]); } #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp index 9f6657760e..5681ae211d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp @@ -2858,14 +2858,14 @@ inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, #endif ////// FP16 support /////// #if __riscv_v == 7000 -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { vfloat16m1_t v = vle16_v_f16m1((__fp16*)ptr, 4); vfloat32m2_t v32 = vfwcvt_f_f_v_f32m2(v, 4); return v_float32x4(vget_v_f32m2_f32m1(v32, 0)); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { vfloat32m2_t v32 = vundefined_f32m2(); v32 = vset_v_f32m1_f32m2(v32, 0, v.val); @@ -2873,14 +2873,14 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v) vse16_v_f16m1((__fp16*)ptr, hv, 4); } #else -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { vfloat16mf2_t v = vle16_v_f16mf2((__fp16*)ptr, 4); vfloat32m1_t v32 = vfwcvt_f_f_v_f32m1(v, 4); return v_float32x4(v32); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { //vfloat32m2_t v32 = vundefined_f32m2(); //v32 = vset_f32m2(v32, 0, v.val); diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp index 9724ea27b8..87531ede1e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp @@ -1810,28 +1810,28 @@ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float64, vlmul_trunc_u8mf8) ////// FP16 support /////// #if defined(__riscv_zfh) && __riscv_zfh -inline v_float32 v_load_expand(const float16_t* ptr) +inline v_float32 v_load_expand(const hfloat* ptr) { return vfwcvt_f(vle16_v_f16mf2((_Float16*)ptr, VTraits::vlanes()) ,VTraits::vlanes());; } -inline void v_pack_store(float16_t* ptr, const v_float32& v) +inline void v_pack_store(hfloat* ptr, const v_float32& v) { vse16_v_f16mf2((_Float16*)ptr, vfncvt_f_f_w_f16mf2(v, VTraits::vlanes()), VTraits::vlanes()); } #else -inline v_float32 v_load_expand(const float16_t* ptr) +inline v_float32 v_load_expand(const hfloat* ptr) { float buf[32]; for( int i = 0; i < VTraits::vlanes(); i++ ) buf[i] = (float)ptr[i]; return v_load(buf); } -inline void v_pack_store(float16_t* ptr, const v_float32& v) +inline void v_pack_store(hfloat* ptr, const v_float32& v) { float buf[32]; v_store(buf, v); - for( int i = 0; i < VTraits::vlanes(); i++ ) ptr[i] = float16_t(buf[i]); + for( int i = 0; i < VTraits::vlanes(); i++ ) ptr[i] = hfloat(buf[i]); } #endif ////////////// Rounding ////////////// diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 9d17f71666..68b5a67bbc 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -3407,7 +3407,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& v) ////////////// FP16 support /////////////////////////// -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { #if CV_FP16 return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); @@ -3427,7 +3427,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) #endif } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { #if CV_FP16 __m128i fp16_value = _mm_cvtps_ph(v.val, 0); diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index b198643cc6..e66563bede 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -1361,7 +1361,7 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) /////// FP16 support //////// -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr); #if CV_VSX3 && defined(vec_extract_fp_from_shorth) @@ -1388,7 +1388,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) #endif } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { // fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"? #if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index db3cb2a9ae..5d470d9419 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -2754,7 +2754,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a) ////////////// FP16 support /////////////////////////// -inline v_float32x4 v_load_expand(const float16_t* ptr) +inline v_float32x4 v_load_expand(const hfloat* ptr) { float a[4]; for (int i = 0; i < 4; i++) @@ -2762,14 +2762,14 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) return v_float32x4(wasm_v128_load(a)); } -inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +inline void v_pack_store(hfloat* ptr, const v_float32x4& v) { double v_[4]; wasm_v128_store(v_, v.val); - ptr[0] = float16_t(v_[0]); - ptr[1] = float16_t(v_[1]); - ptr[2] = float16_t(v_[2]); - ptr[3] = float16_t(v_[3]); + ptr[0] = hfloat(v_[0]); + ptr[1] = hfloat(v_[1]); + ptr[2] = hfloat(v_[2]); + ptr[3] = hfloat(v_[3]); } inline void v_cleanup() {} diff --git a/modules/core/include/opencv2/core/saturate.hpp b/modules/core/include/opencv2/core/saturate.hpp index e0cc965ab6..18ffd1c7af 100644 --- a/modules/core/include/opencv2/core/saturate.hpp +++ b/modules/core/include/opencv2/core/saturate.hpp @@ -158,20 +158,20 @@ template<> inline uint64 saturate_cast(int64 v) { return (uint64)st template<> inline int64 saturate_cast(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); } /** @overload */ -template static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); } +template static inline _Tp saturate_cast(hfloat v) { return saturate_cast<_Tp>((float)v); } // in theory, we could use a LUT for 8u/8s->16f conversion, // but with hardware support for FP32->FP16 conversion the current approach is preferable -template<> inline float16_t saturate_cast(uchar v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(schar v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(ushort v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(short v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(unsigned v){ return float16_t((float)v); } -template<> inline float16_t saturate_cast(int v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(uint64 v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(int64 v) { return float16_t((float)v); } -template<> inline float16_t saturate_cast(float v) { return float16_t(v); } -template<> inline float16_t saturate_cast(double v) { return float16_t((float)v); } +template<> inline hfloat saturate_cast(uchar v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(schar v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(ushort v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(short v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(unsigned v){ return hfloat((float)v); } +template<> inline hfloat saturate_cast(int v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(uint64 v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(int64 v) { return hfloat((float)v); } +template<> inline hfloat saturate_cast(float v) { return hfloat(v); } +template<> inline hfloat saturate_cast(double v) { return hfloat((float)v); } //! @} diff --git a/modules/core/include/opencv2/core/traits.hpp b/modules/core/include/opencv2/core/traits.hpp index 52ab083ca4..522519389b 100644 --- a/modules/core/include/opencv2/core/traits.hpp +++ b/modules/core/include/opencv2/core/traits.hpp @@ -261,10 +261,10 @@ public: }; }; -template<> class DataType +template<> class DataType { public: - typedef float16_t value_type; + typedef hfloat value_type; typedef float work_type; typedef value_type channel_type; typedef value_type vec_type; @@ -347,7 +347,7 @@ template<> class TypeDepth template<> class TypeDepth { enum { depth = CV_16F }; - typedef float16_t value_type; + typedef hfloat value_type; }; #endif diff --git a/modules/core/src/convert.dispatch.cpp b/modules/core/src/convert.dispatch.cpp index 58ca43187a..2b4035285f 100644 --- a/modules/core/src/convert.dispatch.cpp +++ b/modules/core/src/convert.dispatch.cpp @@ -11,13 +11,13 @@ namespace cv { namespace hal { -void cvt16f32f(const float16_t* src, float* dst, int len) +void cvt16f32f(const hfloat* src, float* dst, int len) { CV_INSTRUMENT_REGION(); CV_CPU_DISPATCH(cvt16f32f, (src, dst, len), CV_CPU_DISPATCH_MODES_ALL); } -void cvt32f16f(const float* src, float16_t* dst, int len) +void cvt32f16f(const float* src, hfloat* dst, int len) { CV_INSTRUMENT_REGION(); CV_CPU_DISPATCH(cvt32f16f, (src, dst, len), diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp index 65a998bd8f..177f236ee7 100644 --- a/modules/core/src/convert.hpp +++ b/modules/core/src/convert.hpp @@ -31,7 +31,7 @@ static inline void vx_load_as(const int* ptr, v_float32& a) static inline void vx_load_as(const float* ptr, v_float32& a) { a = vx_load(ptr); } -static inline void vx_load_as(const float16_t* ptr, v_float32& a) +static inline void vx_load_as(const hfloat* ptr, v_float32& a) { a = vx_load_expand(ptr); } static inline void v_store_as(ushort* ptr, const v_float32& a) @@ -46,7 +46,7 @@ static inline void v_store_as(int* ptr, const v_float32& a) static inline void v_store_as(float* ptr, const v_float32& a) { v_store(ptr, a); } -static inline void v_store_as(float16_t* ptr, const v_float32& a) +static inline void v_store_as(hfloat* ptr, const v_float32& a) { v_pack_store(ptr, a); } static inline void vx_load_pair_as(const uchar* ptr, v_uint16& a, v_uint16& b) @@ -150,7 +150,7 @@ static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b) static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b) { a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } -static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b) +static inline void vx_load_pair_as(const hfloat* ptr, v_float32& a, v_float32& b) { a = vx_load_expand(ptr); b = vx_load_expand(ptr + VTraits::vlanes()); @@ -294,7 +294,7 @@ static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b b = vx_load(ptr + VTraits::vlanes()); } -static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b) +static inline void vx_load_pair_as(const hfloat* ptr, v_float64& a, v_float64& b) { v_float32 v0 = vx_load_expand(ptr); a = v_cvt_f64(v0); @@ -348,7 +348,7 @@ static inline void v_store_pair_as(float* ptr, const v_float64& a, const v_float v_store(ptr, v); } -static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_float64& b) +static inline void v_store_pair_as(hfloat* ptr, const v_float64& a, const v_float64& b) { v_float32 v = v_cvt_f32(a, b); v_pack_store(ptr, v); diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp index c126450a13..524c43a16e 100644 --- a/modules/core/src/convert.simd.hpp +++ b/modules/core/src/convert.simd.hpp @@ -14,8 +14,8 @@ namespace cv { namespace hal { CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN -void cvt16f32f(const float16_t* src, float* dst, int len); -void cvt32f16f(const float* src, float16_t* dst, int len); +void cvt16f32f(const hfloat* src, float* dst, int len); +void cvt32f16f(const float* src, hfloat* dst, int len); void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len); void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len); @@ -35,7 +35,7 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN BinaryFunc getConvertFunc(int sdepth, int ddepth); -void cvt16f32f( const float16_t* src, float* dst, int len ) +void cvt16f32f( const hfloat* src, float* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; @@ -56,7 +56,7 @@ void cvt16f32f( const float16_t* src, float* dst, int len ) dst[j] = (float)src[j]; } -void cvt32f16f( const float* src, float16_t* dst, int len ) +void cvt32f16f( const float* src, hfloat* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; @@ -74,7 +74,7 @@ void cvt32f16f( const float* src, float16_t* dst, int len ) } #endif for( ; j < len; j++ ) - dst[j] = float16_t(src[j]); + dst[j] = hfloat(src[j]); } void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ) @@ -188,7 +188,7 @@ DEF_CVT_FUNC(8u16s, cvt_, uchar, short, v_int16) DEF_CVT_FUNC(8u32s, cvt_, uchar, int, v_int32) DEF_CVT_FUNC(8u32f, cvt_, uchar, float, v_float32) DEF_CVT_FUNC(8u64f, cvt_, uchar, double, v_int32) -DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32) +DEF_CVT_FUNC(8u16f, cvt1_, uchar, hfloat, v_float32) ////////////////////// 8s -> ... //////////////////////// @@ -198,7 +198,7 @@ DEF_CVT_FUNC(8s16s, cvt_, schar, short, v_int16) DEF_CVT_FUNC(8s32s, cvt_, schar, int, v_int32) DEF_CVT_FUNC(8s32f, cvt_, schar, float, v_float32) DEF_CVT_FUNC(8s64f, cvt_, schar, double, v_int32) -DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32) +DEF_CVT_FUNC(8s16f, cvt1_, schar, hfloat, v_float32) ////////////////////// 16u -> ... //////////////////////// @@ -208,7 +208,7 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32) DEF_CVT_FUNC(16u32s, cvt_, ushort, int, v_int32) DEF_CVT_FUNC(16u32f, cvt_, ushort, float, v_float32) DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32) -DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32) +DEF_CVT_FUNC(16u16f, cvt1_,ushort, hfloat, v_float32) ////////////////////// 16s -> ... //////////////////////// @@ -218,7 +218,7 @@ DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32) DEF_CVT_FUNC(16s32s, cvt_, short, int, v_int32) DEF_CVT_FUNC(16s32f, cvt_, short, float, v_float32) DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32) -DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32) +DEF_CVT_FUNC(16s16f, cvt1_,short, hfloat, v_float32) ////////////////////// 32s -> ... //////////////////////// @@ -228,7 +228,7 @@ DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32) DEF_CVT_FUNC(32s16s, cvt_, int, short, v_int32) DEF_CVT_FUNC(32s32f, cvt_, int, float, v_float32) DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32) -DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32) +DEF_CVT_FUNC(32s16f, cvt1_,int, hfloat, v_float32) ////////////////////// 32f -> ... //////////////////////// @@ -238,7 +238,7 @@ DEF_CVT_FUNC(32f16u, cvt_, float, ushort, v_float32) DEF_CVT_FUNC(32f16s, cvt_, float, short, v_float32) DEF_CVT_FUNC(32f32s, cvt_, float, int, v_float32) DEF_CVT_FUNC(32f64f, cvt_, float, double, v_float32) -DEF_CVT_FUNC(32f16f, cvt1_,float, float16_t, v_float32) +DEF_CVT_FUNC(32f16f, cvt1_,float, hfloat, v_float32) ////////////////////// 64f -> ... //////////////////////// @@ -248,17 +248,17 @@ DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32) DEF_CVT_FUNC(64f16s, cvt_, double, short, v_int32) DEF_CVT_FUNC(64f32s, cvt_, double, int, v_int32) DEF_CVT_FUNC(64f32f, cvt_, double, float, v_float32) -DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32) +DEF_CVT_FUNC(64f16f, cvt1_,double, hfloat, v_float32) ////////////////////// 16f -> ... //////////////////////// -DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32) -DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32) -DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32) -DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32) -DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32) -DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32) -DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32) +DEF_CVT_FUNC(16f8u, cvt_, hfloat, uchar, v_float32) +DEF_CVT_FUNC(16f8s, cvt_, hfloat, schar, v_float32) +DEF_CVT_FUNC(16f16u, cvt1_, hfloat, ushort, v_float32) +DEF_CVT_FUNC(16f16s, cvt1_, hfloat, short, v_float32) +DEF_CVT_FUNC(16f32s, cvt1_, hfloat, int, v_float32) +DEF_CVT_FUNC(16f32f, cvt1_, hfloat, float, v_float32) +DEF_CVT_FUNC(16f64f, cvt1_, hfloat, double, v_float32) ///////////// "conversion" w/o conversion /////////////// diff --git a/modules/core/src/convert_scale.simd.hpp b/modules/core/src/convert_scale.simd.hpp index c79a33f1b1..ef6aa343e2 100644 --- a/modules/core/src/convert_scale.simd.hpp +++ b/modules/core/src/convert_scale.simd.hpp @@ -232,7 +232,7 @@ DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float) DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float) DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float) DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float) -DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, float16_t, uchar, float) +DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, hfloat, uchar, float) DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float) DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float) @@ -241,7 +241,7 @@ DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float) DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float) DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float) DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float) -DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, float16_t, schar, float) +DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, hfloat, schar, float) DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float) DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float) @@ -250,7 +250,7 @@ DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float) DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float) DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float) DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float) -DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float) +DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, hfloat, ushort, float) DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float) DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float) @@ -259,7 +259,7 @@ DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float) DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float) DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float) DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float) -DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float) +DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, hfloat, short, float) DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float) DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float) @@ -268,7 +268,7 @@ DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float) DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double) DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float) DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double) -DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float) +DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, hfloat, int, float) DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float) DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float) @@ -277,7 +277,7 @@ DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float) DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float) DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float) DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double) -DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float) +DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, hfloat, float, float) DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double) DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double) @@ -286,16 +286,16 @@ DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double) DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double) DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double) DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double) -DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double) +DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, hfloat, double, double) -DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, float16_t, float) -DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, float16_t, float) -DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float) -DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, float16_t, float) -DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, float16_t, float) -DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, float16_t, float) -DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, float16_t, double) -DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float) +DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, hfloat, float) +DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, hfloat, float) +DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, hfloat, float) +DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, hfloat, float) +DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, hfloat, float) +DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, hfloat, float) +DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, hfloat, double) +DEF_CVT_SCALE_FUNC(16f, cvt1_32f, hfloat, hfloat, float) BinaryFunc getCvtScaleAbsFunc(int depth) { diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index f4c98b49c0..5c8af185b5 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -93,7 +93,7 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) scalarToRawData_(s, (double*)_buf, cn, unroll_to); break; case CV_16F: - scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); + scalarToRawData_(s, (hfloat*)_buf, cn, unroll_to); break; default: CV_Error(cv::Error::StsUnsupportedFormat,""); diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 31178dbcda..49d781fcb0 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -752,7 +752,7 @@ double norm( InputArray _src, int normType, InputArray _mask ) for (int j = 0; j < total; j += blockSize) { int bsz = std::min(total - j, blockSize); - hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn); + hal::cvt16f32f((const hfloat*)ptrs[0], data0, bsz * cn); func((uchar*)data0, ptrs[1], (uchar*)&result.f, bsz, cn); ptrs[0] += bsz*esz; if (ptrs[1]) @@ -1222,8 +1222,8 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask for (int j = 0; j < total; j += blockSize) { int bsz = std::min(total - j, blockSize); - hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn); - hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn); + hal::cvt16f32f((const hfloat*)ptrs[0], data0, bsz * cn); + hal::cvt16f32f((const hfloat*)ptrs[1], data1, bsz * cn); func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.f, bsz, cn); ptrs[0] += bsz*esz; ptrs[1] += bsz*esz; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 5eac178316..8d7d7faf44 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -7210,7 +7210,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name) typedef std::string (* func_t)(const Mat &); static const func_t funcs[] = { kerToStr, kerToStr, kerToStr, kerToStr, - kerToStr, kerToStr, kerToStr, kerToStr }; + kerToStr, kerToStr, kerToStr, kerToStr }; const func_t func = funcs[ddepth]; CV_Assert(func != 0); diff --git a/modules/core/src/out.cpp b/modules/core/src/out.cpp index 8a7d7e1636..342cc8a2bb 100644 --- a/modules/core/src/out.cpp +++ b/modules/core/src/out.cpp @@ -77,7 +77,7 @@ namespace cv void valueToStr32s() { snprintf(buf, sizeof(buf), "%d", mtx.ptr(row, col)[cn]); } void valueToStr32f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr(row, col)[cn]); } void valueToStr64f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr(row, col)[cn]); } - void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr(row, col)[cn]); } + void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr(row, col)[cn]); } void valueToStrOther() { buf[0] = 0; } public: diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 6a71c1ff03..5f7f638397 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -270,7 +270,7 @@ int calcStructSize( const char* dt, int initial_size ) case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; } case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; } case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; } - case 'h': { elem_max_size = std::max(elem_max_size, sizeof(float16_t)); break; } + case 'h': { elem_max_size = std::max(elem_max_size, sizeof(hfloat)); break; } default: CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt)); } @@ -1129,8 +1129,8 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s data += sizeof(double); break; case CV_16F: /* reference */ - ptr = fs::floatToString(buf, sizeof(buf), (float) *(float16_t *) data, true, explicitZero); - data += sizeof(float16_t); + ptr = fs::floatToString(buf, sizeof(buf), (float) *(hfloat *) data, true, explicitZero); + data += sizeof(hfloat); break; default: CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported type"); @@ -1809,7 +1809,7 @@ char *FileStorage::Impl::parseBase64(char *ptr, int indent, FileNode &collection node_type = FileNode::REAL; break; case CV_16F: - fval = (float) float16_t::fromBits(base64decoder.getUInt16()); + fval = float(hfloatFromBits(base64decoder.getUInt16())); node_type = FileNode::REAL; break; default: @@ -2600,8 +2600,8 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si data += sizeof(double); break; case CV_16F: - *(float16_t*)data = float16_t((float)ival); - data += sizeof(float16_t); + *(hfloat*)data = hfloat((float)ival); + data += sizeof(hfloat); break; default: CV_Error( Error::StsUnsupportedFormat, "Unsupported type" ); @@ -2642,8 +2642,8 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si data += sizeof(double); break; case CV_16F: - *(float16_t*)data = float16_t((float)fval); - data += sizeof(float16_t); + *(hfloat*)data = hfloat((float)fval); + data += sizeof(hfloat); break; default: CV_Error( Error::StsUnsupportedFormat, "Unsupported type" ); diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index e39acce529..a6301afe34 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -195,7 +195,7 @@ randf_64f( double* arr, int len, uint64* state, const Vec2d* p, void*, bool ) hal::addRNGBias64f(arr, &p[0][0], len); } -static void randf_16f( float16_t* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool ) +static void randf_16f( hfloat* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool ) { uint64 temp = *state; for( int i = 0; i < len; i++ ) diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index be4f7beac7..a8c565ec46 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1557,14 +1557,14 @@ template struct TheTest AlignedData data_f32; data_f32.a.clear(); AlignedData out; - R r1 = vx_load_expand((const cv::float16_t*)data.a.d); + R r1 = vx_load_expand((const cv::hfloat*)data.a.d); R r2(r1); EXPECT_EQ(1.0f, v_get0(r1)); v_store(data_f32.a.d, r2); EXPECT_EQ(-2.0f, data_f32.a.d[VTraits::vlanes() - 1]); out.a.clear(); - v_pack_store((cv::float16_t*)out.a.d, r2); + v_pack_store((cv::hfloat*)out.a.d, r2); for (int i = 0; i < VTraits::vlanes(); ++i) { EXPECT_EQ(data.a[i], out.a[i]) << "i=" << i; @@ -1588,7 +1588,7 @@ template struct TheTest // check some initialization methods R r1 = data.u; - R r2 = vx_load_expand((const float16_t*)data.a.d); + R r2 = vx_load_expand((const hfloat*)data.a.d); R r3(r2); EXPECT_EQ(data.u[0], v_get0(r1)); EXPECT_EQ(data.a[0], v_get0(r2)); diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 000021d898..4fc4fe9a8d 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -381,7 +381,7 @@ inline void copyFromIE(const IE::Blob::Ptr &blob, MatType &mat) { HANDLE(U8, uint8_t); HANDLE(FP32, float); HANDLE(I32, int); - HANDLE(FP16, cv::float16_t); + HANDLE(FP16, cv::hfloat); #undef HANDLE case IE::Precision::I64: { GAPI_LOG_WARNING(NULL, "INT64 isn't supported for cv::Mat. Conversion to INT32 is used."); diff --git a/modules/gapi/test/common/gapi_tests_common.hpp b/modules/gapi/test/common/gapi_tests_common.hpp index f84ee05f49..2b8ee25512 100644 --- a/modules/gapi/test/common/gapi_tests_common.hpp +++ b/modules/gapi/test/common/gapi_tests_common.hpp @@ -370,7 +370,7 @@ public: initMatByPointsVectorRandU>(sz_in); break; case CV_16F: - initMatByPointsVectorRandU>(sz_in); + initMatByPointsVectorRandU>(sz_in); break; default: GAPI_Error("Unsupported depth");