core: Rename cv::float16_t to cv::hfloat (#25217)

* rename cv::float16_t to cv::fp16_t

* add typedef fp16_t float16_t

* remove zero(), bits() from fp16_t class

* fp16_t -> hfloat

* remove cv::float16_t::fromBits; add hfloatFromBits

* undo changes in conv_winograd_f63.simd.hpp and conv_block.simd.hpp

* undo some changes in dnn
This commit is contained in:
Yuantao Feng 2024-03-22 04:44:19 +08:00 committed by GitHub
parent 3aefd4862c
commit 3afe8ddaf8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 156 additions and 166 deletions

View File

@ -809,40 +809,20 @@ using std::uint64_t;
namespace cv
{
class float16_t
class hfloat
{
public:
#if CV_FP16_TYPE
float16_t() : h(0) {}
explicit float16_t(float x) { h = (__fp16)x; }
hfloat() : h(0) {}
explicit hfloat(float x) { h = (__fp16)x; }
operator float() const { return (float)h; }
static float16_t fromBits(ushort w)
{
Cv16suf u;
u.u = w;
float16_t result;
result.h = u.h;
return result;
}
static float16_t zero()
{
float16_t result;
result.h = (__fp16)0;
return result;
}
ushort bits() const
{
Cv16suf u;
u.h = h;
return u.u;
}
protected:
__fp16 h;
#else
float16_t() : w(0) {}
explicit float16_t(float x)
hfloat() : w(0) {}
explicit hfloat(float x)
{
#if CV_FP16 && CV_AVX2
__m128 v = _mm_load_ss(&x);
@ -893,25 +873,35 @@ protected:
#endif
}
static float16_t fromBits(ushort b)
{
float16_t result;
result.w = b;
return result;
}
static float16_t zero()
{
float16_t result;
result.w = (ushort)0;
return result;
}
ushort bits() const { return w; }
protected:
ushort w;
#endif
};
inline hfloat hfloatFromBits(ushort w) {
#if CV_FP16_TYPE
Cv16suf u;
u.u = w;
hfloat res(float(u.h));
return res;
#else
Cv32suf out;
unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
unsigned sign = (w & 0x8000) << 16;
unsigned e = w & 0x7c00;
out.u = t + (1 << 23);
out.u = (e >= 0x7c00 ? t + 0x38000000 :
e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
hfloat res(out.f);
return res;
#endif
}
typedef hfloat float16_t;
}
#endif

View File

@ -195,8 +195,8 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
CV_EXPORTS void cvt16f32f( const hfloat* src, float* dst, int len );
CV_EXPORTS void cvt32f16f( const float* src, hfloat* dst, int len );
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );

View File

@ -708,7 +708,7 @@ namespace CV__SIMD_NAMESPACE {
inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_float32 vx_load_expand(const hfloat * ptr) { return VXPREFIX(_load_expand)(ptr); }
//! @}
//! @name Wide load with quad expansion

View File

@ -3137,7 +3137,7 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, u
// FP16
//
inline v_float32x8 v256_load_expand(const float16_t* ptr)
inline v_float32x8 v256_load_expand(const hfloat* ptr)
{
#if CV_FP16
return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
@ -3149,7 +3149,7 @@ inline v_float32x8 v256_load_expand(const float16_t* ptr)
#endif
}
inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
{
#if CV_FP16
__m128i ah = _mm256_cvtps_ph(a.val, 0);
@ -3158,7 +3158,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
float CV_DECL_ALIGNED(32) buf[8];
v_store_aligned(buf, a);
for (int i = 0; i < 8; i++)
ptr[i] = float16_t(buf[i]);
ptr[i] = hfloat(buf[i]);
#endif
}

View File

@ -506,12 +506,12 @@ inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a)
{ return v_float64x8(_mm512_castps_pd(a.val)); }
// FP16
inline v_float32x16 v512_load_expand(const float16_t* ptr)
inline v_float32x16 v512_load_expand(const hfloat* ptr)
{
return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr)));
}
inline void v_pack_store(float16_t* ptr, const v_float32x16& a)
inline void v_pack_store(hfloat* ptr, const v_float32x16& a)
{
__m256i ah = _mm512_cvtps_ph(a.val, 0);
_mm256_storeu_si256((__m256i*)ptr, ah);

View File

@ -3251,7 +3251,7 @@ template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int,
////// FP16 support ///////
inline v_reg<float, simd128_width / sizeof(float)>
v_load_expand(const float16_t* ptr)
v_load_expand(const hfloat* ptr)
{
v_reg<float, simd128_width / sizeof(float)> v;
for( int i = 0; i < v.nlanes; i++ )
@ -3262,7 +3262,7 @@ v_load_expand(const float16_t* ptr)
}
#if CV_SIMD256
inline v_reg<float, simd256_width / sizeof(float)>
v256_load_expand(const float16_t* ptr)
v256_load_expand(const hfloat* ptr)
{
v_reg<float, simd256_width / sizeof(float)> v;
for (int i = 0; i < v.nlanes; i++)
@ -3274,7 +3274,7 @@ v256_load_expand(const float16_t* ptr)
#endif
#if CV_SIMD512
inline v_reg<float, simd512_width / sizeof(float)>
v512_load_expand(const float16_t* ptr)
v512_load_expand(const hfloat* ptr)
{
v_reg<float, simd512_width / sizeof(float)> v;
for (int i = 0; i < v.nlanes; i++)
@ -3286,11 +3286,11 @@ v512_load_expand(const float16_t* ptr)
#endif
template<int n> inline void
v_pack_store(float16_t* ptr, const v_reg<float, n>& v)
v_pack_store(hfloat* ptr, const v_reg<float, n>& v)
{
for( int i = 0; i < v.nlanes; i++ )
{
ptr[i] = float16_t(v.s[i]);
ptr[i] = hfloat(v.s[i]);
}
}

View File

@ -2983,7 +2983,7 @@ OPENCV_HAL_IMPL_LASX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4,
// FP16
//
inline v_float32x8 v256_load_expand(const float16_t* ptr)
inline v_float32x8 v256_load_expand(const hfloat* ptr)
{
#if CV_FP16
//1-load128, 2-permi, 3-cvt
@ -2996,7 +2996,7 @@ inline v_float32x8 v256_load_expand(const float16_t* ptr)
#endif
}
inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
{
#if CV_FP16
__m256i ah = __lasx_xvfcvt_h_s(a.val, a.val);
@ -3005,7 +3005,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
float CV_DECL_ALIGNED(32) buf[8];
v_store_aligned(buf, a);
for (int i = 0; i < 8; i++)
ptr[i] = float16_t(buf[i]);
ptr[i] = hfloat(buf[i]);
#endif
}

View File

@ -2498,7 +2498,7 @@ OPENCV_HAL_IMPL_LSX_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, u
// FP16
//
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
#if CV_FP16
return v_float32x4(__lsx_vfcvtl_s_h((__m128)__lsx_vld(ptr, 0)));
@ -2510,7 +2510,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
#endif
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& a)
inline void v_pack_store(hfloat* ptr, const v_float32x4& a)
{
#if CV_FP16
__m128i res = (__m218i)__lsx_vfcvt_h_s(a.val, a.val);
@ -2519,7 +2519,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& a)
float CV_DECL_ALIGNED(32) buf[4];
v_store_aligned(buf, a);
for (int i = 0; i < 4; i++)
ptr[i] = float16_t(buf[i]);
ptr[i] = hfloat(buf[i]);
#endif
}

View File

@ -1838,7 +1838,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a)
////// FP16 support ///////
#if CV_FP16
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
#ifndef msa_ld1_f16
v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr);
@ -1848,7 +1848,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_float32x4(msa_cvt_f32_f16(v));
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
v4f16 hv = msa_cvt_f16_f32(v.val);
@ -1859,7 +1859,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
#endif
}
#else
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
float buf[4];
for( int i = 0; i < 4; i++ )
@ -1867,12 +1867,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_load(buf);
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
float buf[4];
v_store(buf, v);
for( int i = 0; i < 4; i++ )
ptr[i] = (float16_t)buf[i];
ptr[i] = (hfloat)buf[i];
}
#endif

View File

@ -2605,7 +2605,7 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo
////// FP16 support ///////
#if CV_FP16
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
float16x4_t v =
#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
@ -2616,7 +2616,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_float32x4(vcvt_f32_f16(v));
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
float16x4_t hv = vcvt_f16_f32(v.val);
@ -2627,7 +2627,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
#endif
}
#else
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
const int N = 4;
float buf[N];
@ -2635,12 +2635,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_load(buf);
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
const int N = 4;
float buf[N];
v_store(buf, v);
for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
for( int i = 0; i < N; i++ ) ptr[i] = hfloat(buf[i]);
}
#endif

View File

@ -2873,17 +2873,17 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
////// FP16 support ///////
#if CV_FP16
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr, 4), 4));
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v, 4), 4);
}
#else
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
const int N = 4;
float buf[N];
@ -2891,12 +2891,12 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_load(buf);
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
const int N = 4;
float buf[N];
v_store(buf, v);
for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
for( int i = 0; i < N; i++ ) ptr[i] = hfloat(buf[i]);
}
#endif

View File

@ -2858,14 +2858,14 @@ inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b,
#endif
////// FP16 support ///////
#if __riscv_v == 7000
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
vfloat16m1_t v = vle16_v_f16m1((__fp16*)ptr, 4);
vfloat32m2_t v32 = vfwcvt_f_f_v_f32m2(v, 4);
return v_float32x4(vget_v_f32m2_f32m1(v32, 0));
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
vfloat32m2_t v32 = vundefined_f32m2();
v32 = vset_v_f32m1_f32m2(v32, 0, v.val);
@ -2873,14 +2873,14 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
vse16_v_f16m1((__fp16*)ptr, hv, 4);
}
#else
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
vfloat16mf2_t v = vle16_v_f16mf2((__fp16*)ptr, 4);
vfloat32m1_t v32 = vfwcvt_f_f_v_f32m1(v, 4);
return v_float32x4(v32);
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
//vfloat32m2_t v32 = vundefined_f32m2();
//v32 = vset_f32m2(v32, 0, v.val);

View File

@ -1810,28 +1810,28 @@ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float64, vlmul_trunc_u8mf8)
////// FP16 support ///////
#if defined(__riscv_zfh) && __riscv_zfh
inline v_float32 v_load_expand(const float16_t* ptr)
inline v_float32 v_load_expand(const hfloat* ptr)
{
return vfwcvt_f(vle16_v_f16mf2((_Float16*)ptr, VTraits<v_float32>::vlanes()) ,VTraits<v_float32>::vlanes());;
}
inline void v_pack_store(float16_t* ptr, const v_float32& v)
inline void v_pack_store(hfloat* ptr, const v_float32& v)
{
vse16_v_f16mf2((_Float16*)ptr, vfncvt_f_f_w_f16mf2(v, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
}
#else
inline v_float32 v_load_expand(const float16_t* ptr)
inline v_float32 v_load_expand(const hfloat* ptr)
{
float buf[32];
for( int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) buf[i] = (float)ptr[i];
return v_load(buf);
}
inline void v_pack_store(float16_t* ptr, const v_float32& v)
inline void v_pack_store(hfloat* ptr, const v_float32& v)
{
float buf[32];
v_store(buf, v);
for( int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) ptr[i] = float16_t(buf[i]);
for( int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) ptr[i] = hfloat(buf[i]);
}
#endif
////////////// Rounding //////////////

View File

@ -3407,7 +3407,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& v)
////////////// FP16 support ///////////////////////////
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
#if CV_FP16
return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
@ -3427,7 +3427,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
#endif
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
#if CV_FP16
__m128i fp16_value = _mm_cvtps_ph(v.val, 0);

View File

@ -1361,7 +1361,7 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec)
/////// FP16 support ////////
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr);
#if CV_VSX3 && defined(vec_extract_fp_from_shorth)
@ -1388,7 +1388,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
#endif
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)

View File

@ -2754,7 +2754,7 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a)
////////////// FP16 support ///////////////////////////
inline v_float32x4 v_load_expand(const float16_t* ptr)
inline v_float32x4 v_load_expand(const hfloat* ptr)
{
float a[4];
for (int i = 0; i < 4; i++)
@ -2762,14 +2762,14 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_float32x4(wasm_v128_load(a));
}
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_pack_store(hfloat* ptr, const v_float32x4& v)
{
double v_[4];
wasm_v128_store(v_, v.val);
ptr[0] = float16_t(v_[0]);
ptr[1] = float16_t(v_[1]);
ptr[2] = float16_t(v_[2]);
ptr[3] = float16_t(v_[3]);
ptr[0] = hfloat(v_[0]);
ptr[1] = hfloat(v_[1]);
ptr[2] = hfloat(v_[2]);
ptr[3] = hfloat(v_[3]);
}
inline void v_cleanup() {}

View File

@ -158,20 +158,20 @@ template<> inline uint64 saturate_cast<uint64>(int64 v) { return (uint64)st
template<> inline int64 saturate_cast<int64>(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); }
template<typename _Tp> static inline _Tp saturate_cast(hfloat v) { return saturate_cast<_Tp>((float)v); }
// in theory, we could use a LUT for 8u/8s->16f conversion,
// but with hardware support for FP32->FP16 conversion the current approach is preferable
template<> inline float16_t saturate_cast<float16_t>(uchar v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(schar v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(ushort v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(short v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(unsigned v){ return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(int v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(uint64 v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(int64 v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(float v) { return float16_t(v); }
template<> inline float16_t saturate_cast<float16_t>(double v) { return float16_t((float)v); }
template<> inline hfloat saturate_cast<hfloat>(uchar v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(schar v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(ushort v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(short v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(unsigned v){ return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(int v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(uint64 v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(int64 v) { return hfloat((float)v); }
template<> inline hfloat saturate_cast<hfloat>(float v) { return hfloat(v); }
template<> inline hfloat saturate_cast<hfloat>(double v) { return hfloat((float)v); }
//! @}

View File

@ -261,10 +261,10 @@ public:
};
};
template<> class DataType<float16_t>
template<> class DataType<hfloat>
{
public:
typedef float16_t value_type;
typedef hfloat value_type;
typedef float work_type;
typedef value_type channel_type;
typedef value_type vec_type;
@ -347,7 +347,7 @@ template<> class TypeDepth<CV_64F>
template<> class TypeDepth<CV_16F>
{
enum { depth = CV_16F };
typedef float16_t value_type;
typedef hfloat value_type;
};
#endif

View File

@ -11,13 +11,13 @@
namespace cv {
namespace hal {
void cvt16f32f(const float16_t* src, float* dst, int len)
void cvt16f32f(const hfloat* src, float* dst, int len)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(cvt16f32f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);
}
void cvt32f16f(const float* src, float16_t* dst, int len)
void cvt32f16f(const float* src, hfloat* dst, int len)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(cvt32f16f, (src, dst, len),

View File

@ -31,7 +31,7 @@ static inline void vx_load_as(const int* ptr, v_float32& a)
static inline void vx_load_as(const float* ptr, v_float32& a)
{ a = vx_load(ptr); }
static inline void vx_load_as(const float16_t* ptr, v_float32& a)
static inline void vx_load_as(const hfloat* ptr, v_float32& a)
{ a = vx_load_expand(ptr); }
static inline void v_store_as(ushort* ptr, const v_float32& a)
@ -46,7 +46,7 @@ static inline void v_store_as(int* ptr, const v_float32& a)
static inline void v_store_as(float* ptr, const v_float32& a)
{ v_store(ptr, a); }
static inline void v_store_as(float16_t* ptr, const v_float32& a)
static inline void v_store_as(hfloat* ptr, const v_float32& a)
{ v_pack_store(ptr, a); }
static inline void vx_load_pair_as(const uchar* ptr, v_uint16& a, v_uint16& b)
@ -150,7 +150,7 @@ static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b)
static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b)
{ a = vx_load(ptr); b = vx_load(ptr + VTraits<v_float32>::vlanes()); }
static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b)
static inline void vx_load_pair_as(const hfloat* ptr, v_float32& a, v_float32& b)
{
a = vx_load_expand(ptr);
b = vx_load_expand(ptr + VTraits<v_float32>::vlanes());
@ -294,7 +294,7 @@ static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b
b = vx_load(ptr + VTraits<v_float64>::vlanes());
}
static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
static inline void vx_load_pair_as(const hfloat* ptr, v_float64& a, v_float64& b)
{
v_float32 v0 = vx_load_expand(ptr);
a = v_cvt_f64(v0);
@ -348,7 +348,7 @@ static inline void v_store_pair_as(float* ptr, const v_float64& a, const v_float
v_store(ptr, v);
}
static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_float64& b)
static inline void v_store_pair_as(hfloat* ptr, const v_float64& a, const v_float64& b)
{
v_float32 v = v_cvt_f32(a, b);
v_pack_store(ptr, v);

View File

@ -14,8 +14,8 @@ namespace cv {
namespace hal {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void cvt16f32f(const float16_t* src, float* dst, int len);
void cvt32f16f(const float* src, float16_t* dst, int len);
void cvt16f32f(const hfloat* src, float* dst, int len);
void cvt32f16f(const float* src, hfloat* dst, int len);
void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len);
void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len);
@ -35,7 +35,7 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
BinaryFunc getConvertFunc(int sdepth, int ddepth);
void cvt16f32f( const float16_t* src, float* dst, int len )
void cvt16f32f( const hfloat* src, float* dst, int len )
{
CV_INSTRUMENT_REGION();
int j = 0;
@ -56,7 +56,7 @@ void cvt16f32f( const float16_t* src, float* dst, int len )
dst[j] = (float)src[j];
}
void cvt32f16f( const float* src, float16_t* dst, int len )
void cvt32f16f( const float* src, hfloat* dst, int len )
{
CV_INSTRUMENT_REGION();
int j = 0;
@ -74,7 +74,7 @@ void cvt32f16f( const float* src, float16_t* dst, int len )
}
#endif
for( ; j < len; j++ )
dst[j] = float16_t(src[j]);
dst[j] = hfloat(src[j]);
}
void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
@ -188,7 +188,7 @@ DEF_CVT_FUNC(8u16s, cvt_, uchar, short, v_int16)
DEF_CVT_FUNC(8u32s, cvt_, uchar, int, v_int32)
DEF_CVT_FUNC(8u32f, cvt_, uchar, float, v_float32)
DEF_CVT_FUNC(8u64f, cvt_, uchar, double, v_int32)
DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
DEF_CVT_FUNC(8u16f, cvt1_, uchar, hfloat, v_float32)
////////////////////// 8s -> ... ////////////////////////
@ -198,7 +198,7 @@ DEF_CVT_FUNC(8s16s, cvt_, schar, short, v_int16)
DEF_CVT_FUNC(8s32s, cvt_, schar, int, v_int32)
DEF_CVT_FUNC(8s32f, cvt_, schar, float, v_float32)
DEF_CVT_FUNC(8s64f, cvt_, schar, double, v_int32)
DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
DEF_CVT_FUNC(8s16f, cvt1_, schar, hfloat, v_float32)
////////////////////// 16u -> ... ////////////////////////
@ -208,7 +208,7 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32)
DEF_CVT_FUNC(16u32s, cvt_, ushort, int, v_int32)
DEF_CVT_FUNC(16u32f, cvt_, ushort, float, v_float32)
DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32)
DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
DEF_CVT_FUNC(16u16f, cvt1_,ushort, hfloat, v_float32)
////////////////////// 16s -> ... ////////////////////////
@ -218,7 +218,7 @@ DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32)
DEF_CVT_FUNC(16s32s, cvt_, short, int, v_int32)
DEF_CVT_FUNC(16s32f, cvt_, short, float, v_float32)
DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32)
DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
DEF_CVT_FUNC(16s16f, cvt1_,short, hfloat, v_float32)
////////////////////// 32s -> ... ////////////////////////
@ -228,7 +228,7 @@ DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32)
DEF_CVT_FUNC(32s16s, cvt_, int, short, v_int32)
DEF_CVT_FUNC(32s32f, cvt_, int, float, v_float32)
DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32)
DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
DEF_CVT_FUNC(32s16f, cvt1_,int, hfloat, v_float32)
////////////////////// 32f -> ... ////////////////////////
@ -238,7 +238,7 @@ DEF_CVT_FUNC(32f16u, cvt_, float, ushort, v_float32)
DEF_CVT_FUNC(32f16s, cvt_, float, short, v_float32)
DEF_CVT_FUNC(32f32s, cvt_, float, int, v_float32)
DEF_CVT_FUNC(32f64f, cvt_, float, double, v_float32)
DEF_CVT_FUNC(32f16f, cvt1_,float, float16_t, v_float32)
DEF_CVT_FUNC(32f16f, cvt1_,float, hfloat, v_float32)
////////////////////// 64f -> ... ////////////////////////
@ -248,17 +248,17 @@ DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32)
DEF_CVT_FUNC(64f16s, cvt_, double, short, v_int32)
DEF_CVT_FUNC(64f32s, cvt_, double, int, v_int32)
DEF_CVT_FUNC(64f32f, cvt_, double, float, v_float32)
DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
DEF_CVT_FUNC(64f16f, cvt1_,double, hfloat, v_float32)
////////////////////// 16f -> ... ////////////////////////
DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32)
DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32)
DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32)
DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32)
DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
DEF_CVT_FUNC(16f8u, cvt_, hfloat, uchar, v_float32)
DEF_CVT_FUNC(16f8s, cvt_, hfloat, schar, v_float32)
DEF_CVT_FUNC(16f16u, cvt1_, hfloat, ushort, v_float32)
DEF_CVT_FUNC(16f16s, cvt1_, hfloat, short, v_float32)
DEF_CVT_FUNC(16f32s, cvt1_, hfloat, int, v_float32)
DEF_CVT_FUNC(16f32f, cvt1_, hfloat, float, v_float32)
DEF_CVT_FUNC(16f64f, cvt1_, hfloat, double, v_float32)
///////////// "conversion" w/o conversion ///////////////

View File

@ -232,7 +232,7 @@ DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float)
DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float)
DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float)
DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float)
DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, float16_t, uchar, float)
DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, hfloat, uchar, float)
DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float)
DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float)
@ -241,7 +241,7 @@ DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float)
DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float)
DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float)
DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float)
DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, float16_t, schar, float)
DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, hfloat, schar, float)
DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float)
DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float)
@ -250,7 +250,7 @@ DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float)
DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float)
DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float)
DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float)
DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float)
DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, hfloat, ushort, float)
DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float)
DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float)
@ -259,7 +259,7 @@ DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float)
DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float)
DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float)
DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float)
DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float)
DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, hfloat, short, float)
DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float)
DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float)
@ -268,7 +268,7 @@ DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float)
DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double)
DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float)
DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double)
DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float)
DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, hfloat, int, float)
DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float)
DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float)
@ -277,7 +277,7 @@ DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float)
DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float)
DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float)
DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double)
DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float)
DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, hfloat, float, float)
DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double)
DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double)
@ -286,16 +286,16 @@ DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double)
DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double)
DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double)
DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double)
DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double)
DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, hfloat, double, double)
DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, float16_t, float)
DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, float16_t, float)
DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float)
DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, float16_t, float)
DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, float16_t, float)
DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, float16_t, float)
DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, float16_t, double)
DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float)
DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, hfloat, float)
DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, hfloat, float)
DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, hfloat, float)
DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, hfloat, float)
DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, hfloat, float)
DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, hfloat, float)
DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, hfloat, double)
DEF_CVT_SCALE_FUNC(16f, cvt1_32f, hfloat, hfloat, float)
BinaryFunc getCvtScaleAbsFunc(int depth)
{

View File

@ -93,7 +93,7 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
break;
case CV_16F:
scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
scalarToRawData_<hfloat>(s, (hfloat*)_buf, cn, unroll_to);
break;
default:
CV_Error(cv::Error::StsUnsupportedFormat,"");

View File

@ -752,7 +752,7 @@ double norm( InputArray _src, int normType, InputArray _mask )
for (int j = 0; j < total; j += blockSize)
{
int bsz = std::min(total - j, blockSize);
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
hal::cvt16f32f((const hfloat*)ptrs[0], data0, bsz * cn);
func((uchar*)data0, ptrs[1], (uchar*)&result.f, bsz, cn);
ptrs[0] += bsz*esz;
if (ptrs[1])
@ -1222,8 +1222,8 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
for (int j = 0; j < total; j += blockSize)
{
int bsz = std::min(total - j, blockSize);
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
hal::cvt16f32f((const hfloat*)ptrs[0], data0, bsz * cn);
hal::cvt16f32f((const hfloat*)ptrs[1], data1, bsz * cn);
func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.f, bsz, cn);
ptrs[0] += bsz*esz;
ptrs[1] += bsz*esz;

View File

@ -7210,7 +7210,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
typedef std::string (* func_t)(const Mat &);
static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<float16_t> };
kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<hfloat> };
const func_t func = funcs[ddepth];
CV_Assert(func != 0);

View File

@ -77,7 +77,7 @@ namespace cv
void valueToStr32s() { snprintf(buf, sizeof(buf), "%d", mtx.ptr<int>(row, col)[cn]); }
void valueToStr32f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr<float>(row, col)[cn]); }
void valueToStr64f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr<double>(row, col)[cn]); }
void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr<float16_t>(row, col)[cn]); }
void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr<hfloat>(row, col)[cn]); }
void valueToStrOther() { buf[0] = 0; }
public:

View File

@ -270,7 +270,7 @@ int calcStructSize( const char* dt, int initial_size )
case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; }
case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; }
case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; }
case 'h': { elem_max_size = std::max(elem_max_size, sizeof(float16_t)); break; }
case 'h': { elem_max_size = std::max(elem_max_size, sizeof(hfloat)); break; }
default:
CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt));
}
@ -1129,8 +1129,8 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s
data += sizeof(double);
break;
case CV_16F: /* reference */
ptr = fs::floatToString(buf, sizeof(buf), (float) *(float16_t *) data, true, explicitZero);
data += sizeof(float16_t);
ptr = fs::floatToString(buf, sizeof(buf), (float) *(hfloat *) data, true, explicitZero);
data += sizeof(hfloat);
break;
default:
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported type");
@ -1809,7 +1809,7 @@ char *FileStorage::Impl::parseBase64(char *ptr, int indent, FileNode &collection
node_type = FileNode::REAL;
break;
case CV_16F:
fval = (float) float16_t::fromBits(base64decoder.getUInt16());
fval = float(hfloatFromBits(base64decoder.getUInt16()));
node_type = FileNode::REAL;
break;
default:
@ -2600,8 +2600,8 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
data += sizeof(double);
break;
case CV_16F:
*(float16_t*)data = float16_t((float)ival);
data += sizeof(float16_t);
*(hfloat*)data = hfloat((float)ival);
data += sizeof(hfloat);
break;
default:
CV_Error( Error::StsUnsupportedFormat, "Unsupported type" );
@ -2642,8 +2642,8 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
data += sizeof(double);
break;
case CV_16F:
*(float16_t*)data = float16_t((float)fval);
data += sizeof(float16_t);
*(hfloat*)data = hfloat((float)fval);
data += sizeof(hfloat);
break;
default:
CV_Error( Error::StsUnsupportedFormat, "Unsupported type" );

View File

@ -195,7 +195,7 @@ randf_64f( double* arr, int len, uint64* state, const Vec2d* p, void*, bool )
hal::addRNGBias64f(arr, &p[0][0], len);
}
static void randf_16f( float16_t* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool )
static void randf_16f( hfloat* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool )
{
uint64 temp = *state;
for( int i = 0; i < len; i++ )

View File

@ -1557,14 +1557,14 @@ template<typename R> struct TheTest
AlignedData<v_float32> data_f32; data_f32.a.clear();
AlignedData<v_uint16> out;
R r1 = vx_load_expand((const cv::float16_t*)data.a.d);
R r1 = vx_load_expand((const cv::hfloat*)data.a.d);
R r2(r1);
EXPECT_EQ(1.0f, v_get0(r1));
v_store(data_f32.a.d, r2);
EXPECT_EQ(-2.0f, data_f32.a.d[VTraits<R>::vlanes() - 1]);
out.a.clear();
v_pack_store((cv::float16_t*)out.a.d, r2);
v_pack_store((cv::hfloat*)out.a.d, r2);
for (int i = 0; i < VTraits<R>::vlanes(); ++i)
{
EXPECT_EQ(data.a[i], out.a[i]) << "i=" << i;
@ -1588,7 +1588,7 @@ template<typename R> struct TheTest
// check some initialization methods
R r1 = data.u;
R r2 = vx_load_expand((const float16_t*)data.a.d);
R r2 = vx_load_expand((const hfloat*)data.a.d);
R r3(r2);
EXPECT_EQ(data.u[0], v_get0(r1));
EXPECT_EQ(data.a[0], v_get0(r2));

View File

@ -381,7 +381,7 @@ inline void copyFromIE(const IE::Blob::Ptr &blob, MatType &mat) {
HANDLE(U8, uint8_t);
HANDLE(FP32, float);
HANDLE(I32, int);
HANDLE(FP16, cv::float16_t);
HANDLE(FP16, cv::hfloat);
#undef HANDLE
case IE::Precision::I64: {
GAPI_LOG_WARNING(NULL, "INT64 isn't supported for cv::Mat. Conversion to INT32 is used.");

View File

@ -370,7 +370,7 @@ public:
initMatByPointsVectorRandU<Pt<double>>(sz_in);
break;
case CV_16F:
initMatByPointsVectorRandU<Pt<cv::float16_t>>(sz_in);
initMatByPointsVectorRandU<Pt<cv::hfloat>>(sz_in);
break;
default:
GAPI_Error("Unsupported depth");