mirror of
https://github.com/opencv/opencv.git
synced 2024-12-05 09:49:12 +08:00
173 lines
4.2 KiB
C++
173 lines
4.2 KiB
C++
|
|
||
|
namespace
|
||
|
{
|
||
|
float convertFp16SW(short fp16);
|
||
|
short convertFp16SW(float fp32);
|
||
|
|
||
|
#if !CV_FP16_TYPE
|
||
|
// const numbers for floating points format
|
||
|
const unsigned int kShiftSignificand = 13;
|
||
|
const unsigned int kMaskFp16Significand = 0x3ff;
|
||
|
const unsigned int kBiasFp16Exponent = 15;
|
||
|
const unsigned int kBiasFp32Exponent = 127;
|
||
|
#endif
|
||
|
|
||
|
#if CV_FP16_TYPE
|
||
|
float convertFp16SW(short fp16)
|
||
|
{
|
||
|
// Fp16 -> Fp32
|
||
|
Cv16suf a;
|
||
|
a.i = fp16;
|
||
|
return (float)a.h;
|
||
|
}
|
||
|
#else
|
||
|
float convertFp16SW(short fp16)
|
||
|
{
|
||
|
// Fp16 -> Fp32
|
||
|
Cv16suf b;
|
||
|
b.i = fp16;
|
||
|
int exponent = b.fmt.exponent - kBiasFp16Exponent;
|
||
|
int significand = b.fmt.significand;
|
||
|
|
||
|
Cv32suf a;
|
||
|
a.i = 0;
|
||
|
a.fmt.sign = b.fmt.sign; // sign bit
|
||
|
if( exponent == 16 )
|
||
|
{
|
||
|
// Inf or NaN
|
||
|
a.i = a.i | 0x7F800000;
|
||
|
if( significand != 0 )
|
||
|
{
|
||
|
// NaN
|
||
|
#if defined(__x86_64__) || defined(_M_X64)
|
||
|
// 64bit
|
||
|
a.i = a.i | 0x7FC00000;
|
||
|
#endif
|
||
|
a.fmt.significand = a.fmt.significand | (significand << kShiftSignificand);
|
||
|
}
|
||
|
return a.f;
|
||
|
}
|
||
|
else if ( exponent == -(int)kBiasFp16Exponent )
|
||
|
{
|
||
|
// subnormal in Fp16
|
||
|
if( significand == 0 )
|
||
|
{
|
||
|
// zero
|
||
|
return a.f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
int shift = -1;
|
||
|
while( ( significand & 0x400 ) == 0 )
|
||
|
{
|
||
|
significand = significand << 1;
|
||
|
shift++;
|
||
|
}
|
||
|
significand = significand & kMaskFp16Significand;
|
||
|
exponent -= shift;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
a.fmt.exponent = (exponent+kBiasFp32Exponent);
|
||
|
a.fmt.significand = significand << kShiftSignificand;
|
||
|
return a.f;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if CV_FP16_TYPE
|
||
|
short convertFp16SW(float fp32)
|
||
|
{
|
||
|
// Fp32 -> Fp16
|
||
|
Cv16suf a;
|
||
|
a.h = (__fp16)fp32;
|
||
|
return a.i;
|
||
|
}
|
||
|
#else
|
||
|
short convertFp16SW(float fp32)
|
||
|
{
|
||
|
// Fp32 -> Fp16
|
||
|
Cv32suf a;
|
||
|
a.f = fp32;
|
||
|
int exponent = a.fmt.exponent - kBiasFp32Exponent;
|
||
|
int significand = a.fmt.significand;
|
||
|
|
||
|
Cv16suf result;
|
||
|
result.i = 0;
|
||
|
unsigned int absolute = a.i & 0x7fffffff;
|
||
|
if( 0x477ff000 <= absolute )
|
||
|
{
|
||
|
// Inf in Fp16
|
||
|
result.i = result.i | 0x7C00;
|
||
|
if( exponent == 128 && significand != 0 )
|
||
|
{
|
||
|
// NaN
|
||
|
result.i = (short)( result.i | 0x200 | ( significand >> kShiftSignificand ) );
|
||
|
}
|
||
|
}
|
||
|
else if ( absolute < 0x33000001 )
|
||
|
{
|
||
|
// too small for fp16
|
||
|
result.i = 0;
|
||
|
}
|
||
|
else if ( absolute < 0x387fe000 )
|
||
|
{
|
||
|
// subnormal in Fp16
|
||
|
int fp16Significand = significand | 0x800000;
|
||
|
int bitShift = (-exponent) - 1;
|
||
|
fp16Significand = fp16Significand >> bitShift;
|
||
|
|
||
|
// special cases to round up
|
||
|
bitShift = exponent + 24;
|
||
|
int threshold = ( ( 0x400000 >> bitShift ) | ( ( ( significand & ( 0x800000 >> bitShift ) ) >> ( 126 - a.fmt.exponent ) ) ^ 1 ) );
|
||
|
if( absolute == 0x33c00000 )
|
||
|
{
|
||
|
result.i = 2;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( threshold <= ( significand & ( 0xffffff >> ( exponent + 25 ) ) ) )
|
||
|
{
|
||
|
fp16Significand++;
|
||
|
}
|
||
|
result.i = (short)fp16Significand;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// usual situation
|
||
|
// exponent
|
||
|
result.fmt.exponent = ( exponent + kBiasFp16Exponent );
|
||
|
|
||
|
// significand;
|
||
|
short fp16Significand = (short)(significand >> kShiftSignificand);
|
||
|
result.fmt.significand = fp16Significand;
|
||
|
|
||
|
// special cases to round up
|
||
|
short lsb10bitsFp32 = (significand & 0x1fff);
|
||
|
short threshold = 0x1000 + ( ( fp16Significand & 0x1 ) ? 0 : 1 );
|
||
|
if( threshold <= lsb10bitsFp32 )
|
||
|
{
|
||
|
result.i++;
|
||
|
}
|
||
|
else if ( fp16Significand == kMaskFp16Significand && exponent == -15)
|
||
|
{
|
||
|
result.i++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// sign bit
|
||
|
result.fmt.sign = a.fmt.sign;
|
||
|
return result.i;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
namespace cv
|
||
|
{
|
||
|
namespace opt_FP16
|
||
|
{
|
||
|
void cvtScaleHalf_SIMD32f16f( const float* src, size_t sstep, short* dst, size_t dstep, cv::Size size );
|
||
|
void cvtScaleHalf_SIMD16f32f( const short* src, size_t sstep, float* dst, size_t dstep, cv::Size size );
|
||
|
}
|
||
|
}
|