opencv/modules/core/src/convert.hpp


namespace
{
float convertFp16SW(short fp16);
short convertFp16SW(float fp32);

#if !CV_FP16_TYPE
// const numbers for floating points format
const unsigned int kShiftSignificand    = 13;
const unsigned int kMaskFp16Significand = 0x3ff;
const unsigned int kBiasFp16Exponent    = 15;
const unsigned int kBiasFp32Exponent    = 127;
#endif

#if CV_FP16_TYPE
inline float convertFp16SW(short fp16)
{
    // Fp16 -> Fp32
    Cv16suf a;
    a.i = fp16;
    return (float)a.h;
}
#else
inline float convertFp16SW(short fp16)
{
    // Fp16 -> Fp32
    Cv16suf b;
    b.i = fp16;
    int exponent    = b.fmt.exponent - kBiasFp16Exponent;
    int significand = b.fmt.significand;

    Cv32suf a;
    a.i = 0;
    a.fmt.sign = b.fmt.sign; // sign bit
    if( exponent == 16 )
    {
        // Inf or NaN
        a.i = a.i | 0x7F800000;
        if( significand != 0 )
        {
            // NaN
#if defined(__x86_64__) || defined(_M_X64)
            // 64bit
            a.i = a.i | 0x7FC00000;
#endif
            a.fmt.significand = a.fmt.significand | (significand << kShiftSignificand);
        }
        return a.f;
    }
    else if ( exponent == -(int)kBiasFp16Exponent )
    {
        // subnormal in Fp16
        if( significand == 0 )
        {
            // zero
            return a.f;
        }
        else
        {
            int shift = -1;
            while( ( significand & 0x400 ) == 0 )
            {
                significand = significand << 1;
                shift++;
            }
            significand = significand & kMaskFp16Significand;
            exponent -= shift;
        }
    }

    a.fmt.exponent = (exponent+kBiasFp32Exponent);
    a.fmt.significand = significand << kShiftSignificand;
    return a.f;
}
#endif

#if CV_FP16_TYPE
inline short convertFp16SW(float fp32)
{
    // Fp32 -> Fp16
    Cv16suf a;
    a.h = (__fp16)fp32;
    return a.i;
}
#else
inline short convertFp16SW(float fp32)
{
    // Fp32 -> Fp16
    Cv32suf a;
    a.f = fp32;
    int exponent    = a.fmt.exponent - kBiasFp32Exponent;
    int significand = a.fmt.significand;

    Cv16suf result;
    result.i = 0;
    unsigned int absolute = a.i & 0x7fffffff;
    if( 0x477ff000 <= absolute )
    {
        // Inf in Fp16
        result.i = result.i | 0x7C00;
        if( exponent == 128 && significand != 0 )
        {
            // NaN
            result.i = (short)( result.i | 0x200 | ( significand >> kShiftSignificand ) );
        }
    }
    else if ( absolute < 0x33000001 )
    {
        // too small for fp16
        result.i = 0;
    }
    else if ( absolute < 0x387fe000 )
    {
        // subnormal in Fp16
        int fp16Significand = significand | 0x800000;
        int bitShift = (-exponent) - 1;
        fp16Significand = fp16Significand >> bitShift;

        // special cases to round up
        bitShift = exponent + 24;
        int threshold = ( ( 0x400000 >> bitShift ) | ( ( ( significand & ( 0x800000 >> bitShift ) ) >> ( 126 - a.fmt.exponent ) ) ^ 1 ) );
        if( absolute == 0x33c00000 )
        {
            result.i = 2;
        }
        else
        {
            if( threshold <= ( significand & ( 0xffffff >> ( exponent + 25 ) ) ) )
            {
                fp16Significand++;
            }
            result.i = (short)fp16Significand;
        }
    }
    else
    {
        // usual situation
        // exponent
        result.fmt.exponent = ( exponent + kBiasFp16Exponent );

        // significand;
        short fp16Significand = (short)(significand >> kShiftSignificand);
        result.fmt.significand = fp16Significand;

        // special cases to round up
        short lsb10bitsFp32 = (significand & 0x1fff);
        short threshold = 0x1000 + ( ( fp16Significand & 0x1 ) ? 0 : 1 );
        if( threshold <= lsb10bitsFp32 )
        {
            result.i++;
        }
        else if ( fp16Significand == kMaskFp16Significand && exponent == -15)
        {
            result.i++;
        }
    }

    // sign bit
    result.fmt.sign = a.fmt.sign;
    return result.i;
}
#endif

}

namespace cv
{
namespace opt_FP16
{
void cvtScaleHalf_SIMD32f16f( const float* src, size_t sstep, short* dst, size_t dstep, cv::Size size );
void cvtScaleHalf_SIMD16f32f( const short* src, size_t sstep, float* dst, size_t dstep, cv::Size size );
}
}
update convertFp16 using CV_CPU_CALL_FP16 * avoid link error (move the implementation of software version to header) * make getConvertFuncFp16 local (move from precomp.hpp to convert.hpp) * fix error on 32bit x86 2017-06-06 21:26:51 +08:00
			`namespace`
			`{`
			`float convertFp16SW(short fp16);`
			`short convertFp16SW(float fp32);`

			`#if !CV_FP16_TYPE`
			`// const numbers for floating points format`
			`const unsigned int kShiftSignificand = 13;`
			`const unsigned int kMaskFp16Significand = 0x3ff;`
			`const unsigned int kBiasFp16Exponent = 15;`
			`const unsigned int kBiasFp32Exponent = 127;`
			`#endif`

			`#if CV_FP16_TYPE`
android: make optional "cpufeatures", build fixes for NDK r15 2017-06-13 00:10:13 +08:00			`inline float convertFp16SW(short fp16)`
update convertFp16 using CV_CPU_CALL_FP16 * avoid link error (move the implementation of software version to header) * make getConvertFuncFp16 local (move from precomp.hpp to convert.hpp) * fix error on 32bit x86 2017-06-06 21:26:51 +08:00			`{`
			`// Fp16 -> Fp32`
			`Cv16suf a;`
			`a.i = fp16;`
			`return (float)a.h;`
			`}`
			`#else`
android: make optional "cpufeatures", build fixes for NDK r15 2017-06-13 00:10:13 +08:00			`inline float convertFp16SW(short fp16)`
update convertFp16 using CV_CPU_CALL_FP16 * avoid link error (move the implementation of software version to header) * make getConvertFuncFp16 local (move from precomp.hpp to convert.hpp) * fix error on 32bit x86 2017-06-06 21:26:51 +08:00			`{`
			`// Fp16 -> Fp32`
			`Cv16suf b;`
			`b.i = fp16;`
			`int exponent = b.fmt.exponent - kBiasFp16Exponent;`
			`int significand = b.fmt.significand;`

			`Cv32suf a;`
			`a.i = 0;`
			`a.fmt.sign = b.fmt.sign; // sign bit`
			`if( exponent == 16 )`
			`{`
			`// Inf or NaN`
			`a.i = a.i \| 0x7F800000;`
			`if( significand != 0 )`
			`{`
			`// NaN`
			`#if defined(__x86_64__) \|\| defined(_M_X64)`
			`// 64bit`
			`a.i = a.i \| 0x7FC00000;`
			`#endif`
			`a.fmt.significand = a.fmt.significand \| (significand << kShiftSignificand);`
			`}`
			`return a.f;`
			`}`
			`else if ( exponent == -(int)kBiasFp16Exponent )`
			`{`
			`// subnormal in Fp16`
			`if( significand == 0 )`
			`{`
			`// zero`
			`return a.f;`
			`}`
			`else`
			`{`
			`int shift = -1;`
			`while( ( significand & 0x400 ) == 0 )`
			`{`
			`significand = significand << 1;`
			`shift++;`
			`}`
			`significand = significand & kMaskFp16Significand;`
			`exponent -= shift;`
			`}`
			`}`

			`a.fmt.exponent = (exponent+kBiasFp32Exponent);`
			`a.fmt.significand = significand << kShiftSignificand;`
			`return a.f;`
			`}`
			`#endif`

			`#if CV_FP16_TYPE`
android: make optional "cpufeatures", build fixes for NDK r15 2017-06-13 00:10:13 +08:00			`inline short convertFp16SW(float fp32)`
update convertFp16 using CV_CPU_CALL_FP16 * avoid link error (move the implementation of software version to header) * make getConvertFuncFp16 local (move from precomp.hpp to convert.hpp) * fix error on 32bit x86 2017-06-06 21:26:51 +08:00			`{`
			`// Fp32 -> Fp16`
			`Cv16suf a;`
			`a.h = (__fp16)fp32;`
			`return a.i;`
			`}`
			`#else`
android: make optional "cpufeatures", build fixes for NDK r15 2017-06-13 00:10:13 +08:00			`inline short convertFp16SW(float fp32)`
update convertFp16 using CV_CPU_CALL_FP16 * avoid link error (move the implementation of software version to header) * make getConvertFuncFp16 local (move from precomp.hpp to convert.hpp) * fix error on 32bit x86 2017-06-06 21:26:51 +08:00			`{`
			`// Fp32 -> Fp16`
			`Cv32suf a;`
			`a.f = fp32;`
			`int exponent = a.fmt.exponent - kBiasFp32Exponent;`
			`int significand = a.fmt.significand;`

			`Cv16suf result;`
			`result.i = 0;`
			`unsigned int absolute = a.i & 0x7fffffff;`
			`if( 0x477ff000 <= absolute )`
			`{`
			`// Inf in Fp16`
			`result.i = result.i \| 0x7C00;`
			`if( exponent == 128 && significand != 0 )`
			`{`
			`// NaN`
			`result.i = (short)( result.i \| 0x200 \| ( significand >> kShiftSignificand ) );`
			`}`
			`}`
			`else if ( absolute < 0x33000001 )`
			`{`
			`// too small for fp16`
			`result.i = 0;`
			`}`
			`else if ( absolute < 0x387fe000 )`
			`{`
			`// subnormal in Fp16`
			`int fp16Significand = significand \| 0x800000;`
			`int bitShift = (-exponent) - 1;`
			`fp16Significand = fp16Significand >> bitShift;`

			`// special cases to round up`
			`bitShift = exponent + 24;`
			`int threshold = ( ( 0x400000 >> bitShift ) \| ( ( ( significand & ( 0x800000 >> bitShift ) ) >> ( 126 - a.fmt.exponent ) ) ^ 1 ) );`
			`if( absolute == 0x33c00000 )`
			`{`
			`result.i = 2;`
			`}`
			`else`
			`{`
			`if( threshold <= ( significand & ( 0xffffff >> ( exponent + 25 ) ) ) )`
			`{`
			`fp16Significand++;`
			`}`
			`result.i = (short)fp16Significand;`
			`}`
			`}`
			`else`
			`{`
			`// usual situation`
			`// exponent`
			`result.fmt.exponent = ( exponent + kBiasFp16Exponent );`

			`// significand;`
			`short fp16Significand = (short)(significand >> kShiftSignificand);`
			`result.fmt.significand = fp16Significand;`

			`// special cases to round up`
			`short lsb10bitsFp32 = (significand & 0x1fff);`
			`short threshold = 0x1000 + ( ( fp16Significand & 0x1 ) ? 0 : 1 );`
			`if( threshold <= lsb10bitsFp32 )`
			`{`
			`result.i++;`
			`}`
			`else if ( fp16Significand == kMaskFp16Significand && exponent == -15)`
			`{`
			`result.i++;`
			`}`
			`}`

			`// sign bit`
			`result.fmt.sign = a.fmt.sign;`
			`return result.i;`
			`}`
			`#endif`

			`}`

			`namespace cv`
			`{`
			`namespace opt_FP16`
			`{`
			`void cvtScaleHalf_SIMD32f16f( const float* src, size_t sstep, short* dst, size_t dstep, cv::Size size );`
			`void cvtScaleHalf_SIMD16f32f( const short* src, size_t sstep, float* dst, size_t dstep, cv::Size size );`
			`}`
			`}`