opencv/modules/core/src/softfloat.cpp

4758 lines
186 KiB
C++
Raw Normal View History

2017-06-15 02:20:10 +08:00
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This file is based on files from package issued with the following license:
SoftFloat integrated (#8668) * everything is put into softfloat.cpp and softfloat.hpp * WIP: try to integrate softfloat into OpenCV * extra functions removed * softfloat made stateless * CV_EXPORTS added * operators fixed * exp added, log: WIP * log32 fixed * shorter names; a lot of TODOs * log64 rewritten * cbrt32 added * minors, refactoring * "inline" -> "CV_INLINE" * cast to bool warnings fixed * several warnings fixed * fixed warning about unsigned unary minus * fixed warnings on type cast * inline -> CV_INLINE * special cases processing added (NaNs, Infs, etc.) * constants for NaN and Inf added * more macros and helper functions added * added (or fixed) tests for pow32, pow64, cbrt32 * exp-like functions fixed * minor changes * fixed random number generation for tests * tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation * minor warning fix * pow(f, i) 32/64: special cases handling added * unused functions removed * refactoring is in progress (not compiling) * CV_inline added * unions {uint_t, float_t} removed * tests compilation fixed * static const members -> static methods returning const * reinterpret_cast * warning fixed * const-ness fixed * all FP calculations (even compile-time) are done in SoftFloat + minor fixes * pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed * CV_INLINE -> inline * internal constants moved to .cpp file * toInt_minMag() methods merged into toInt() methods * macros moved to .cpp file * refactoring: types renamed to softfloat and softdouble; explicit constructors, etc. * toFloat(), toDouble() -> operator float(), operator double() * removed f32/f64 prefixes from functions names * toType() methods removed, round() and trunc() functions added * minor change * minors * MSVC: warnings fixed * added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>() * typo fixed * type cast fixed
2017-05-29 22:07:25 +08:00
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3c, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include "precomp.hpp"
#include "opencv2/core/softfloat.hpp"
SoftFloat integrated (#8668) * everything is put into softfloat.cpp and softfloat.hpp * WIP: try to integrate softfloat into OpenCV * extra functions removed * softfloat made stateless * CV_EXPORTS added * operators fixed * exp added, log: WIP * log32 fixed * shorter names; a lot of TODOs * log64 rewritten * cbrt32 added * minors, refactoring * "inline" -> "CV_INLINE" * cast to bool warnings fixed * several warnings fixed * fixed warning about unsigned unary minus * fixed warnings on type cast * inline -> CV_INLINE * special cases processing added (NaNs, Infs, etc.) * constants for NaN and Inf added * more macros and helper functions added * added (or fixed) tests for pow32, pow64, cbrt32 * exp-like functions fixed * minor changes * fixed random number generation for tests * tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation * minor warning fix * pow(f, i) 32/64: special cases handling added * unused functions removed * refactoring is in progress (not compiling) * CV_inline added * unions {uint_t, float_t} removed * tests compilation fixed * static const members -> static methods returning const * reinterpret_cast * warning fixed * const-ness fixed * all FP calculations (even compile-time) are done in SoftFloat + minor fixes * pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed * CV_INLINE -> inline * internal constants moved to .cpp file * toInt_minMag() methods merged into toInt() methods * macros moved to .cpp file * refactoring: types renamed to softfloat and softdouble; explicit constructors, etc. * toFloat(), toDouble() -> operator float(), operator double() * removed f32/f64 prefixes from functions names * toType() methods removed, round() and trunc() functions added * minor change * minors * MSVC: warnings fixed * added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>() * typo fixed * type cast fixed
2017-05-29 22:07:25 +08:00
namespace cv
{
/*----------------------------------------------------------------------------
| Software floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
enum {
tininess_beforeRounding = 0,
tininess_afterRounding = 1
};
//fixed to make softfloat code stateless
const uint_fast8_t globalDetectTininess = tininess_afterRounding;
/*----------------------------------------------------------------------------
| Software floating-point exception flags.
*----------------------------------------------------------------------------*/
enum {
flag_inexact = 1,
flag_underflow = 2,
flag_overflow = 4,
flag_infinite = 8,
flag_invalid = 16
};
// Disabled to make softfloat code stateless
// This function may be changed in the future for better error handling
inline void raiseFlags( uint_fast8_t /* flags */)
{
//exceptionFlags |= flags;
}
/*----------------------------------------------------------------------------
| Software floating-point rounding mode.
*----------------------------------------------------------------------------*/
enum {
round_near_even = 0,
round_minMag = 1,
round_min = 2,
round_max = 3,
round_near_maxMag = 4,
round_odd = 5
};
//fixed to make softfloat code stateless
const uint_fast8_t globalRoundingMode = round_near_even;
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF32UI( a ) (((uint32_t) (a)>>31) != 0)
#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
#define fracF32UI( a ) ((a) & 0x007FFFFF)
#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))
#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF64UI( a ) (((uint64_t) (a)>>63) != 0)
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))
#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
/*----------------------------------------------------------------------------
| Types used to pass 32-bit and 64-bit floating-point
| arguments and results to/from functions. These types must be exactly
| 32 bits and 64 bits in size, respectively. Where a
| platform has "native" support for IEEE-Standard floating-point formats,
| the types below may, if desired, be defined as aliases for the native types
| (typically 'float' and 'double').
*----------------------------------------------------------------------------*/
typedef softfloat float32_t;
typedef softdouble float64_t;
/*----------------------------------------------------------------------------
| Integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
float32_t ui32_to_f32( uint32_t );
float64_t ui32_to_f64( uint32_t );
float32_t ui64_to_f32( uint64_t );
float64_t ui64_to_f64( uint64_t );
float32_t i32_to_f32( int32_t );
float64_t i32_to_f64( int32_t );
float32_t i64_to_f32( int64_t );
float64_t i64_to_f64( int64_t );
/*----------------------------------------------------------------------------
| 32-bit (single-precision) floating-point operations.
*----------------------------------------------------------------------------*/
uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
float64_t f32_to_f64( float32_t );
float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
float32_t f32_add( float32_t, float32_t );
float32_t f32_sub( float32_t, float32_t );
float32_t f32_mul( float32_t, float32_t );
float32_t f32_mulAdd( float32_t, float32_t, float32_t );
float32_t f32_div( float32_t, float32_t );
float32_t f32_rem( float32_t, float32_t );
float32_t f32_sqrt( float32_t );
bool f32_eq( float32_t, float32_t );
bool f32_le( float32_t, float32_t );
bool f32_lt( float32_t, float32_t );
bool f32_eq_signaling( float32_t, float32_t );
bool f32_le_quiet( float32_t, float32_t );
bool f32_lt_quiet( float32_t, float32_t );
bool f32_isSignalingNaN( float32_t );
/*----------------------------------------------------------------------------
| 64-bit (double-precision) floating-point operations.
*----------------------------------------------------------------------------*/
uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
float32_t f64_to_f32( float64_t );
float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
float64_t f64_add( float64_t, float64_t );
float64_t f64_sub( float64_t, float64_t );
float64_t f64_mul( float64_t, float64_t );
float64_t f64_mulAdd( float64_t, float64_t, float64_t );
float64_t f64_div( float64_t, float64_t );
float64_t f64_rem( float64_t, float64_t );
float64_t f64_sqrt( float64_t );
bool f64_eq( float64_t, float64_t );
bool f64_le( float64_t, float64_t );
bool f64_lt( float64_t, float64_t );
bool f64_eq_signaling( float64_t, float64_t );
bool f64_le_quiet( float64_t, float64_t );
bool f64_lt_quiet( float64_t, float64_t );
bool f64_isSignalingNaN( float64_t );
/*----------------------------------------------------------------------------
| Ported from OpenCV and added for usability
*----------------------------------------------------------------------------*/
float32_t f32_powi( float32_t x, int y);
float64_t f64_powi( float64_t x, int y);
float32_t f32_exp( float32_t x);
float64_t f64_exp(float64_t x);
float32_t f32_log(float32_t x);
float64_t f64_log(float64_t x);
float32_t f32_cbrt(float32_t x);
float32_t f32_pow( float32_t x, float32_t y);
float64_t f64_pow( float64_t x, float64_t y);
/*----------------------------------------------------------------------------
| softfloat and softdouble methods and members
*----------------------------------------------------------------------------*/
softfloat::softfloat( const uint32_t a ) { *this = ui32_to_f32(a); }
softfloat::softfloat( const uint64_t a ) { *this = ui64_to_f32(a); }
softfloat::softfloat( const int32_t a ) { *this = i32_to_f32(a); }
softfloat::softfloat( const int64_t a ) { *this = i64_to_f32(a); }
softfloat::operator softdouble() const { return f32_to_f64(*this); }
softfloat softfloat::operator + (const softfloat& a) const { return f32_add(*this, a); }
softfloat softfloat::operator - (const softfloat& a) const { return f32_sub(*this, a); }
softfloat softfloat::operator * (const softfloat& a) const { return f32_mul(*this, a); }
softfloat softfloat::operator / (const softfloat& a) const { return f32_div(*this, a); }
softfloat softfloat::operator % (const softfloat& a) const { return f32_rem(*this, a); }
bool softfloat::operator == ( const softfloat& a ) const { return f32_eq(*this, a); }
bool softfloat::operator != ( const softfloat& a ) const { return !f32_eq(*this, a); }
bool softfloat::operator > ( const softfloat& a ) const { return f32_lt(a, *this); }
bool softfloat::operator >= ( const softfloat& a ) const { return f32_le(a, *this); }
bool softfloat::operator < ( const softfloat& a ) const { return f32_lt(*this, a); }
bool softfloat::operator <= ( const softfloat& a ) const { return f32_le(*this, a); }
softdouble::softdouble( const uint32_t a ) { *this = ui32_to_f64(a); }
softdouble::softdouble( const uint64_t a ) { *this = ui64_to_f64(a); }
softdouble::softdouble( const int32_t a ) { *this = i32_to_f64(a); }
softdouble::softdouble( const int64_t a ) { *this = i64_to_f64(a); }
}
int cvTrunc(const cv::softfloat& a) { return cv::f32_to_i32_r_minMag(a, false); }
int cvRound(const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_near_even, false); }
int cvFloor(const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_min, false); }
int cvCeil (const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_max, false); }
int cvTrunc(const cv::softdouble& a) { return cv::f64_to_i32_r_minMag(a, false); }
int cvRound(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_near_even, false); }
int cvFloor(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_min, false); }
int cvCeil (const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_max, false); }
namespace cv
{
softdouble::operator softfloat() const { return f64_to_f32(*this); }
softdouble softdouble::operator + (const softdouble& a) const { return f64_add(*this, a); }
softdouble softdouble::operator - (const softdouble& a) const { return f64_sub(*this, a); }
softdouble softdouble::operator * (const softdouble& a) const { return f64_mul(*this, a); }
softdouble softdouble::operator / (const softdouble& a) const { return f64_div(*this, a); }
softdouble softdouble::operator % (const softdouble& a) const { return f64_rem(*this, a); }
bool softdouble::operator == (const softdouble& a) const { return f64_eq(*this, a); }
bool softdouble::operator != (const softdouble& a) const { return !f64_eq(*this, a); }
bool softdouble::operator > (const softdouble& a) const { return f64_lt(a, *this); }
bool softdouble::operator >= (const softdouble& a) const { return f64_le(a, *this); }
bool softdouble::operator < (const softdouble& a) const { return f64_lt(*this, a); }
bool softdouble::operator <= (const softdouble& a) const { return f64_le(*this, a); }
/*----------------------------------------------------------------------------
| Overloads for math functions
*----------------------------------------------------------------------------*/
softfloat mulAdd( const softfloat& a, const softfloat& b, const softfloat & c) { return f32_mulAdd(a, b, c); }
softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c) { return f64_mulAdd(a, b, c); }
softfloat sqrt( const softfloat& a ) { return f32_sqrt(a); }
softdouble sqrt( const softdouble& a ) { return f64_sqrt(a); }
softfloat exp( const softfloat& a) { return f32_exp(a); }
softdouble exp( const softdouble& a) { return f64_exp(a); }
softfloat log( const softfloat& a ) { return f32_log(a); }
softdouble log( const softdouble& a ) { return f64_log(a); }
softfloat pow( const softfloat& a, const softfloat& b) { return f32_pow(a, b); }
softdouble pow( const softdouble& a, const softdouble& b) { return f64_pow(a, b); }
softfloat cbrt(const softfloat& a) { return f32_cbrt(a); }
/*----------------------------------------------------------------------------
| The values to return on conversions to 32-bit integer formats that raise an
| invalid exception.
*----------------------------------------------------------------------------*/
#define ui32_fromPosOverflow 0xFFFFFFFF
#define ui32_fromNegOverflow 0
#define ui32_fromNaN 0xFFFFFFFF
#define i32_fromPosOverflow 0x7FFFFFFF
#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
#define i32_fromNaN 0x7FFFFFFF
/*----------------------------------------------------------------------------
| The values to return on conversions to 64-bit integer formats that raise an
| invalid exception.
*----------------------------------------------------------------------------*/
#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define ui64_fromNegOverflow 0
#define ui64_fromNaN UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define i64_fromPosOverflow UINT64_C( 0x7FFFFFFFFFFFFFFF )
//fixed unsigned unary minus: -x == ~x + 1
//#define i64_fromNegOverflow (-UINT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
#define i64_fromNegOverflow (~UINT64_C( 0x7FFFFFFFFFFFFFFF ) + 1 - 1)
#define i64_fromNaN UINT64_C( 0x7FFFFFFFFFFFFFFF )
/*----------------------------------------------------------------------------
| "Common NaN" structure, used to transfer NaN representations from one format
| to another.
*----------------------------------------------------------------------------*/
struct commonNaN {
bool sign;
#ifndef WORDS_BIGENDIAN
uint64_t v0, v64;
#else
uint64_t v64, v0;
#endif
};
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 32-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF32UI 0xFFC00000
/*----------------------------------------------------------------------------
| Returns true when 32-bit unsigned integer `uiA' has the bit pattern of a
| 32-bit floating-point signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
static uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static uint_fast32_t softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 64-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )
/*----------------------------------------------------------------------------
| Returns true when 64-bit unsigned integer `uiA' has the bit pattern of a
| 64-bit floating-point signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF64UI( uiA ) \
((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && \
((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
static uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static uint_fast64_t softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#ifndef WORDS_BIGENDIAN
struct uint128 { uint64_t v0, v64; };
struct uint64_extra { uint64_t extra, v; };
struct uint128_extra { uint64_t extra; struct uint128 v; };
#else
struct uint128 { uint64_t v64, v0; };
struct uint64_extra { uint64_t v, extra; };
struct uint128_extra { struct uint128 v; uint64_t extra; };
#endif
/*----------------------------------------------------------------------------
| These macros are used to isolate the differences in word order between big-
| endian and little-endian platforms.
*----------------------------------------------------------------------------*/
#ifndef WORDS_BIGENDIAN
#define wordIncr 1
#define indexWord( total, n ) (n)
#define indexWordHi( total ) ((total) - 1)
#define indexWordLo( total ) 0
#define indexMultiword( total, m, n ) (n)
#define indexMultiwordHi( total, n ) ((total) - (n))
#define indexMultiwordLo( total, n ) 0
#define indexMultiwordHiBut( total, n ) (n)
#define indexMultiwordLoBut( total, n ) 0
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
#else
#define wordIncr -1
#define indexWord( total, n ) ((total) - 1 - (n))
#define indexWordHi( total ) 0
#define indexWordLo( total ) ((total) - 1)
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
#define indexMultiwordHi( total, n ) 0
#define indexMultiwordLo( total, n ) ((total) - (n))
#define indexMultiwordHiBut( total, n ) 0
#define indexMultiwordLoBut( total, n ) (n)
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
#endif
enum {
softfloat_mulAdd_subC = 1,
softfloat_mulAdd_subProd = 2
};
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
static uint_fast32_t softfloat_roundToUI32( bool, uint_fast64_t, uint_fast8_t, bool );
static uint_fast64_t softfloat_roundToUI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
static int_fast32_t softfloat_roundToI32( bool, uint_fast64_t, uint_fast8_t, bool );
static int_fast64_t softfloat_roundToI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
static struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
static float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
static float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
static float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t );
static float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t );
static float32_t softfloat_mulAddF32(uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
static float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );
static float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );
static float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
static float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
static float64_t softfloat_mulAddF64( uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must be in
| the range 1 to 63. If any nonzero bits are shifted off, they are "jammed"
| into the least-significant bit of the shifted value by setting the least-
| significant bit to 1. This shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
inline uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
{ return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must not
| be zero. If any nonzero bits are shifted off, they are "jammed" into the
| least-significant bit of the shifted value by setting the least-significant
| bit to 1. This shifted-and-jammed value is returned.
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
| greater than 32, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
inline uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
{
//fixed unsigned unary minus: -x == ~x + 1
return (dist < 31) ? a>>dist | ((uint32_t) (a<<((~dist + 1) & 31)) != 0) : (a != 0);
}
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must not
| be zero. If any nonzero bits are shifted off, they are "jammed" into the
| least-significant bit of the shifted value by setting the least-significant
| bit to 1. This shifted-and-jammed value is returned.
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
| greater than 64, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
inline uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
{
//fixed unsigned unary minus: -x == ~x + 1
return (dist < 63) ? a>>dist | ((uint64_t) (a<<((~dist + 1) & 63)) != 0) : (a != 0);
}
/*----------------------------------------------------------------------------
| A constant table that translates an 8-bit unsigned integer (the array index)
| into the number of leading 0 bits before the most-significant 1 of that
| integer. For integer zero (index 0), the corresponding table element is 8.
*----------------------------------------------------------------------------*/
static const uint_least8_t softfloat_countLeadingZeros8[256] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'. If 'a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
inline uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
{
uint_fast8_t count = 0;
if ( a < 0x10000 ) {
count = 16;
a <<= 16;
}
if ( a < 0x1000000 ) {
count += 8;
a <<= 8;
}
count += softfloat_countLeadingZeros8[a>>24];
return count;
}
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'. If 'a' is zero, 64 is returned.
*----------------------------------------------------------------------------*/
static uint_fast8_t softfloat_countLeadingZeros64( uint64_t a );
/*----------------------------------------------------------------------------
| Returns an approximation to the reciprocal of the number represented by 'a',
| where 'a' is interpreted as an unsigned fixed-point number with one integer
| bit and 31 fraction bits. The 'a' input must be "normalized", meaning that
| its most-significant bit (bit 31) must be 1. Thus, if A is the value of
| the fixed-point interpretation of 'a', then 1 <= A < 2. The returned value
| is interpreted as a pure unsigned fraction, having no integer bits and 32
| fraction bits. The approximation returned is never greater than the true
| reciprocal 1/A, and it differs from the true reciprocal by at most 2.006 ulp
| (units in the last place).
*----------------------------------------------------------------------------*/
#define softfloat_approxRecip32_1( a ) ((uint32_t) (UINT64_C( 0x7FFFFFFFFFFFFFFF ) / (uint32_t) (a)))
/*----------------------------------------------------------------------------
| Returns an approximation to the reciprocal of the square root of the number
| represented by 'a', where 'a' is interpreted as an unsigned fixed-point
| number either with one integer bit and 31 fraction bits or with two integer
| bits and 30 fraction bits. The format of 'a' is determined by 'oddExpA',
| which must be either 0 or 1. If 'oddExpA' is 1, 'a' is interpreted as
| having one integer bit, and if 'oddExpA' is 0, 'a' is interpreted as having
| two integer bits. The 'a' input must be "normalized", meaning that its
| most-significant bit (bit 31) must be 1. Thus, if A is the value of the
| fixed-point interpretation of 'a', it follows that 1 <= A < 2 when 'oddExpA'
| is 1, and 2 <= A < 4 when 'oddExpA' is 0.
| The returned value is interpreted as a pure unsigned fraction, having
| no integer bits and 32 fraction bits. The approximation returned is never
| greater than the true reciprocal 1/sqrt(A), and it differs from the true
| reciprocal by at most 2.06 ulp (units in the last place). The approximation
| returned is also always within the range 0.5 to 1; thus, the most-
| significant bit of the result is always set.
*----------------------------------------------------------------------------*/
static uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
static const uint16_t softfloat_approxRecipSqrt_1k0s[16] = {
0xB4C9, 0xFFAB, 0xAA7D, 0xF11C, 0xA1C5, 0xE4C7, 0x9A43, 0xDA29,
0x93B5, 0xD0E5, 0x8DED, 0xC8B7, 0x88C6, 0xC16D, 0x8424, 0xBAE1
};
static const uint16_t softfloat_approxRecipSqrt_1k1s[16] = {
0xA5A5, 0xEA42, 0x8C21, 0xC62D, 0x788F, 0xAA7F, 0x6928, 0x94B6,
0x5CC7, 0x8335, 0x52A6, 0x74E2, 0x4A3E, 0x68FE, 0x432B, 0x5EFD
};
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' left by the
| number of bits given in 'dist', which must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
inline struct uint128 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
struct uint128 z;
z.v64 = a64<<dist | a0>>(-dist & 63);
z.v0 = a0<<dist;
return z;
}
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must be in the range 1 to 63. If any
| nonzero bits are shifted off, they are "jammed" into the least-significant
| bit of the shifted value by setting the least-significant bit to 1. This
| shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
inline struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
uint_fast8_t negDist = -dist;
struct uint128 z;
z.v64 = a64>>dist;
z.v0 =
a64<<(negDist & 63) | a0>>dist
| ((uint64_t) (a0<<(negDist & 63)) != 0);
return z;
}
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a' and 'extra' right by 64
| _plus_ the number of bits given in 'dist', which must not be zero. This
| shifted value is at most 64 nonzero bits and is returned in the 'v' field
| of the 'struct uint64_extra' result. The 64-bit 'extra' field of the result
| contains a value formed as follows from the bits that were shifted off: The
| _last_ bit shifted off is the most-significant bit of the 'extra' field, and
| the other 63 bits of the 'extra' field are all zero if and only if _all_but_
| _the_last_ bits shifted off were all zero.
| (This function makes more sense if 'a' and 'extra' are considered to form
| an unsigned fixed-point number with binary point between 'a' and 'extra'.
| This fixed-point value is shifted right by the number of bits given in
| 'dist', and the integer part of this shifted value is returned in the 'v'
| field of the result. The fractional part of the shifted value is modified
| as described above and returned in the 'extra' field of the result.)
*----------------------------------------------------------------------------*/
inline struct uint64_extra softfloat_shiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast32_t dist )
{
struct uint64_extra z;
if ( dist < 64 ) {
z.v = a>>dist;
//fixed unsigned unary minus: -x == ~x + 1
z.extra = a<<((~dist + 1) & 63);
} else {
z.v = 0;
z.extra = (dist == 64) ? a : (a != 0);
}
z.extra |= (extra != 0);
return z;
}
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must not be zero. If any nonzero bits
| are shifted off, they are "jammed" into the least-significant bit of the
| shifted value by setting the least-significant bit to 1. This shifted-and-
| jammed value is returned.
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
| greater than 128, the result will be either 0 or 1, depending on whether the
| original 128 bits are all zeros.
*----------------------------------------------------------------------------*/
static struct uint128 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist );
/*----------------------------------------------------------------------------
| Returns the sum of the 128-bit integer formed by concatenating 'a64' and
| 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'. The
| addition is modulo 2^128, so any carry out is lost.
*----------------------------------------------------------------------------*/
inline struct uint128 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
struct uint128 z;
z.v0 = a0 + b0;
z.v64 = a64 + b64 + (z.v0 < a0);
return z;
}
/*----------------------------------------------------------------------------
| Returns the difference of the 128-bit integer formed by concatenating 'a64'
| and 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'.
| The subtraction is modulo 2^128, so any borrow out (carry out) is lost.
*----------------------------------------------------------------------------*/
inline struct uint128 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
struct uint128 z;
z.v0 = a0 - b0;
z.v64 = a64 - b64;
z.v64 -= (a0 < b0);
return z;
}
/*----------------------------------------------------------------------------
| Returns the 128-bit product of 'a' and 'b'.
*----------------------------------------------------------------------------*/
static struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
float32_t f32_add( float32_t a, float32_t b )
{
uint_fast32_t uiA = a.v;
uint_fast32_t uiB = b.v;
if ( signF32UI( uiA ^ uiB ) ) {
return softfloat_subMagsF32( uiA, uiB );
} else {
return softfloat_addMagsF32( uiA, uiB );
}
}
float32_t f32_div( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
uint_fast32_t uiB;
bool signB;
int_fast16_t expB;
uint_fast32_t sigB;
bool signZ;
struct exp16_sig32 normExpSig;
int_fast16_t expZ;
uint_fast64_t sig64A;
uint_fast32_t sigZ;
uint_fast32_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
uiB = b.v;
signB = signF32UI( uiB );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA ) goto propagateNaN;
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
goto invalid;
}
goto infinity;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
goto zero;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) {
if ( ! (expA | sigA) ) goto invalid;
raiseFlags( flag_infinite );
goto infinity;
}
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA - expB + 0x7E;
sigA |= 0x00800000;
sigB |= 0x00800000;
if ( sigA < sigB ) {
--expZ;
sig64A = (uint_fast64_t) sigA<<31;
} else {
sig64A = (uint_fast64_t) sigA<<30;
}
sigZ = (uint_fast32_t)(sig64A / sigB); // fixed warning on type cast
if ( ! (sigZ & 0x3F) ) sigZ |= ((uint_fast64_t) sigB * sigZ != sig64A);
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infinity:
uiZ = packToF32UI( signZ, 0xFF, 0 );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF32UI( signZ, 0, 0 );
uiZ:
return float32_t::fromRaw(uiZ);
}
bool f32_eq( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
if (
softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
) {
raiseFlags( flag_invalid );
}
return false;
}
return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
}
bool f32_eq_signaling( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
}
bool f32_isSignalingNaN( float32_t a )
{
return softfloat_isSigNaNF32UI( a.v );
}
bool f32_le( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
signA = signF32UI( uiA );
signB = signF32UI( uiB );
return
(signA != signB) ? signA || ! (uint32_t) ((uiA | uiB)<<1)
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
bool f32_le_quiet( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
if (
softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
) {
raiseFlags( flag_invalid );
}
return false;
}
signA = signF32UI( uiA );
signB = signF32UI( uiB );
return
(signA != signB) ? signA || ! (uint32_t) ((uiA | uiB)<<1)
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
bool f32_lt( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
signA = signF32UI( uiA );
signB = signF32UI( uiB );
return
(signA != signB) ? signA && ((uint32_t) ((uiA | uiB)<<1) != 0)
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
bool f32_lt_quiet( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
if (
softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
) {
raiseFlags( flag_invalid );
}
return false;
}
signA = signF32UI( uiA );
signB = signF32UI( uiB );
return
(signA != signB) ? signA && ((uint32_t) ((uiA | uiB)<<1) != 0)
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uint_fast32_t uiC;
uiA = a.v;
uiB = b.v;
uiC = c.v;
return softfloat_mulAddF32( uiA, uiB, uiC, 0 );
}
float32_t f32_mul( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
uint_fast32_t uiB;
bool signB;
int_fast16_t expB;
uint_fast32_t sigB;
bool signZ;
uint_fast32_t magBits;
struct exp16_sig32 normExpSig;
int_fast16_t expZ;
uint_fast32_t sigZ, uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
uiB = b.v;
signB = signF32UI( uiB );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN;
magBits = expB | sigB;
goto infArg;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
magBits = expA | sigA;
goto infArg;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zero;
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA + expB - 0x7F;
sigA = (sigA | 0x00800000)<<7;
sigB = (sigB | 0x00800000)<<8;
sigZ = (uint_fast32_t)softfloat_shortShiftRightJam64( (uint_fast64_t) sigA * sigB, 32 ); //fixed warning on type cast
if ( sigZ < 0x40000000 ) {
--expZ;
sigZ <<= 1;
}
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infArg:
if ( ! magBits ) {
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
} else {
uiZ = packToF32UI( signZ, 0xFF, 0 );
}
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF32UI( signZ, 0, 0 );
uiZ:
return float32_t::fromRaw(uiZ);
}
float32_t f32_rem( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
uint_fast32_t uiB;
int_fast16_t expB;
uint_fast32_t sigB;
struct exp16_sig32 normExpSig;
uint32_t rem;
int_fast16_t expDiff;
uint32_t q, recip32, altRem, meanRem;
bool signRem;
uint_fast32_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
uiB = b.v;
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN;
goto invalid;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) goto invalid;
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
rem = sigA | 0x00800000;
sigB |= 0x00800000;
expDiff = expA - expB;
if ( expDiff < 1 ) {
if ( expDiff < -1 ) return a;
sigB <<= 6;
if ( expDiff ) {
rem <<= 5;
q = 0;
} else {
rem <<= 6;
q = (sigB <= rem);
if ( q ) rem -= sigB;
}
} else {
recip32 = softfloat_approxRecip32_1( sigB<<8 );
/*--------------------------------------------------------------------
| Changing the shift of `rem' here requires also changing the initial
| subtraction from `expDiff'.
*--------------------------------------------------------------------*/
rem <<= 7;
expDiff -= 31;
/*--------------------------------------------------------------------
| The scale of `sigB' affects how many bits are obtained during each
| cycle of the loop. Currently this is 29 bits per loop iteration,
| which is believed to be the maximum possible.
*--------------------------------------------------------------------*/
sigB <<= 6;
for (;;) {
q = (rem * (uint_fast64_t) recip32)>>32;
if ( expDiff < 0 ) break;
//fixed unsigned unary minus: -x == ~x + 1
rem = ~(q * (uint32_t) sigB) + 1;
expDiff -= 29;
}
/*--------------------------------------------------------------------
| (`expDiff' cannot be less than -30 here.)
*--------------------------------------------------------------------*/
q >>= ~expDiff & 31;
rem = (rem<<(expDiff + 30)) - q * (uint32_t) sigB;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
do {
altRem = rem;
++q;
rem -= sigB;
} while ( ! (rem & 0x80000000) );
meanRem = rem + altRem;
if ( (meanRem & 0x80000000) || (! meanRem && (q & 1)) ) rem = altRem;
signRem = signA;
if ( 0x80000000 <= rem ) {
signRem = ! signRem;
//fixed unsigned unary minus: -x == ~x + 1
rem = ~rem + 1;
}
return softfloat_normRoundPackToF32( signRem, expB, rem );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto uiZ;
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
uiZ:
return float32_t::fromRaw(uiZ);
}
float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t uiZ, lastBitMask, roundBitsMask;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp <= 0x7E ) {
if ( ! (uint32_t) (uiA<<1) ) return a;
if ( exact ) raiseFlags(flag_inexact);
uiZ = uiA & packToF32UI( 1, 0, 0 );
switch ( roundingMode ) {
case round_near_even:
if ( ! fracF32UI( uiA ) ) break;
case round_near_maxMag:
if ( exp == 0x7E ) uiZ |= packToF32UI( 0, 0x7F, 0 );
break;
case round_min:
if ( uiZ ) uiZ = packToF32UI( 1, 0x7F, 0 );
break;
case round_max:
if ( ! uiZ ) uiZ = packToF32UI( 0, 0x7F, 0 );
break;
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0x96 <= exp ) {
if ( (exp == 0xFF) && fracF32UI( uiA ) ) {
uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
goto uiZ;
}
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = uiA;
lastBitMask = (uint_fast32_t) 1<<(0x96 - exp);
roundBitsMask = lastBitMask - 1;
if ( roundingMode == round_near_maxMag ) {
uiZ += lastBitMask>>1;
} else if ( roundingMode == round_near_even ) {
uiZ += lastBitMask>>1;
if ( ! (uiZ & roundBitsMask) ) uiZ &= ~lastBitMask;
} else if (
roundingMode
== (signF32UI( uiZ ) ? round_min : round_max)
) {
uiZ += roundBitsMask;
}
uiZ &= ~roundBitsMask;
if ( exact && (uiZ != uiA) ) {
raiseFlags(flag_inexact);
}
uiZ:
return float32_t::fromRaw(uiZ);
}
float32_t f32_sqrt( float32_t a )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA, uiZ;
struct exp16_sig32 normExpSig;
int_fast16_t expZ;
uint_fast32_t sigZ, shiftedSigZ;
uint32_t negRem;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA ) {
uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
goto uiZ;
}
if ( ! signA ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( signA ) {
if ( ! (expA | sigA) ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = ((expA - 0x7F)>>1) + 0x7E;
expA &= 1;
sigA = (sigA | 0x00800000)<<8;
sigZ =
((uint_fast64_t) sigA * softfloat_approxRecipSqrt32_1( expA, sigA ))
>>32;
if ( expA ) sigZ >>= 1;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sigZ += 2;
if ( (sigZ & 0x3F) < 2 ) {
shiftedSigZ = sigZ>>2;
negRem = shiftedSigZ * shiftedSigZ;
sigZ &= ~3;
if ( negRem & 0x80000000 ) {
sigZ |= 1;
} else {
if ( negRem ) --sigZ;
}
}
return softfloat_roundPackToF32( 0, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
uiZ:
return float32_t::fromRaw(uiZ);
}
float32_t f32_sub( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uiA = a.v;
uiB = b.v;
if ( signF32UI( uiA ^ uiB ) ) {
return softfloat_addMagsF32( uiA, uiB );
} else {
return softfloat_subMagsF32( uiA, uiB );
}
}
float64_t f32_to_f64( float32_t a )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t frac;
struct commonNaN commonNaN;
uint_fast64_t uiZ;
struct exp16_sig32 normExpSig;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
frac = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF64UI( &commonNaN );
} else {
uiZ = packToF64UI( sign, 0x7FF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! exp ) {
if ( ! frac ) {
uiZ = packToF64UI( sign, 0, 0 );
goto uiZ;
}
normExpSig = softfloat_normSubnormalF32Sig( frac );
exp = normExpSig.exp - 1;
frac = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = packToF64UI( sign, exp + 0x380, (uint_fast64_t) frac<<29 );
uiZ:
return float64_t::fromRaw(uiZ);
}
int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t sig;
uint_fast64_t sig64;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
if ( (exp == 0xFF) && sig ) {
#if (i32_fromNaN == i32_fromPosOverflow)
sign = 0;
#elif (i32_fromNaN == i32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
return i32_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<32;
shiftDist = 0xAA - exp;
if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
return softfloat_roundToI32( sign, sig64, roundingMode, exact );
}
int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
bool sign;
int_fast32_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x9E - exp;
if ( 32 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF32UI( uiA );
if ( shiftDist <= 0 ) {
if ( uiA == packToF32UI( 1, 0x9E, 0 ) ) return -0x7FFFFFFF - 1;
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? i32_fromNaN
: sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig | 0x00800000)<<8;
absZ = sig>>shiftDist;
if ( exact && ((uint_fast32_t) absZ<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
return sign ? -absZ : absZ;
}
int_fast64_t f32_to_i64( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
uint_fast64_t sig64, extra;
struct uint64_extra sig64Extra;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0xBE - exp;
if ( shiftDist < 0 ) {
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? i64_fromNaN
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<40;
extra = 0;
if ( shiftDist ) {
sig64Extra = softfloat_shiftRightJam64Extra( sig64, 0, shiftDist );
sig64 = sig64Extra.v;
extra = sig64Extra.extra;
}
return softfloat_roundToI64( sign, sig64, extra, roundingMode, exact );
}
int_fast64_t f32_to_i64_r_minMag( float32_t a, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
bool sign;
uint_fast64_t sig64;
int_fast64_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0xBE - exp;
if ( 64 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF32UI( uiA );
if ( shiftDist <= 0 ) {
if ( uiA == packToF32UI( 1, 0xBE, 0 ) ) {
return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
}
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? i64_fromNaN
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<40;
absZ = sig64>>shiftDist;
shiftDist = 40 - shiftDist;
if ( exact && (shiftDist < 0) && (uint32_t) (sig<<(shiftDist & 31)) ) {
raiseFlags(flag_inexact);
}
return sign ? -absZ : absZ;
}
uint_fast32_t f32_to_ui32( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t sig;
uint_fast64_t sig64;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
if ( (exp == 0xFF) && sig ) {
#if (ui32_fromNaN == ui32_fromPosOverflow)
sign = 0;
#elif (ui32_fromNaN == ui32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
return ui32_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<32;
shiftDist = 0xAA - exp;
if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
return softfloat_roundToUI32( sign, sig64, roundingMode, exact );
}
uint_fast32_t f32_to_ui32_r_minMag( float32_t a, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
bool sign;
uint_fast32_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x9E - exp;
if ( 32 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF32UI( uiA );
if ( sign || (shiftDist < 0) ) {
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? ui32_fromNaN
: sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig | 0x00800000)<<8;
z = sig>>shiftDist;
if ( exact && (z<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
return z;
}
uint_fast64_t f32_to_ui64( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
uint_fast64_t sig64, extra;
struct uint64_extra sig64Extra;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0xBE - exp;
if ( shiftDist < 0 ) {
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? ui64_fromNaN
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<40;
extra = 0;
if ( shiftDist ) {
sig64Extra = softfloat_shiftRightJam64Extra( sig64, 0, shiftDist );
sig64 = sig64Extra.v;
extra = sig64Extra.extra;
}
return softfloat_roundToUI64( sign, sig64, extra, roundingMode, exact );
}
uint_fast64_t f32_to_ui64_r_minMag( float32_t a, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
bool sign;
uint_fast64_t sig64, z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0xBE - exp;
if ( 64 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF32UI( uiA );
if ( sign || (shiftDist < 0) ) {
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? ui64_fromNaN
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<40;
z = sig64>>shiftDist;
shiftDist = 40 - shiftDist;
if ( exact && (shiftDist < 0) && (uint32_t) (sig<<(shiftDist & 31)) ) {
raiseFlags(flag_inexact);
}
return z;
}
float64_t f64_add( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
uint_fast64_t uiB;
bool signB;
uiA = a.v;
signA = signF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
if ( signA == signB ) {
return softfloat_addMagsF64( uiA, uiB, signA );
} else {
return softfloat_subMagsF64( uiA, uiB, signA );
}
}
float64_t f64_div( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
uint_fast64_t uiB;
bool signB;
int_fast16_t expB;
uint_fast64_t sigB;
bool signZ;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
uint32_t recip32, sig32Z, doubleTerm;
uint_fast64_t rem;
uint32_t q;
uint_fast64_t sigZ;
uint_fast64_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA ) goto propagateNaN;
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
goto invalid;
}
goto infinity;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
goto zero;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) {
if ( ! (expA | sigA) ) goto invalid;
raiseFlags( flag_infinite );
goto infinity;
}
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA - expB + 0x3FE;
sigA |= UINT64_C( 0x0010000000000000 );
sigB |= UINT64_C( 0x0010000000000000 );
if ( sigA < sigB ) {
--expZ;
sigA <<= 11;
} else {
sigA <<= 10;
}
sigB <<= 11;
recip32 = softfloat_approxRecip32_1( sigB>>32 ) - 2;
sig32Z = ((uint32_t) (sigA>>32) * (uint_fast64_t) recip32)>>32;
doubleTerm = sig32Z<<1;
rem =
((sigA - (uint_fast64_t) doubleTerm * (uint32_t) (sigB>>32))<<28)
- (uint_fast64_t) doubleTerm * ((uint32_t) sigB>>4);
q = (((uint32_t) (rem>>32) * (uint_fast64_t) recip32)>>32) + 4;
sigZ = ((uint_fast64_t) sig32Z<<32) + ((uint_fast64_t) q<<4);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( (sigZ & 0x1FF) < 4<<4 ) {
q &= ~7;
sigZ &= ~(uint_fast64_t) 0x7F;
doubleTerm = q<<1;
rem =
((rem - (uint_fast64_t) doubleTerm * (uint32_t) (sigB>>32))<<28)
- (uint_fast64_t) doubleTerm * ((uint32_t) sigB>>4);
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
sigZ -= 1<<7;
} else {
if ( rem ) sigZ |= 1;
}
}
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infinity:
uiZ = packToF64UI( signZ, 0x7FF, 0 );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF64UI( signZ, 0, 0 );
uiZ:
return float64_t::fromRaw(uiZ);
}
bool f64_eq( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
if (
softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
) {
raiseFlags( flag_invalid );
}
return false;
}
return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
}
bool f64_eq_signaling( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
}
bool f64_isSignalingNaN( float64_t a )
{
return softfloat_isSigNaNF64UI( a.v );
}
bool f64_le( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
signA = signF64UI( uiA );
signB = signF64UI( uiB );
return
(signA != signB)
? signA || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
bool f64_le_quiet( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
if (
softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
) {
raiseFlags( flag_invalid );
}
return false;
}
signA = signF64UI( uiA );
signB = signF64UI( uiB );
return
(signA != signB)
? signA || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
bool f64_lt( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
signA = signF64UI( uiA );
signB = signF64UI( uiB );
return
(signA != signB)
? signA && ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
bool f64_lt_quiet( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
if (
softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
) {
raiseFlags( flag_invalid );
}
return false;
}
signA = signF64UI( uiA );
signB = signF64UI( uiB );
return
(signA != signB)
? signA && ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
uint_fast64_t uiC;
uiA = a.v;
uiB = b.v;
uiC = c.v;
return softfloat_mulAddF64( uiA, uiB, uiC, 0 );
}
float64_t f64_mul( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
uint_fast64_t uiB;
bool signB;
int_fast16_t expB;
uint_fast64_t sigB;
bool signZ;
uint_fast64_t magBits;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
struct uint128 sig128Z;
uint_fast64_t sigZ, uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN;
magBits = expB | sigB;
goto infArg;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
magBits = expA | sigA;
goto infArg;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zero;
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA + expB - 0x3FF;
sigA = (sigA | UINT64_C( 0x0010000000000000 ))<<10;
sigB = (sigB | UINT64_C( 0x0010000000000000 ))<<11;
sig128Z = softfloat_mul64To128( sigA, sigB );
sigZ = sig128Z.v64 | (sig128Z.v0 != 0);
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
--expZ;
sigZ <<= 1;
}
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infArg:
if ( ! magBits ) {
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
} else {
uiZ = packToF64UI( signZ, 0x7FF, 0 );
}
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF64UI( signZ, 0, 0 );
uiZ:
return float64_t::fromRaw(uiZ);
}
float64_t f64_rem( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
uint_fast64_t uiB;
int_fast16_t expB;
uint_fast64_t sigB;
struct exp16_sig64 normExpSig;
uint64_t rem;
int_fast16_t expDiff;
uint32_t q, recip32;
uint_fast64_t q64;
uint64_t altRem, meanRem;
bool signRem;
uint_fast64_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
uiB = b.v;
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN;
goto invalid;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA < expB - 1 ) return a;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) goto invalid;
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
rem = sigA | UINT64_C( 0x0010000000000000 );
sigB |= UINT64_C( 0x0010000000000000 );
expDiff = expA - expB;
if ( expDiff < 1 ) {
if ( expDiff < -1 ) return a;
sigB <<= 9;
if ( expDiff ) {
rem <<= 8;
q = 0;
} else {
rem <<= 9;
q = (sigB <= rem);
if ( q ) rem -= sigB;
}
} else {
recip32 = softfloat_approxRecip32_1( sigB>>21 );
/*--------------------------------------------------------------------
| Changing the shift of `rem' here requires also changing the initial
| subtraction from `expDiff'.
*--------------------------------------------------------------------*/
rem <<= 9;
expDiff -= 30;
/*--------------------------------------------------------------------
| The scale of `sigB' affects how many bits are obtained during each
| cycle of the loop. Currently this is 29 bits per loop iteration,
| the maximum possible.
*--------------------------------------------------------------------*/
sigB <<= 9;
for (;;) {
q64 = (uint32_t) (rem>>32) * (uint_fast64_t) recip32;
if ( expDiff < 0 ) break;
q = (q64 + 0x80000000)>>32;
rem <<= 29;
rem -= q * (uint64_t) sigB;
if ( rem & UINT64_C( 0x8000000000000000 ) ) rem += sigB;
expDiff -= 29;
}
/*--------------------------------------------------------------------
| (`expDiff' cannot be less than -29 here.)
*--------------------------------------------------------------------*/
q = (uint32_t) (q64>>32)>>(~expDiff & 31);
rem = (rem<<(expDiff + 30)) - q * (uint64_t) sigB;
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
altRem = rem + sigB;
goto selectRem;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
do {
altRem = rem;
++q;
rem -= sigB;
} while ( ! (rem & UINT64_C( 0x8000000000000000 )) );
selectRem:
meanRem = rem + altRem;
if (
(meanRem & UINT64_C( 0x8000000000000000 )) || (! meanRem && (q & 1))
) {
rem = altRem;
}
signRem = signA;
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
signRem = ! signRem;
//fixed unsigned unary minus: -x == ~x + 1
rem = ~rem + 1;
}
return softfloat_normRoundPackToF64( signRem, expB, rem );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto uiZ;
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
uiZ:
return float64_t::fromRaw(uiZ);
}
float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t uiZ, lastBitMask, roundBitsMask;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp <= 0x3FE ) {
if ( ! (uiA & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) return a;
if ( exact ) raiseFlags(flag_inexact);
uiZ = uiA & packToF64UI( 1, 0, 0 );
switch ( roundingMode ) {
case round_near_even:
if ( ! fracF64UI( uiA ) ) break;
case round_near_maxMag:
if ( exp == 0x3FE ) uiZ |= packToF64UI( 0, 0x3FF, 0 );
break;
case round_min:
if ( uiZ ) uiZ = packToF64UI( 1, 0x3FF, 0 );
break;
case round_max:
if ( ! uiZ ) uiZ = packToF64UI( 0, 0x3FF, 0 );
break;
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0x433 <= exp ) {
if ( (exp == 0x7FF) && fracF64UI( uiA ) ) {
uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
goto uiZ;
}
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = uiA;
lastBitMask = (uint_fast64_t) 1<<(0x433 - exp);
roundBitsMask = lastBitMask - 1;
if ( roundingMode == round_near_maxMag ) {
uiZ += lastBitMask>>1;
} else if ( roundingMode == round_near_even ) {
uiZ += lastBitMask>>1;
if ( ! (uiZ & roundBitsMask) ) uiZ &= ~lastBitMask;
} else if (
roundingMode
== (signF64UI( uiZ ) ? round_min : round_max)
) {
uiZ += roundBitsMask;
}
uiZ &= ~roundBitsMask;
if ( exact && (uiZ != uiA) ) {
raiseFlags(flag_inexact);
}
uiZ:
return float64_t::fromRaw(uiZ);
}
float64_t f64_sqrt( float64_t a )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA, uiZ;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
uint32_t sig32A, recipSqrt32, sig32Z;
uint_fast64_t rem;
uint32_t q;
uint_fast64_t sigZ, shiftedSigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA ) {
uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
goto uiZ;
}
if ( ! signA ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( signA ) {
if ( ! (expA | sigA) ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
| (`sig32Z' is guaranteed to be a lower bound on the square root of
| `sig32A', which makes `sig32Z' also a lower bound on the square root of
| `sigA'.)
*------------------------------------------------------------------------*/
expZ = ((expA - 0x3FF)>>1) + 0x3FE;
expA &= 1;
sigA |= UINT64_C( 0x0010000000000000 );
sig32A = (uint32_t)(sigA>>21); //fixed warning on type cast
recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
if ( expA ) {
sigA <<= 8;
sig32Z >>= 1;
} else {
sigA <<= 9;
}
rem = sigA - (uint_fast64_t) sig32Z * sig32Z;
q = ((uint32_t) (rem>>2) * (uint_fast64_t) recipSqrt32)>>32;
sigZ = ((uint_fast64_t) sig32Z<<32 | 1<<5) + ((uint_fast64_t) q<<3);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( (sigZ & 0x1FF) < 1<<5 ) {
sigZ &= ~(uint_fast64_t) 0x3F;
shiftedSigZ = sigZ>>6;
rem = (sigA<<52) - shiftedSigZ * shiftedSigZ;
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
--sigZ;
} else {
if ( rem ) sigZ |= 1;
}
}
return softfloat_roundPackToF64( 0, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
uiZ:
return float64_t::fromRaw(uiZ);
}
float64_t f64_sub( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
uint_fast64_t uiB;
bool signB;
uiA = a.v;
signA = signF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
if ( signA == signB ) {
return softfloat_subMagsF64( uiA, uiB, signA );
} else {
return softfloat_addMagsF64( uiA, uiB, signA );
}
}
float32_t f64_to_f32( float64_t a )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t frac;
struct commonNaN commonNaN;
uint_fast32_t uiZ, frac32;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
frac = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0x7FF ) {
if ( frac ) {
softfloat_f64UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
} else {
uiZ = packToF32UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
frac32 = (uint_fast32_t)softfloat_shortShiftRightJam64( frac, 22 ); //fixed warning on type cast
if ( ! (exp | frac32) ) {
uiZ = packToF32UI( sign, 0, 0 );
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
return softfloat_roundPackToF32( sign, exp - 0x381, frac32 | 0x40000000 );
uiZ:
return float32_t::fromRaw(uiZ);
}
int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
if ( (exp == 0x7FF) && sig ) {
#if (i32_fromNaN == i32_fromPosOverflow)
sign = 0;
#elif (i32_fromNaN == i32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
return i32_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
shiftDist = 0x427 - exp;
if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
return softfloat_roundToI32( sign, sig, roundingMode, exact );
}
int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
bool sign;
int_fast32_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x433 - exp;
if ( 53 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF64UI( uiA );
if ( shiftDist < 22 ) {
if (
sign && (exp == 0x41E) && (sig < UINT64_C( 0x0000000000200000 ))
) {
if ( exact && sig ) {
raiseFlags(flag_inexact);
}
return -0x7FFFFFFF - 1;
}
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && sig ? i32_fromNaN
: sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig |= UINT64_C( 0x0010000000000000 );
absZ = (int_fast32_t)(sig>>shiftDist); //fixed warning on type cast
if ( exact && ((uint_fast64_t) (uint_fast32_t) absZ<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
return sign ? -absZ : absZ;
}
int_fast64_t f64_to_i64( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
struct uint64_extra sigExtra;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
shiftDist = 0x433 - exp;
if ( shiftDist <= 0 ) {
if ( shiftDist < -11 ) goto invalid;
sigExtra.v = sig<<-shiftDist;
sigExtra.extra = 0;
} else {
sigExtra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
}
return
softfloat_roundToI64(
sign, sigExtra.v, sigExtra.extra, roundingMode, exact );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && fracF64UI( uiA ) ? i64_fromNaN
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
int_fast64_t f64_to_i64_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
int_fast64_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x433 - exp;
if ( shiftDist <= 0 ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( shiftDist < -10 ) {
if ( uiA == packToF64UI( 1, 0x43E, 0 ) ) {
return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
}
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && sig ? i64_fromNaN
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sig |= UINT64_C( 0x0010000000000000 );
absZ = sig<<-shiftDist;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( 53 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sig |= UINT64_C( 0x0010000000000000 );
absZ = sig>>shiftDist;
if ( exact && (absZ<<shiftDist != (int_fast64_t)sig) ) {
raiseFlags(flag_inexact);
}
}
return sign ? -absZ : absZ;
}
uint_fast32_t f64_to_ui32( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
if ( (exp == 0x7FF) && sig ) {
#if (ui32_fromNaN == ui32_fromPosOverflow)
sign = 0;
#elif (ui32_fromNaN == ui32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
return ui32_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
shiftDist = 0x427 - exp;
if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
return softfloat_roundToUI32( sign, sig, roundingMode, exact );
}
uint_fast32_t f64_to_ui32_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
bool sign;
uint_fast32_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x433 - exp;
if ( 53 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF64UI( uiA );
if ( sign || (shiftDist < 21) ) {
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && sig ? ui32_fromNaN
: sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig |= UINT64_C( 0x0010000000000000 );
z = (uint_fast32_t)(sig>>shiftDist); //fixed warning on type cast
if ( exact && ((uint_fast64_t) z<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
return z;
}
uint_fast64_t f64_to_ui64( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
struct uint64_extra sigExtra;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
shiftDist = 0x433 - exp;
if ( shiftDist <= 0 ) {
if ( shiftDist < -11 ) goto invalid;
sigExtra.v = sig<<-shiftDist;
sigExtra.extra = 0;
} else {
sigExtra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
}
return
softfloat_roundToUI64(
sign, sigExtra.v, sigExtra.extra, roundingMode, exact );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && fracF64UI( uiA ) ? ui64_fromNaN
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
}
uint_fast64_t f64_to_ui64_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
bool sign;
uint_fast64_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x433 - exp;
if ( 53 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF64UI( uiA );
if ( sign ) goto invalid;
if ( shiftDist <= 0 ) {
if ( shiftDist < -11 ) goto invalid;
z = (sig | UINT64_C( 0x0010000000000000 ))<<-shiftDist;
} else {
sig |= UINT64_C( 0x0010000000000000 );
z = sig>>shiftDist;
if ( exact && (uint64_t) (sig<<(-shiftDist & 63)) ) {
raiseFlags(flag_inexact);
}
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && sig ? ui64_fromNaN
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
}
float32_t i32_to_f32( int32_t a )
{
bool sign;
uint_fast32_t absA;
sign = (a < 0);
if ( ! (a & 0x7FFFFFFF) ) {
return float32_t::fromRaw(sign ? packToF32UI( 1, 0x9E, 0 ) : 0);
}
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast32_t) a + 1) : (uint_fast32_t) a;
return softfloat_normRoundPackToF32( sign, 0x9C, absA );
}
float64_t i32_to_f64( int32_t a )
{
uint_fast64_t uiZ;
bool sign;
uint_fast32_t absA;
int_fast8_t shiftDist;
if ( ! a ) {
uiZ = 0;
} else {
sign = (a < 0);
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast32_t) a + 1) : (uint_fast32_t) a;
shiftDist = softfloat_countLeadingZeros32( absA ) + 21;
uiZ =
packToF64UI(
sign, 0x432 - shiftDist, (uint_fast64_t) absA<<shiftDist );
}
return float64_t::fromRaw(uiZ);
}
float32_t i64_to_f32( int64_t a )
{
bool sign;
uint_fast64_t absA;
int_fast8_t shiftDist;
uint_fast32_t sig;
sign = (a < 0);
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast64_t) a + 1) : (uint_fast64_t) a;
shiftDist = softfloat_countLeadingZeros64( absA ) - 40;
if ( 0 <= shiftDist ) {
return float32_t::fromRaw(a ? packToF32UI(sign, 0x95 - shiftDist, (uint_fast32_t) absA<<shiftDist ) : 0);
} else {
shiftDist += 7;
sig =
(shiftDist < 0)
? (uint_fast32_t) softfloat_shortShiftRightJam64( absA, -shiftDist ) //fixed warning on type cast
: (uint_fast32_t) absA<<shiftDist;
return softfloat_roundPackToF32( sign, 0x9C - shiftDist, sig );
}
}
float64_t i64_to_f64( int64_t a )
{
bool sign;
uint_fast64_t absA;
sign = (a < 0);
if ( ! (a & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) {
return float64_t::fromRaw(sign ? packToF64UI( 1, 0x43E, 0 ) : 0);
}
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast64_t) a + 1) : (uint_fast64_t) a;
return softfloat_normRoundPackToF64( sign, 0x43C, absA );
}
float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
{
int_fast16_t expA;
uint_fast32_t sigA;
int_fast16_t expB;
uint_fast32_t sigB;
int_fast16_t expDiff;
uint_fast32_t uiZ;
bool signZ;
int_fast16_t expZ;
uint_fast32_t sigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( ! expA ) {
uiZ = uiA + sigB;
goto uiZ;
}
if ( expA == 0xFF ) {
if ( sigA | sigB ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
signZ = signF32UI( uiA );
expZ = expA;
sigZ = 0x01000000 + sigA + sigB;
if ( ! (sigZ & 1) && (expZ < 0xFE) ) {
uiZ = packToF32UI( signZ, expZ, sigZ>>1 );
goto uiZ;
}
sigZ <<= 6;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
signZ = signF32UI( uiA );
sigA <<= 6;
sigB <<= 6;
if ( expDiff < 0 ) {
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF32UI( signZ, 0xFF, 0 );
goto uiZ;
}
expZ = expB;
sigA += expA ? 0x20000000 : sigA;
sigA = softfloat_shiftRightJam32( sigA, -expDiff );
} else {
if ( expA == 0xFF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA;
sigB += expB ? 0x20000000 : sigB;
sigB = softfloat_shiftRightJam32( sigB, expDiff );
}
sigZ = 0x20000000 + sigA + sigB;
if ( sigZ < 0x40000000 ) {
--expZ;
sigZ <<= 1;
}
}
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
uiZ:
return float32_t::fromRaw(uiZ);
}
float64_t
softfloat_addMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
{
int_fast16_t expA;
uint_fast64_t sigA;
int_fast16_t expB;
uint_fast64_t sigB;
int_fast16_t expDiff;
uint_fast64_t uiZ;
int_fast16_t expZ;
uint_fast64_t sigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( ! expA ) {
uiZ = uiA + sigB;
goto uiZ;
}
if ( expA == 0x7FF ) {
if ( sigA | sigB ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA;
sigZ = UINT64_C( 0x0020000000000000 ) + sigA + sigB;
sigZ <<= 9;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sigA <<= 9;
sigB <<= 9;
if ( expDiff < 0 ) {
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF64UI( signZ, 0x7FF, 0 );
goto uiZ;
}
expZ = expB;
if ( expA ) {
sigA += UINT64_C( 0x2000000000000000 );
} else {
sigA <<= 1;
}
sigA = softfloat_shiftRightJam64( sigA, -expDiff );
} else {
if ( expA == 0x7FF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA;
if ( expB ) {
sigB += UINT64_C( 0x2000000000000000 );
} else {
sigB <<= 1;
}
sigB = softfloat_shiftRightJam64( sigB, expDiff );
}
sigZ = UINT64_C( 0x2000000000000000 ) + sigA + sigB;
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
--expZ;
sigZ <<= 1;
}
}
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
uiZ:
return float64_t::fromRaw(uiZ);
}
uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
{
int index;
uint16_t eps, r0;
uint_fast32_t ESqrR0;
uint32_t sigma0;
uint_fast32_t r;
uint32_t sqrSigma0;
index = (a>>27 & 0xE) + oddExpA;
eps = (uint16_t) (a>>12);
r0 = softfloat_approxRecipSqrt_1k0s[index]
- ((softfloat_approxRecipSqrt_1k1s[index] * (uint_fast32_t) eps)
>>20);
ESqrR0 = (uint_fast32_t) r0 * r0;
if ( ! oddExpA ) ESqrR0 <<= 1;
sigma0 = ~(uint_fast32_t) (((uint32_t) ESqrR0 * (uint_fast64_t) a)>>23);
r = (uint_fast32_t)(((uint_fast32_t) r0<<16) + ((r0 * (uint_fast64_t) sigma0)>>25)); //fixed warning on type cast
sqrSigma0 = ((uint_fast64_t) sigma0 * sigma0)>>32;
r += ((uint32_t) ((r>>1) + (r>>3) - ((uint_fast32_t) r0<<14))
* (uint_fast64_t) sqrSigma0)
>>48;
if ( ! (r & 0x80000000) ) r = 0x80000000;
return r;
}
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
{
return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
}
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
{
return
(uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
| aPtr->v64>>12;
}
uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
{
uint_fast8_t count;
uint32_t a32;
count = 0;
a32 = a>>32;
if ( ! a32 ) {
count = 32;
a32 = (uint32_t) a; //fixed warning on type cast
}
/*------------------------------------------------------------------------
| From here, result is current count + count leading zeros of `a32'.
*------------------------------------------------------------------------*/
if ( a32 < 0x10000 ) {
count += 16;
a32 <<= 16;
}
if ( a32 < 0x1000000 ) {
count += 8;
a32 <<= 8;
}
count += softfloat_countLeadingZeros8[a32>>24];
return count;
}
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNF32UI( uiA ) ) {
raiseFlags( flag_invalid );
}
zPtr->sign = (uiA>>31) != 0;
zPtr->v64 = (uint_fast64_t) uiA<<41;
zPtr->v0 = 0;
}
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNF64UI( uiA ) ) {
raiseFlags( flag_invalid );
}
zPtr->sign = (uiA>>63) != 0;
zPtr->v64 = uiA<<12;
zPtr->v0 = 0;
}
struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
{
uint32_t a32, a0, b32, b0;
struct uint128 z;
uint64_t mid1, mid;
a32 = a>>32;
a0 = (uint32_t)a; //fixed warning on type cast
b32 = b>>32;
b0 = (uint32_t) b; //fixed warning on type cast
z.v0 = (uint_fast64_t) a0 * b0;
mid1 = (uint_fast64_t) a32 * b0;
mid = mid1 + (uint_fast64_t) a0 * b32;
z.v64 = (uint_fast64_t) a32 * b32;
z.v64 += (uint_fast64_t) (mid < mid1)<<32 | mid>>32;
mid <<= 32;
z.v0 += mid;
z.v64 += (z.v0 < mid);
return z;
}
float32_t
softfloat_mulAddF32(
uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast8_t op )
{
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
bool signB;
int_fast16_t expB;
uint_fast32_t sigB;
bool signC;
int_fast16_t expC;
uint_fast32_t sigC;
bool signProd;
uint_fast32_t magBits, uiZ;
struct exp16_sig32 normExpSig;
int_fast16_t expProd;
uint_fast64_t sigProd;
bool signZ;
int_fast16_t expZ;
uint_fast32_t sigZ;
int_fast16_t expDiff;
uint_fast64_t sig64Z, sig64C;
int_fast8_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
signB = signF32UI( uiB );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
signC = signF32UI( uiC ) ^ (op == softfloat_mulAdd_subC);
expC = expF32UI( uiC );
sigC = fracF32UI( uiC );
signProd = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN_ABC;
magBits = expB | sigB;
goto infProdArg;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN_ABC;
magBits = expA | sigA;
goto infProdArg;
}
if ( expC == 0xFF ) {
if ( sigC ) {
uiZ = 0;
goto propagateNaN_ZC;
}
uiZ = uiC;
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zeroProd;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zeroProd;
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expProd = expA + expB - 0x7E;
sigA = (sigA | 0x00800000)<<7;
sigB = (sigB | 0x00800000)<<7;
sigProd = (uint_fast64_t) sigA * sigB;
if ( sigProd < UINT64_C( 0x2000000000000000 ) ) {
--expProd;
sigProd <<= 1;
}
signZ = signProd;
if ( ! expC ) {
if ( ! sigC ) {
expZ = expProd - 1;
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sigProd, 31 ); //fixed warning on type cast
goto roundPack;
}
normExpSig = softfloat_normSubnormalF32Sig( sigC );
expC = normExpSig.exp;
sigC = normExpSig.sig;
}
sigC = (sigC | 0x00800000)<<6;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expProd - expC;
if ( signProd == signC ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expDiff <= 0 ) {
expZ = expC;
sigZ = sigC + (uint_fast32_t) softfloat_shiftRightJam64( sigProd, 32 - expDiff ); //fixed warning on type cast
} else {
expZ = expProd;
sig64Z =
sigProd
+ softfloat_shiftRightJam64(
(uint_fast64_t) sigC<<32, expDiff );
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sig64Z, 32 ); //fixed warning on type cast
}
if ( sigZ < 0x40000000 ) {
--expZ;
sigZ <<= 1;
}
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sig64C = (uint_fast64_t) sigC<<32;
if ( expDiff < 0 ) {
signZ = signC;
expZ = expC;
sig64Z = sig64C - softfloat_shiftRightJam64( sigProd, -expDiff );
} else if ( ! expDiff ) {
expZ = expProd;
sig64Z = sigProd - sig64C;
if ( ! sig64Z ) goto completeCancellation;
if ( sig64Z & UINT64_C( 0x8000000000000000 ) ) {
signZ = ! signZ;
//fixed unsigned unary minus: -x == ~x + 1
sig64Z = ~sig64Z + 1;
}
} else {
expZ = expProd;
sig64Z = sigProd - softfloat_shiftRightJam64( sig64C, expDiff );
}
shiftDist = softfloat_countLeadingZeros64( sig64Z ) - 1;
expZ -= shiftDist;
shiftDist -= 32;
if ( shiftDist < 0 ) {
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sig64Z, -shiftDist ); //fixed warning on type cast
} else {
sigZ = (uint_fast32_t) sig64Z<<shiftDist;
}
}
roundPack:
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN_ABC:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto propagateNaN_ZC;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infProdArg:
if ( magBits ) {
uiZ = packToF32UI( signProd, 0xFF, 0 );
if ( expC != 0xFF ) goto uiZ;
if ( sigC ) goto propagateNaN_ZC;
if ( signProd == signC ) goto uiZ;
}
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
propagateNaN_ZC:
uiZ = softfloat_propagateNaNF32UI( uiZ, uiC );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zeroProd:
uiZ = uiC;
if ( ! (expC | sigC) && (signProd != signC) ) {
completeCancellation:
uiZ =
packToF32UI((globalRoundingMode == round_min), 0, 0 );
}
uiZ:
return float32_t::fromRaw(uiZ);
}
float64_t
softfloat_mulAddF64(
uint_fast64_t uiA, uint_fast64_t uiB, uint_fast64_t uiC, uint_fast8_t op )
{
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
bool signB;
int_fast16_t expB;
uint_fast64_t sigB;
bool signC;
int_fast16_t expC;
uint_fast64_t sigC;
bool signZ;
uint_fast64_t magBits, uiZ;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
struct uint128 sig128Z;
uint_fast64_t sigZ;
int_fast16_t expDiff;
struct uint128 sig128C;
int_fast8_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
signB = signF64UI( uiB );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
signC = signF64UI( uiC ) ^ (op == softfloat_mulAdd_subC);
expC = expF64UI( uiC );
sigC = fracF64UI( uiC );
signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN_ABC;
magBits = expB | sigB;
goto infProdArg;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN_ABC;
magBits = expA | sigA;
goto infProdArg;
}
if ( expC == 0x7FF ) {
if ( sigC ) {
uiZ = 0;
goto propagateNaN_ZC;
}
uiZ = uiC;
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zeroProd;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zeroProd;
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA + expB - 0x3FE;
sigA = (sigA | UINT64_C( 0x0010000000000000 ))<<10;
sigB = (sigB | UINT64_C( 0x0010000000000000 ))<<10;
sig128Z = softfloat_mul64To128( sigA, sigB );
if ( sig128Z.v64 < UINT64_C( 0x2000000000000000 ) ) {
--expZ;
sig128Z =
softfloat_add128(
sig128Z.v64, sig128Z.v0, sig128Z.v64, sig128Z.v0 );
}
if ( ! expC ) {
if ( ! sigC ) {
--expZ;
sigZ = sig128Z.v64<<1 | (sig128Z.v0 != 0);
goto roundPack;
}
normExpSig = softfloat_normSubnormalF64Sig( sigC );
expC = normExpSig.exp;
sigC = normExpSig.sig;
}
sigC = (sigC | UINT64_C( 0x0010000000000000 ))<<9;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
//fixed initialization
sig128C.v0 = sig128C.v64 = 0;
expDiff = expZ - expC;
if ( expDiff < 0 ) {
expZ = expC;
if ( (signZ == signC) || (expDiff < -1) ) {
sig128Z.v64 = softfloat_shiftRightJam64( sig128Z.v64, -expDiff );
} else {
sig128Z =
softfloat_shortShiftRightJam128( sig128Z.v64, sig128Z.v0, 1 );
}
} else if ( expDiff ) {
sig128C = softfloat_shiftRightJam128( sigC, 0, expDiff );
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( signZ == signC ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expDiff <= 0 ) {
sigZ = (sigC + sig128Z.v64) | (sig128Z.v0 != 0);
} else {
sig128Z =
softfloat_add128(
sig128Z.v64, sig128Z.v0, sig128C.v64, sig128C.v0 );
sigZ = sig128Z.v64 | (sig128Z.v0 != 0);
}
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
--expZ;
sigZ <<= 1;
}
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expDiff < 0 ) {
signZ = signC;
sig128Z = softfloat_sub128( sigC, 0, sig128Z.v64, sig128Z.v0 );
} else if ( ! expDiff ) {
sig128Z.v64 = sig128Z.v64 - sigC;
if ( ! (sig128Z.v64 | sig128Z.v0) ) goto completeCancellation;
if ( sig128Z.v64 & UINT64_C( 0x8000000000000000 ) ) {
signZ = ! signZ;
sig128Z = softfloat_sub128( 0, 0, sig128Z.v64, sig128Z.v0 );
}
} else {
sig128Z =
softfloat_sub128(
sig128Z.v64, sig128Z.v0, sig128C.v64, sig128C.v0 );
}
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( ! sig128Z.v64 ) {
expZ -= 64;
sig128Z.v64 = sig128Z.v0;
sig128Z.v0 = 0;
}
shiftDist = softfloat_countLeadingZeros64( sig128Z.v64 ) - 1;
expZ -= shiftDist;
if ( shiftDist < 0 ) {
sigZ = softfloat_shortShiftRightJam64( sig128Z.v64, -shiftDist );
} else {
sig128Z =
softfloat_shortShiftLeft128(
sig128Z.v64, sig128Z.v0, shiftDist );
sigZ = sig128Z.v64;
}
sigZ |= (sig128Z.v0 != 0);
}
roundPack:
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN_ABC:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto propagateNaN_ZC;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infProdArg:
if ( magBits ) {
uiZ = packToF64UI( signZ, 0x7FF, 0 );
if ( expC != 0x7FF ) goto uiZ;
if ( sigC ) goto propagateNaN_ZC;
if ( signZ == signC ) goto uiZ;
}
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
propagateNaN_ZC:
uiZ = softfloat_propagateNaNF64UI( uiZ, uiC );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zeroProd:
uiZ = uiC;
if ( ! (expC | sigC) && (signZ != signC) ) {
completeCancellation:
uiZ =
packToF64UI((globalRoundingMode == round_min), 0, 0 );
}
uiZ:
return float64_t::fromRaw(uiZ);
}
float32_t
softfloat_normRoundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
{
int_fast8_t shiftDist;
shiftDist = softfloat_countLeadingZeros32( sig ) - 1;
exp -= shiftDist;
if ( (7 <= shiftDist) && ((unsigned int) exp < 0xFD) ) {
return float32_t::fromRaw(packToF32UI( sign, sig ? exp : 0, sig<<(shiftDist - 7) ));
} else {
return softfloat_roundPackToF32( sign, exp, sig<<shiftDist );
}
}
float64_t
softfloat_normRoundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
{
int_fast8_t shiftDist;
shiftDist = softfloat_countLeadingZeros64( sig ) - 1;
exp -= shiftDist;
if ( (10 <= shiftDist) && ((unsigned int) exp < 0x7FD) ) {
return float64_t::fromRaw(packToF64UI( sign, sig ? exp : 0, sig<<(shiftDist - 10) ));
} else {
return softfloat_roundPackToF64( sign, exp, sig<<shiftDist );
}
}
struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
{
int_fast8_t shiftDist;
struct exp16_sig32 z;
shiftDist = softfloat_countLeadingZeros32( sig ) - 8;
z.exp = 1 - shiftDist;
z.sig = sig<<shiftDist;
return z;
}
struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
int_fast8_t shiftDist;
struct exp16_sig64 z;
shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
z.exp = 1 - shiftDist;
z.sig = sig<<shiftDist;
return z;
}
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
uint_fast32_t
softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )
{
bool isSigNaNA;
isSigNaNA = softfloat_isSigNaNF32UI( uiA );
if ( isSigNaNA || softfloat_isSigNaNF32UI( uiB ) ) {
raiseFlags( flag_invalid );
if ( isSigNaNA ) return uiA | 0x00400000;
}
return (isNaNF32UI( uiA ) ? uiA : uiB) | 0x00400000;
}
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
uint_fast64_t
softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
{
bool isSigNaNA;
isSigNaNA = softfloat_isSigNaNF64UI( uiA );
if ( isSigNaNA || softfloat_isSigNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
if ( isSigNaNA ) return uiA | UINT64_C( 0x0008000000000000 );
}
return (isNaNF64UI( uiA ) ? uiA : uiB) | UINT64_C( 0x0008000000000000 );
}
float32_t
softfloat_roundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast8_t roundIncrement, roundBits;
bool isTiny;
uint_fast32_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = globalRoundingMode;
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x40;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0x7F
: 0;
}
roundBits = sig & 0x7F;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0xFD <= (unsigned int) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(globalDetectTininess == tininess_beforeRounding)
|| (exp < -1) || (sig + roundIncrement < 0x80000000);
sig = softfloat_shiftRightJam32( sig, -exp );
exp = 0;
roundBits = sig & 0x7F;
if ( isTiny && roundBits ) {
raiseFlags( flag_underflow );
}
} else if ( (0xFD < exp) || (0x80000000 <= sig + roundIncrement) ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
raiseFlags(
flag_overflow | flag_inexact );
uiZ = packToF32UI( sign, 0xFF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>7;
if ( roundBits ) {
raiseFlags(flag_inexact);
if ( roundingMode == round_odd ) {
sig |= 1;
goto packReturn;
}
}
sig &= ~(uint_fast32_t) (! (roundBits ^ 0x40) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToF32UI( sign, exp, sig );
uiZ:
return float32_t::fromRaw(uiZ);
}
float64_t
softfloat_roundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast16_t roundIncrement, roundBits;
bool isTiny;
uint_fast64_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = globalRoundingMode;
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x200;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0x3FF
: 0;
}
roundBits = sig & 0x3FF;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0x7FD <= (uint16_t) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(globalDetectTininess == tininess_beforeRounding)
|| (exp < -1)
|| (sig + roundIncrement < UINT64_C( 0x8000000000000000 ));
sig = softfloat_shiftRightJam64( sig, -exp );
exp = 0;
roundBits = sig & 0x3FF;
if ( isTiny && roundBits ) {
raiseFlags( flag_underflow );
}
} else if (
(0x7FD < exp)
|| (UINT64_C( 0x8000000000000000 ) <= sig + roundIncrement)
) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
raiseFlags(
flag_overflow | flag_inexact );
uiZ = packToF64UI( sign, 0x7FF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>10;
if ( roundBits ) {
raiseFlags(flag_inexact);
if ( roundingMode == round_odd ) {
sig |= 1;
goto packReturn;
}
}
sig &= ~(uint_fast64_t) (! (roundBits ^ 0x200) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToF64UI( sign, exp, sig );
uiZ:
return float64_t::fromRaw(uiZ);
}
int_fast32_t
softfloat_roundToI32(
bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
{
bool roundNearEven;
uint_fast16_t roundIncrement, roundBits;
uint_fast32_t sig32;
union { uint32_t ui; int32_t i; } uZ;
int_fast32_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x800;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0xFFF
: 0;
}
roundBits = sig & 0xFFF;
sig += roundIncrement;
if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
sig32 = (uint_fast32_t)(sig>>12); //fixed warning on type cast
sig32 &= ~(uint_fast32_t) (! (roundBits ^ 0x800) & roundNearEven);
//fixed unsigned unary minus: -x == ~x + 1
uZ.ui = sign ? (~sig32 + 1) : sig32;
z = uZ.i;
if ( z && ((z < 0) ^ sign) ) goto invalid;
if ( exact && roundBits ) {
raiseFlags(flag_inexact);
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
int_fast64_t
softfloat_roundToI64(
bool sign,
uint_fast64_t sig,
uint_fast64_t sigExtra,
uint_fast8_t roundingMode,
bool exact
)
{
bool roundNearEven, doIncrement;
union { uint64_t ui; int64_t i; } uZ;
int_fast64_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
doIncrement =
(roundingMode
== (sign ? round_min : round_max))
&& sigExtra;
}
if ( doIncrement ) {
++sig;
if ( ! sig ) goto invalid;
sig &=
~(uint_fast64_t)
(! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
& roundNearEven);
}
//fixed unsigned unary minus: -x == ~x + 1
uZ.ui = sign ? (~sig + 1) : sig;
z = uZ.i;
if ( z && ((z < 0) ^ sign) ) goto invalid;
if ( exact && sigExtra ) {
raiseFlags(flag_inexact);
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
uint_fast32_t
softfloat_roundToUI32(
bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
{
bool roundNearEven;
uint_fast16_t roundIncrement, roundBits;
uint_fast32_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x800;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0xFFF
: 0;
}
roundBits = sig & 0xFFF;
sig += roundIncrement;
if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
z = (uint_fast32_t)(sig>>12); //fixed warning on type cast
z &= ~(uint_fast32_t) (! (roundBits ^ 0x800) & roundNearEven);
if ( sign && z ) goto invalid;
if ( exact && roundBits ) {
raiseFlags(flag_inexact);
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
}
uint_fast64_t
softfloat_roundToUI64(
bool sign,
uint_fast64_t sig,
uint_fast64_t sigExtra,
uint_fast8_t roundingMode,
bool exact
)
{
bool roundNearEven, doIncrement;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
doIncrement =
(roundingMode
== (sign ? round_min : round_max))
&& sigExtra;
}
if ( doIncrement ) {
++sig;
if ( ! sig ) goto invalid;
sig &=
~(uint_fast64_t)
(! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
& roundNearEven);
}
if ( sign && sig ) goto invalid;
if ( exact && sigExtra ) {
raiseFlags(flag_inexact);
}
return sig;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
}
struct uint128
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
uint_fast8_t u8NegDist;
struct uint128 z;
if ( dist < 64 ) {
//fixed unsigned unary minus: -x == ~x + 1 , fixed type cast
u8NegDist = (uint_fast8_t)(~dist + 1);
z.v64 = a64>>dist;
z.v0 =
a64<<(u8NegDist & 63) | a0>>dist
| ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
} else {
z.v64 = 0;
z.v0 =
(dist < 127)
? a64>>(dist & 63)
| (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
!= 0)
: ((a64 | a0) != 0);
}
return z;
}
float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
{
int_fast16_t expA;
uint_fast32_t sigA;
int_fast16_t expB;
uint_fast32_t sigB;
int_fast16_t expDiff;
uint_fast32_t uiZ;
int_fast32_t sigDiff;
bool signZ;
int_fast8_t shiftDist;
int_fast16_t expZ;
uint_fast32_t sigX, sigY;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA | sigB ) goto propagateNaN;
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
goto uiZ;
}
sigDiff = sigA - sigB;
if ( ! sigDiff ) {
uiZ =
packToF32UI(
(globalRoundingMode == round_min), 0, 0 );
goto uiZ;
}
if ( expA ) --expA;
signZ = signF32UI( uiA );
if ( sigDiff < 0 ) {
signZ = ! signZ;
sigDiff = -sigDiff;
}
shiftDist = softfloat_countLeadingZeros32( sigDiff ) - 8;
expZ = expA - shiftDist;
if ( expZ < 0 ) {
shiftDist = (int_fast8_t)expA; //fixed type cast
expZ = 0;
}
uiZ = packToF32UI( signZ, expZ, sigDiff<<shiftDist );
goto uiZ;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
signZ = signF32UI( uiA );
sigA <<= 7;
sigB <<= 7;
if ( expDiff < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
signZ = ! signZ;
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF32UI( signZ, 0xFF, 0 );
goto uiZ;
}
expZ = expB - 1;
sigX = sigB | 0x40000000;
sigY = sigA + (expA ? 0x40000000 : sigA);
expDiff = -expDiff;
} else {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA - 1;
sigX = sigA | 0x40000000;
sigY = sigB + (expB ? 0x40000000 : sigB);
}
return
softfloat_normRoundPackToF32(
signZ, expZ, sigX - softfloat_shiftRightJam32( sigY, expDiff )
);
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
uiZ:
return float32_t::fromRaw(uiZ);
}
float64_t
softfloat_subMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
{
int_fast16_t expA;
uint_fast64_t sigA;
int_fast16_t expB;
uint_fast64_t sigB;
int_fast16_t expDiff;
uint_fast64_t uiZ;
int_fast64_t sigDiff;
int_fast8_t shiftDist;
int_fast16_t expZ;
uint_fast64_t sigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA | sigB ) goto propagateNaN;
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
goto uiZ;
}
sigDiff = sigA - sigB;
if ( ! sigDiff ) {
uiZ =
packToF64UI(
(globalRoundingMode == round_min), 0, 0 );
goto uiZ;
}
if ( expA ) --expA;
if ( sigDiff < 0 ) {
signZ = ! signZ;
sigDiff = -sigDiff;
}
shiftDist = softfloat_countLeadingZeros64( sigDiff ) - 11;
expZ = expA - shiftDist;
if ( expZ < 0 ) {
shiftDist = (int_fast8_t)expA; //fixed type cast
expZ = 0;
}
uiZ = packToF64UI( signZ, expZ, sigDiff<<shiftDist );
goto uiZ;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sigA <<= 10;
sigB <<= 10;
if ( expDiff < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
signZ = ! signZ;
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF64UI( signZ, 0x7FF, 0 );
goto uiZ;
}
sigA += expA ? UINT64_C( 0x4000000000000000 ) : sigA;
sigA = softfloat_shiftRightJam64( sigA, -expDiff );
sigB |= UINT64_C( 0x4000000000000000 );
expZ = expB;
sigZ = sigB - sigA;
} else {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
sigB += expB ? UINT64_C( 0x4000000000000000 ) : sigB;
sigB = softfloat_shiftRightJam64( sigB, expDiff );
sigA |= UINT64_C( 0x4000000000000000 );
expZ = expA;
sigZ = sigA - sigB;
}
return softfloat_normRoundPackToF64( signZ, expZ - 1, sigZ );
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
uiZ:
return float64_t::fromRaw(uiZ);
}
float32_t ui32_to_f32( uint32_t a )
{
if ( ! a ) {
return float32_t::fromRaw(0);
}
if ( a & 0x80000000 ) {
return softfloat_roundPackToF32( 0, 0x9D, a>>1 | (a & 1) );
} else {
return softfloat_normRoundPackToF32( 0, 0x9C, a );
}
}
float64_t ui32_to_f64( uint32_t a )
{
uint_fast64_t uiZ;
int_fast8_t shiftDist;
if ( ! a ) {
uiZ = 0;
} else {
shiftDist = softfloat_countLeadingZeros32( a ) + 21;
uiZ =
packToF64UI( 0, 0x432 - shiftDist, (uint_fast64_t) a<<shiftDist );
}
return float64_t::fromRaw(uiZ);
}
float32_t ui64_to_f32( uint64_t a )
{
int_fast8_t shiftDist;
uint_fast32_t sig;
shiftDist = softfloat_countLeadingZeros64( a ) - 40;
if ( 0 <= shiftDist ) {
return float32_t::fromRaw(a ? packToF32UI(0, 0x95 - shiftDist, (uint_fast32_t) a<<shiftDist ) : 0);
} else {
shiftDist += 7;
sig =
(shiftDist < 0) ? (uint_fast32_t) softfloat_shortShiftRightJam64( a, -shiftDist ) //fixed warning on type cast
: (uint_fast32_t) a<<shiftDist;
return softfloat_roundPackToF32( 0, 0x9C - shiftDist, sig );
}
}
float64_t ui64_to_f64( uint64_t a )
{
if ( ! a ) {
return float64_t::fromRaw(0);
}
if ( a & UINT64_C( 0x8000000000000000 ) ) {
return
softfloat_roundPackToF64(
0, 0x43D, softfloat_shortShiftRightJam64( a, 1 ) );
} else {
return softfloat_normRoundPackToF64( 0, 0x43C, a );
}
}
/*----------------------------------------------------------------------------
| Ported from OpenCV.
*----------------------------------------------------------------------------*/
////////////////////////////////////// EXP /////////////////////////////////////
#define EXPTAB_SCALE 6
#define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
static const softdouble EXPPOLY_32F_A0(.9670371139572337719125840413672004409288e-2);
static const double expTab[] = {
1.0,
1.0108892860517004600204097905619,
1.0218971486541166782344801347833,
1.0330248790212284225001082839705,
1.0442737824274138403219664787399,
1.0556451783605571588083413251529,
1.0671404006768236181695211209928,
1.0787607977571197937406800374385,
1.0905077326652576592070106557607,
1.1023825833078409435564142094256,
1.1143867425958925363088129569196,
1.126521618608241899794798643787 ,
1.1387886347566916537038302838415,
1.151189229952982705817759635202 ,
1.1637248587775775138135735990922,
1.1763969916502812762846457284838,
1.1892071150027210667174999705605,
1.2021567314527031420963969574978,
1.2152473599804688781165202513388,
1.2284805361068700056940089577928,
1.2418578120734840485936774687266,
1.2553807570246910895793906574423,
1.2690509571917332225544190810323,
1.2828700160787782807266697810215,
1.2968395546510096659337541177925,
1.3109612115247643419229917863308,
1.3252366431597412946295370954987,
1.3396675240533030053600306697244,
1.3542555469368927282980147401407,
1.3690024229745906119296011329822,
1.3839098819638319548726595272652,
1.3989796725383111402095281367152,
1.4142135623730950488016887242097,
1.4296133383919700112350657782751,
1.4451808069770466200370062414717,
1.4609177941806469886513028903106,
1.476826145939499311386907480374 ,
1.4929077282912648492006435314867,
1.5091644275934227397660195510332,
1.5255981507445383068512536895169,
1.5422108254079408236122918620907,
1.5590044002378369670337280894749,
1.5759808451078864864552701601819,
1.5931421513422668979372486431191,
1.6104903319492543081795206673574,
1.628027421857347766848218522014 ,
1.6457554781539648445187567247258,
1.6636765803267364350463364569764,
1.6817928305074290860622509524664,
1.7001063537185234695013625734975,
1.7186192981224779156293443764563,
1.7373338352737062489942020818722,
1.7562521603732994831121606193753,
1.7753764925265212525505592001993,
1.7947090750031071864277032421278,
1.8142521755003987562498346003623,
1.8340080864093424634870831895883,
1.8539791250833855683924530703377,
1.8741676341102999013299989499544,
1.8945759815869656413402186534269,
1.9152065613971472938726112702958,
1.9360617934922944505980559045667,
1.9571441241754002690183222516269,
1.9784560263879509682582499181312,
};
// 1 / ln(2) == 1.4426950408889634073599246810019
static const float64_t exp_prescale = float64_t(1.4426950408889634073599246810019) * float64_t(1 << EXPTAB_SCALE);
static const float64_t exp_postscale = float64_t::one()/float64_t(1 << EXPTAB_SCALE);
static const float64_t exp_max_val(3000*(1 << EXPTAB_SCALE)); // log10(DBL_MAX) < 3000
float32_t f32_exp( float32_t x)
{
//special cases
if(x.isNaN()) return float32_t::nan();
if(x.isInf()) return (x == float32_t::inf()) ? x : float32_t::zero();
static const float64_t
A4 = float64_t(1.000000000000002438532970795181890933776) / EXPPOLY_32F_A0,
A3 = float64_t(.6931471805521448196800669615864773144641) / EXPPOLY_32F_A0,
A2 = float64_t(.2402265109513301490103372422686535526573) / EXPPOLY_32F_A0,
A1 = float64_t(.5550339366753125211915322047004666939128e-1) / EXPPOLY_32F_A0;
float64_t x0;
if(expF32UI(x.v) > 127 + 10)
x0 = signF32UI(x.v) ? -exp_max_val : exp_max_val;
else
x0 = f32_to_f64(x) * exp_prescale;
int val0 = f64_to_i32(x0, round_near_even, false);
int t = (val0 >> EXPTAB_SCALE) + 1023;
t = t < 0 ? 0 : (t > 2047 ? 2047 : t);
float64_t buf; buf.v = packToF64UI(0, t, 0);
x0 = (x0 - f64_roundToInt(x0, round_near_even, false)) * exp_postscale;
return (buf * EXPPOLY_32F_A0 * float64_t(expTab[val0 & EXPTAB_MASK]) * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4));
}
float64_t f64_exp(float64_t x)
{
//special cases
if(x.isNaN()) return float64_t::nan();
if(x.isInf()) return (x == float64_t::inf()) ? x : float64_t::zero();
static const float64_t
A5 = float64_t(.99999999999999999998285227504999) / EXPPOLY_32F_A0,
A4 = float64_t(.69314718055994546743029643825322) / EXPPOLY_32F_A0,
A3 = float64_t(.24022650695886477918181338054308) / EXPPOLY_32F_A0,
A2 = float64_t(.55504108793649567998466049042729e-1) / EXPPOLY_32F_A0,
A1 = float64_t(.96180973140732918010002372686186e-2) / EXPPOLY_32F_A0,
A0 = float64_t(.13369713757180123244806654839424e-2) / EXPPOLY_32F_A0;
float64_t x0;
if(expF64UI(x.v) > 1023 + 10)
x0 = signF64UI(x.v) ? -exp_max_val : exp_max_val;
else
x0 = x * exp_prescale;
int val0 = cvRound(x0);
int t = (val0 >> EXPTAB_SCALE) + 1023;
t = t < 0 ? 0 : (t > 2047 ? 2047 : t);
float64_t buf; buf.v = packToF64UI(0, t, 0);
x0 = (x0 - f64_roundToInt(x0, round_near_even, false)) * exp_postscale;
return buf * EXPPOLY_32F_A0 * float64_t(expTab[val0 & EXPTAB_MASK]) * (((((A0*x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4)*x0 + A5);
}
#undef EXPTAB_SCALE
#undef EXPTAB_MASK
#undef EXPPOLY_32F_A0
/////////////////////////////////////////// LOG ///////////////////////////////////////
#define LOGTAB_SCALE 8
static const double CV_DECL_ALIGNED(16) icvLogTab[] = {
0.0000000000000000000000000000000000000000, 1.000000000000000000000000000000000000000,
.00389864041565732288852075271279318258166, .9961089494163424124513618677042801556420,
.00778214044205494809292034119607706088573, .9922480620155038759689922480620155038760,
.01165061721997527263705585198749759001657, .9884169884169884169884169884169884169884,
.01550418653596525274396267235488267033361, .9846153846153846153846153846153846153846,
.01934296284313093139406447562578250654042, .9808429118773946360153256704980842911877,
.02316705928153437593630670221500622574241, .9770992366412213740458015267175572519084,
.02697658769820207233514075539915211265906, .9733840304182509505703422053231939163498,
.03077165866675368732785500469617545604706, .9696969696969696969696969696969696969697,
.03455238150665972812758397481047722976656, .9660377358490566037735849056603773584906,
.03831886430213659461285757856785494368522, .9624060150375939849624060150375939849624,
.04207121392068705056921373852674150839447, .9588014981273408239700374531835205992509,
.04580953603129420126371940114040626212953, .9552238805970149253731343283582089552239,
.04953393512227662748292900118940451648088, .9516728624535315985130111524163568773234,
.05324451451881227759255210685296333394944, .9481481481481481481481481481481481481481,
.05694137640013842427411105973078520037234, .9446494464944649446494464944649446494465,
.06062462181643483993820353816772694699466, .9411764705882352941176470588235294117647,
.06429435070539725460836422143984236754475, .9377289377289377289377289377289377289377,
.06795066190850773679699159401934593915938, .9343065693430656934306569343065693430657,
.07159365318700880442825962290953611955044, .9309090909090909090909090909090909090909,
.07522342123758751775142172846244648098944, .9275362318840579710144927536231884057971,
.07884006170777602129362549021607264876369, .9241877256317689530685920577617328519856,
.08244366921107458556772229485432035289706, .9208633093525179856115107913669064748201,
.08603433734180314373940490213499288074675, .9175627240143369175627240143369175627240,
.08961215868968712416897659522874164395031, .9142857142857142857142857142857142857143,
.09317722485418328259854092721070628613231, .9110320284697508896797153024911032028470,
.09672962645855109897752299730200320482256, .9078014184397163120567375886524822695035,
.10026945316367513738597949668474029749630, .9045936395759717314487632508833922261484,
.10379679368164355934833764649738441221420, .9014084507042253521126760563380281690141,
.10731173578908805021914218968959175981580, .8982456140350877192982456140350877192982,
.11081436634029011301105782649756292812530, .8951048951048951048951048951048951048951,
.11430477128005862852422325204315711744130, .8919860627177700348432055749128919860627,
.11778303565638344185817487641543266363440, .8888888888888888888888888888888888888889,
.12124924363286967987640707633545389398930, .8858131487889273356401384083044982698962,
.12470347850095722663787967121606925502420, .8827586206896551724137931034482758620690,
.12814582269193003360996385708858724683530, .8797250859106529209621993127147766323024,
.13157635778871926146571524895989568904040, .8767123287671232876712328767123287671233,
.13499516453750481925766280255629681050780, .8737201365187713310580204778156996587031,
.13840232285911913123754857224412262439730, .8707482993197278911564625850340136054422,
.14179791186025733629172407290752744302150, .8677966101694915254237288135593220338983,
.14518200984449788903951628071808954700830, .8648648648648648648648648648648648648649,
.14855469432313711530824207329715136438610, .8619528619528619528619528619528619528620,
.15191604202584196858794030049466527998450, .8590604026845637583892617449664429530201,
.15526612891112392955683674244937719777230, .8561872909698996655518394648829431438127,
.15860503017663857283636730244325008243330, .8533333333333333333333333333333333333333,
.16193282026931324346641360989451641216880, .8504983388704318936877076411960132890365,
.16524957289530714521497145597095368430010, .8476821192052980132450331125827814569536,
.16855536102980664403538924034364754334090, .8448844884488448844884488448844884488449,
.17185025692665920060697715143760433420540, .8421052631578947368421052631578947368421,
.17513433212784912385018287750426679849630, .8393442622950819672131147540983606557377,
.17840765747281828179637841458315961062910, .8366013071895424836601307189542483660131,
.18167030310763465639212199675966985523700, .8338762214983713355048859934853420195440,
.18492233849401198964024217730184318497780, .8311688311688311688311688311688311688312,
.18816383241818296356839823602058459073300, .8284789644012944983818770226537216828479,
.19139485299962943898322009772527962923050, .8258064516129032258064516129032258064516,
.19461546769967164038916962454095482826240, .8231511254019292604501607717041800643087,
.19782574332991986754137769821682013571260, .8205128205128205128205128205128205128205,
.20102574606059073203390141770796617493040, .8178913738019169329073482428115015974441,
.20421554142869088876999228432396193966280, .8152866242038216560509554140127388535032,
.20739519434607056602715147164417430758480, .8126984126984126984126984126984126984127,
.21056476910734961416338251183333341032260, .8101265822784810126582278481012658227848,
.21372432939771812687723695489694364368910, .8075709779179810725552050473186119873817,
.21687393830061435506806333251006435602900, .8050314465408805031446540880503144654088,
.22001365830528207823135744547471404075630, .8025078369905956112852664576802507836991,
.22314355131420973710199007200571941211830, .8000000000000000000000000000000000000000,
.22626367865045338145790765338460914790630, .7975077881619937694704049844236760124611,
.22937410106484582006380890106811420992010, .7950310559006211180124223602484472049689,
.23247487874309405442296849741978803649550, .7925696594427244582043343653250773993808,
.23556607131276688371634975283086532726890, .7901234567901234567901234567901234567901,
.23864773785017498464178231643018079921600, .7876923076923076923076923076923076923077,
.24171993688714515924331749374687206000090, .7852760736196319018404907975460122699387,
.24478272641769091566565919038112042471760, .7828746177370030581039755351681957186544,
.24783616390458124145723672882013488560910, .7804878048780487804878048780487804878049,
.25088030628580937353433455427875742316250, .7781155015197568389057750759878419452888,
.25391520998096339667426946107298135757450, .7757575757575757575757575757575757575758,
.25694093089750041913887912414793390780680, .7734138972809667673716012084592145015106,
.25995752443692604627401010475296061486000, .7710843373493975903614457831325301204819,
.26296504550088134477547896494797896593800, .7687687687687687687687687687687687687688,
.26596354849713793599974565040611196309330, .7664670658682634730538922155688622754491,
.26895308734550393836570947314612567424780, .7641791044776119402985074626865671641791,
.27193371548364175804834985683555714786050, .7619047619047619047619047619047619047619,
.27490548587279922676529508862586226314300, .7596439169139465875370919881305637982196,
.27786845100345625159121709657483734190480, .7573964497041420118343195266272189349112,
.28082266290088775395616949026589281857030, .7551622418879056047197640117994100294985,
.28376817313064456316240580235898960381750, .7529411764705882352941176470588235294118,
.28670503280395426282112225635501090437180, .7507331378299120234604105571847507331378,
.28963329258304265634293983566749375313530, .7485380116959064327485380116959064327485,
.29255300268637740579436012922087684273730, .7463556851311953352769679300291545189504,
.29546421289383584252163927885703742504130, .7441860465116279069767441860465116279070,
.29836697255179722709783618483925238251680, .7420289855072463768115942028985507246377,
.30126133057816173455023545102449133992200, .7398843930635838150289017341040462427746,
.30414733546729666446850615102448500692850, .7377521613832853025936599423631123919308,
.30702503529491181888388950937951449304830, .7356321839080459770114942528735632183908,
.30989447772286465854207904158101882785550, .7335243553008595988538681948424068767908,
.31275571000389684739317885942000430077330, .7314285714285714285714285714285714285714,
.31560877898630329552176476681779604405180, .7293447293447293447293447293447293447293,
.31845373111853458869546784626436419785030, .7272727272727272727272727272727272727273,
.32129061245373424782201254856772720813750, .7252124645892351274787535410764872521246,
.32411946865421192853773391107097268104550, .7231638418079096045197740112994350282486,
.32694034499585328257253991068864706903700, .7211267605633802816901408450704225352113,
.32975328637246797969240219572384376078850, .7191011235955056179775280898876404494382,
.33255833730007655635318997155991382896900, .7170868347338935574229691876750700280112,
.33535554192113781191153520921943709254280, .7150837988826815642458100558659217877095,
.33814494400871636381467055798566434532400, .7130919220055710306406685236768802228412,
.34092658697059319283795275623560883104800, .7111111111111111111111111111111111111111,
.34370051385331840121395430287520866841080, .7091412742382271468144044321329639889197,
.34646676734620857063262633346312213689100, .7071823204419889502762430939226519337017,
.34922538978528827602332285096053965389730, .7052341597796143250688705234159779614325,
.35197642315717814209818925519357435405250, .7032967032967032967032967032967032967033,
.35471990910292899856770532096561510115850, .7013698630136986301369863013698630136986,
.35745588892180374385176833129662554711100, .6994535519125683060109289617486338797814,
.36018440357500774995358483465679455548530, .6975476839237057220708446866485013623978,
.36290549368936841911903457003063522279280, .6956521739130434782608695652173913043478,
.36561919956096466943762379742111079394830, .6937669376693766937669376693766937669377,
.36832556115870762614150635272380895912650, .6918918918918918918918918918918918918919,
.37102461812787262962487488948681857436900, .6900269541778975741239892183288409703504,
.37371640979358405898480555151763837784530, .6881720430107526881720430107526881720430,
.37640097516425302659470730759494472295050, .6863270777479892761394101876675603217158,
.37907835293496944251145919224654790014030, .6844919786096256684491978609625668449198,
.38174858149084833769393299007788300514230, .6826666666666666666666666666666666666667,
.38441169891033200034513583887019194662580, .6808510638297872340425531914893617021277,
.38706774296844825844488013899535872042180, .6790450928381962864721485411140583554377,
.38971675114002518602873692543653305619950, .6772486772486772486772486772486772486772,
.39235876060286384303665840889152605086580, .6754617414248021108179419525065963060686,
.39499380824086893770896722344332374632350, .6736842105263157894736842105263157894737,
.39762193064713846624158577469643205404280, .6719160104986876640419947506561679790026,
.40024316412701266276741307592601515352730, .6701570680628272251308900523560209424084,
.40285754470108348090917615991202183067800, .6684073107049608355091383812010443864230,
.40546510810816432934799991016916465014230, .6666666666666666666666666666666666666667,
.40806588980822172674223224930756259709600, .6649350649350649350649350649350649350649,
.41065992498526837639616360320360399782650, .6632124352331606217616580310880829015544,
.41324724855021932601317757871584035456180, .6614987080103359173126614987080103359173,
.41582789514371093497757669865677598863850, .6597938144329896907216494845360824742268,
.41840189913888381489925905043492093682300, .6580976863753213367609254498714652956298,
.42096929464412963239894338585145305842150, .6564102564102564102564102564102564102564,
.42353011550580327293502591601281892508280, .6547314578005115089514066496163682864450,
.42608439531090003260516141381231136620050, .6530612244897959183673469387755102040816,
.42863216738969872610098832410585600882780, .6513994910941475826972010178117048346056,
.43117346481837132143866142541810404509300, .6497461928934010152284263959390862944162,
.43370832042155937902094819946796633303180, .6481012658227848101265822784810126582278,
.43623676677491801667585491486534010618930, .6464646464646464646464646464646464646465,
.43875883620762790027214350629947148263450, .6448362720403022670025188916876574307305,
.44127456080487520440058801796112675219780, .6432160804020100502512562814070351758794,
.44378397241030093089975139264424797147500, .6416040100250626566416040100250626566416,
.44628710262841947420398014401143882423650, .6400000000000000000000000000000000000000,
.44878398282700665555822183705458883196130, .6384039900249376558603491271820448877805,
.45127464413945855836729492693848442286250, .6368159203980099502487562189054726368159,
.45375911746712049854579618113348260521900, .6352357320099255583126550868486352357320,
.45623743348158757315857769754074979573500, .6336633663366336633663366336633663366337,
.45870962262697662081833982483658473938700, .6320987654320987654320987654320987654321,
.46117571512217014895185229761409573256980, .6305418719211822660098522167487684729064,
.46363574096303250549055974261136725544930, .6289926289926289926289926289926289926290,
.46608972992459918316399125615134835243230, .6274509803921568627450980392156862745098,
.46853771156323925639597405279346276074650, .6259168704156479217603911980440097799511,
.47097971521879100631480241645476780831830, .6243902439024390243902439024390243902439,
.47341577001667212165614273544633761048330, .6228710462287104622871046228710462287105,
.47584590486996386493601107758877333253630, .6213592233009708737864077669902912621359,
.47827014848147025860569669930555392056700, .6198547215496368038740920096852300242131,
.48068852934575190261057286988943815231330, .6183574879227053140096618357487922705314,
.48310107575113581113157579238759353756900, .6168674698795180722891566265060240963855,
.48550781578170076890899053978500887751580, .6153846153846153846153846153846153846154,
.48790877731923892879351001283794175833480, .6139088729016786570743405275779376498801,
.49030398804519381705802061333088204264650, .6124401913875598086124401913875598086124,
.49269347544257524607047571407747454941280, .6109785202863961813842482100238663484487,
.49507726679785146739476431321236304938800, .6095238095238095238095238095238095238095,
.49745538920281889838648226032091770321130, .6080760095011876484560570071258907363420,
.49982786955644931126130359189119189977650, .6066350710900473933649289099526066350711,
.50219473456671548383667413872899487614650, .6052009456264775413711583924349881796690,
.50455601075239520092452494282042607665050, .6037735849056603773584905660377358490566,
.50691172444485432801997148999362252652650, .6023529411764705882352941176470588235294,
.50926190178980790257412536448100581765150, .6009389671361502347417840375586854460094,
.51160656874906207391973111953120678663250, .5995316159250585480093676814988290398126,
.51394575110223428282552049495279788970950, .5981308411214953271028037383177570093458,
.51627947444845445623684554448118433356300, .5967365967365967365967365967365967365967,
.51860776420804555186805373523384332656850, .5953488372093023255813953488372093023256,
.52093064562418522900344441950437612831600, .5939675174013921113689095127610208816705,
.52324814376454775732838697877014055848100, .5925925925925925925925925925925925925926,
.52556028352292727401362526507000438869000, .5912240184757505773672055427251732101617,
.52786708962084227803046587723656557500350, .5898617511520737327188940092165898617512,
.53016858660912158374145519701414741575700, .5885057471264367816091954022988505747126,
.53246479886947173376654518506256863474850, .5871559633027522935779816513761467889908,
.53475575061602764748158733709715306758900, .5858123569794050343249427917620137299771,
.53704146589688361856929077475797384977350, .5844748858447488584474885844748858447489,
.53932196859560876944783558428753167390800, .5831435079726651480637813211845102505695,
.54159728243274429804188230264117009937750, .5818181818181818181818181818181818181818,
.54386743096728351609669971367111429572100, .5804988662131519274376417233560090702948,
.54613243759813556721383065450936555862450, .5791855203619909502262443438914027149321,
.54839232556557315767520321969641372561450, .5778781038374717832957110609480812641084,
.55064711795266219063194057525834068655950, .5765765765765765765765765765765765765766,
.55289683768667763352766542084282264113450, .5752808988764044943820224719101123595506,
.55514150754050151093110798683483153581600, .5739910313901345291479820627802690582960,
.55738115013400635344709144192165695130850, .5727069351230425055928411633109619686801,
.55961578793542265941596269840374588966350, .5714285714285714285714285714285714285714,
.56184544326269181269140062795486301183700, .5701559020044543429844097995545657015590,
.56407013828480290218436721261241473257550, .5688888888888888888888888888888888888889,
.56628989502311577464155334382667206227800, .5676274944567627494456762749445676274945,
.56850473535266865532378233183408156037350, .5663716814159292035398230088495575221239,
.57071468100347144680739575051120482385150, .5651214128035320088300220750551876379691,
.57291975356178548306473885531886480748650, .5638766519823788546255506607929515418502,
.57511997447138785144460371157038025558000, .5626373626373626373626373626373626373626,
.57731536503482350219940144597785547375700, .5614035087719298245614035087719298245614,
.57950594641464214795689713355386629700650, .5601750547045951859956236323851203501094,
.58169173963462239562716149521293118596100, .5589519650655021834061135371179039301310,
.58387276558098266665552955601015128195300, .5577342047930283224400871459694989106754,
.58604904500357812846544902640744112432000, .5565217391304347826086956521739130434783,
.58822059851708596855957011939608491957200, .5553145336225596529284164859002169197397,
.59038744660217634674381770309992134571100, .5541125541125541125541125541125541125541,
.59254960960667157898740242671919986605650, .5529157667386609071274298056155507559395,
.59470710774669277576265358220553025603300, .5517241379310344827586206896551724137931,
.59685996110779382384237123915227130055450, .5505376344086021505376344086021505376344,
.59900818964608337768851242799428291618800, .5493562231759656652360515021459227467811,
.60115181318933474940990890900138765573500, .5481798715203426124197002141327623126338,
.60329085143808425240052883964381180703650, .5470085470085470085470085470085470085470,
.60542532396671688843525771517306566238400, .5458422174840085287846481876332622601279,
.60755525022454170969155029524699784815300, .5446808510638297872340425531914893617021,
.60968064953685519036241657886421307921400, .5435244161358811040339702760084925690021,
.61180154110599282990534675263916142284850, .5423728813559322033898305084745762711864,
.61391794401237043121710712512140162289150, .5412262156448202959830866807610993657505,
.61602987721551394351138242200249806046500, .5400843881856540084388185654008438818565,
.61813735955507864705538167982012964785100, .5389473684210526315789473684210526315789,
.62024040975185745772080281312810257077200, .5378151260504201680672268907563025210084,
.62233904640877868441606324267922900617100, .5366876310272536687631027253668763102725,
.62443328801189346144440150965237990021700, .5355648535564853556485355648535564853556,
.62652315293135274476554741340805776417250, .5344467640918580375782881002087682672234,
.62860865942237409420556559780379757285100, .5333333333333333333333333333333333333333,
.63068982562619868570408243613201193511500, .5322245322245322245322245322245322245322,
.63276666957103777644277897707070223987100, .5311203319502074688796680497925311203320,
.63483920917301017716738442686619237065300, .5300207039337474120082815734989648033126,
.63690746223706917739093569252872839570050, .5289256198347107438016528925619834710744,
.63897144645792069983514238629140891134750, .5278350515463917525773195876288659793814,
.64103117942093124081992527862894348800200, .5267489711934156378600823045267489711934,
.64308667860302726193566513757104985415950, .5256673511293634496919917864476386036961,
.64513796137358470073053240412264131009600, .5245901639344262295081967213114754098361,
.64718504499530948859131740391603671014300, .5235173824130879345603271983640081799591,
.64922794662510974195157587018911726772800, .5224489795918367346938775510204081632653,
.65126668331495807251485530287027359008800, .5213849287169042769857433808553971486762,
.65330127201274557080523663898929953575150, .5203252032520325203252032520325203252033,
.65533172956312757406749369692988693714150, .5192697768762677484787018255578093306288,
.65735807270835999727154330685152672231200, .5182186234817813765182186234817813765182,
.65938031808912778153342060249997302889800, .5171717171717171717171717171717171717172,
.66139848224536490484126716182800009846700, .5161290322580645161290322580645161290323,
.66341258161706617713093692145776003599150, .5150905432595573440643863179074446680080,
.66542263254509037562201001492212526500250, .5140562248995983935742971887550200803213,
.66742865127195616370414654738851822912700, .5130260521042084168336673346693386773547,
.66943065394262923906154583164607174694550, .5120000000000000000000000000000000000000,
.67142865660530226534774556057527661323550, .5109780439121756487025948103792415169661,
.67342267521216669923234121597488410770900, .5099601593625498007968127490039840637450,
.67541272562017662384192817626171745359900, .5089463220675944333996023856858846918489,
.67739882359180603188519853574689477682100, .5079365079365079365079365079365079365079,
.67938098479579733801614338517538271844400, .5069306930693069306930693069306930693069,
.68135922480790300781450241629499942064300, .5059288537549407114624505928853754940711,
.68333355911162063645036823800182901322850, .5049309664694280078895463510848126232742,
.68530400309891936760919861626462079584600, .5039370078740157480314960629921259842520,
.68727057207096020619019327568821609020250, .5029469548133595284872298624754420432220,
.68923328123880889251040571252815425395950, .5019607843137254901960784313725490196078,
.69314718055994530941723212145818, 5.0e-01,
};
static const float64_t ln_2(0.69314718055994530941723212145818);
float32_t f32_log(float32_t x)
{
//special cases
if(x.isNaN() || x < float32_t::zero()) return float32_t::nan();
if(x == float32_t::zero()) return -float32_t::inf();
//first 8 bits of mantissa
int h0 = (x.v >> (23 - LOGTAB_SCALE)) & ((1 << LOGTAB_SCALE) - 1);
//buf == 0.00000000_the_rest_mantissa_bits
float64_t buf; buf.v = packToF64UI(0, 1023, ((uint64_t)x.v << 29) & ((1LL << (52 - LOGTAB_SCALE)) - 1));
buf -= float64_t::one();
float64_t tab0(icvLogTab[2*h0]);
float64_t tab1(icvLogTab[2*h0+1]);
float64_t x0 = buf * tab1;
//if last elements of icvLogTab
if(h0 == 255) x0 += float64_t(-1./512);
float64_t y0 = ln_2 * float64_t(expF32UI(x.v) - 127) + tab0 + x0*x0*x0/float64_t(3) - x0*x0/float64_t(2) + x0;
return y0;
}
float64_t f64_log(float64_t x)
{
//special cases
if(x.isNaN() || x < float64_t::zero()) return float64_t::nan();
if(x == float64_t::zero()) return -float64_t::inf();
static const float64_t
A7(1.0),
A6(-0.5),
A5(0.333333333333333314829616256247390992939472198486328125),
A4(-0.25),
A3(0.2),
A2(-0.1666666666666666574148081281236954964697360992431640625),
A1(0.1428571428571428769682682968777953647077083587646484375),
A0(-0.125);
//first 8 bits of mantissa
int h0 = (x.v >> (52 - LOGTAB_SCALE)) & ((1 << LOGTAB_SCALE) - 1);
//buf == 0.00000000_the_rest_mantissa_bits
float64_t buf; buf.v = packToF64UI(0, 1023, x.v & ((1LL << (52 - LOGTAB_SCALE)) - 1));
buf -= float64_t::one();
float64_t tab0(icvLogTab[2*h0]);
float64_t tab1(icvLogTab[2*h0 + 1]);
float64_t x0 = buf * tab1;
//if last elements of icvLogTab
if(h0 == 255) x0 += float64_t(-1./512);
float64_t xq = x0*x0;
return ln_2 * float64_t( expF64UI(x.v) - 1023) + tab0 + (((A0*xq + A2)*xq + A4)*xq + A6)*xq +
(((A1*xq + A3)*xq + A5)*xq + A7)*x0;
}
/* ************************************************************************** *\
Fast cube root by Ken Turkowski
(http://www.worldserver.com/turk/computergraphics/papers.html)
\* ************************************************************************** */
float32_t f32_cbrt(float32_t x)
{
//special cases
if(x.isNaN()) return float32_t::nan();
if(x.isInf()) return x;
int s = signF32UI(x.v);
int ex = expF32UI(x.v) - 127;
int shx = ex % 3;
shx -= shx >= 0 ? 3 : 0;
ex = (ex - shx) / 3 - 1; /* exponent of cube root */
float64_t fr; fr.v = packToF64UI(0, shx + 1023, ((uint64_t)fracF32UI(x.v)) << 29);
/* 0.125 <= fr < 1.0 */
/* Use quartic rational polynomial with error < 2^(-24) */
const float64_t A1(45.2548339756803022511987494);
const float64_t A2(192.2798368355061050458134625);
const float64_t A3(119.1654824285581628956914143);
const float64_t A4(13.43250139086239872172837314);
const float64_t A5(0.1636161226585754240958355063);
const float64_t A6(14.80884093219134573786480845);
const float64_t A7(151.9714051044435648658557668);
const float64_t A8(168.5254414101568283957668343);
const float64_t A9(33.9905941350215598754191872);
const float64_t A10(1.0);
fr = ((((A1 * fr + A2) * fr + A3) * fr + A4) * fr + A5)/
((((A6 * fr + A7) * fr + A8) * fr + A9) * fr + A10);
/* fr *= 2^ex * sign */
// checks for "+0" and "-0", reset sign bit
float32_t y; y.v = ((x.v & ((1u << 31) - 1)) != 0) ? packToF32UI(s, ex+127, (uint32_t)(fracF64UI(fr.v) >> 29)) : 0;
return y;
}
/// POW functions ///
float32_t f32_pow( float32_t x, float32_t y)
{
const float32_t zero = float32_t::zero(), one = float32_t::one(), inf = float32_t::inf(), nan = float32_t::nan();
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
float32_t ax = abs(x);
bool useInf = (y > zero) == (ax > one);
float32_t v;
//special cases
if(ynan) v = nan;
else if(yinf) v = (ax == one || xnan) ? nan : (useInf ? inf : zero);
else if(y == zero) v = one;
else if(y == one ) v = x;
else //here y is ok
{
if(xnan) v = nan;
else if(xinf) v = (y < zero) ? zero : inf;
else if(y == f32_roundToInt(y, round_near_even, false)) v = f32_powi(x, f32_to_i32(y, round_near_even, false));
else if(x < zero) v = nan;
// (0 ** 0) == 1
else if(x == zero) v = (y < zero) ? inf : (y == zero ? one : zero);
// here x and y are ok
else v = f32_exp(y * f32_log(x));
}
return v;
}
float64_t f64_pow( float64_t x, float64_t y)
{
const float64_t zero = float64_t::zero(), one = float64_t::one(), inf = float64_t::inf(), nan = float64_t::nan();
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
float64_t ax = abs(x);
bool useInf = (y > zero) == (ax > one);
float64_t v;
//special cases
if(ynan) v = nan;
else if(yinf) v = (ax == one || xnan) ? nan : (useInf ? inf : zero);
else if(y == zero) v = one;
else if(y == one ) v = x;
else //here y is ok
{
if(xnan) v = nan;
else if(xinf) v = (y < zero) ? zero : inf;
else if(y == f64_roundToInt(y, round_near_even, false)) v = f64_powi(x, f64_to_i32(y, round_near_even, false));
else if(x < zero) v = nan;
// (0 ** 0) == 1
else if(x == zero) v = (y < zero) ? inf : (y == zero ? one : zero);
// here x and y are ok
else v = f64_exp(y * f64_log(x));
}
return v;
}
// These functions are for internal use only
float32_t f32_powi( float32_t x, int y)
{
float32_t v;
//special case: (0 ** 0) == 1
if(x == float32_t::zero())
v = (y < 0) ? float32_t::inf() : (y == 0 ? float32_t::one() : float32_t::zero());
// here x and y are ok
else
{
float32_t a = float32_t::one(), b = x;
int p = std::abs(y);
if( y < 0 )
b = float32_t::one()/b;
while( p > 1 )
{
if( p & 1 )
a *= b;
b *= b;
p >>= 1;
}
v = a * b;
}
return v;
}
float64_t f64_powi( float64_t x, int y)
{
float64_t v;
//special case: (0 ** 0) == 1
if(x == float64_t::zero())
v = (y < 0) ? float64_t::inf() : (y == 0 ? float64_t::one() : float64_t::zero());
// here x and y are ok
else
{
float64_t a = float64_t::one(), b = x;
int p = std::abs(y);
if( y < 0 )
b = float64_t::one()/b;
while( p > 1 )
{
if( p & 1 )
a *= b;
b *= b;
p >>= 1;
}
v = a * b;
}
return v;
}
}