opencv/modules/core/src/softfloat.cpp
2019-09-27 16:29:50 +03:00

4332 lines
166 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This file is based on files from packages softfloat and fdlibm
// issued with the following licenses:
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3c, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
// FDLIBM licenses:
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* ====================================================
* Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "precomp.hpp"
#include "opencv2/core/softfloat.hpp"
namespace cv
{
/*----------------------------------------------------------------------------
| Software floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
enum {
tininess_beforeRounding = 0,
tininess_afterRounding = 1
};
//fixed to make softfloat code stateless
static const uint_fast8_t globalDetectTininess = tininess_afterRounding;
/*----------------------------------------------------------------------------
| Software floating-point exception flags.
*----------------------------------------------------------------------------*/
enum {
flag_inexact = 1,
flag_underflow = 2,
flag_overflow = 4,
flag_infinite = 8,
flag_invalid = 16
};
// Disabled to make softfloat code stateless
// This function may be changed in the future for better error handling
static inline void raiseFlags( uint_fast8_t /* flags */)
{
//exceptionFlags |= flags;
}
/*----------------------------------------------------------------------------
| Software floating-point rounding mode.
*----------------------------------------------------------------------------*/
enum {
round_near_even = 0, // round to nearest, with ties to even
round_minMag = 1, // round to minimum magnitude (toward zero)
round_min = 2, // round to minimum (down)
round_max = 3, // round to maximum (up)
round_near_maxMag = 4, // round to nearest, with ties to maximum magnitude (away from zero)
round_odd = 5 // round to odd (jamming)
};
/* What is round_odd (from SoftFloat manual):
* If supported, mode round_odd first rounds a floating-point result to minimum magnitude,
* the same as round_minMag, and then, if the result is inexact, the least-significant bit
* of the result is set to 1. This rounding mode is also known as jamming.
*/
//fixed to make softfloat code stateless
static const uint_fast8_t globalRoundingMode = round_near_even;
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF32UI( a ) (((uint32_t) (a)>>31) != 0)
#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
#define fracF32UI( a ) ((a) & 0x007FFFFF)
#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))
#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF64UI( a ) (((uint64_t) (a)>>63) != 0)
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))
#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
/*----------------------------------------------------------------------------
| Types used to pass 32-bit and 64-bit floating-point
| arguments and results to/from functions. These types must be exactly
| 32 bits and 64 bits in size, respectively. Where a
| platform has "native" support for IEEE-Standard floating-point formats,
| the types below may, if desired, be defined as aliases for the native types
| (typically 'float' and 'double').
*----------------------------------------------------------------------------*/
typedef softfloat float32_t;
typedef softdouble float64_t;
/*----------------------------------------------------------------------------
| Integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
static float32_t ui32_to_f32( uint32_t );
static float64_t ui32_to_f64( uint32_t );
static float32_t ui64_to_f32( uint64_t );
static float64_t ui64_to_f64( uint64_t );
static float32_t i32_to_f32( int32_t );
static float64_t i32_to_f64( int32_t );
static float32_t i64_to_f32( int64_t );
static float64_t i64_to_f64( int64_t );
/*----------------------------------------------------------------------------
| 32-bit (single-precision) floating-point operations.
*----------------------------------------------------------------------------*/
static int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
static int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
static float64_t f32_to_f64( float32_t );
static float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
static float32_t f32_add( float32_t, float32_t );
static float32_t f32_sub( float32_t, float32_t );
static float32_t f32_mul( float32_t, float32_t );
static float32_t f32_mulAdd( float32_t, float32_t, float32_t );
static float32_t f32_div( float32_t, float32_t );
static float32_t f32_rem( float32_t, float32_t );
static float32_t f32_sqrt( float32_t );
static bool f32_eq( float32_t, float32_t );
static bool f32_le( float32_t, float32_t );
static bool f32_lt( float32_t, float32_t );
/*----------------------------------------------------------------------------
| 64-bit (double-precision) floating-point operations.
*----------------------------------------------------------------------------*/
static int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
static int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
static int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
static float32_t f64_to_f32( float64_t );
static float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
static float64_t f64_add( float64_t, float64_t );
static float64_t f64_sub( float64_t, float64_t );
static float64_t f64_mul( float64_t, float64_t );
static float64_t f64_mulAdd( float64_t, float64_t, float64_t );
static float64_t f64_div( float64_t, float64_t );
static float64_t f64_rem( float64_t, float64_t );
static float64_t f64_sqrt( float64_t );
static bool f64_eq( float64_t, float64_t );
static bool f64_le( float64_t, float64_t );
static bool f64_lt( float64_t, float64_t );
/*----------------------------------------------------------------------------
| Ported from OpenCV and fdlibm and added for usability
*----------------------------------------------------------------------------*/
static float32_t f32_powi( float32_t x, int y);
static float64_t f64_powi( float64_t x, int y);
static float64_t f64_sin_kernel(float64_t x);
static float64_t f64_cos_kernel(float64_t x);
static void f64_sincos_reduce(const float64_t& x, float64_t& y, int& n);
static float32_t f32_exp( float32_t x);
static float64_t f64_exp(float64_t x);
static float32_t f32_log(float32_t x);
static float64_t f64_log(float64_t x);
static float32_t f32_cbrt(float32_t x);
static float32_t f32_pow( float32_t x, float32_t y);
static float64_t f64_pow( float64_t x, float64_t y);
static float64_t f64_sin( float64_t x );
static float64_t f64_cos( float64_t x );
/*----------------------------------------------------------------------------
| softfloat and softdouble methods and members
*----------------------------------------------------------------------------*/
softfloat::softfloat( const uint32_t a ) { *this = ui32_to_f32(a); }
softfloat::softfloat( const uint64_t a ) { *this = ui64_to_f32(a); }
softfloat::softfloat( const int32_t a ) { *this = i32_to_f32(a); }
softfloat::softfloat( const int64_t a ) { *this = i64_to_f32(a); }
softfloat::operator softdouble() const { return f32_to_f64(*this); }
softfloat softfloat::operator + (const softfloat& a) const { return f32_add(*this, a); }
softfloat softfloat::operator - (const softfloat& a) const { return f32_sub(*this, a); }
softfloat softfloat::operator * (const softfloat& a) const { return f32_mul(*this, a); }
softfloat softfloat::operator / (const softfloat& a) const { return f32_div(*this, a); }
softfloat softfloat::operator % (const softfloat& a) const { return f32_rem(*this, a); }
bool softfloat::operator == ( const softfloat& a ) const { return f32_eq(*this, a); }
bool softfloat::operator != ( const softfloat& a ) const { return !f32_eq(*this, a); }
bool softfloat::operator > ( const softfloat& a ) const { return f32_lt(a, *this); }
bool softfloat::operator >= ( const softfloat& a ) const { return f32_le(a, *this); }
bool softfloat::operator < ( const softfloat& a ) const { return f32_lt(*this, a); }
bool softfloat::operator <= ( const softfloat& a ) const { return f32_le(*this, a); }
softdouble::softdouble( const uint32_t a ) { *this = ui32_to_f64(a); }
softdouble::softdouble( const uint64_t a ) { *this = ui64_to_f64(a); }
softdouble::softdouble( const int32_t a ) { *this = i32_to_f64(a); }
softdouble::softdouble( const int64_t a ) { *this = i64_to_f64(a); }
}
int cvTrunc(const cv::softfloat& a) { return cv::f32_to_i32_r_minMag(a, false); }
int cvRound(const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_near_even, false); }
int cvFloor(const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_min, false); }
int cvCeil (const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_max, false); }
int cvTrunc(const cv::softdouble& a) { return cv::f64_to_i32_r_minMag(a, false); }
int cvRound(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_near_even, false); }
int cvFloor(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_min, false); }
int cvCeil (const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_max, false); }
int64_t cvRound64(const cv::softdouble& a) { return cv::f64_to_i64(a, cv::round_near_even, false); }
namespace cv
{
softdouble::operator softfloat() const { return f64_to_f32(*this); }
softdouble softdouble::operator + (const softdouble& a) const { return f64_add(*this, a); }
softdouble softdouble::operator - (const softdouble& a) const { return f64_sub(*this, a); }
softdouble softdouble::operator * (const softdouble& a) const { return f64_mul(*this, a); }
softdouble softdouble::operator / (const softdouble& a) const { return f64_div(*this, a); }
softdouble softdouble::operator % (const softdouble& a) const { return f64_rem(*this, a); }
bool softdouble::operator == (const softdouble& a) const { return f64_eq(*this, a); }
bool softdouble::operator != (const softdouble& a) const { return !f64_eq(*this, a); }
bool softdouble::operator > (const softdouble& a) const { return f64_lt(a, *this); }
bool softdouble::operator >= (const softdouble& a) const { return f64_le(a, *this); }
bool softdouble::operator < (const softdouble& a) const { return f64_lt(*this, a); }
bool softdouble::operator <= (const softdouble& a) const { return f64_le(*this, a); }
/*----------------------------------------------------------------------------
| Overloads for math functions
*----------------------------------------------------------------------------*/
softfloat mulAdd( const softfloat& a, const softfloat& b, const softfloat & c) { return f32_mulAdd(a, b, c); }
softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c) { return f64_mulAdd(a, b, c); }
softfloat sqrt( const softfloat& a ) { return f32_sqrt(a); }
softdouble sqrt( const softdouble& a ) { return f64_sqrt(a); }
softfloat exp( const softfloat& a) { return f32_exp(a); }
softdouble exp( const softdouble& a) { return f64_exp(a); }
softfloat log( const softfloat& a ) { return f32_log(a); }
softdouble log( const softdouble& a ) { return f64_log(a); }
softfloat pow( const softfloat& a, const softfloat& b) { return f32_pow(a, b); }
softdouble pow( const softdouble& a, const softdouble& b) { return f64_pow(a, b); }
softfloat cbrt(const softfloat& a) { return f32_cbrt(a); }
softdouble sin(const softdouble& a) { return f64_sin(a); }
softdouble cos(const softdouble& a) { return f64_cos(a); }
/*----------------------------------------------------------------------------
| The values to return on conversions to 32-bit integer formats that raise an
| invalid exception.
*----------------------------------------------------------------------------*/
#define ui32_fromPosOverflow 0xFFFFFFFF
#define ui32_fromNegOverflow 0
#define ui32_fromNaN 0xFFFFFFFF
#define i32_fromPosOverflow 0x7FFFFFFF
#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
#define i32_fromNaN 0x7FFFFFFF
/*----------------------------------------------------------------------------
| The values to return on conversions to 64-bit integer formats that raise an
| invalid exception.
*----------------------------------------------------------------------------*/
#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define ui64_fromNegOverflow 0
#define ui64_fromNaN UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define i64_fromPosOverflow UINT64_C( 0x7FFFFFFFFFFFFFFF )
//fixed unsigned unary minus: -x == ~x + 1
//#define i64_fromNegOverflow (-UINT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
#define i64_fromNegOverflow (~UINT64_C( 0x7FFFFFFFFFFFFFFF ) + 1 - 1)
#define i64_fromNaN UINT64_C( 0x7FFFFFFFFFFFFFFF )
/*----------------------------------------------------------------------------
| "Common NaN" structure, used to transfer NaN representations from one format
| to another.
*----------------------------------------------------------------------------*/
struct commonNaN {
bool sign;
#ifndef WORDS_BIGENDIAN
uint64_t v0, v64;
#else
uint64_t v64, v0;
#endif
};
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 32-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF32UI 0xFFC00000
/*----------------------------------------------------------------------------
| Returns true when 32-bit unsigned integer `uiA' has the bit pattern of a
| 32-bit floating-point signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
static uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static uint_fast32_t softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 64-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )
/*----------------------------------------------------------------------------
| Returns true when 64-bit unsigned integer `uiA' has the bit pattern of a
| 64-bit floating-point signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF64UI( uiA ) \
((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && \
((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
static uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static uint_fast64_t softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#ifndef WORDS_BIGENDIAN
struct uint128 { uint64_t v0, v64; };
struct uint64_extra { uint64_t extra, v; };
struct uint128_extra { uint64_t extra; struct uint128 v; };
#else
struct uint128 { uint64_t v64, v0; };
struct uint64_extra { uint64_t v, extra; };
struct uint128_extra { struct uint128 v; uint64_t extra; };
#endif
/*----------------------------------------------------------------------------
| These macros are used to isolate the differences in word order between big-
| endian and little-endian platforms.
*----------------------------------------------------------------------------*/
#ifndef WORDS_BIGENDIAN
#define wordIncr 1
#define indexWord( total, n ) (n)
#define indexWordHi( total ) ((total) - 1)
#define indexWordLo( total ) 0
#define indexMultiword( total, m, n ) (n)
#define indexMultiwordHi( total, n ) ((total) - (n))
#define indexMultiwordLo( total, n ) 0
#define indexMultiwordHiBut( total, n ) (n)
#define indexMultiwordLoBut( total, n ) 0
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
#else
#define wordIncr -1
#define indexWord( total, n ) ((total) - 1 - (n))
#define indexWordHi( total ) 0
#define indexWordLo( total ) ((total) - 1)
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
#define indexMultiwordHi( total, n ) 0
#define indexMultiwordLo( total, n ) ((total) - (n))
#define indexMultiwordHiBut( total, n ) 0
#define indexMultiwordLoBut( total, n ) (n)
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
#endif
enum {
softfloat_mulAdd_subC = 1,
softfloat_mulAdd_subProd = 2
};
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
static int_fast32_t softfloat_roundToI32( bool, uint_fast64_t, uint_fast8_t, bool );
struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
static struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
static float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
static float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
static float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t );
static float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t );
static float32_t softfloat_mulAddF32(uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
static int_fast64_t softfloat_roundToI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool);
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
static float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );
static float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );
static float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
static float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
static float64_t softfloat_mulAddF64( uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must be in
| the range 1 to 63. If any nonzero bits are shifted off, they are "jammed"
| into the least-significant bit of the shifted value by setting the least-
| significant bit to 1. This shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
static inline uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
{ return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must not
| be zero. If any nonzero bits are shifted off, they are "jammed" into the
| least-significant bit of the shifted value by setting the least-significant
| bit to 1. This shifted-and-jammed value is returned.
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
| greater than 32, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
static inline uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
{
//fixed unsigned unary minus: -x == ~x + 1
return (dist < 31) ? a>>dist | ((uint32_t) (a<<((~dist + 1) & 31)) != 0) : (a != 0);
}
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must not
| be zero. If any nonzero bits are shifted off, they are "jammed" into the
| least-significant bit of the shifted value by setting the least-significant
| bit to 1. This shifted-and-jammed value is returned.
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
| greater than 64, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
static inline uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
{
//fixed unsigned unary minus: -x == ~x + 1
return (dist < 63) ? a>>dist | ((uint64_t) (a<<((~dist + 1) & 63)) != 0) : (a != 0);
}
/*----------------------------------------------------------------------------
| A constant table that translates an 8-bit unsigned integer (the array index)
| into the number of leading 0 bits before the most-significant 1 of that
| integer. For integer zero (index 0), the corresponding table element is 8.
*----------------------------------------------------------------------------*/
static const uint_least8_t softfloat_countLeadingZeros8[256] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'. If 'a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
static inline uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
{
uint_fast8_t count = 0;
if ( a < 0x10000 ) {
count = 16;
a <<= 16;
}
if ( a < 0x1000000 ) {
count += 8;
a <<= 8;
}
count += softfloat_countLeadingZeros8[a>>24];
return count;
}
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'. If 'a' is zero, 64 is returned.
*----------------------------------------------------------------------------*/
static uint_fast8_t softfloat_countLeadingZeros64( uint64_t a );
/*----------------------------------------------------------------------------
| Returns an approximation to the reciprocal of the number represented by 'a',
| where 'a' is interpreted as an unsigned fixed-point number with one integer
| bit and 31 fraction bits. The 'a' input must be "normalized", meaning that
| its most-significant bit (bit 31) must be 1. Thus, if A is the value of
| the fixed-point interpretation of 'a', then 1 <= A < 2. The returned value
| is interpreted as a pure unsigned fraction, having no integer bits and 32
| fraction bits. The approximation returned is never greater than the true
| reciprocal 1/A, and it differs from the true reciprocal by at most 2.006 ulp
| (units in the last place).
*----------------------------------------------------------------------------*/
#define softfloat_approxRecip32_1( a ) ((uint32_t) (UINT64_C( 0x7FFFFFFFFFFFFFFF ) / (uint32_t) (a)))
/*----------------------------------------------------------------------------
| Returns an approximation to the reciprocal of the square root of the number
| represented by 'a', where 'a' is interpreted as an unsigned fixed-point
| number either with one integer bit and 31 fraction bits or with two integer
| bits and 30 fraction bits. The format of 'a' is determined by 'oddExpA',
| which must be either 0 or 1. If 'oddExpA' is 1, 'a' is interpreted as
| having one integer bit, and if 'oddExpA' is 0, 'a' is interpreted as having
| two integer bits. The 'a' input must be "normalized", meaning that its
| most-significant bit (bit 31) must be 1. Thus, if A is the value of the
| fixed-point interpretation of 'a', it follows that 1 <= A < 2 when 'oddExpA'
| is 1, and 2 <= A < 4 when 'oddExpA' is 0.
| The returned value is interpreted as a pure unsigned fraction, having
| no integer bits and 32 fraction bits. The approximation returned is never
| greater than the true reciprocal 1/sqrt(A), and it differs from the true
| reciprocal by at most 2.06 ulp (units in the last place). The approximation
| returned is also always within the range 0.5 to 1; thus, the most-
| significant bit of the result is always set.
*----------------------------------------------------------------------------*/
static uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
static const uint16_t softfloat_approxRecipSqrt_1k0s[16] = {
0xB4C9, 0xFFAB, 0xAA7D, 0xF11C, 0xA1C5, 0xE4C7, 0x9A43, 0xDA29,
0x93B5, 0xD0E5, 0x8DED, 0xC8B7, 0x88C6, 0xC16D, 0x8424, 0xBAE1
};
static const uint16_t softfloat_approxRecipSqrt_1k1s[16] = {
0xA5A5, 0xEA42, 0x8C21, 0xC62D, 0x788F, 0xAA7F, 0x6928, 0x94B6,
0x5CC7, 0x8335, 0x52A6, 0x74E2, 0x4A3E, 0x68FE, 0x432B, 0x5EFD
};
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' left by the
| number of bits given in 'dist', which must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
static inline struct uint128 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
struct uint128 z;
z.v64 = a64<<dist | a0>>(-dist & 63);
z.v0 = a0<<dist;
return z;
}
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must be in the range 1 to 63. If any
| nonzero bits are shifted off, they are "jammed" into the least-significant
| bit of the shifted value by setting the least-significant bit to 1. This
| shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
static inline struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
uint_fast8_t negDist = -dist;
struct uint128 z;
z.v64 = a64>>dist;
z.v0 =
a64<<(negDist & 63) | a0>>dist
| ((uint64_t) (a0<<(negDist & 63)) != 0);
return z;
}
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must not be zero. If any nonzero bits
| are shifted off, they are "jammed" into the least-significant bit of the
| shifted value by setting the least-significant bit to 1. This shifted-and-
| jammed value is returned.
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
| greater than 128, the result will be either 0 or 1, depending on whether the
| original 128 bits are all zeros.
*----------------------------------------------------------------------------*/
static struct uint128 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist );
/*----------------------------------------------------------------------------
| Returns the sum of the 128-bit integer formed by concatenating 'a64' and
| 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'. The
| addition is modulo 2^128, so any carry out is lost.
*----------------------------------------------------------------------------*/
static inline struct uint128 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
struct uint128 z;
z.v0 = a0 + b0;
z.v64 = a64 + b64 + (z.v0 < a0);
return z;
}
/*----------------------------------------------------------------------------
| Returns the difference of the 128-bit integer formed by concatenating 'a64'
| and 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'.
| The subtraction is modulo 2^128, so any borrow out (carry out) is lost.
*----------------------------------------------------------------------------*/
static inline struct uint128 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
struct uint128 z;
z.v0 = a0 - b0;
z.v64 = a64 - b64;
z.v64 -= (a0 < b0);
return z;
}
/*----------------------------------------------------------------------------
| Returns the 128-bit product of 'a' and 'b'.
*----------------------------------------------------------------------------*/
static struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
static float32_t f32_add( float32_t a, float32_t b )
{
uint_fast32_t uiA = a.v;
uint_fast32_t uiB = b.v;
if ( signF32UI( uiA ^ uiB ) ) {
return softfloat_subMagsF32( uiA, uiB );
} else {
return softfloat_addMagsF32( uiA, uiB );
}
}
static float32_t f32_div( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
uint_fast32_t uiB;
bool signB;
int_fast16_t expB;
uint_fast32_t sigB;
bool signZ;
struct exp16_sig32 normExpSig;
int_fast16_t expZ;
uint_fast64_t sig64A;
uint_fast32_t sigZ;
uint_fast32_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
uiB = b.v;
signB = signF32UI( uiB );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA ) goto propagateNaN;
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
goto invalid;
}
goto infinity;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
goto zero;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) {
if ( ! (expA | sigA) ) goto invalid;
raiseFlags( flag_infinite );
goto infinity;
}
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA - expB + 0x7E;
sigA |= 0x00800000;
sigB |= 0x00800000;
if ( sigA < sigB ) {
--expZ;
sig64A = (uint_fast64_t) sigA<<31;
} else {
sig64A = (uint_fast64_t) sigA<<30;
}
sigZ = (uint_fast32_t)(sig64A / sigB); // fixed warning on type cast
if ( ! (sigZ & 0x3F) ) sigZ |= ((uint_fast64_t) sigB * sigZ != sig64A);
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infinity:
uiZ = packToF32UI( signZ, 0xFF, 0 );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF32UI( signZ, 0, 0 );
uiZ:
return float32_t::fromRaw(uiZ);
}
static bool f32_eq( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) )
{
if (softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB ) )
raiseFlags( flag_invalid );
return false;
}
return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
}
static bool f32_le( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) )
{
raiseFlags( flag_invalid );
return false;
}
signA = signF32UI( uiA );
signB = signF32UI( uiB );
return (signA != signB) ? signA || ! (uint32_t) ((uiA | uiB)<<1)
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
static bool f32_lt( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
bool signA, signB;
uiA = a.v; uiB = b.v;
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) )
{
raiseFlags( flag_invalid );
return false;
}
signA = signF32UI( uiA );
signB = signF32UI( uiB );
return (signA != signB) ? signA && ((uint32_t) ((uiA | uiB)<<1) != 0)
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
static float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uint_fast32_t uiC;
uiA = a.v;
uiB = b.v;
uiC = c.v;
return softfloat_mulAddF32( uiA, uiB, uiC, 0 );
}
static float32_t f32_mul( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
uint_fast32_t uiB;
bool signB;
int_fast16_t expB;
uint_fast32_t sigB;
bool signZ;
uint_fast32_t magBits;
struct exp16_sig32 normExpSig;
int_fast16_t expZ;
uint_fast32_t sigZ, uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
uiB = b.v;
signB = signF32UI( uiB );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN;
magBits = expB | sigB;
goto infArg;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
magBits = expA | sigA;
goto infArg;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zero;
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA + expB - 0x7F;
sigA = (sigA | 0x00800000)<<7;
sigB = (sigB | 0x00800000)<<8;
sigZ = (uint_fast32_t)softfloat_shortShiftRightJam64( (uint_fast64_t) sigA * sigB, 32 ); //fixed warning on type cast
if ( sigZ < 0x40000000 ) {
--expZ;
sigZ <<= 1;
}
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infArg:
if ( ! magBits ) {
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
} else {
uiZ = packToF32UI( signZ, 0xFF, 0 );
}
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF32UI( signZ, 0, 0 );
uiZ:
return float32_t::fromRaw(uiZ);
}
static float32_t f32_rem( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
uint_fast32_t uiB;
int_fast16_t expB;
uint_fast32_t sigB;
struct exp16_sig32 normExpSig;
uint32_t rem;
int_fast16_t expDiff;
uint32_t q, recip32, altRem, meanRem;
bool signRem;
uint_fast32_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
uiB = b.v;
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN;
goto invalid;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) goto invalid;
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
rem = sigA | 0x00800000;
sigB |= 0x00800000;
expDiff = expA - expB;
if ( expDiff < 1 ) {
if ( expDiff < -1 ) return a;
sigB <<= 6;
if ( expDiff ) {
rem <<= 5;
q = 0;
} else {
rem <<= 6;
q = (sigB <= rem);
if ( q ) rem -= sigB;
}
} else {
recip32 = softfloat_approxRecip32_1( sigB<<8 );
/*--------------------------------------------------------------------
| Changing the shift of `rem' here requires also changing the initial
| subtraction from `expDiff'.
*--------------------------------------------------------------------*/
rem <<= 7;
expDiff -= 31;
/*--------------------------------------------------------------------
| The scale of `sigB' affects how many bits are obtained during each
| cycle of the loop. Currently this is 29 bits per loop iteration,
| which is believed to be the maximum possible.
*--------------------------------------------------------------------*/
sigB <<= 6;
for (;;) {
q = (rem * (uint_fast64_t) recip32)>>32;
if ( expDiff < 0 ) break;
//fixed unsigned unary minus: -x == ~x + 1
rem = ~(q * (uint32_t) sigB) + 1;
expDiff -= 29;
}
/*--------------------------------------------------------------------
| (`expDiff' cannot be less than -30 here.)
*--------------------------------------------------------------------*/
q >>= ~expDiff & 31;
rem = (rem<<(expDiff + 30)) - q * (uint32_t) sigB;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
do {
altRem = rem;
++q;
rem -= sigB;
} while ( ! (rem & 0x80000000) );
meanRem = rem + altRem;
if ( (meanRem & 0x80000000) || (! meanRem && (q & 1)) ) rem = altRem;
signRem = signA;
if ( 0x80000000 <= rem ) {
signRem = ! signRem;
//fixed unsigned unary minus: -x == ~x + 1
rem = ~rem + 1;
}
return softfloat_normRoundPackToF32( signRem, expB, rem );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto uiZ;
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
uiZ:
return float32_t::fromRaw(uiZ);
}
static float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t uiZ, lastBitMask, roundBitsMask;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp <= 0x7E ) {
if ( ! (uint32_t) (uiA<<1) ) return a;
if ( exact ) raiseFlags(flag_inexact);
uiZ = uiA & packToF32UI( 1, 0, 0 );
switch ( roundingMode ) {
case round_near_even:
if ( ! fracF32UI( uiA ) ) break;
/* fallthrough */
case round_near_maxMag:
if ( exp == 0x7E ) uiZ |= packToF32UI( 0, 0x7F, 0 );
break;
case round_min:
if ( uiZ ) uiZ = packToF32UI( 1, 0x7F, 0 );
break;
case round_max:
if ( ! uiZ ) uiZ = packToF32UI( 0, 0x7F, 0 );
break;
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0x96 <= exp ) {
if ( (exp == 0xFF) && fracF32UI( uiA ) ) {
uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
goto uiZ;
}
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = uiA;
lastBitMask = (uint_fast32_t) 1<<(0x96 - exp);
roundBitsMask = lastBitMask - 1;
if ( roundingMode == round_near_maxMag ) {
uiZ += lastBitMask>>1;
} else if ( roundingMode == round_near_even ) {
uiZ += lastBitMask>>1;
if ( ! (uiZ & roundBitsMask) ) uiZ &= ~lastBitMask;
} else if (
roundingMode
== (signF32UI( uiZ ) ? round_min : round_max)
) {
uiZ += roundBitsMask;
}
uiZ &= ~roundBitsMask;
if ( exact && (uiZ != uiA) ) {
raiseFlags(flag_inexact);
}
uiZ:
return float32_t::fromRaw(uiZ);
}
static float32_t f32_sqrt( float32_t a )
{
uint_fast32_t uiA;
bool signA;
int_fast16_t expA;
uint_fast32_t sigA, uiZ;
struct exp16_sig32 normExpSig;
int_fast16_t expZ;
uint_fast32_t sigZ, shiftedSigZ;
uint32_t negRem;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA ) {
uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
goto uiZ;
}
if ( ! signA ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( signA ) {
if ( ! (expA | sigA) ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = ((expA - 0x7F)>>1) + 0x7E;
expA &= 1;
sigA = (sigA | 0x00800000)<<8;
sigZ =
((uint_fast64_t) sigA * softfloat_approxRecipSqrt32_1( expA, sigA ))
>>32;
if ( expA ) sigZ >>= 1;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sigZ += 2;
if ( (sigZ & 0x3F) < 2 ) {
shiftedSigZ = sigZ>>2;
negRem = shiftedSigZ * shiftedSigZ;
sigZ &= ~3;
if ( negRem & 0x80000000 ) {
sigZ |= 1;
} else {
if ( negRem ) --sigZ;
}
}
return softfloat_roundPackToF32( 0, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
uiZ:
return float32_t::fromRaw(uiZ);
}
static float32_t f32_sub( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
uiA = a.v;
uiB = b.v;
if ( signF32UI( uiA ^ uiB ) ) {
return softfloat_addMagsF32( uiA, uiB );
} else {
return softfloat_subMagsF32( uiA, uiB );
}
}
static float64_t f32_to_f64( float32_t a )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t frac;
struct commonNaN commonNaN;
uint_fast64_t uiZ;
struct exp16_sig32 normExpSig;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
frac = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF64UI( &commonNaN );
} else {
uiZ = packToF64UI( sign, 0x7FF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! exp ) {
if ( ! frac ) {
uiZ = packToF64UI( sign, 0, 0 );
goto uiZ;
}
normExpSig = softfloat_normSubnormalF32Sig( frac );
exp = normExpSig.exp - 1;
frac = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = packToF64UI( sign, exp + 0x380, (uint_fast64_t) frac<<29 );
uiZ:
return float64_t::fromRaw(uiZ);
}
static int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t sig;
uint_fast64_t sig64;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
if ( (exp == 0xFF) && sig ) {
#if (i32_fromNaN == i32_fromPosOverflow)
sign = 0;
#elif (i32_fromNaN == i32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
return i32_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= 0x00800000;
sig64 = (uint_fast64_t) sig<<32;
shiftDist = 0xAA - exp;
if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
return softfloat_roundToI32( sign, sig64, roundingMode, exact );
}
static int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
uint_fast32_t sig;
int_fast16_t shiftDist;
bool sign;
int_fast32_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF32UI( uiA );
sig = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x9E - exp;
if ( 32 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF32UI( uiA );
if ( shiftDist <= 0 ) {
if ( uiA == packToF32UI( 1, 0x9E, 0 ) ) return -0x7FFFFFFF - 1;
raiseFlags( flag_invalid );
return
(exp == 0xFF) && sig ? i32_fromNaN
: sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig | 0x00800000)<<8;
absZ = sig>>shiftDist;
if ( exact && ((uint_fast32_t) absZ<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
return sign ? -absZ : absZ;
}
static float64_t f64_add( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
uint_fast64_t uiB;
bool signB;
uiA = a.v;
signA = signF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
if ( signA == signB ) {
return softfloat_addMagsF64( uiA, uiB, signA );
} else {
return softfloat_subMagsF64( uiA, uiB, signA );
}
}
static float64_t f64_div( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
uint_fast64_t uiB;
bool signB;
int_fast16_t expB;
uint_fast64_t sigB;
bool signZ;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
uint32_t recip32, sig32Z, doubleTerm;
uint_fast64_t rem;
uint32_t q;
uint_fast64_t sigZ;
uint_fast64_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA ) goto propagateNaN;
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
goto invalid;
}
goto infinity;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
goto zero;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) {
if ( ! (expA | sigA) ) goto invalid;
raiseFlags( flag_infinite );
goto infinity;
}
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA - expB + 0x3FE;
sigA |= UINT64_C( 0x0010000000000000 );
sigB |= UINT64_C( 0x0010000000000000 );
if ( sigA < sigB ) {
--expZ;
sigA <<= 11;
} else {
sigA <<= 10;
}
sigB <<= 11;
recip32 = softfloat_approxRecip32_1( sigB>>32 ) - 2;
sig32Z = ((uint32_t) (sigA>>32) * (uint_fast64_t) recip32)>>32;
doubleTerm = sig32Z<<1;
rem =
((sigA - (uint_fast64_t) doubleTerm * (uint32_t) (sigB>>32))<<28)
- (uint_fast64_t) doubleTerm * ((uint32_t) sigB>>4);
q = (((uint32_t) (rem>>32) * (uint_fast64_t) recip32)>>32) + 4;
sigZ = ((uint_fast64_t) sig32Z<<32) + ((uint_fast64_t) q<<4);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( (sigZ & 0x1FF) < 4<<4 ) {
q &= ~7;
sigZ &= ~(uint_fast64_t) 0x7F;
doubleTerm = q<<1;
rem =
((rem - (uint_fast64_t) doubleTerm * (uint32_t) (sigB>>32))<<28)
- (uint_fast64_t) doubleTerm * ((uint32_t) sigB>>4);
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
sigZ -= 1<<7;
} else {
if ( rem ) sigZ |= 1;
}
}
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infinity:
uiZ = packToF64UI( signZ, 0x7FF, 0 );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF64UI( signZ, 0, 0 );
uiZ:
return float64_t::fromRaw(uiZ);
}
static bool f64_eq( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) )
{
if ( softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB ) )
raiseFlags( flag_invalid );
return false;
}
return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
}
static bool f64_le( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
signA = signF64UI( uiA );
signB = signF64UI( uiB );
return (signA != signB) ? signA || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
static bool f64_lt( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
bool signA, signB;
uiA = a.v;
uiB = b.v;
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
return false;
}
signA = signF64UI( uiA );
signB = signF64UI( uiB );
return (signA != signB) ? signA && ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
static float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
uint_fast64_t uiC;
uiA = a.v;
uiB = b.v;
uiC = c.v;
return softfloat_mulAddF64( uiA, uiB, uiC, 0 );
}
static float64_t f64_mul( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
uint_fast64_t uiB;
bool signB;
int_fast16_t expB;
uint_fast64_t sigB;
bool signZ;
uint_fast64_t magBits;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
struct uint128 sig128Z;
uint_fast64_t sigZ, uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
signZ = signA ^ signB;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN;
magBits = expB | sigB;
goto infArg;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
magBits = expA | sigA;
goto infArg;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zero;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zero;
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA + expB - 0x3FF;
sigA = (sigA | UINT64_C( 0x0010000000000000 ))<<10;
sigB = (sigB | UINT64_C( 0x0010000000000000 ))<<11;
sig128Z = softfloat_mul64To128( sigA, sigB );
sigZ = sig128Z.v64 | (sig128Z.v0 != 0);
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
--expZ;
sigZ <<= 1;
}
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infArg:
if ( ! magBits ) {
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
} else {
uiZ = packToF64UI( signZ, 0x7FF, 0 );
}
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zero:
uiZ = packToF64UI( signZ, 0, 0 );
uiZ:
return float64_t::fromRaw(uiZ);
}
static float64_t f64_rem( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
uint_fast64_t uiB;
int_fast16_t expB;
uint_fast64_t sigB;
struct exp16_sig64 normExpSig;
uint64_t rem;
int_fast16_t expDiff;
uint32_t q, recip32;
uint_fast64_t q64;
uint64_t altRem, meanRem;
bool signRem;
uint_fast64_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
uiB = b.v;
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN;
goto invalid;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA < expB - 1 ) return a;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expB ) {
if ( ! sigB ) goto invalid;
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
rem = sigA | UINT64_C( 0x0010000000000000 );
sigB |= UINT64_C( 0x0010000000000000 );
expDiff = expA - expB;
if ( expDiff < 1 ) {
if ( expDiff < -1 ) return a;
sigB <<= 9;
if ( expDiff ) {
rem <<= 8;
q = 0;
} else {
rem <<= 9;
q = (sigB <= rem);
if ( q ) rem -= sigB;
}
} else {
recip32 = softfloat_approxRecip32_1( sigB>>21 );
/*--------------------------------------------------------------------
| Changing the shift of `rem' here requires also changing the initial
| subtraction from `expDiff'.
*--------------------------------------------------------------------*/
rem <<= 9;
expDiff -= 30;
/*--------------------------------------------------------------------
| The scale of `sigB' affects how many bits are obtained during each
| cycle of the loop. Currently this is 29 bits per loop iteration,
| the maximum possible.
*--------------------------------------------------------------------*/
sigB <<= 9;
for (;;) {
q64 = (uint32_t) (rem>>32) * (uint_fast64_t) recip32;
if ( expDiff < 0 ) break;
q = (q64 + 0x80000000)>>32;
rem <<= 29;
rem -= q * (uint64_t) sigB;
if ( rem & UINT64_C( 0x8000000000000000 ) ) rem += sigB;
expDiff -= 29;
}
/*--------------------------------------------------------------------
| (`expDiff' cannot be less than -29 here.)
*--------------------------------------------------------------------*/
q = (uint32_t) (q64>>32)>>(~expDiff & 31);
rem = (rem<<(expDiff + 30)) - q * (uint64_t) sigB;
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
altRem = rem + sigB;
goto selectRem;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
do {
altRem = rem;
++q;
rem -= sigB;
} while ( ! (rem & UINT64_C( 0x8000000000000000 )) );
selectRem:
meanRem = rem + altRem;
if (
(meanRem & UINT64_C( 0x8000000000000000 )) || (! meanRem && (q & 1))
) {
rem = altRem;
}
signRem = signA;
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
signRem = ! signRem;
//fixed unsigned unary minus: -x == ~x + 1
rem = ~rem + 1;
}
return softfloat_normRoundPackToF64( signRem, expB, rem );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto uiZ;
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
uiZ:
return float64_t::fromRaw(uiZ);
}
static float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t uiZ, lastBitMask, roundBitsMask;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp <= 0x3FE ) {
if ( ! (uiA & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) return a;
if ( exact ) raiseFlags(flag_inexact);
uiZ = uiA & packToF64UI( 1, 0, 0 );
switch ( roundingMode ) {
case round_near_even:
if ( ! fracF64UI( uiA ) ) break;
/* fallthrough */
case round_near_maxMag:
if ( exp == 0x3FE ) uiZ |= packToF64UI( 0, 0x3FF, 0 );
break;
case round_min:
if ( uiZ ) uiZ = packToF64UI( 1, 0x3FF, 0 );
break;
case round_max:
if ( ! uiZ ) uiZ = packToF64UI( 0, 0x3FF, 0 );
break;
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0x433 <= exp ) {
if ( (exp == 0x7FF) && fracF64UI( uiA ) ) {
uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
goto uiZ;
}
return a;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = uiA;
lastBitMask = (uint_fast64_t) 1<<(0x433 - exp);
roundBitsMask = lastBitMask - 1;
if ( roundingMode == round_near_maxMag ) {
uiZ += lastBitMask>>1;
} else if ( roundingMode == round_near_even ) {
uiZ += lastBitMask>>1;
if ( ! (uiZ & roundBitsMask) ) uiZ &= ~lastBitMask;
} else if (
roundingMode
== (signF64UI( uiZ ) ? round_min : round_max)
) {
uiZ += roundBitsMask;
}
uiZ &= ~roundBitsMask;
if ( exact && (uiZ != uiA) ) {
raiseFlags(flag_inexact);
}
uiZ:
return float64_t::fromRaw(uiZ);
}
static float64_t f64_sqrt( float64_t a )
{
uint_fast64_t uiA;
bool signA;
int_fast16_t expA;
uint_fast64_t sigA, uiZ;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
uint32_t sig32A, recipSqrt32, sig32Z;
uint_fast64_t rem;
uint32_t q;
uint_fast64_t sigZ, shiftedSigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA ) {
uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
goto uiZ;
}
if ( ! signA ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( signA ) {
if ( ! (expA | sigA) ) return a;
goto invalid;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) return a;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
/*------------------------------------------------------------------------
| (`sig32Z' is guaranteed to be a lower bound on the square root of
| `sig32A', which makes `sig32Z' also a lower bound on the square root of
| `sigA'.)
*------------------------------------------------------------------------*/
expZ = ((expA - 0x3FF)>>1) + 0x3FE;
expA &= 1;
sigA |= UINT64_C( 0x0010000000000000 );
sig32A = (uint32_t)(sigA>>21); //fixed warning on type cast
recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
if ( expA ) {
sigA <<= 8;
sig32Z >>= 1;
} else {
sigA <<= 9;
}
rem = sigA - (uint_fast64_t) sig32Z * sig32Z;
q = ((uint32_t) (rem>>2) * (uint_fast64_t) recipSqrt32)>>32;
sigZ = ((uint_fast64_t) sig32Z<<32 | 1<<5) + ((uint_fast64_t) q<<3);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( (sigZ & 0x1FF) < 0x22 ) {
sigZ &= ~(uint_fast64_t) 0x3F;
shiftedSigZ = sigZ>>6;
rem = (sigA<<52) - shiftedSigZ * shiftedSigZ;
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
--sigZ;
} else {
if ( rem ) sigZ |= 1;
}
}
return softfloat_roundPackToF64( 0, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
uiZ:
return float64_t::fromRaw(uiZ);
}
static float64_t f64_sub( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
uint_fast64_t uiB;
bool signB;
uiA = a.v;
signA = signF64UI( uiA );
uiB = b.v;
signB = signF64UI( uiB );
if ( signA == signB ) {
return softfloat_subMagsF64( uiA, uiB, signA );
} else {
return softfloat_addMagsF64( uiA, uiB, signA );
}
}
static float32_t f64_to_f32( float64_t a )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t frac;
struct commonNaN commonNaN;
uint_fast32_t uiZ, frac32;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
frac = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0x7FF ) {
if ( frac ) {
softfloat_f64UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
} else {
uiZ = packToF32UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
frac32 = (uint_fast32_t)softfloat_shortShiftRightJam64( frac, 22 ); //fixed warning on type cast
if ( ! (exp | frac32) ) {
uiZ = packToF32UI( sign, 0, 0 );
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
return softfloat_roundPackToF32( sign, exp - 0x381, frac32 | 0x40000000 );
uiZ:
return float32_t::fromRaw(uiZ);
}
static int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
if ( (exp == 0x7FF) && sig ) {
#if (i32_fromNaN == i32_fromPosOverflow)
sign = 0;
#elif (i32_fromNaN == i32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
return i32_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
shiftDist = 0x427 - exp;
if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
return softfloat_roundToI32( sign, sig, roundingMode, exact );
}
static int_fast64_t f64_to_i64(float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI(uiA);
exp = expF64UI(uiA);
sig = fracF64UI(uiA);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
#if (i64_fromNaN != i64_fromPosOverflow) || (i64_fromNaN != i64_fromNegOverflow)
if ((exp == 0x7FF) && sig) {
#if (i64_fromNaN == i64_fromPosOverflow)
sign = 0;
#elif (i64_fromNaN == i64_fromNegOverflow)
sign = 1;
#else
raiseFlags(flag_invalid);
return i64_fromNaN;
#endif
}
#endif
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if (exp) sig |= UINT64_C(0x0010000000000000);
shiftDist = 0x433 - exp;
if (shiftDist <= 0) {
bool isValid = shiftDist >= -11;
if (isValid)
{
uint_fast64_t z = sig << -shiftDist;
if (0 == (z & UINT64_C(0x8000000000000000)))
{
return sign ? -(int_fast64_t)z : (int_fast64_t)z;
}
}
raiseFlags(flag_invalid);
return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
else {
if (shiftDist < 64)
return
softfloat_roundToI64(
sign, sig >> shiftDist, sig << (-shiftDist & 63), roundingMode, exact);
else
return
softfloat_roundToI64(
sign, 0, (shiftDist == 64) ? sig : (sig != 0), roundingMode, exact);
}
}
static int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
bool sign;
int_fast32_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
exp = expF64UI( uiA );
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
shiftDist = 0x433 - exp;
if ( 53 <= shiftDist ) {
if ( exact && (exp | sig) ) {
raiseFlags(flag_inexact);
}
return 0;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF64UI( uiA );
if ( shiftDist < 22 ) {
if (
sign && (exp == 0x41E) && (sig < UINT64_C( 0x0000000000200000 ))
) {
if ( exact && sig ) {
raiseFlags(flag_inexact);
}
return -0x7FFFFFFF - 1;
}
raiseFlags( flag_invalid );
return
(exp == 0x7FF) && sig ? i32_fromNaN
: sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig |= UINT64_C( 0x0010000000000000 );
absZ = (int_fast32_t)(sig>>shiftDist); //fixed warning on type cast
if ( exact && ((uint_fast64_t) (uint_fast32_t) absZ<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
return sign ? -absZ : absZ;
}
static float32_t i32_to_f32( int32_t a )
{
bool sign;
uint_fast32_t absA;
sign = (a < 0);
if ( ! (a & 0x7FFFFFFF) ) {
return float32_t::fromRaw(sign ? packToF32UI( 1, 0x9E, 0 ) : 0);
}
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast32_t) a + 1) : (uint_fast32_t) a;
return softfloat_normRoundPackToF32( sign, 0x9C, absA );
}
static float64_t i32_to_f64( int32_t a )
{
uint_fast64_t uiZ;
bool sign;
uint_fast32_t absA;
int_fast8_t shiftDist;
if ( ! a ) {
uiZ = 0;
} else {
sign = (a < 0);
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast32_t) a + 1) : (uint_fast32_t) a;
shiftDist = softfloat_countLeadingZeros32( absA ) + 21;
uiZ =
packToF64UI(
sign, 0x432 - shiftDist, (uint_fast64_t) absA<<shiftDist );
}
return float64_t::fromRaw(uiZ);
}
static float32_t i64_to_f32( int64_t a )
{
bool sign;
uint_fast64_t absA;
int_fast8_t shiftDist;
uint_fast32_t sig;
sign = (a < 0);
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast64_t) a + 1) : (uint_fast64_t) a;
shiftDist = softfloat_countLeadingZeros64( absA ) - 40;
if ( 0 <= shiftDist ) {
return float32_t::fromRaw(a ? packToF32UI(sign, 0x95 - shiftDist, (uint_fast32_t) absA<<shiftDist ) : 0);
} else {
shiftDist += 7;
sig =
(shiftDist < 0)
? (uint_fast32_t) softfloat_shortShiftRightJam64( absA, -shiftDist ) //fixed warning on type cast
: (uint_fast32_t) absA<<shiftDist;
return softfloat_roundPackToF32( sign, 0x9C - shiftDist, sig );
}
}
static float64_t i64_to_f64( int64_t a )
{
bool sign;
uint_fast64_t absA;
sign = (a < 0);
if ( ! (a & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) {
return float64_t::fromRaw(sign ? packToF64UI( 1, 0x43E, 0 ) : 0);
}
//fixed unsigned unary minus: -x == ~x + 1
absA = sign ? (~(uint_fast64_t) a + 1) : (uint_fast64_t) a;
return softfloat_normRoundPackToF64( sign, 0x43C, absA );
}
static float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
{
int_fast16_t expA;
uint_fast32_t sigA;
int_fast16_t expB;
uint_fast32_t sigB;
int_fast16_t expDiff;
uint_fast32_t uiZ;
bool signZ;
int_fast16_t expZ;
uint_fast32_t sigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( ! expA ) {
uiZ = uiA + sigB;
goto uiZ;
}
if ( expA == 0xFF ) {
if ( sigA | sigB ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
signZ = signF32UI( uiA );
expZ = expA;
sigZ = 0x01000000 + sigA + sigB;
if ( ! (sigZ & 1) && (expZ < 0xFE) ) {
uiZ = packToF32UI( signZ, expZ, sigZ>>1 );
goto uiZ;
}
sigZ <<= 6;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
signZ = signF32UI( uiA );
sigA <<= 6;
sigB <<= 6;
if ( expDiff < 0 ) {
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF32UI( signZ, 0xFF, 0 );
goto uiZ;
}
expZ = expB;
sigA += expA ? 0x20000000 : sigA;
sigA = softfloat_shiftRightJam32( sigA, -expDiff );
} else {
if ( expA == 0xFF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA;
sigB += expB ? 0x20000000 : sigB;
sigB = softfloat_shiftRightJam32( sigB, expDiff );
}
sigZ = 0x20000000 + sigA + sigB;
if ( sigZ < 0x40000000 ) {
--expZ;
sigZ <<= 1;
}
}
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
uiZ:
return float32_t::fromRaw(uiZ);
}
static float64_t
softfloat_addMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
{
int_fast16_t expA;
uint_fast64_t sigA;
int_fast16_t expB;
uint_fast64_t sigB;
int_fast16_t expDiff;
uint_fast64_t uiZ;
int_fast16_t expZ;
uint_fast64_t sigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( ! expA ) {
uiZ = uiA + sigB;
goto uiZ;
}
if ( expA == 0x7FF ) {
if ( sigA | sigB ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA;
sigZ = UINT64_C( 0x0020000000000000 ) + sigA + sigB;
sigZ <<= 9;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sigA <<= 9;
sigB <<= 9;
if ( expDiff < 0 ) {
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF64UI( signZ, 0x7FF, 0 );
goto uiZ;
}
expZ = expB;
if ( expA ) {
sigA += UINT64_C( 0x2000000000000000 );
} else {
sigA <<= 1;
}
sigA = softfloat_shiftRightJam64( sigA, -expDiff );
} else {
if ( expA == 0x7FF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA;
if ( expB ) {
sigB += UINT64_C( 0x2000000000000000 );
} else {
sigB <<= 1;
}
sigB = softfloat_shiftRightJam64( sigB, expDiff );
}
sigZ = UINT64_C( 0x2000000000000000 ) + sigA + sigB;
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
--expZ;
sigZ <<= 1;
}
}
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
uiZ:
return float64_t::fromRaw(uiZ);
}
static uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
{
int index;
uint16_t eps, r0;
uint_fast32_t ESqrR0;
uint32_t sigma0;
uint_fast32_t r;
uint32_t sqrSigma0;
index = (a>>27 & 0xE) + oddExpA;
eps = (uint16_t) (a>>12);
r0 = softfloat_approxRecipSqrt_1k0s[index]
- ((softfloat_approxRecipSqrt_1k1s[index] * (uint_fast32_t) eps)
>>20);
ESqrR0 = (uint_fast32_t) r0 * r0;
if ( ! oddExpA ) ESqrR0 <<= 1;
sigma0 = ~(uint_fast32_t) (((uint32_t) ESqrR0 * (uint_fast64_t) a)>>23);
r = (uint_fast32_t)(((uint_fast32_t) r0<<16) + ((r0 * (uint_fast64_t) sigma0)>>25)); //fixed warning on type cast
sqrSigma0 = ((uint_fast64_t) sigma0 * sigma0)>>32;
r += ((uint32_t) ((r>>1) + (r>>3) - ((uint_fast32_t) r0<<14))
* (uint_fast64_t) sqrSigma0)
>>48;
if ( ! (r & 0x80000000) ) r = 0x80000000;
return r;
}
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
static uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
{
return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
}
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
static uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
{
return
(uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
| aPtr->v64>>12;
}
static uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
{
uint_fast8_t count;
uint32_t a32;
count = 0;
a32 = a>>32;
if ( ! a32 ) {
count = 32;
a32 = (uint32_t) a; //fixed warning on type cast
}
/*------------------------------------------------------------------------
| From here, result is current count + count leading zeros of `a32'.
*------------------------------------------------------------------------*/
if ( a32 < 0x10000 ) {
count += 16;
a32 <<= 16;
}
if ( a32 < 0x1000000 ) {
count += 8;
a32 <<= 8;
}
count += softfloat_countLeadingZeros8[a32>>24];
return count;
}
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNF32UI( uiA ) ) {
raiseFlags( flag_invalid );
}
zPtr->sign = (uiA>>31) != 0;
zPtr->v64 = (uint_fast64_t) uiA<<41;
zPtr->v0 = 0;
}
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNF64UI( uiA ) ) {
raiseFlags( flag_invalid );
}
zPtr->sign = (uiA>>63) != 0;
zPtr->v64 = uiA<<12;
zPtr->v0 = 0;
}
static struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
{
uint32_t a32, a0, b32, b0;
struct uint128 z;
uint64_t mid1, mid;
a32 = a>>32;
a0 = (uint32_t)a; //fixed warning on type cast
b32 = b>>32;
b0 = (uint32_t) b; //fixed warning on type cast
z.v0 = (uint_fast64_t) a0 * b0;
mid1 = (uint_fast64_t) a32 * b0;
mid = mid1 + (uint_fast64_t) a0 * b32;
z.v64 = (uint_fast64_t) a32 * b32;
z.v64 += (uint_fast64_t) (mid < mid1)<<32 | mid>>32;
mid <<= 32;
z.v0 += mid;
z.v64 += (z.v0 < mid);
return z;
}
static float32_t
softfloat_mulAddF32(
uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast8_t op )
{
bool signA;
int_fast16_t expA;
uint_fast32_t sigA;
bool signB;
int_fast16_t expB;
uint_fast32_t sigB;
bool signC;
int_fast16_t expC;
uint_fast32_t sigC;
bool signProd;
uint_fast32_t magBits, uiZ;
struct exp16_sig32 normExpSig;
int_fast16_t expProd;
uint_fast64_t sigProd;
bool signZ;
int_fast16_t expZ;
uint_fast32_t sigZ;
int_fast16_t expDiff;
uint_fast64_t sig64Z, sig64C;
int_fast8_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
signA = signF32UI( uiA );
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
signB = signF32UI( uiB );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
signC = signF32UI( uiC ) ^ (op == softfloat_mulAdd_subC);
expC = expF32UI( uiC );
sigC = fracF32UI( uiC );
signProd = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN_ABC;
magBits = expB | sigB;
goto infProdArg;
}
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN_ABC;
magBits = expA | sigA;
goto infProdArg;
}
if ( expC == 0xFF ) {
if ( sigC ) {
uiZ = 0;
goto propagateNaN_ZC;
}
uiZ = uiC;
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zeroProd;
normExpSig = softfloat_normSubnormalF32Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zeroProd;
normExpSig = softfloat_normSubnormalF32Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expProd = expA + expB - 0x7E;
sigA = (sigA | 0x00800000)<<7;
sigB = (sigB | 0x00800000)<<7;
sigProd = (uint_fast64_t) sigA * sigB;
if ( sigProd < UINT64_C( 0x2000000000000000 ) ) {
--expProd;
sigProd <<= 1;
}
signZ = signProd;
if ( ! expC ) {
if ( ! sigC ) {
expZ = expProd - 1;
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sigProd, 31 ); //fixed warning on type cast
goto roundPack;
}
normExpSig = softfloat_normSubnormalF32Sig( sigC );
expC = normExpSig.exp;
sigC = normExpSig.sig;
}
sigC = (sigC | 0x00800000)<<6;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expProd - expC;
if ( signProd == signC ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expDiff <= 0 ) {
expZ = expC;
sigZ = sigC + (uint_fast32_t) softfloat_shiftRightJam64( sigProd, 32 - expDiff ); //fixed warning on type cast
} else {
expZ = expProd;
sig64Z =
sigProd
+ softfloat_shiftRightJam64(
(uint_fast64_t) sigC<<32, expDiff );
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sig64Z, 32 ); //fixed warning on type cast
}
if ( sigZ < 0x40000000 ) {
--expZ;
sigZ <<= 1;
}
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sig64C = (uint_fast64_t) sigC<<32;
if ( expDiff < 0 ) {
signZ = signC;
expZ = expC;
sig64Z = sig64C - softfloat_shiftRightJam64( sigProd, -expDiff );
} else if ( ! expDiff ) {
expZ = expProd;
sig64Z = sigProd - sig64C;
if ( ! sig64Z ) goto completeCancellation;
if ( sig64Z & UINT64_C( 0x8000000000000000 ) ) {
signZ = ! signZ;
//fixed unsigned unary minus: -x == ~x + 1
sig64Z = ~sig64Z + 1;
}
} else {
expZ = expProd;
sig64Z = sigProd - softfloat_shiftRightJam64( sig64C, expDiff );
}
shiftDist = softfloat_countLeadingZeros64( sig64Z ) - 1;
expZ -= shiftDist;
shiftDist -= 32;
if ( shiftDist < 0 ) {
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sig64Z, -shiftDist ); //fixed warning on type cast
} else {
sigZ = (uint_fast32_t) sig64Z<<shiftDist;
}
}
roundPack:
return softfloat_roundPackToF32( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN_ABC:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
goto propagateNaN_ZC;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infProdArg:
if ( magBits ) {
uiZ = packToF32UI( signProd, 0xFF, 0 );
if ( expC != 0xFF ) goto uiZ;
if ( sigC ) goto propagateNaN_ZC;
if ( signProd == signC ) goto uiZ;
}
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
propagateNaN_ZC:
uiZ = softfloat_propagateNaNF32UI( uiZ, uiC );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zeroProd:
uiZ = uiC;
if ( ! (expC | sigC) && (signProd != signC) ) {
completeCancellation:
uiZ =
packToF32UI((globalRoundingMode == round_min), 0, 0 );
}
uiZ:
return float32_t::fromRaw(uiZ);
}
static float64_t
softfloat_mulAddF64(
uint_fast64_t uiA, uint_fast64_t uiB, uint_fast64_t uiC, uint_fast8_t op )
{
bool signA;
int_fast16_t expA;
uint_fast64_t sigA;
bool signB;
int_fast16_t expB;
uint_fast64_t sigB;
bool signC;
int_fast16_t expC;
uint_fast64_t sigC;
bool signZ;
uint_fast64_t magBits, uiZ;
struct exp16_sig64 normExpSig;
int_fast16_t expZ;
struct uint128 sig128Z;
uint_fast64_t sigZ;
int_fast16_t expDiff;
struct uint128 sig128C;
int_fast8_t shiftDist;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
signA = signF64UI( uiA );
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
signB = signF64UI( uiB );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
signC = signF64UI( uiC ) ^ (op == softfloat_mulAdd_subC);
expC = expF64UI( uiC );
sigC = fracF64UI( uiC );
signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN_ABC;
magBits = expB | sigB;
goto infProdArg;
}
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN_ABC;
magBits = expA | sigA;
goto infProdArg;
}
if ( expC == 0x7FF ) {
if ( sigC ) {
uiZ = 0;
goto propagateNaN_ZC;
}
uiZ = uiC;
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! expA ) {
if ( ! sigA ) goto zeroProd;
normExpSig = softfloat_normSubnormalF64Sig( sigA );
expA = normExpSig.exp;
sigA = normExpSig.sig;
}
if ( ! expB ) {
if ( ! sigB ) goto zeroProd;
normExpSig = softfloat_normSubnormalF64Sig( sigB );
expB = normExpSig.exp;
sigB = normExpSig.sig;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expZ = expA + expB - 0x3FE;
sigA = (sigA | UINT64_C( 0x0010000000000000 ))<<10;
sigB = (sigB | UINT64_C( 0x0010000000000000 ))<<10;
sig128Z = softfloat_mul64To128( sigA, sigB );
if ( sig128Z.v64 < UINT64_C( 0x2000000000000000 ) ) {
--expZ;
sig128Z =
softfloat_add128(
sig128Z.v64, sig128Z.v0, sig128Z.v64, sig128Z.v0 );
}
if ( ! expC ) {
if ( ! sigC ) {
--expZ;
sigZ = sig128Z.v64<<1 | (sig128Z.v0 != 0);
goto roundPack;
}
normExpSig = softfloat_normSubnormalF64Sig( sigC );
expC = normExpSig.exp;
sigC = normExpSig.sig;
}
sigC = (sigC | UINT64_C( 0x0010000000000000 ))<<9;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
//fixed initialization
sig128C.v0 = sig128C.v64 = 0;
expDiff = expZ - expC;
if ( expDiff < 0 ) {
expZ = expC;
if ( (signZ == signC) || (expDiff < -1) ) {
sig128Z.v64 = softfloat_shiftRightJam64( sig128Z.v64, -expDiff );
} else {
sig128Z =
softfloat_shortShiftRightJam128( sig128Z.v64, sig128Z.v0, 1 );
}
} else if ( expDiff ) {
sig128C = softfloat_shiftRightJam128( sigC, 0, expDiff );
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( signZ == signC ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expDiff <= 0 ) {
sigZ = (sigC + sig128Z.v64) | (sig128Z.v0 != 0);
} else {
sig128Z =
softfloat_add128(
sig128Z.v64, sig128Z.v0, sig128C.v64, sig128C.v0 );
sigZ = sig128Z.v64 | (sig128Z.v0 != 0);
}
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
--expZ;
sigZ <<= 1;
}
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expDiff < 0 ) {
signZ = signC;
sig128Z = softfloat_sub128( sigC, 0, sig128Z.v64, sig128Z.v0 );
} else if ( ! expDiff ) {
sig128Z.v64 = sig128Z.v64 - sigC;
if ( ! (sig128Z.v64 | sig128Z.v0) ) goto completeCancellation;
if ( sig128Z.v64 & UINT64_C( 0x8000000000000000 ) ) {
signZ = ! signZ;
sig128Z = softfloat_sub128( 0, 0, sig128Z.v64, sig128Z.v0 );
}
} else {
sig128Z =
softfloat_sub128(
sig128Z.v64, sig128Z.v0, sig128C.v64, sig128C.v0 );
}
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( ! sig128Z.v64 ) {
expZ -= 64;
sig128Z.v64 = sig128Z.v0;
sig128Z.v0 = 0;
}
shiftDist = softfloat_countLeadingZeros64( sig128Z.v64 ) - 1;
expZ -= shiftDist;
if ( shiftDist < 0 ) {
sigZ = softfloat_shortShiftRightJam64( sig128Z.v64, -shiftDist );
} else {
sig128Z =
softfloat_shortShiftLeft128(
sig128Z.v64, sig128Z.v0, shiftDist );
sigZ = sig128Z.v64;
}
sigZ |= (sig128Z.v0 != 0);
}
roundPack:
return softfloat_roundPackToF64( signZ, expZ, sigZ );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN_ABC:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
goto propagateNaN_ZC;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
infProdArg:
if ( magBits ) {
uiZ = packToF64UI( signZ, 0x7FF, 0 );
if ( expC != 0x7FF ) goto uiZ;
if ( sigC ) goto propagateNaN_ZC;
if ( signZ == signC ) goto uiZ;
}
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
propagateNaN_ZC:
uiZ = softfloat_propagateNaNF64UI( uiZ, uiC );
goto uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
zeroProd:
uiZ = uiC;
if ( ! (expC | sigC) && (signZ != signC) ) {
completeCancellation:
uiZ =
packToF64UI((globalRoundingMode == round_min), 0, 0 );
}
uiZ:
return float64_t::fromRaw(uiZ);
}
static float32_t
softfloat_normRoundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
{
int_fast8_t shiftDist;
shiftDist = softfloat_countLeadingZeros32( sig ) - 1;
exp -= shiftDist;
if ( (7 <= shiftDist) && ((unsigned int) exp < 0xFD) ) {
return float32_t::fromRaw(packToF32UI( sign, sig ? exp : 0, sig<<(shiftDist - 7) ));
} else {
return softfloat_roundPackToF32( sign, exp, sig<<shiftDist );
}
}
static float64_t
softfloat_normRoundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
{
int_fast8_t shiftDist;
shiftDist = softfloat_countLeadingZeros64( sig ) - 1;
exp -= shiftDist;
if ( (10 <= shiftDist) && ((unsigned int) exp < 0x7FD) ) {
return float64_t::fromRaw(packToF64UI( sign, sig ? exp : 0, sig<<(shiftDist - 10) ));
} else {
return softfloat_roundPackToF64( sign, exp, sig<<shiftDist );
}
}
static struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
{
int_fast8_t shiftDist;
struct exp16_sig32 z;
shiftDist = softfloat_countLeadingZeros32( sig ) - 8;
z.exp = 1 - shiftDist;
z.sig = sig<<shiftDist;
return z;
}
static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
int_fast8_t shiftDist;
struct exp16_sig64 z;
shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
z.exp = 1 - shiftDist;
z.sig = sig<<shiftDist;
return z;
}
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static uint_fast32_t
softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )
{
bool isSigNaNA;
isSigNaNA = softfloat_isSigNaNF32UI( uiA );
if ( isSigNaNA || softfloat_isSigNaNF32UI( uiB ) ) {
raiseFlags( flag_invalid );
if ( isSigNaNA ) return uiA | 0x00400000;
}
return (isNaNF32UI( uiA ) ? uiA : uiB) | 0x00400000;
}
/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static uint_fast64_t
softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
{
bool isSigNaNA;
isSigNaNA = softfloat_isSigNaNF64UI( uiA );
if ( isSigNaNA || softfloat_isSigNaNF64UI( uiB ) ) {
raiseFlags( flag_invalid );
if ( isSigNaNA ) return uiA | UINT64_C( 0x0008000000000000 );
}
return (isNaNF64UI( uiA ) ? uiA : uiB) | UINT64_C( 0x0008000000000000 );
}
static float32_t
softfloat_roundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast8_t roundIncrement, roundBits;
bool isTiny;
uint_fast32_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = globalRoundingMode;
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x40;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0x7F
: 0;
}
roundBits = sig & 0x7F;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0xFD <= (unsigned int) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(globalDetectTininess == tininess_beforeRounding)
|| (exp < -1) || (sig + roundIncrement < 0x80000000);
sig = softfloat_shiftRightJam32( sig, -exp );
exp = 0;
roundBits = sig & 0x7F;
if ( isTiny && roundBits ) {
raiseFlags( flag_underflow );
}
} else if ( (0xFD < exp) || (0x80000000 <= sig + roundIncrement) ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
raiseFlags(
flag_overflow | flag_inexact );
uiZ = packToF32UI( sign, 0xFF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>7;
if ( roundBits ) {
raiseFlags(flag_inexact);
if ( roundingMode == round_odd ) {
sig |= 1;
goto packReturn;
}
}
sig &= ~(uint_fast32_t) (! (roundBits ^ 0x40) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToF32UI( sign, exp, sig );
uiZ:
return float32_t::fromRaw(uiZ);
}
static float64_t
softfloat_roundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast16_t roundIncrement, roundBits;
bool isTiny;
uint_fast64_t uiZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = globalRoundingMode;
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x200;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0x3FF
: 0;
}
roundBits = sig & 0x3FF;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0x7FD <= (uint16_t) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(globalDetectTininess == tininess_beforeRounding)
|| (exp < -1)
|| (sig + roundIncrement < UINT64_C( 0x8000000000000000 ));
sig = softfloat_shiftRightJam64( sig, -exp );
exp = 0;
roundBits = sig & 0x3FF;
if ( isTiny && roundBits ) {
raiseFlags( flag_underflow );
}
} else if (
(0x7FD < exp)
|| (UINT64_C( 0x8000000000000000 ) <= sig + roundIncrement)
) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
raiseFlags(
flag_overflow | flag_inexact );
uiZ = packToF64UI( sign, 0x7FF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>10;
if ( roundBits ) {
raiseFlags(flag_inexact);
if ( roundingMode == round_odd ) {
sig |= 1;
goto packReturn;
}
}
sig &= ~(uint_fast64_t) (! (roundBits ^ 0x200) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToF64UI( sign, exp, sig );
uiZ:
return float64_t::fromRaw(uiZ);
}
static int_fast32_t
softfloat_roundToI32(
bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
{
bool roundNearEven;
uint_fast16_t roundIncrement, roundBits;
uint_fast32_t sig32;
union { uint32_t ui; int32_t i; } uZ;
int_fast32_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
roundIncrement = 0x800;
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? round_min : round_max))
? 0xFFF
: 0;
}
roundBits = sig & 0xFFF;
sig += roundIncrement;
if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
sig32 = (uint_fast32_t)(sig>>12); //fixed warning on type cast
sig32 &= ~(uint_fast32_t) (! (roundBits ^ 0x800) & roundNearEven);
//fixed unsigned unary minus: -x == ~x + 1
uZ.ui = sign ? (~sig32 + 1) : sig32;
z = uZ.i;
if ( z && ((z < 0) ^ sign) ) goto invalid;
if ( exact && roundBits ) {
raiseFlags(flag_inexact);
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags( flag_invalid );
return sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
static int_fast64_t
softfloat_roundToI64(
bool sign, uint_fast64_t sig, uint_fast64_t sigExtra, uint_fast8_t roundingMode, bool exact )
{
bool roundNearEven, doIncrement;
union { uint64_t ui; int64_t i; } uZ;
int_fast64_t z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundNearEven = (roundingMode == round_near_even);
doIncrement = (UINT64_C(0x8000000000000000) <= sigExtra);
if (!roundNearEven && (roundingMode != round_near_maxMag)) {
doIncrement =
(roundingMode
== (sign ? round_min : round_max))
&& sigExtra;
}
if (doIncrement) {
++sig;
if (!sig) goto invalid;
sig &=
~(uint_fast64_t)
(!(sigExtra & UINT64_C(0x7FFFFFFFFFFFFFFF))
& roundNearEven);
}
uZ.ui = sign ? (~sig + 1) : sig;
z = uZ.i;
if (z && ((z < 0) ^ sign)) goto invalid;
if (exact && sigExtra) {
raiseFlags(flag_inexact);
}
return z;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
invalid:
raiseFlags(flag_invalid);
return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
static struct uint128
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
uint_fast8_t u8NegDist;
struct uint128 z;
if ( dist < 64 ) {
//fixed unsigned unary minus: -x == ~x + 1 , fixed type cast
u8NegDist = (uint_fast8_t)(~dist + 1);
z.v64 = a64>>dist;
z.v0 =
a64<<(u8NegDist & 63) | a0>>dist
| ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
} else {
z.v64 = 0;
z.v0 =
(dist < 127)
? a64>>(dist & 63)
| (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
!= 0)
: ((a64 | a0) != 0);
}
return z;
}
static float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
{
int_fast16_t expA;
uint_fast32_t sigA;
int_fast16_t expB;
uint_fast32_t sigB;
int_fast16_t expDiff;
uint_fast32_t uiZ;
int_fast32_t sigDiff;
bool signZ;
int_fast8_t shiftDist;
int_fast16_t expZ;
uint_fast32_t sigX, sigY;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF32UI( uiA );
sigA = fracF32UI( uiA );
expB = expF32UI( uiB );
sigB = fracF32UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA | sigB ) goto propagateNaN;
raiseFlags( flag_invalid );
uiZ = defaultNaNF32UI;
goto uiZ;
}
sigDiff = sigA - sigB;
if ( ! sigDiff ) {
uiZ =
packToF32UI(
(globalRoundingMode == round_min), 0, 0 );
goto uiZ;
}
if ( expA ) --expA;
signZ = signF32UI( uiA );
if ( sigDiff < 0 ) {
signZ = ! signZ;
sigDiff = -sigDiff;
}
shiftDist = softfloat_countLeadingZeros32( sigDiff ) - 8;
expZ = expA - shiftDist;
if ( expZ < 0 ) {
shiftDist = (int_fast8_t)expA; //fixed type cast
expZ = 0;
}
uiZ = packToF32UI( signZ, expZ, sigDiff<<shiftDist );
goto uiZ;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
signZ = signF32UI( uiA );
sigA <<= 7;
sigB <<= 7;
if ( expDiff < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
signZ = ! signZ;
if ( expB == 0xFF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF32UI( signZ, 0xFF, 0 );
goto uiZ;
}
expZ = expB - 1;
sigX = sigB | 0x40000000;
sigY = sigA + (expA ? 0x40000000 : sigA);
expDiff = -expDiff;
} else {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
if ( expA == 0xFF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
expZ = expA - 1;
sigX = sigA | 0x40000000;
sigY = sigB + (expB ? 0x40000000 : sigB);
}
return
softfloat_normRoundPackToF32(
signZ, expZ, sigX - softfloat_shiftRightJam32( sigY, expDiff )
);
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
uiZ:
return float32_t::fromRaw(uiZ);
}
static float64_t
softfloat_subMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
{
int_fast16_t expA;
uint_fast64_t sigA;
int_fast16_t expB;
uint_fast64_t sigB;
int_fast16_t expDiff;
uint_fast64_t uiZ;
int_fast64_t sigDiff;
int_fast8_t shiftDist;
int_fast16_t expZ;
uint_fast64_t sigZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expA = expF64UI( uiA );
sigA = fracF64UI( uiA );
expB = expF64UI( uiB );
sigB = fracF64UI( uiB );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
expDiff = expA - expB;
if ( ! expDiff ) {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA | sigB ) goto propagateNaN;
raiseFlags( flag_invalid );
uiZ = defaultNaNF64UI;
goto uiZ;
}
sigDiff = sigA - sigB;
if ( ! sigDiff ) {
uiZ =
packToF64UI(
(globalRoundingMode == round_min), 0, 0 );
goto uiZ;
}
if ( expA ) --expA;
if ( sigDiff < 0 ) {
signZ = ! signZ;
sigDiff = -sigDiff;
}
shiftDist = softfloat_countLeadingZeros64( sigDiff ) - 11;
expZ = expA - shiftDist;
if ( expZ < 0 ) {
shiftDist = (int_fast8_t)expA; //fixed type cast
expZ = 0;
}
uiZ = packToF64UI( signZ, expZ, sigDiff<<shiftDist );
goto uiZ;
} else {
/*--------------------------------------------------------------------
*--------------------------------------------------------------------*/
sigA <<= 10;
sigB <<= 10;
if ( expDiff < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
signZ = ! signZ;
if ( expB == 0x7FF ) {
if ( sigB ) goto propagateNaN;
uiZ = packToF64UI( signZ, 0x7FF, 0 );
goto uiZ;
}
sigA += expA ? UINT64_C( 0x4000000000000000 ) : sigA;
sigA = softfloat_shiftRightJam64( sigA, -expDiff );
sigB |= UINT64_C( 0x4000000000000000 );
expZ = expB;
sigZ = sigB - sigA;
} else {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
if ( expA == 0x7FF ) {
if ( sigA ) goto propagateNaN;
uiZ = uiA;
goto uiZ;
}
sigB += expB ? UINT64_C( 0x4000000000000000 ) : sigB;
sigB = softfloat_shiftRightJam64( sigB, expDiff );
sigA |= UINT64_C( 0x4000000000000000 );
expZ = expA;
sigZ = sigA - sigB;
}
return softfloat_normRoundPackToF64( signZ, expZ - 1, sigZ );
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
propagateNaN:
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
uiZ:
return float64_t::fromRaw(uiZ);
}
static float32_t ui32_to_f32( uint32_t a )
{
if ( ! a ) {
return float32_t::fromRaw(0);
}
if ( a & 0x80000000 ) {
return softfloat_roundPackToF32( 0, 0x9D, a>>1 | (a & 1) );
} else {
return softfloat_normRoundPackToF32( 0, 0x9C, a );
}
}
static float64_t ui32_to_f64( uint32_t a )
{
uint_fast64_t uiZ;
int_fast8_t shiftDist;
if ( ! a ) {
uiZ = 0;
} else {
shiftDist = softfloat_countLeadingZeros32( a ) + 21;
uiZ =
packToF64UI( 0, 0x432 - shiftDist, (uint_fast64_t) a<<shiftDist );
}
return float64_t::fromRaw(uiZ);
}
static float32_t ui64_to_f32( uint64_t a )
{
int_fast8_t shiftDist;
uint_fast32_t sig;
shiftDist = softfloat_countLeadingZeros64( a ) - 40;
if ( 0 <= shiftDist ) {
return float32_t::fromRaw(a ? packToF32UI(0, 0x95 - shiftDist, (uint_fast32_t) a<<shiftDist ) : 0);
} else {
shiftDist += 7;
sig =
(shiftDist < 0) ? (uint_fast32_t) softfloat_shortShiftRightJam64( a, -shiftDist ) //fixed warning on type cast
: (uint_fast32_t) a<<shiftDist;
return softfloat_roundPackToF32( 0, 0x9C - shiftDist, sig );
}
}
static float64_t ui64_to_f64( uint64_t a )
{
if ( ! a ) {
return float64_t::fromRaw(0);
}
if ( a & UINT64_C( 0x8000000000000000 ) ) {
return
softfloat_roundPackToF64(
0, 0x43D, softfloat_shortShiftRightJam64( a, 1 ) );
} else {
return softfloat_normRoundPackToF64( 0, 0x43C, a );
}
}
/*----------------------------------------------------------------------------
| Ported from OpenCV.
*----------------------------------------------------------------------------*/
////////////////////////////////////// EXP /////////////////////////////////////
#define EXPTAB_SCALE 6
#define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
// .9670371139572337719125840413672004409288e-2
static const softdouble EXPPOLY_32F_A0 = float64_t::fromRaw(0x3f83ce0f3e46f431);
static const uint64_t expTab[] = {
0x3ff0000000000000, // 1.000000
0x3ff02c9a3e778061, // 1.010889
0x3ff059b0d3158574, // 1.021897
0x3ff0874518759bc8, // 1.033025
0x3ff0b5586cf9890f, // 1.044274
0x3ff0e3ec32d3d1a2, // 1.055645
0x3ff11301d0125b51, // 1.067140
0x3ff1429aaea92de0, // 1.078761
0x3ff172b83c7d517b, // 1.090508
0x3ff1a35beb6fcb75, // 1.102383
0x3ff1d4873168b9aa, // 1.114387
0x3ff2063b88628cd6, // 1.126522
0x3ff2387a6e756238, // 1.138789
0x3ff26b4565e27cdd, // 1.151189
0x3ff29e9df51fdee1, // 1.163725
0x3ff2d285a6e4030b, // 1.176397
0x3ff306fe0a31b715, // 1.189207
0x3ff33c08b26416ff, // 1.202157
0x3ff371a7373aa9cb, // 1.215247
0x3ff3a7db34e59ff7, // 1.228481
0x3ff3dea64c123422, // 1.241858
0x3ff4160a21f72e2a, // 1.255381
0x3ff44e086061892d, // 1.269051
0x3ff486a2b5c13cd0, // 1.282870
0x3ff4bfdad5362a27, // 1.296840
0x3ff4f9b2769d2ca7, // 1.310961
0x3ff5342b569d4f82, // 1.325237
0x3ff56f4736b527da, // 1.339668
0x3ff5ab07dd485429, // 1.354256
0x3ff5e76f15ad2148, // 1.369002
0x3ff6247eb03a5585, // 1.383910
0x3ff6623882552225, // 1.398980
0x3ff6a09e667f3bcd, // 1.414214
0x3ff6dfb23c651a2f, // 1.429613
0x3ff71f75e8ec5f74, // 1.445181
0x3ff75feb564267c9, // 1.460918
0x3ff7a11473eb0187, // 1.476826
0x3ff7e2f336cf4e62, // 1.492908
0x3ff82589994cce13, // 1.509164
0x3ff868d99b4492ed, // 1.525598
0x3ff8ace5422aa0db, // 1.542211
0x3ff8f1ae99157736, // 1.559004
0x3ff93737b0cdc5e5, // 1.575981
0x3ff97d829fde4e50, // 1.593142
0x3ff9c49182a3f090, // 1.610490
0x3ffa0c667b5de565, // 1.628027
0x3ffa5503b23e255d, // 1.645755
0x3ffa9e6b5579fdbf, // 1.663677
0x3ffae89f995ad3ad, // 1.681793
0x3ffb33a2b84f15fb, // 1.700106
0x3ffb7f76f2fb5e47, // 1.718619
0x3ffbcc1e904bc1d2, // 1.737334
0x3ffc199bdd85529c, // 1.756252
0x3ffc67f12e57d14b, // 1.775376
0x3ffcb720dcef9069, // 1.794709
0x3ffd072d4a07897c, // 1.814252
0x3ffd5818dcfba487, // 1.834008
0x3ffda9e603db3285, // 1.853979
0x3ffdfc97337b9b5f, // 1.874168
0x3ffe502ee78b3ff6, // 1.894576
0x3ffea4afa2a490da, // 1.915207
0x3ffefa1bee615a27, // 1.936062
0x3fff50765b6e4540, // 1.957144
0x3fffa7c1819e90d8, // 1.978456
};
// 1 / ln(2) * (1 << EXPTAB_SCALE) == 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE)
static const float64_t exp_prescale = float64_t::fromRaw(0x3ff71547652b82fe) * float64_t(1 << EXPTAB_SCALE);
static const float64_t exp_postscale = float64_t::one()/float64_t(1 << EXPTAB_SCALE);
static const float64_t exp_max_val(3000*(1 << EXPTAB_SCALE)); // log10(DBL_MAX) < 3000
static float32_t f32_exp( float32_t x)
{
//special cases
if(x.isNaN()) return float32_t::nan();
if(x.isInf()) return (x == float32_t::inf()) ? x : float32_t::zero();
static const float64_t
A4 = float64_t::one() / EXPPOLY_32F_A0,
A3 = float64_t::fromRaw(0x3fe62e42fef9277b) / EXPPOLY_32F_A0, // .6931471805521448196800669615864773144641 / EXPPOLY_32F_A0,
A2 = float64_t::fromRaw(0x3fcebfbe081585e7) / EXPPOLY_32F_A0, // .2402265109513301490103372422686535526573 / EXPPOLY_32F_A0,
A1 = float64_t::fromRaw(0x3fac6af0d93cf576) / EXPPOLY_32F_A0; // .5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0;
float64_t x0;
if(expF32UI(x.v) > 127 + 10)
x0 = signF32UI(x.v) ? -exp_max_val : exp_max_val;
else
x0 = f32_to_f64(x) * exp_prescale;
int val0 = f64_to_i32(x0, round_near_even, false);
int t = (val0 >> EXPTAB_SCALE) + 1023;
t = t < 0 ? 0 : (t > 2047 ? 2047 : t);
float64_t buf; buf.v = packToF64UI(0, t, 0);
x0 = (x0 - f64_roundToInt(x0, round_near_even, false)) * exp_postscale;
return (buf * EXPPOLY_32F_A0 * float64_t::fromRaw(expTab[val0 & EXPTAB_MASK]) * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4));
}
static float64_t f64_exp(float64_t x)
{
//special cases
if(x.isNaN()) return float64_t::nan();
if(x.isInf()) return (x == float64_t::inf()) ? x : float64_t::zero();
static const float64_t
A5 = float64_t::one() / EXPPOLY_32F_A0,
A4 = float64_t::fromRaw(0x3fe62e42fefa39f1) / EXPPOLY_32F_A0, // .69314718055994546743029643825322 / EXPPOLY_32F_A0
A3 = float64_t::fromRaw(0x3fcebfbdff82a45a) / EXPPOLY_32F_A0, // .24022650695886477918181338054308 / EXPPOLY_32F_A0
A2 = float64_t::fromRaw(0x3fac6b08d81fec75) / EXPPOLY_32F_A0, // .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0
A1 = float64_t::fromRaw(0x3f83b2a72b4f3cd3) / EXPPOLY_32F_A0, // .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0
A0 = float64_t::fromRaw(0x3f55e7aa1566c2a4) / EXPPOLY_32F_A0; // .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0
float64_t x0;
if(expF64UI(x.v) > 1023 + 10)
x0 = signF64UI(x.v) ? -exp_max_val : exp_max_val;
else
x0 = x * exp_prescale;
int val0 = cvRound(x0);
int t = (val0 >> EXPTAB_SCALE) + 1023;
t = t < 0 ? 0 : (t > 2047 ? 2047 : t);
float64_t buf; buf.v = packToF64UI(0, t, 0);
x0 = (x0 - f64_roundToInt(x0, round_near_even, false)) * exp_postscale;
return buf * EXPPOLY_32F_A0 * float64_t::fromRaw(expTab[val0 & EXPTAB_MASK]) * (((((A0*x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4)*x0 + A5);
}
#undef EXPTAB_SCALE
#undef EXPTAB_MASK
#undef EXPPOLY_32F_A0
/////////////////////////////////////////// LOG ///////////////////////////////////////
#define LOGTAB_SCALE 8
static const uint64_t CV_DECL_ALIGNED(16) icvLogTab[] = {
0, 0x3ff0000000000000, // 0.000000, 1.000000
0x3f6ff00aa2b10bc0, 0x3fefe01fe01fe020, // 0.003899, 0.996109
0x3f7fe02a6b106788, 0x3fefc07f01fc07f0, // 0.007782, 0.992248
0x3f87dc475f810a76, 0x3fefa11caa01fa12, // 0.011651, 0.988417
0x3f8fc0a8b0fc03e3, 0x3fef81f81f81f820, // 0.015504, 0.984615
0x3f93cea44346a574, 0x3fef6310aca0dbb5, // 0.019343, 0.980843
0x3f97b91b07d5b11a, 0x3fef44659e4a4271, // 0.023167, 0.977099
0x3f9b9fc027af9197, 0x3fef25f644230ab5, // 0.026977, 0.973384
0x3f9f829b0e783300, 0x3fef07c1f07c1f08, // 0.030772, 0.969697
0x3fa1b0d98923d97f, 0x3feee9c7f8458e02, // 0.034552, 0.966038
0x3fa39e87b9febd5f, 0x3feecc07b301ecc0, // 0.038319, 0.962406
0x3fa58a5bafc8e4d4, 0x3feeae807aba01eb, // 0.042071, 0.958801
0x3fa77458f632dcfc, 0x3fee9131abf0b767, // 0.045810, 0.955224
0x3fa95c830ec8e3eb, 0x3fee741aa59750e4, // 0.049534, 0.951673
0x3fab42dd711971be, 0x3fee573ac901e574, // 0.053245, 0.948148
0x3fad276b8adb0b52, 0x3fee3a9179dc1a73, // 0.056941, 0.944649
0x3faf0a30c01162a6, 0x3fee1e1e1e1e1e1e, // 0.060625, 0.941176
0x3fb075983598e471, 0x3fee01e01e01e01e, // 0.064294, 0.937729
0x3fb16536eea37ae0, 0x3fede5d6e3f8868a, // 0.067951, 0.934307
0x3fb253f62f0a1416, 0x3fedca01dca01dca, // 0.071594, 0.930909
0x3fb341d7961bd1d0, 0x3fedae6076b981db, // 0.075223, 0.927536
0x3fb42edcbea646f0, 0x3fed92f2231e7f8a, // 0.078840, 0.924188
0x3fb51b073f06183f, 0x3fed77b654b82c34, // 0.082444, 0.920863
0x3fb60658a93750c3, 0x3fed5cac807572b2, // 0.086034, 0.917563
0x3fb6f0d28ae56b4b, 0x3fed41d41d41d41d, // 0.089612, 0.914286
0x3fb7da766d7b12cc, 0x3fed272ca3fc5b1a, // 0.093177, 0.911032
0x3fb8c345d6319b20, 0x3fed0cb58f6ec074, // 0.096730, 0.907801
0x3fb9ab42462033ac, 0x3fecf26e5c44bfc6, // 0.100269, 0.904594
0x3fba926d3a4ad563, 0x3fecd85689039b0b, // 0.103797, 0.901408
0x3fbb78c82bb0eda1, 0x3fecbe6d9601cbe7, // 0.107312, 0.898246
0x3fbc5e548f5bc743, 0x3feca4b3055ee191, // 0.110814, 0.895105
0x3fbd4313d66cb35d, 0x3fec8b265afb8a42, // 0.114305, 0.891986
0x3fbe27076e2af2e5, 0x3fec71c71c71c71c, // 0.117783, 0.888889
0x3fbf0a30c01162a6, 0x3fec5894d10d4986, // 0.121249, 0.885813
0x3fbfec9131dbeaba, 0x3fec3f8f01c3f8f0, // 0.124703, 0.882759
0x3fc0671512ca596e, 0x3fec26b5392ea01c, // 0.128146, 0.879725
0x3fc0d77e7cd08e59, 0x3fec0e070381c0e0, // 0.131576, 0.876712
0x3fc14785846742ac, 0x3febf583ee868d8b, // 0.134995, 0.873720
0x3fc1b72ad52f67a0, 0x3febdd2b899406f7, // 0.138402, 0.870748
0x3fc2266f190a5acb, 0x3febc4fd65883e7b, // 0.141798, 0.867797
0x3fc29552f81ff523, 0x3febacf914c1bad0, // 0.145182, 0.864865
0x3fc303d718e47fd2, 0x3feb951e2b18ff23, // 0.148555, 0.861953
0x3fc371fc201e8f74, 0x3feb7d6c3dda338b, // 0.151916, 0.859060
0x3fc3dfc2b0ecc629, 0x3feb65e2e3beee05, // 0.155266, 0.856187
0x3fc44d2b6ccb7d1e, 0x3feb4e81b4e81b4f, // 0.158605, 0.853333
0x3fc4ba36f39a55e5, 0x3feb37484ad806ce, // 0.161933, 0.850498
0x3fc526e5e3a1b437, 0x3feb2036406c80d9, // 0.165250, 0.847682
0x3fc59338d9982085, 0x3feb094b31d922a4, // 0.168555, 0.844884
0x3fc5ff3070a793d3, 0x3feaf286bca1af28, // 0.171850, 0.842105
0x3fc66acd4272ad50, 0x3feadbe87f94905e, // 0.175134, 0.839344
0x3fc6d60fe719d21c, 0x3feac5701ac5701b, // 0.178408, 0.836601
0x3fc740f8f54037a4, 0x3feaaf1d2f87ebfd, // 0.181670, 0.833876
0x3fc7ab890210d909, 0x3fea98ef606a63be, // 0.184922, 0.831169
0x3fc815c0a14357ea, 0x3fea82e65130e159, // 0.188164, 0.828479
0x3fc87fa06520c910, 0x3fea6d01a6d01a6d, // 0.191395, 0.825806
0x3fc8e928de886d40, 0x3fea574107688a4a, // 0.194615, 0.823151
0x3fc9525a9cf456b4, 0x3fea41a41a41a41a, // 0.197826, 0.820513
0x3fc9bb362e7dfb83, 0x3fea2c2a87c51ca0, // 0.201026, 0.817891
0x3fca23bc1fe2b563, 0x3fea16d3f97a4b02, // 0.204216, 0.815287
0x3fca8becfc882f18, 0x3fea01a01a01a01a, // 0.207395, 0.812698
0x3fcaf3c94e80bff2, 0x3fe9ec8e951033d9, // 0.210565, 0.810127
0x3fcb5b519e8fb5a4, 0x3fe9d79f176b682d, // 0.213724, 0.807571
0x3fcbc286742d8cd6, 0x3fe9c2d14ee4a102, // 0.216874, 0.805031
0x3fcc2968558c18c0, 0x3fe9ae24ea5510da, // 0.220014, 0.802508
0x3fcc8ff7c79a9a21, 0x3fe999999999999a, // 0.223144, 0.800000
0x3fccf6354e09c5dc, 0x3fe9852f0d8ec0ff, // 0.226264, 0.797508
0x3fcd5c216b4fbb91, 0x3fe970e4f80cb872, // 0.229374, 0.795031
0x3fcdc1bca0abec7d, 0x3fe95cbb0be377ae, // 0.232475, 0.792570
0x3fce27076e2af2e5, 0x3fe948b0fcd6e9e0, // 0.235566, 0.790123
0x3fce8c0252aa5a5f, 0x3fe934c67f9b2ce6, // 0.238648, 0.787692
0x3fcef0adcbdc5936, 0x3fe920fb49d0e229, // 0.241720, 0.785276
0x3fcf550a564b7b37, 0x3fe90d4f120190d5, // 0.244783, 0.782875
0x3fcfb9186d5e3e2a, 0x3fe8f9c18f9c18fa, // 0.247836, 0.780488
0x3fd00e6c45ad501c, 0x3fe8e6527af1373f, // 0.250880, 0.778116
0x3fd0402594b4d040, 0x3fe8d3018d3018d3, // 0.253915, 0.775758
0x3fd071b85fcd590d, 0x3fe8bfce8062ff3a, // 0.256941, 0.773414
0x3fd0a324e27390e3, 0x3fe8acb90f6bf3aa, // 0.259958, 0.771084
0x3fd0d46b579ab74b, 0x3fe899c0f601899c, // 0.262965, 0.768769
0x3fd1058bf9ae4ad5, 0x3fe886e5f0abb04a, // 0.265964, 0.766467
0x3fd136870293a8b0, 0x3fe87427bcc092b9, // 0.268953, 0.764179
0x3fd1675cababa60e, 0x3fe8618618618618, // 0.271934, 0.761905
0x3fd1980d2dd4236f, 0x3fe84f00c2780614, // 0.274905, 0.759644
0x3fd1c898c16999fa, 0x3fe83c977ab2bedd, // 0.277868, 0.757396
0x3fd1f8ff9e48a2f2, 0x3fe82a4a0182a4a0, // 0.280823, 0.755162
0x3fd22941fbcf7965, 0x3fe8181818181818, // 0.283768, 0.752941
0x3fd2596010df7639, 0x3fe8060180601806, // 0.286705, 0.750733
0x3fd2895a13de86a3, 0x3fe7f405fd017f40, // 0.289633, 0.748538
0x3fd2b9303ab89d24, 0x3fe7e225515a4f1d, // 0.292553, 0.746356
0x3fd2e8e2bae11d30, 0x3fe7d05f417d05f4, // 0.295464, 0.744186
0x3fd31871c9544184, 0x3fe7beb3922e017c, // 0.298367, 0.742029
0x3fd347dd9a987d54, 0x3fe7ad2208e0ecc3, // 0.301261, 0.739884
0x3fd3772662bfd85a, 0x3fe79baa6bb6398b, // 0.304147, 0.737752
0x3fd3a64c556945e9, 0x3fe78a4c8178a4c8, // 0.307025, 0.735632
0x3fd3d54fa5c1f70f, 0x3fe77908119ac60d, // 0.309894, 0.733524
0x3fd404308686a7e3, 0x3fe767dce434a9b1, // 0.312756, 0.731429
0x3fd432ef2a04e813, 0x3fe756cac201756d, // 0.315609, 0.729345
0x3fd4618bc21c5ec2, 0x3fe745d1745d1746, // 0.318454, 0.727273
0x3fd49006804009d0, 0x3fe734f0c541fe8d, // 0.321291, 0.725212
0x3fd4be5f957778a0, 0x3fe724287f46debc, // 0.324119, 0.723164
0x3fd4ec9732600269, 0x3fe713786d9c7c09, // 0.326940, 0.721127
0x3fd51aad872df82d, 0x3fe702e05c0b8170, // 0.329753, 0.719101
0x3fd548a2c3add262, 0x3fe6f26016f26017, // 0.332558, 0.717087
0x3fd5767717455a6c, 0x3fe6e1f76b4337c7, // 0.335356, 0.715084
0x3fd5a42ab0f4cfe1, 0x3fe6d1a62681c861, // 0.338145, 0.713092
0x3fd5d1bdbf5809ca, 0x3fe6c16c16c16c17, // 0.340927, 0.711111
0x3fd5ff3070a793d3, 0x3fe6b1490aa31a3d, // 0.343701, 0.709141
0x3fd62c82f2b9c795, 0x3fe6a13cd1537290, // 0.346467, 0.707182
0x3fd659b57303e1f2, 0x3fe691473a88d0c0, // 0.349225, 0.705234
0x3fd686c81e9b14ae, 0x3fe6816816816817, // 0.351976, 0.703297
0x3fd6b3bb2235943d, 0x3fe6719f3601671a, // 0.354720, 0.701370
0x3fd6e08eaa2ba1e3, 0x3fe661ec6a5122f9, // 0.357456, 0.699454
0x3fd70d42e2789235, 0x3fe6524f853b4aa3, // 0.360184, 0.697548
0x3fd739d7f6bbd006, 0x3fe642c8590b2164, // 0.362905, 0.695652
0x3fd7664e1239dbce, 0x3fe63356b88ac0de, // 0.365619, 0.693767
0x3fd792a55fdd47a2, 0x3fe623fa77016240, // 0.368326, 0.691892
0x3fd7bede0a37afbf, 0x3fe614b36831ae94, // 0.371025, 0.690027
0x3fd7eaf83b82afc3, 0x3fe6058160581606, // 0.373716, 0.688172
0x3fd816f41da0d495, 0x3fe5f66434292dfc, // 0.376401, 0.686327
0x3fd842d1da1e8b17, 0x3fe5e75bb8d015e7, // 0.379078, 0.684492
0x3fd86e919a330ba0, 0x3fe5d867c3ece2a5, // 0.381749, 0.682667
0x3fd89a3386c1425a, 0x3fe5c9882b931057, // 0.384412, 0.680851
0x3fd8c5b7c858b48a, 0x3fe5babcc647fa91, // 0.387068, 0.679045
0x3fd8f11e873662c7, 0x3fe5ac056b015ac0, // 0.389717, 0.677249
0x3fd91c67eb45a83d, 0x3fe59d61f123ccaa, // 0.392359, 0.675462
0x3fd947941c2116fa, 0x3fe58ed2308158ed, // 0.394994, 0.673684
0x3fd972a341135158, 0x3fe5805601580560, // 0.397622, 0.671916
0x3fd99d958117e08a, 0x3fe571ed3c506b3a, // 0.400243, 0.670157
0x3fd9c86b02dc0862, 0x3fe56397ba7c52e2, // 0.402858, 0.668407
0x3fd9f323ecbf984b, 0x3fe5555555555555, // 0.405465, 0.666667
0x3fda1dc064d5b995, 0x3fe54725e6bb82fe, // 0.408066, 0.664935
0x3fda484090e5bb0a, 0x3fe5390948f40feb, // 0.410660, 0.663212
0x3fda72a4966bd9ea, 0x3fe52aff56a8054b, // 0.413247, 0.661499
0x3fda9cec9a9a0849, 0x3fe51d07eae2f815, // 0.415828, 0.659794
0x3fdac718c258b0e4, 0x3fe50f22e111c4c5, // 0.418402, 0.658098
0x3fdaf1293247786b, 0x3fe5015015015015, // 0.420969, 0.656410
0x3fdb1b1e0ebdfc5b, 0x3fe4f38f62dd4c9b, // 0.423530, 0.654731
0x3fdb44f77bcc8f62, 0x3fe4e5e0a72f0539, // 0.426084, 0.653061
0x3fdb6eb59d3cf35d, 0x3fe4d843bedc2c4c, // 0.428632, 0.651399
0x3fdb9858969310fb, 0x3fe4cab88725af6e, // 0.431173, 0.649746
0x3fdbc1e08b0dad0a, 0x3fe4bd3edda68fe1, // 0.433708, 0.648101
0x3fdbeb4d9da71b7b, 0x3fe4afd6a052bf5b, // 0.436237, 0.646465
0x3fdc149ff115f026, 0x3fe4a27fad76014a, // 0.438759, 0.644836
0x3fdc3dd7a7cdad4d, 0x3fe49539e3b2d067, // 0.441275, 0.643216
0x3fdc66f4e3ff6ff7, 0x3fe4880522014880, // 0.443784, 0.641604
0x3fdc8ff7c79a9a21, 0x3fe47ae147ae147b, // 0.446287, 0.640000
0x3fdcb8e0744d7ac9, 0x3fe46dce34596066, // 0.448784, 0.638404
0x3fdce1af0b85f3eb, 0x3fe460cbc7f5cf9a, // 0.451275, 0.636816
0x3fdd0a63ae721e64, 0x3fe453d9e2c776ca, // 0.453759, 0.635236
0x3fdd32fe7e00ebd5, 0x3fe446f86562d9fb, // 0.456237, 0.633663
0x3fdd5b7f9ae2c683, 0x3fe43a2730abee4d, // 0.458710, 0.632099
0x3fdd83e7258a2f3e, 0x3fe42d6625d51f87, // 0.461176, 0.630542
0x3fddac353e2c5954, 0x3fe420b5265e5951, // 0.463636, 0.628993
0x3fddd46a04c1c4a0, 0x3fe4141414141414, // 0.466090, 0.627451
0x3fddfc859906d5b5, 0x3fe40782d10e6566, // 0.468538, 0.625917
0x3fde24881a7c6c26, 0x3fe3fb013fb013fb, // 0.470980, 0.624390
0x3fde4c71a8687704, 0x3fe3ee8f42a5af07, // 0.473416, 0.622871
0x3fde744261d68787, 0x3fe3e22cbce4a902, // 0.475846, 0.621359
0x3fde9bfa659861f5, 0x3fe3d5d991aa75c6, // 0.478270, 0.619855
0x3fdec399d2468cc0, 0x3fe3c995a47babe7, // 0.480689, 0.618357
0x3fdeeb20c640ddf4, 0x3fe3bd60d9232955, // 0.483101, 0.616867
0x3fdf128f5faf06ec, 0x3fe3b13b13b13b14, // 0.485508, 0.615385
0x3fdf39e5bc811e5b, 0x3fe3a524387ac822, // 0.487909, 0.613909
0x3fdf6123fa7028ac, 0x3fe3991c2c187f63, // 0.490304, 0.612440
0x3fdf884a36fe9ec2, 0x3fe38d22d366088e, // 0.492693, 0.610979
0x3fdfaf588f78f31e, 0x3fe3813813813814, // 0.495077, 0.609524
0x3fdfd64f20f61571, 0x3fe3755bd1c945ee, // 0.497455, 0.608076
0x3fdffd2e0857f498, 0x3fe3698df3de0748, // 0.499828, 0.606635
0x3fe011fab125ff8a, 0x3fe35dce5f9f2af8, // 0.502195, 0.605201
0x3fe02552a5a5d0fe, 0x3fe3521cfb2b78c1, // 0.504556, 0.603774
0x3fe0389eefce633b, 0x3fe34679ace01346, // 0.506912, 0.602353
0x3fe04bdf9da926d2, 0x3fe33ae45b57bcb2, // 0.509262, 0.600939
0x3fe05f14bd26459c, 0x3fe32f5ced6a1dfa, // 0.511607, 0.599532
0x3fe0723e5c1cdf40, 0x3fe323e34a2b10bf, // 0.513946, 0.598131
0x3fe0855c884b450e, 0x3fe3187758e9ebb6, // 0.516279, 0.596737
0x3fe0986f4f573520, 0x3fe30d190130d190, // 0.518608, 0.595349
0x3fe0ab76bece14d1, 0x3fe301c82ac40260, // 0.520931, 0.593968
0x3fe0be72e4252a82, 0x3fe2f684bda12f68, // 0.523248, 0.592593
0x3fe0d163ccb9d6b7, 0x3fe2eb4ea1fed14b, // 0.525560, 0.591224
0x3fe0e44985d1cc8b, 0x3fe2e025c04b8097, // 0.527867, 0.589862
0x3fe0f7241c9b497d, 0x3fe2d50a012d50a0, // 0.530169, 0.588506
0x3fe109f39e2d4c96, 0x3fe2c9fb4d812ca0, // 0.532465, 0.587156
0x3fe11cb81787ccf8, 0x3fe2bef98e5a3711, // 0.534756, 0.585812
0x3fe12f719593efbc, 0x3fe2b404ad012b40, // 0.537041, 0.584475
0x3fe1422025243d44, 0x3fe2a91c92f3c105, // 0.539322, 0.583144
0x3fe154c3d2f4d5e9, 0x3fe29e4129e4129e, // 0.541597, 0.581818
0x3fe1675cababa60e, 0x3fe293725bb804a5, // 0.543867, 0.580499
0x3fe179eabbd899a0, 0x3fe288b01288b013, // 0.546132, 0.579186
0x3fe18c6e0ff5cf06, 0x3fe27dfa38a1ce4d, // 0.548392, 0.577878
0x3fe19ee6b467c96e, 0x3fe27350b8812735, // 0.550647, 0.576577
0x3fe1b154b57da29e, 0x3fe268b37cd60127, // 0.552897, 0.575281
0x3fe1c3b81f713c24, 0x3fe25e22708092f1, // 0.555142, 0.573991
0x3fe1d610fe677003, 0x3fe2539d7e9177b2, // 0.557381, 0.572707
0x3fe1e85f5e7040d0, 0x3fe2492492492492, // 0.559616, 0.571429
0x3fe1faa34b87094c, 0x3fe23eb79717605b, // 0.561845, 0.570156
0x3fe20cdcd192ab6d, 0x3fe23456789abcdf, // 0.564070, 0.568889
0x3fe21f0bfc65beeb, 0x3fe22a0122a0122a, // 0.566290, 0.567627
0x3fe23130d7bebf42, 0x3fe21fb78121fb78, // 0.568505, 0.566372
0x3fe2434b6f483933, 0x3fe21579804855e6, // 0.570715, 0.565121
0x3fe2555bce98f7cb, 0x3fe20b470c67c0d9, // 0.572920, 0.563877
0x3fe26762013430df, 0x3fe2012012012012, // 0.575120, 0.562637
0x3fe2795e1289b11a, 0x3fe1f7047dc11f70, // 0.577315, 0.561404
0x3fe28b500df60782, 0x3fe1ecf43c7fb84c, // 0.579506, 0.560175
0x3fe29d37fec2b08a, 0x3fe1e2ef3b3fb874, // 0.581692, 0.558952
0x3fe2af15f02640ad, 0x3fe1d8f5672e4abd, // 0.583873, 0.557734
0x3fe2c0e9ed448e8b, 0x3fe1cf06ada2811d, // 0.586049, 0.556522
0x3fe2d2b4012edc9d, 0x3fe1c522fc1ce059, // 0.588221, 0.555315
0x3fe2e47436e40268, 0x3fe1bb4a4046ed29, // 0.590387, 0.554113
0x3fe2f62a99509546, 0x3fe1b17c67f2bae3, // 0.592550, 0.552916
0x3fe307d7334f10be, 0x3fe1a7b9611a7b96, // 0.594707, 0.551724
0x3fe3197a0fa7fe6a, 0x3fe19e0119e0119e, // 0.596860, 0.550538
0x3fe32b1339121d71, 0x3fe19453808ca29c, // 0.599008, 0.549356
0x3fe33ca2ba328994, 0x3fe18ab083902bdb, // 0.601152, 0.548180
0x3fe34e289d9ce1d3, 0x3fe1811811811812, // 0.603291, 0.547009
0x3fe35fa4edd36ea0, 0x3fe1778a191bd684, // 0.605425, 0.545842
0x3fe37117b54747b5, 0x3fe16e0689427379, // 0.607555, 0.544681
0x3fe38280fe58797e, 0x3fe1648d50fc3201, // 0.609681, 0.543524
0x3fe393e0d3562a19, 0x3fe15b1e5f75270d, // 0.611802, 0.542373
0x3fe3a5373e7ebdf9, 0x3fe151b9a3fdd5c9, // 0.613918, 0.541226
0x3fe3b68449fffc22, 0x3fe1485f0e0acd3b, // 0.616030, 0.540084
0x3fe3c7c7fff73205, 0x3fe13f0e8d344724, // 0.618137, 0.538947
0x3fe3d9026a7156fa, 0x3fe135c81135c811, // 0.620240, 0.537815
0x3fe3ea33936b2f5b, 0x3fe12c8b89edc0ac, // 0.622339, 0.536688
0x3fe3fb5b84d16f42, 0x3fe12358e75d3033, // 0.624433, 0.535565
0x3fe40c7a4880dce9, 0x3fe11a3019a74826, // 0.626523, 0.534447
0x3fe41d8fe84672ae, 0x3fe1111111111111, // 0.628609, 0.533333
0x3fe42e9c6ddf80bf, 0x3fe107fbbe011080, // 0.630690, 0.532225
0x3fe43f9fe2f9ce67, 0x3fe0fef010fef011, // 0.632767, 0.531120
0x3fe4509a5133bb0a, 0x3fe0f5edfab325a2, // 0.634839, 0.530021
0x3fe4618bc21c5ec2, 0x3fe0ecf56be69c90, // 0.636907, 0.528926
0x3fe472743f33aaad, 0x3fe0e40655826011, // 0.638971, 0.527835
0x3fe48353d1ea88df, 0x3fe0db20a88f4696, // 0.641031, 0.526749
0x3fe4942a83a2fc07, 0x3fe0d24456359e3a, // 0.643087, 0.525667
0x3fe4a4f85db03ebb, 0x3fe0c9714fbcda3b, // 0.645138, 0.524590
0x3fe4b5bd6956e273, 0x3fe0c0a7868b4171, // 0.647185, 0.523517
0x3fe4c679afccee39, 0x3fe0b7e6ec259dc8, // 0.649228, 0.522449
0x3fe4d72d3a39fd00, 0x3fe0af2f722eecb5, // 0.651267, 0.521385
0x3fe4e7d811b75bb0, 0x3fe0a6810a6810a7, // 0.653301, 0.520325
0x3fe4f87a3f5026e8, 0x3fe09ddba6af8360, // 0.655332, 0.519270
0x3fe50913cc01686b, 0x3fe0953f39010954, // 0.657358, 0.518219
0x3fe519a4c0ba3446, 0x3fe08cabb37565e2, // 0.659380, 0.517172
0x3fe52a2d265bc5aa, 0x3fe0842108421084, // 0.661398, 0.516129
0x3fe53aad05b99b7c, 0x3fe07b9f29b8eae2, // 0.663413, 0.515091
0x3fe54b2467999497, 0x3fe073260a47f7c6, // 0.665423, 0.514056
0x3fe55b9354b40bcd, 0x3fe06ab59c7912fb, // 0.667429, 0.513026
0x3fe56bf9d5b3f399, 0x3fe0624dd2f1a9fc, // 0.669431, 0.512000
0x3fe57c57f336f190, 0x3fe059eea0727586, // 0.671429, 0.510978
0x3fe58cadb5cd7989, 0x3fe05197f7d73404, // 0.673423, 0.509960
0x3fe59cfb25fae87d, 0x3fe04949cc1664c5, // 0.675413, 0.508946
0x3fe5ad404c359f2c, 0x3fe0410410410410, // 0.677399, 0.507937
0x3fe5bd7d30e71c73, 0x3fe038c6b78247fc, // 0.679381, 0.506931
0x3fe5cdb1dc6c1764, 0x3fe03091b51f5e1a, // 0.681359, 0.505929
0x3fe5ddde57149923, 0x3fe02864fc7729e9, // 0.683334, 0.504931
0x3fe5ee02a9241675, 0x3fe0204081020408, // 0.685304, 0.503937
0x3fe5fe1edad18918, 0x3fe0182436517a37, // 0.687271, 0.502947
0x3fe60e32f44788d8, 0x3fe0101010101010, // 0.689233, 0.501961
0x3fe62e42fefa39ef, 0x3fe0000000000000, // 0.693147, 0.500000
};
// 0.69314718055994530941723212145818
static const float64_t ln_2 = float64_t::fromRaw(0x3fe62e42fefa39ef);
static float32_t f32_log(float32_t x)
{
//special cases
if(x.isNaN() || x < float32_t::zero()) return float32_t::nan();
if(x == float32_t::zero()) return -float32_t::inf();
//first 8 bits of mantissa
int h0 = (x.v >> (23 - LOGTAB_SCALE)) & ((1 << LOGTAB_SCALE) - 1);
//buf == 0.00000000_the_rest_mantissa_bits
float64_t buf; buf.v = packToF64UI(0, 1023, ((uint64_t)x.v << 29) & ((1LL << (52 - LOGTAB_SCALE)) - 1));
buf -= float64_t::one();
float64_t tab0 = float64_t::fromRaw(icvLogTab[2*h0]);
float64_t tab1 = float64_t::fromRaw(icvLogTab[2*h0+1]);
float64_t x0 = buf * tab1;
//if last elements of icvLogTab
if(h0 == 255) x0 += float64_t(-float64_t::one() / float64_t(512));
float64_t y0 = ln_2 * float64_t(expF32UI(x.v) - 127) + tab0 + x0*x0*x0/float64_t(3) - x0*x0/float64_t(2) + x0;
return y0;
}
static float64_t f64_log(float64_t x)
{
//special cases
if(x.isNaN() || x < float64_t::zero()) return float64_t::nan();
if(x == float64_t::zero()) return -float64_t::inf();
static const float64_t
A7(1),
A6(-float64_t::one() / float64_t(2)),
A5( float64_t::one() / float64_t(3)),
A4(-float64_t::one() / float64_t(4)),
A3( float64_t::one() / float64_t(5)),
A2(-float64_t::one() / float64_t(6)),
A1( float64_t::one() / float64_t(7)),
A0(-float64_t::one() / float64_t(8));
//first 8 bits of mantissa
int h0 = (x.v >> (52 - LOGTAB_SCALE)) & ((1 << LOGTAB_SCALE) - 1);
//buf == 0.00000000_the_rest_mantissa_bits
float64_t buf; buf.v = packToF64UI(0, 1023, x.v & ((1LL << (52 - LOGTAB_SCALE)) - 1));
buf -= float64_t::one();
float64_t tab0 = float64_t::fromRaw(icvLogTab[2*h0]);
float64_t tab1 = float64_t::fromRaw(icvLogTab[2*h0 + 1]);
float64_t x0 = buf * tab1;
//if last elements of icvLogTab
if(h0 == 255) x0 += float64_t(-float64_t::one()/float64_t(512));
float64_t xq = x0*x0;
return ln_2 * float64_t( expF64UI(x.v) - 1023) + tab0 + (((A0*xq + A2)*xq + A4)*xq + A6)*xq +
(((A1*xq + A3)*xq + A5)*xq + A7)*x0;
}
/* ************************************************************************** *\
Fast cube root by Ken Turkowski
(http://www.worldserver.com/turk/computergraphics/papers.html)
\* ************************************************************************** */
static float32_t f32_cbrt(float32_t x)
{
//special cases
if (x.isNaN()) return float32_t::nan();
if (x.isInf()) return x;
int s = signF32UI(x.v);
int ex = expF32UI(x.v) - 127;
int shx = ex % 3;
shx -= shx >= 0 ? 3 : 0;
ex = (ex - shx) / 3 - 1; /* exponent of cube root */
float64_t fr; fr.v = packToF64UI(0, shx + 1023, ((uint64_t)fracF32UI(x.v)) << 29);
/* 0.125 <= fr < 1.0 */
/* Use quartic rational polynomial with error < 2^(-24) */
const float64_t A1 = float64_t::fromRaw(0x4046a09e6653ba70); // 45.2548339756803022511987494
const float64_t A2 = float64_t::fromRaw(0x406808f46c6116e0); // 192.2798368355061050458134625
const float64_t A3 = float64_t::fromRaw(0x405dca97439cae14); // 119.1654824285581628956914143
const float64_t A4 = float64_t::fromRaw(0x402add70d2827500); // 13.43250139086239872172837314
const float64_t A5 = float64_t::fromRaw(0x3fc4f15f83f55d2d); // 0.1636161226585754240958355063
const float64_t A6 = float64_t::fromRaw(0x402d9e20660edb21); // 14.80884093219134573786480845
const float64_t A7 = float64_t::fromRaw(0x4062ff15c0285815); // 151.9714051044435648658557668
const float64_t A8 = float64_t::fromRaw(0x406510d06a8112ce); // 168.5254414101568283957668343
const float64_t A9 = float64_t::fromRaw(0x4040fecbc9e2c375); // 33.9905941350215598754191872
const float64_t A10 = float64_t::one();
fr = ((((A1 * fr + A2) * fr + A3) * fr + A4) * fr + A5)/
((((A6 * fr + A7) * fr + A8) * fr + A9) * fr + A10);
/* fr *= 2^ex * sign */
// checks for "+0" and "-0", reset sign bit
float32_t y; y.v = ((x.v & ((1u << 31) - 1)) != 0) ? packToF32UI(s, ex + 127, (uint32_t)(fracF64UI(fr.v) >> 29)) : 0;
return y;
}
/// POW functions ///
static float32_t f32_pow( float32_t x, float32_t y)
{
static const float32_t zero = float32_t::zero(), one = float32_t::one(), inf = float32_t::inf(), nan = float32_t::nan();
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
float32_t ax = abs(x);
bool useInf = (y > zero) == (ax > one);
float32_t v;
//special cases
if(ynan) v = nan;
else if(yinf) v = (ax == one || xnan) ? nan : (useInf ? inf : zero);
else if(y == zero) v = one;
else if(y == one ) v = x;
else //here y is ok
{
if(xnan) v = nan;
else if(xinf) v = (y < zero) ? zero : inf;
else if(y == f32_roundToInt(y, round_near_even, false)) v = f32_powi(x, f32_to_i32(y, round_near_even, false));
else if(x < zero) v = nan;
// (0 ** 0) == 1
else if(x == zero) v = (y < zero) ? inf : (y == zero ? one : zero);
// here x and y are ok
else v = f32_exp(y * f32_log(x));
}
return v;
}
static float64_t f64_pow( float64_t x, float64_t y)
{
static const float64_t zero = float64_t::zero(), one = float64_t::one(), inf = float64_t::inf(), nan = float64_t::nan();
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
float64_t ax = abs(x);
bool useInf = (y > zero) == (ax > one);
float64_t v;
//special cases
if(ynan) v = nan;
else if(yinf) v = (ax == one || xnan) ? nan : (useInf ? inf : zero);
else if(y == zero) v = one;
else if(y == one ) v = x;
else //here y is ok
{
if(xnan) v = nan;
else if(xinf) v = (y < zero) ? zero : inf;
else if(y == f64_roundToInt(y, round_near_even, false)) v = f64_powi(x, f64_to_i32(y, round_near_even, false));
else if(x < zero) v = nan;
// (0 ** 0) == 1
else if(x == zero) v = (y < zero) ? inf : (y == zero ? one : zero);
// here x and y are ok
else v = f64_exp(y * f64_log(x));
}
return v;
}
// These functions are for internal use only
static float32_t f32_powi( float32_t x, int y)
{
float32_t v;
//special case: (0 ** 0) == 1
if(x == float32_t::zero())
v = (y < 0) ? float32_t::inf() : (y == 0 ? float32_t::one() : float32_t::zero());
// here x and y are ok
else
{
float32_t a = float32_t::one(), b = x;
int p = std::abs(y);
if( y < 0 )
b = float32_t::one()/b;
while( p > 1 )
{
if( p & 1 )
a *= b;
b *= b;
p >>= 1;
}
v = a * b;
}
return v;
}
static float64_t f64_powi( float64_t x, int y)
{
float64_t v;
//special case: (0 ** 0) == 1
if(x == float64_t::zero())
v = (y < 0) ? float64_t::inf() : (y == 0 ? float64_t::one() : float64_t::zero());
// here x and y are ok
else
{
float64_t a = float64_t::one(), b = x;
int p = std::abs(y);
if( y < 0 )
b = float64_t::one()/b;
while( p > 1 )
{
if( p & 1 )
a *= b;
b *= b;
p >>= 1;
}
v = a * b;
}
return v;
}
/*
* sin and cos functions taken from fdlibm with original comments
* (edited where needed)
*/
static const float64_t pi2 = float64_t::pi().setExp(2);
static const float64_t piby2 = float64_t::pi().setExp(0);
static const float64_t piby4 = float64_t::pi().setExp(-1);
static const float64_t half = float64_t::one()/float64_t(2);
static const float64_t third = float64_t::one()/float64_t(3);
/* __kernel_sin( x, y, iy)
* N.B. from OpenCV side: y and iy removed, simplified to polynomial
*
* kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
*
* Algorithm
* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
* 3. sin(x) is approximated by a polynomial of degree 13 on
* [0,pi/4]
* 3 13
* sin(x) ~ x + S1*x + ... + S6*x
* where
*
* |sin(x) 2 4 6 8 10 12 | -58
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
* | x |
*
* 4. sin(x+y) = sin(x) + sin'(x')*y
* ~ sin(x) + (1-x*x/2)*y
* For better accuracy, let
* 3 2 2 2 2
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
* then 3 2
* sin(x) = x + (S1*x + (x *(r-y/2)+y))
*/
static const float64_t
// -1/3! = -1/6
S1 = float64_t::fromRaw( 0xBFC5555555555549 ),
// 1/5! = 1/120
S2 = float64_t::fromRaw( 0x3F8111111110F8A6 ),
// -1/7! = -1/5040
S3 = float64_t::fromRaw( 0xBF2A01A019C161D5 ),
// 1/9! = 1/362880
S4 = float64_t::fromRaw( 0x3EC71DE357B1FE7D ),
// -1/11! = -1/39916800
S5 = float64_t::fromRaw( 0xBE5AE5E68A2B9CEB ),
// 1/13! = 1/6227020800
S6 = float64_t::fromRaw( 0x3DE5D93A5ACFD57C );
static float64_t f64_sin_kernel(float64_t x)
{
if(x.getExp() < -27)
{
if(x != x.zero()) raiseFlags(flag_inexact);
return x;
}
float64_t z = x*x;
return x*mulAdd(z, mulAdd(z, mulAdd(z, mulAdd(z, mulAdd(z, mulAdd(z,
S6, S5), S4), S3), S2), S1), x.one());
}
/*
* __kernel_cos( x, y )
* N.B. from OpenCV's side: y removed, simplified to one polynomial
*
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
*
* Algorithm
* 1. Since cos(-x) = cos(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
* 3. cos(x) is approximated by a polynomial of degree 14 on
* [0,pi/4]
* 4 14
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
* where the remez error is
*
* | 2 4 6 8 10 12 14 | -58
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
* | |
*
* 4 6 8 10 12 14
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
* cos(x) = 1 - x*x/2 + r
* since cos(x+y) ~ cos(x) - sin(x)*y
* ~ cos(x) - x*y,
* a correction term is necessary in cos(x) and hence
* cos(x+y) = 1 - (x*x/2 - (r - x*y))
*
* N.B. The following part was removed since we have enough precision
*
* For better accuracy when x > 0.3, let qx = |x|/4 with
* the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
* Then
* cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
* Note that 1-qx and (x*x/2-qx) is EXACT here, and the
* magnitude of the latter is at least a quarter of x*x/2,
* thus, reducing the rounding error in the subtraction.
*/
static const float64_t
// 1/4! = 1/24
C1 = float64_t::fromRaw( 0x3FA555555555554C ),
// -1/6! = -1/720
C2 = float64_t::fromRaw( 0xBF56C16C16C15177 ),
// 1/8! = 1/40320
C3 = float64_t::fromRaw( 0x3EFA01A019CB1590 ),
// -1/10! = -1/3628800
C4 = float64_t::fromRaw( 0xBE927E4F809C52AD ),
// 1/12! = 1/479001600
C5 = float64_t::fromRaw( 0x3E21EE9EBDB4B1C4 ),
// -1/14! = -1/87178291200
C6 = float64_t::fromRaw( 0xBDA8FAE9BE8838D4 );
static float64_t f64_cos_kernel(float64_t x)
{
if(x.getExp() < -27)
{
if(x != x.zero()) raiseFlags(flag_inexact);
return x.one();
}
float64_t z = x*x;
return mulAdd(mulAdd(z, mulAdd(z, mulAdd(z, mulAdd(z, mulAdd(z, mulAdd(z,
C6, C5), C4), C3), C2), C1), -half), z, x.one());
}
static void f64_sincos_reduce(const float64_t& x, float64_t& y, int& n)
{
if(abs(x) < piby4)
{
n = 0, y = x;
}
else
{
/* argument reduction needed */
float64_t p = f64_rem(x, pi2);
float64_t v = p - float64_t::eps().setExp(-10);
if(abs(v) <= piby4)
{
n = 0; y = p;
}
else if(abs(v) <= (float64_t(3)*piby4))
{
n = (p > 0) ? 1 : 3;
y = (p > 0) ? p - piby2 : p + piby2;
if(p > 0) n = 1, y = p - piby2;
else n = 3, y = p + piby2;
}
else
{
n = 2;
y = (p > 0) ? p - p.pi() : p + p.pi();
}
}
}
/* sin(x)
* Return sine function of x.
*
* kernel function:
* __kernel_sin ... sine function on [-pi/4,pi/4]
* __kernel_cos ... cose function on [-pi/4,pi/4]
*
* Method.
* Let S,C and T denote the sin, cos and tan respectively on
* [-PI/4, +PI/4]. Reduce the argument x to y = x-k*pi/2
* in [-pi/4 , +pi/4], and let n = k mod 4.
* We have
*
* n sin(x) cos(x) tan(x)
* ----------------------------------------------------------
* 0 S C T
* 1 C -S -1/T
* 2 -S -C T
* 3 -C S -1/T
* ----------------------------------------------------------
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* trig(+-INF) is NaN, with signals;
* trig(NaN) is that NaN;
*
* Accuracy:
* TRIG(x) returns trig(x) nearly rounded
*/
static float64_t f64_sin( float64_t x )
{
if(x.isInf() || x.isNaN()) return x.nan();
float64_t y; int n;
f64_sincos_reduce(x, y, n);
switch (n)
{
case 0: return f64_sin_kernel(y);
case 1: return f64_cos_kernel(y);
case 2: return -f64_sin_kernel(y);
default: return -f64_cos_kernel(y);
}
}
/* cos(x)
* Return cosine function of x.
*
* kernel function:
* __kernel_sin ... sine function on [-pi/4,pi/4]
* __kernel_cos ... cosine function on [-pi/4,pi/4]
*
* Method.
* Let S,C and T denote the sin, cos and tan respectively on
* [-PI/4, +PI/4]. Reduce the argument x to y = x-k*pi/2
* in [-pi/4 , +pi/4], and let n = k mod 4.
* We have
*
* n sin(x) cos(x) tan(x)
* ----------------------------------------------------------
* 0 S C T
* 1 C -S -1/T
* 2 -S -C T
* 3 -C S -1/T
* ----------------------------------------------------------
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* trig(+-INF) is NaN, with signals;
* trig(NaN) is that NaN;
*
* Accuracy:
* TRIG(x) returns trig(x) nearly rounded
*/
static float64_t f64_cos( float64_t x )
{
if(x.isInf() || x.isNaN()) return x.nan();
float64_t y; int n;
f64_sincos_reduce(x, y, n);
switch (n)
{
case 0: return f64_cos_kernel(y);
case 1: return -f64_sin_kernel(y);
case 2: return -f64_cos_kernel(y);
default: return f64_sin_kernel(y);
}
}
}