2017-06-15 02:20:10 +08:00
|
|
|
// This file is part of OpenCV project.
|
|
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
|
|
// of this distribution and at http://opencv.org/license.html
|
|
|
|
|
|
|
|
// This file is based on files from package issued with the following license:
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
|
|
|
|
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
|
|
|
Package, Release 3c, by John R. Hauser.
|
|
|
|
|
|
|
|
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
|
|
|
|
University of California. All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
1. Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions, and the following disclaimer.
|
|
|
|
|
|
|
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
this list of conditions, and the following disclaimer in the documentation
|
|
|
|
and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
3. Neither the name of the University nor the names of its contributors may
|
|
|
|
be used to endorse or promote products derived from this software without
|
|
|
|
specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
|
|
|
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
|
|
|
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
|
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
=============================================================================*/
|
|
|
|
|
|
|
|
#include "precomp.hpp"
|
|
|
|
|
2017-06-08 04:24:58 +08:00
|
|
|
#include "opencv2/core/softfloat.hpp"
|
|
|
|
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
namespace cv
|
|
|
|
{
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Software floating-point underflow tininess-detection mode.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum {
|
|
|
|
tininess_beforeRounding = 0,
|
|
|
|
tininess_afterRounding = 1
|
|
|
|
};
|
|
|
|
//fixed to make softfloat code stateless
|
|
|
|
const uint_fast8_t globalDetectTininess = tininess_afterRounding;
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Software floating-point exception flags.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum {
|
|
|
|
flag_inexact = 1,
|
|
|
|
flag_underflow = 2,
|
|
|
|
flag_overflow = 4,
|
|
|
|
flag_infinite = 8,
|
|
|
|
flag_invalid = 16
|
|
|
|
};
|
|
|
|
|
|
|
|
// Disabled to make softfloat code stateless
|
|
|
|
// This function may be changed in the future for better error handling
|
|
|
|
inline void raiseFlags( uint_fast8_t /* flags */)
|
|
|
|
{
|
|
|
|
//exceptionFlags |= flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Software floating-point rounding mode.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum {
|
|
|
|
round_near_even = 0,
|
|
|
|
round_minMag = 1,
|
|
|
|
round_min = 2,
|
|
|
|
round_max = 3,
|
|
|
|
round_near_maxMag = 4,
|
|
|
|
round_odd = 5
|
|
|
|
};
|
|
|
|
|
|
|
|
//fixed to make softfloat code stateless
|
|
|
|
const uint_fast8_t globalRoundingMode = round_near_even;
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
#define signF32UI( a ) (((uint32_t) (a)>>31) != 0)
|
|
|
|
#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
|
|
|
|
#define fracF32UI( a ) ((a) & 0x007FFFFF)
|
|
|
|
#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))
|
|
|
|
|
|
|
|
#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
#define signF64UI( a ) (((uint64_t) (a)>>63) != 0)
|
|
|
|
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
|
|
|
|
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
|
|
|
|
#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))
|
|
|
|
|
|
|
|
#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Types used to pass 32-bit and 64-bit floating-point
|
|
|
|
| arguments and results to/from functions. These types must be exactly
|
|
|
|
| 32 bits and 64 bits in size, respectively. Where a
|
|
|
|
| platform has "native" support for IEEE-Standard floating-point formats,
|
|
|
|
| the types below may, if desired, be defined as aliases for the native types
|
|
|
|
| (typically 'float' and 'double').
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
typedef softfloat float32_t;
|
|
|
|
typedef softdouble float64_t;
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Integer-to-floating-point conversion routines.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float32_t ui32_to_f32( uint32_t );
|
|
|
|
float64_t ui32_to_f64( uint32_t );
|
|
|
|
float32_t ui64_to_f32( uint64_t );
|
|
|
|
float64_t ui64_to_f64( uint64_t );
|
|
|
|
float32_t i32_to_f32( int32_t );
|
|
|
|
float64_t i32_to_f64( int32_t );
|
|
|
|
float32_t i64_to_f32( int64_t );
|
|
|
|
float64_t i64_to_f64( int64_t );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| 32-bit (single-precision) floating-point operations.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
|
|
|
|
uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
|
|
|
|
int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
|
|
|
|
int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
|
|
|
|
uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
|
|
|
|
uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
|
|
|
|
int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
|
|
|
|
int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
|
|
|
|
float64_t f32_to_f64( float32_t );
|
|
|
|
float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
|
|
|
|
float32_t f32_add( float32_t, float32_t );
|
|
|
|
float32_t f32_sub( float32_t, float32_t );
|
|
|
|
float32_t f32_mul( float32_t, float32_t );
|
|
|
|
float32_t f32_mulAdd( float32_t, float32_t, float32_t );
|
|
|
|
float32_t f32_div( float32_t, float32_t );
|
|
|
|
float32_t f32_rem( float32_t, float32_t );
|
|
|
|
float32_t f32_sqrt( float32_t );
|
|
|
|
bool f32_eq( float32_t, float32_t );
|
|
|
|
bool f32_le( float32_t, float32_t );
|
|
|
|
bool f32_lt( float32_t, float32_t );
|
|
|
|
bool f32_eq_signaling( float32_t, float32_t );
|
|
|
|
bool f32_le_quiet( float32_t, float32_t );
|
|
|
|
bool f32_lt_quiet( float32_t, float32_t );
|
|
|
|
bool f32_isSignalingNaN( float32_t );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| 64-bit (double-precision) floating-point operations.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
|
|
|
|
uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
|
|
|
|
int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
|
|
|
|
int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
|
|
|
|
uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
|
|
|
|
uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
|
|
|
|
int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
|
|
|
|
int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
|
|
|
|
float32_t f64_to_f32( float64_t );
|
|
|
|
float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
|
|
|
|
float64_t f64_add( float64_t, float64_t );
|
|
|
|
float64_t f64_sub( float64_t, float64_t );
|
|
|
|
float64_t f64_mul( float64_t, float64_t );
|
|
|
|
float64_t f64_mulAdd( float64_t, float64_t, float64_t );
|
|
|
|
float64_t f64_div( float64_t, float64_t );
|
|
|
|
float64_t f64_rem( float64_t, float64_t );
|
|
|
|
float64_t f64_sqrt( float64_t );
|
|
|
|
bool f64_eq( float64_t, float64_t );
|
|
|
|
bool f64_le( float64_t, float64_t );
|
|
|
|
bool f64_lt( float64_t, float64_t );
|
|
|
|
bool f64_eq_signaling( float64_t, float64_t );
|
|
|
|
bool f64_le_quiet( float64_t, float64_t );
|
|
|
|
bool f64_lt_quiet( float64_t, float64_t );
|
|
|
|
bool f64_isSignalingNaN( float64_t );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Ported from OpenCV and added for usability
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
float32_t f32_powi( float32_t x, int y);
|
|
|
|
float64_t f64_powi( float64_t x, int y);
|
|
|
|
|
|
|
|
float32_t f32_exp( float32_t x);
|
|
|
|
float64_t f64_exp(float64_t x);
|
|
|
|
float32_t f32_log(float32_t x);
|
|
|
|
float64_t f64_log(float64_t x);
|
|
|
|
float32_t f32_cbrt(float32_t x);
|
|
|
|
float32_t f32_pow( float32_t x, float32_t y);
|
|
|
|
float64_t f64_pow( float64_t x, float64_t y);
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| softfloat and softdouble methods and members
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
softfloat::softfloat( const uint32_t a ) { *this = ui32_to_f32(a); }
|
|
|
|
softfloat::softfloat( const uint64_t a ) { *this = ui64_to_f32(a); }
|
|
|
|
softfloat::softfloat( const int32_t a ) { *this = i32_to_f32(a); }
|
|
|
|
softfloat::softfloat( const int64_t a ) { *this = i64_to_f32(a); }
|
|
|
|
|
|
|
|
softfloat::operator softdouble() const { return f32_to_f64(*this); }
|
|
|
|
|
|
|
|
softfloat softfloat::operator + (const softfloat& a) const { return f32_add(*this, a); }
|
|
|
|
softfloat softfloat::operator - (const softfloat& a) const { return f32_sub(*this, a); }
|
|
|
|
softfloat softfloat::operator * (const softfloat& a) const { return f32_mul(*this, a); }
|
|
|
|
softfloat softfloat::operator / (const softfloat& a) const { return f32_div(*this, a); }
|
|
|
|
softfloat softfloat::operator % (const softfloat& a) const { return f32_rem(*this, a); }
|
|
|
|
|
|
|
|
bool softfloat::operator == ( const softfloat& a ) const { return f32_eq(*this, a); }
|
|
|
|
bool softfloat::operator != ( const softfloat& a ) const { return !f32_eq(*this, a); }
|
|
|
|
bool softfloat::operator > ( const softfloat& a ) const { return f32_lt(a, *this); }
|
|
|
|
bool softfloat::operator >= ( const softfloat& a ) const { return f32_le(a, *this); }
|
|
|
|
bool softfloat::operator < ( const softfloat& a ) const { return f32_lt(*this, a); }
|
|
|
|
bool softfloat::operator <= ( const softfloat& a ) const { return f32_le(*this, a); }
|
|
|
|
|
|
|
|
softdouble::softdouble( const uint32_t a ) { *this = ui32_to_f64(a); }
|
|
|
|
softdouble::softdouble( const uint64_t a ) { *this = ui64_to_f64(a); }
|
|
|
|
softdouble::softdouble( const int32_t a ) { *this = i32_to_f64(a); }
|
|
|
|
softdouble::softdouble( const int64_t a ) { *this = i64_to_f64(a); }
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
int cvTrunc(const cv::softfloat& a) { return cv::f32_to_i32_r_minMag(a, false); }
|
|
|
|
int cvRound(const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_near_even, false); }
|
|
|
|
int cvFloor(const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_min, false); }
|
|
|
|
int cvCeil (const cv::softfloat& a) { return cv::f32_to_i32(a, cv::round_max, false); }
|
|
|
|
|
|
|
|
int cvTrunc(const cv::softdouble& a) { return cv::f64_to_i32_r_minMag(a, false); }
|
|
|
|
int cvRound(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_near_even, false); }
|
|
|
|
int cvFloor(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_min, false); }
|
|
|
|
int cvCeil (const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_max, false); }
|
|
|
|
|
|
|
|
namespace cv
|
|
|
|
{
|
|
|
|
softdouble::operator softfloat() const { return f64_to_f32(*this); }
|
|
|
|
|
|
|
|
softdouble softdouble::operator + (const softdouble& a) const { return f64_add(*this, a); }
|
|
|
|
softdouble softdouble::operator - (const softdouble& a) const { return f64_sub(*this, a); }
|
|
|
|
softdouble softdouble::operator * (const softdouble& a) const { return f64_mul(*this, a); }
|
|
|
|
softdouble softdouble::operator / (const softdouble& a) const { return f64_div(*this, a); }
|
|
|
|
softdouble softdouble::operator % (const softdouble& a) const { return f64_rem(*this, a); }
|
|
|
|
|
|
|
|
bool softdouble::operator == (const softdouble& a) const { return f64_eq(*this, a); }
|
|
|
|
bool softdouble::operator != (const softdouble& a) const { return !f64_eq(*this, a); }
|
|
|
|
bool softdouble::operator > (const softdouble& a) const { return f64_lt(a, *this); }
|
|
|
|
bool softdouble::operator >= (const softdouble& a) const { return f64_le(a, *this); }
|
|
|
|
bool softdouble::operator < (const softdouble& a) const { return f64_lt(*this, a); }
|
|
|
|
bool softdouble::operator <= (const softdouble& a) const { return f64_le(*this, a); }
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Overloads for math functions
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
softfloat mulAdd( const softfloat& a, const softfloat& b, const softfloat & c) { return f32_mulAdd(a, b, c); }
|
|
|
|
softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c) { return f64_mulAdd(a, b, c); }
|
|
|
|
|
|
|
|
softfloat sqrt( const softfloat& a ) { return f32_sqrt(a); }
|
|
|
|
softdouble sqrt( const softdouble& a ) { return f64_sqrt(a); }
|
|
|
|
|
|
|
|
softfloat exp( const softfloat& a) { return f32_exp(a); }
|
|
|
|
softdouble exp( const softdouble& a) { return f64_exp(a); }
|
|
|
|
|
|
|
|
softfloat log( const softfloat& a ) { return f32_log(a); }
|
|
|
|
softdouble log( const softdouble& a ) { return f64_log(a); }
|
|
|
|
|
|
|
|
softfloat pow( const softfloat& a, const softfloat& b) { return f32_pow(a, b); }
|
|
|
|
softdouble pow( const softdouble& a, const softdouble& b) { return f64_pow(a, b); }
|
|
|
|
|
|
|
|
softfloat cbrt(const softfloat& a) { return f32_cbrt(a); }
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| The values to return on conversions to 32-bit integer formats that raise an
|
|
|
|
| invalid exception.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define ui32_fromPosOverflow 0xFFFFFFFF
|
|
|
|
#define ui32_fromNegOverflow 0
|
|
|
|
#define ui32_fromNaN 0xFFFFFFFF
|
|
|
|
#define i32_fromPosOverflow 0x7FFFFFFF
|
|
|
|
#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
|
|
|
|
#define i32_fromNaN 0x7FFFFFFF
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| The values to return on conversions to 64-bit integer formats that raise an
|
|
|
|
| invalid exception.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
|
|
|
#define ui64_fromNegOverflow 0
|
|
|
|
#define ui64_fromNaN UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
|
|
|
#define i64_fromPosOverflow UINT64_C( 0x7FFFFFFFFFFFFFFF )
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
//#define i64_fromNegOverflow (-UINT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
|
|
|
#define i64_fromNegOverflow (~UINT64_C( 0x7FFFFFFFFFFFFFFF ) + 1 - 1)
|
|
|
|
#define i64_fromNaN UINT64_C( 0x7FFFFFFFFFFFFFFF )
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| "Common NaN" structure, used to transfer NaN representations from one format
|
|
|
|
| to another.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
struct commonNaN {
|
|
|
|
bool sign;
|
|
|
|
#ifndef WORDS_BIGENDIAN
|
|
|
|
uint64_t v0, v64;
|
|
|
|
#else
|
|
|
|
uint64_t v64, v0;
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| The bit pattern for a default generated 32-bit floating-point NaN.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define defaultNaNF32UI 0xFFC00000
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns true when 32-bit unsigned integer `uiA' has the bit pattern of a
|
|
|
|
| 32-bit floating-point signaling NaN.
|
|
|
|
| Note: This macro evaluates its argument more than once.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
|
|
|
|
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
|
|
|
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
|
|
|
| exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
|
|
|
|
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
|
|
|
|
| point values, at least one of which is a NaN, returns the bit pattern of
|
|
|
|
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
|
|
|
| signaling NaN, the invalid exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint_fast32_t softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| The bit pattern for a default generated 64-bit floating-point NaN.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns true when 64-bit unsigned integer `uiA' has the bit pattern of a
|
|
|
|
| 64-bit floating-point signaling NaN.
|
|
|
|
| Note: This macro evaluates its argument more than once.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define softfloat_isSigNaNF64UI( uiA ) \
|
|
|
|
((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && \
|
|
|
|
((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
|
|
|
|
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
|
|
|
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
|
|
|
| exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
|
|
|
|
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
|
|
|
|
| point values, at least one of which is a NaN, returns the bit pattern of
|
|
|
|
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
|
|
|
| signaling NaN, the invalid exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint_fast64_t softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
#ifndef WORDS_BIGENDIAN
|
|
|
|
struct uint128 { uint64_t v0, v64; };
|
|
|
|
struct uint64_extra { uint64_t extra, v; };
|
|
|
|
struct uint128_extra { uint64_t extra; struct uint128 v; };
|
|
|
|
#else
|
|
|
|
struct uint128 { uint64_t v64, v0; };
|
|
|
|
struct uint64_extra { uint64_t v, extra; };
|
|
|
|
struct uint128_extra { struct uint128 v; uint64_t extra; };
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| These macros are used to isolate the differences in word order between big-
|
|
|
|
| endian and little-endian platforms.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#ifndef WORDS_BIGENDIAN
|
|
|
|
#define wordIncr 1
|
|
|
|
#define indexWord( total, n ) (n)
|
|
|
|
#define indexWordHi( total ) ((total) - 1)
|
|
|
|
#define indexWordLo( total ) 0
|
|
|
|
#define indexMultiword( total, m, n ) (n)
|
|
|
|
#define indexMultiwordHi( total, n ) ((total) - (n))
|
|
|
|
#define indexMultiwordLo( total, n ) 0
|
|
|
|
#define indexMultiwordHiBut( total, n ) (n)
|
|
|
|
#define indexMultiwordLoBut( total, n ) 0
|
|
|
|
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
|
|
|
|
#else
|
|
|
|
#define wordIncr -1
|
|
|
|
#define indexWord( total, n ) ((total) - 1 - (n))
|
|
|
|
#define indexWordHi( total ) 0
|
|
|
|
#define indexWordLo( total ) ((total) - 1)
|
|
|
|
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
|
|
|
|
#define indexMultiwordHi( total, n ) 0
|
|
|
|
#define indexMultiwordLo( total, n ) ((total) - (n))
|
|
|
|
#define indexMultiwordHiBut( total, n ) 0
|
|
|
|
#define indexMultiwordLoBut( total, n ) (n)
|
|
|
|
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
|
|
|
|
#endif
|
|
|
|
|
|
|
|
enum {
|
|
|
|
softfloat_mulAdd_subC = 1,
|
|
|
|
softfloat_mulAdd_subProd = 2
|
|
|
|
};
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint_fast32_t softfloat_roundToUI32( bool, uint_fast64_t, uint_fast8_t, bool );
|
|
|
|
static uint_fast64_t softfloat_roundToUI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
|
|
|
|
static int_fast32_t softfloat_roundToI32( bool, uint_fast64_t, uint_fast8_t, bool );
|
|
|
|
static int_fast64_t softfloat_roundToI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
|
|
|
|
static struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
|
|
|
|
|
|
|
|
static float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
|
|
|
|
static float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
|
|
|
|
|
|
|
|
static float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t );
|
|
|
|
static float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t );
|
|
|
|
static float32_t softfloat_mulAddF32(uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
|
|
|
|
static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
|
|
|
|
|
|
|
|
static float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );
|
|
|
|
static float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );
|
|
|
|
|
|
|
|
static float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
|
|
|
|
static float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
|
|
|
|
static float64_t softfloat_mulAddF64( uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts 'a' right by the number of bits given in 'dist', which must be in
|
|
|
|
| the range 1 to 63. If any nonzero bits are shifted off, they are "jammed"
|
|
|
|
| into the least-significant bit of the shifted value by setting the least-
|
|
|
|
| significant bit to 1. This shifted-and-jammed value is returned.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
inline uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
|
|
|
|
{ return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts 'a' right by the number of bits given in 'dist', which must not
|
|
|
|
| be zero. If any nonzero bits are shifted off, they are "jammed" into the
|
|
|
|
| least-significant bit of the shifted value by setting the least-significant
|
|
|
|
| bit to 1. This shifted-and-jammed value is returned.
|
|
|
|
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
|
|
|
|
| greater than 32, the result will be either 0 or 1, depending on whether 'a'
|
|
|
|
| is zero or nonzero.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
inline uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
|
|
|
|
{
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
return (dist < 31) ? a>>dist | ((uint32_t) (a<<((~dist + 1) & 31)) != 0) : (a != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts 'a' right by the number of bits given in 'dist', which must not
|
|
|
|
| be zero. If any nonzero bits are shifted off, they are "jammed" into the
|
|
|
|
| least-significant bit of the shifted value by setting the least-significant
|
|
|
|
| bit to 1. This shifted-and-jammed value is returned.
|
|
|
|
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
|
|
|
|
| greater than 64, the result will be either 0 or 1, depending on whether 'a'
|
|
|
|
| is zero or nonzero.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
|
|
|
|
{
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
return (dist < 63) ? a>>dist | ((uint64_t) (a<<((~dist + 1) & 63)) != 0) : (a != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| A constant table that translates an 8-bit unsigned integer (the array index)
|
|
|
|
| into the number of leading 0 bits before the most-significant 1 of that
|
|
|
|
| integer. For integer zero (index 0), the corresponding table element is 8.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static const uint_least8_t softfloat_countLeadingZeros8[256] = {
|
|
|
|
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
|
|
};
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns the number of leading 0 bits before the most-significant 1 bit of
|
|
|
|
| 'a'. If 'a' is zero, 32 is returned.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
|
|
|
|
{
|
|
|
|
uint_fast8_t count = 0;
|
|
|
|
if ( a < 0x10000 ) {
|
|
|
|
count = 16;
|
|
|
|
a <<= 16;
|
|
|
|
}
|
|
|
|
if ( a < 0x1000000 ) {
|
|
|
|
count += 8;
|
|
|
|
a <<= 8;
|
|
|
|
}
|
|
|
|
count += softfloat_countLeadingZeros8[a>>24];
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns the number of leading 0 bits before the most-significant 1 bit of
|
|
|
|
| 'a'. If 'a' is zero, 64 is returned.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint_fast8_t softfloat_countLeadingZeros64( uint64_t a );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns an approximation to the reciprocal of the number represented by 'a',
|
|
|
|
| where 'a' is interpreted as an unsigned fixed-point number with one integer
|
|
|
|
| bit and 31 fraction bits. The 'a' input must be "normalized", meaning that
|
|
|
|
| its most-significant bit (bit 31) must be 1. Thus, if A is the value of
|
|
|
|
| the fixed-point interpretation of 'a', then 1 <= A < 2. The returned value
|
|
|
|
| is interpreted as a pure unsigned fraction, having no integer bits and 32
|
|
|
|
| fraction bits. The approximation returned is never greater than the true
|
|
|
|
| reciprocal 1/A, and it differs from the true reciprocal by at most 2.006 ulp
|
|
|
|
| (units in the last place).
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#define softfloat_approxRecip32_1( a ) ((uint32_t) (UINT64_C( 0x7FFFFFFFFFFFFFFF ) / (uint32_t) (a)))
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns an approximation to the reciprocal of the square root of the number
|
|
|
|
| represented by 'a', where 'a' is interpreted as an unsigned fixed-point
|
|
|
|
| number either with one integer bit and 31 fraction bits or with two integer
|
|
|
|
| bits and 30 fraction bits. The format of 'a' is determined by 'oddExpA',
|
|
|
|
| which must be either 0 or 1. If 'oddExpA' is 1, 'a' is interpreted as
|
|
|
|
| having one integer bit, and if 'oddExpA' is 0, 'a' is interpreted as having
|
|
|
|
| two integer bits. The 'a' input must be "normalized", meaning that its
|
|
|
|
| most-significant bit (bit 31) must be 1. Thus, if A is the value of the
|
|
|
|
| fixed-point interpretation of 'a', it follows that 1 <= A < 2 when 'oddExpA'
|
|
|
|
| is 1, and 2 <= A < 4 when 'oddExpA' is 0.
|
|
|
|
| The returned value is interpreted as a pure unsigned fraction, having
|
|
|
|
| no integer bits and 32 fraction bits. The approximation returned is never
|
|
|
|
| greater than the true reciprocal 1/sqrt(A), and it differs from the true
|
|
|
|
| reciprocal by at most 2.06 ulp (units in the last place). The approximation
|
|
|
|
| returned is also always within the range 0.5 to 1; thus, the most-
|
|
|
|
| significant bit of the result is always set.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
|
|
|
|
|
|
|
|
static const uint16_t softfloat_approxRecipSqrt_1k0s[16] = {
|
|
|
|
0xB4C9, 0xFFAB, 0xAA7D, 0xF11C, 0xA1C5, 0xE4C7, 0x9A43, 0xDA29,
|
|
|
|
0x93B5, 0xD0E5, 0x8DED, 0xC8B7, 0x88C6, 0xC16D, 0x8424, 0xBAE1
|
|
|
|
};
|
|
|
|
static const uint16_t softfloat_approxRecipSqrt_1k1s[16] = {
|
|
|
|
0xA5A5, 0xEA42, 0x8C21, 0xC62D, 0x788F, 0xAA7F, 0x6928, 0x94B6,
|
|
|
|
0x5CC7, 0x8335, 0x52A6, 0x74E2, 0x4A3E, 0x68FE, 0x432B, 0x5EFD
|
|
|
|
};
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' left by the
|
|
|
|
| number of bits given in 'dist', which must be in the range 1 to 63.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline struct uint128 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
|
|
|
|
{
|
|
|
|
struct uint128 z;
|
|
|
|
z.v64 = a64<<dist | a0>>(-dist & 63);
|
|
|
|
z.v0 = a0<<dist;
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
|
|
|
|
| number of bits given in 'dist', which must be in the range 1 to 63. If any
|
|
|
|
| nonzero bits are shifted off, they are "jammed" into the least-significant
|
|
|
|
| bit of the shifted value by setting the least-significant bit to 1. This
|
|
|
|
| shifted-and-jammed value is returned.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist )
|
|
|
|
{
|
|
|
|
uint_fast8_t negDist = -dist;
|
|
|
|
struct uint128 z;
|
|
|
|
z.v64 = a64>>dist;
|
|
|
|
z.v0 =
|
|
|
|
a64<<(negDist & 63) | a0>>dist
|
|
|
|
| ((uint64_t) (a0<<(negDist & 63)) != 0);
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts the 128 bits formed by concatenating 'a' and 'extra' right by 64
|
|
|
|
| _plus_ the number of bits given in 'dist', which must not be zero. This
|
|
|
|
| shifted value is at most 64 nonzero bits and is returned in the 'v' field
|
|
|
|
| of the 'struct uint64_extra' result. The 64-bit 'extra' field of the result
|
|
|
|
| contains a value formed as follows from the bits that were shifted off: The
|
|
|
|
| _last_ bit shifted off is the most-significant bit of the 'extra' field, and
|
|
|
|
| the other 63 bits of the 'extra' field are all zero if and only if _all_but_
|
|
|
|
| _the_last_ bits shifted off were all zero.
|
|
|
|
| (This function makes more sense if 'a' and 'extra' are considered to form
|
|
|
|
| an unsigned fixed-point number with binary point between 'a' and 'extra'.
|
|
|
|
| This fixed-point value is shifted right by the number of bits given in
|
|
|
|
| 'dist', and the integer part of this shifted value is returned in the 'v'
|
|
|
|
| field of the result. The fractional part of the shifted value is modified
|
|
|
|
| as described above and returned in the 'extra' field of the result.)
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline struct uint64_extra softfloat_shiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast32_t dist )
|
|
|
|
{
|
|
|
|
struct uint64_extra z;
|
|
|
|
if ( dist < 64 ) {
|
|
|
|
z.v = a>>dist;
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
z.extra = a<<((~dist + 1) & 63);
|
|
|
|
} else {
|
|
|
|
z.v = 0;
|
|
|
|
z.extra = (dist == 64) ? a : (a != 0);
|
|
|
|
}
|
|
|
|
z.extra |= (extra != 0);
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
|
|
|
|
| number of bits given in 'dist', which must not be zero. If any nonzero bits
|
|
|
|
| are shifted off, they are "jammed" into the least-significant bit of the
|
|
|
|
| shifted value by setting the least-significant bit to 1. This shifted-and-
|
|
|
|
| jammed value is returned.
|
|
|
|
| The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
|
|
|
|
| greater than 128, the result will be either 0 or 1, depending on whether the
|
|
|
|
| original 128 bits are all zeros.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static struct uint128 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns the sum of the 128-bit integer formed by concatenating 'a64' and
|
|
|
|
| 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'. The
|
|
|
|
| addition is modulo 2^128, so any carry out is lost.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline struct uint128 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
|
|
|
|
{
|
|
|
|
struct uint128 z;
|
|
|
|
z.v0 = a0 + b0;
|
|
|
|
z.v64 = a64 + b64 + (z.v0 < a0);
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns the difference of the 128-bit integer formed by concatenating 'a64'
|
|
|
|
| and 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'.
|
|
|
|
| The subtraction is modulo 2^128, so any borrow out (carry out) is lost.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
inline struct uint128 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
|
|
|
|
{
|
|
|
|
struct uint128 z;
|
|
|
|
z.v0 = a0 - b0;
|
|
|
|
z.v64 = a64 - b64;
|
|
|
|
z.v64 -= (a0 < b0);
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Returns the 128-bit product of 'a' and 'b'.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
float32_t f32_add( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA = a.v;
|
|
|
|
uint_fast32_t uiB = b.v;
|
|
|
|
|
|
|
|
if ( signF32UI( uiA ^ uiB ) ) {
|
|
|
|
return softfloat_subMagsF32( uiA, uiB );
|
|
|
|
} else {
|
|
|
|
return softfloat_addMagsF32( uiA, uiB );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_div( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
bool signB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast32_t sigB;
|
|
|
|
bool signZ;
|
|
|
|
struct exp16_sig32 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast64_t sig64A;
|
|
|
|
uint_fast32_t sigZ;
|
|
|
|
uint_fast32_t uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
expB = expF32UI( uiB );
|
|
|
|
sigB = fracF32UI( uiB );
|
|
|
|
signZ = signA ^ signB;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA ) goto propagateNaN;
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
goto infinity;
|
|
|
|
}
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
goto zero;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) {
|
|
|
|
if ( ! (expA | sigA) ) goto invalid;
|
|
|
|
raiseFlags( flag_infinite );
|
|
|
|
goto infinity;
|
|
|
|
}
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) goto zero;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = expA - expB + 0x7E;
|
|
|
|
sigA |= 0x00800000;
|
|
|
|
sigB |= 0x00800000;
|
|
|
|
if ( sigA < sigB ) {
|
|
|
|
--expZ;
|
|
|
|
sig64A = (uint_fast64_t) sigA<<31;
|
|
|
|
} else {
|
|
|
|
sig64A = (uint_fast64_t) sigA<<30;
|
|
|
|
}
|
|
|
|
sigZ = (uint_fast32_t)(sig64A / sigB); // fixed warning on type cast
|
|
|
|
if ( ! (sigZ & 0x3F) ) sigZ |= ((uint_fast64_t) sigB * sigZ != sig64A);
|
|
|
|
return softfloat_roundPackToF32( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF32UI;
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
infinity:
|
|
|
|
uiZ = packToF32UI( signZ, 0xFF, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
zero:
|
|
|
|
uiZ = packToF32UI( signZ, 0, 0 );
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_eq( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
|
|
|
|
if (
|
|
|
|
softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
|
|
|
|
) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_eq_signaling( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_isSignalingNaN( float32_t a )
|
|
|
|
{
|
|
|
|
return softfloat_isSigNaNF32UI( a.v );
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_le( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB) ? signA || ! (uint32_t) ((uiA | uiB)<<1)
|
|
|
|
: (uiA == uiB) || (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_le_quiet( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
|
|
|
|
if (
|
|
|
|
softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
|
|
|
|
) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB) ? signA || ! (uint32_t) ((uiA | uiB)<<1)
|
|
|
|
: (uiA == uiB) || (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_lt( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB) ? signA && ((uint32_t) ((uiA | uiB)<<1) != 0)
|
|
|
|
: (uiA != uiB) && (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f32_lt_quiet( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
|
|
|
|
if (
|
|
|
|
softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
|
|
|
|
) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB) ? signA && ((uint32_t) ((uiA | uiB)<<1) != 0)
|
|
|
|
: (uiA != uiB) && (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
uint_fast32_t uiC;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
uiC = c.v;
|
|
|
|
return softfloat_mulAddF32( uiA, uiB, uiC, 0 );
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_mul( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
bool signB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast32_t sigB;
|
|
|
|
bool signZ;
|
|
|
|
uint_fast32_t magBits;
|
|
|
|
struct exp16_sig32 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast32_t sigZ, uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
expB = expF32UI( uiB );
|
|
|
|
sigB = fracF32UI( uiB );
|
|
|
|
signZ = signA ^ signB;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN;
|
|
|
|
magBits = expB | sigB;
|
|
|
|
goto infArg;
|
|
|
|
}
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
magBits = expA | sigA;
|
|
|
|
goto infArg;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) goto zero;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) goto zero;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = expA + expB - 0x7F;
|
|
|
|
sigA = (sigA | 0x00800000)<<7;
|
|
|
|
sigB = (sigB | 0x00800000)<<8;
|
|
|
|
sigZ = (uint_fast32_t)softfloat_shortShiftRightJam64( (uint_fast64_t) sigA * sigB, 32 ); //fixed warning on type cast
|
|
|
|
if ( sigZ < 0x40000000 ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ <<= 1;
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF32( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
infArg:
|
|
|
|
if ( ! magBits ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF32UI;
|
|
|
|
} else {
|
|
|
|
uiZ = packToF32UI( signZ, 0xFF, 0 );
|
|
|
|
}
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
zero:
|
|
|
|
uiZ = packToF32UI( signZ, 0, 0 );
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_rem( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast32_t sigB;
|
|
|
|
struct exp16_sig32 normExpSig;
|
|
|
|
uint32_t rem;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint32_t q, recip32, altRem, meanRem;
|
|
|
|
bool signRem;
|
|
|
|
uint_fast32_t uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
expB = expF32UI( uiB );
|
|
|
|
sigB = fracF32UI( uiB );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) goto invalid;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) return a;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
rem = sigA | 0x00800000;
|
|
|
|
sigB |= 0x00800000;
|
|
|
|
expDiff = expA - expB;
|
|
|
|
if ( expDiff < 1 ) {
|
|
|
|
if ( expDiff < -1 ) return a;
|
|
|
|
sigB <<= 6;
|
|
|
|
if ( expDiff ) {
|
|
|
|
rem <<= 5;
|
|
|
|
q = 0;
|
|
|
|
} else {
|
|
|
|
rem <<= 6;
|
|
|
|
q = (sigB <= rem);
|
|
|
|
if ( q ) rem -= sigB;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
recip32 = softfloat_approxRecip32_1( sigB<<8 );
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
| Changing the shift of `rem' here requires also changing the initial
|
|
|
|
| subtraction from `expDiff'.
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
rem <<= 7;
|
|
|
|
expDiff -= 31;
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
| The scale of `sigB' affects how many bits are obtained during each
|
|
|
|
| cycle of the loop. Currently this is 29 bits per loop iteration,
|
|
|
|
| which is believed to be the maximum possible.
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sigB <<= 6;
|
|
|
|
for (;;) {
|
|
|
|
q = (rem * (uint_fast64_t) recip32)>>32;
|
|
|
|
if ( expDiff < 0 ) break;
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
rem = ~(q * (uint32_t) sigB) + 1;
|
|
|
|
expDiff -= 29;
|
|
|
|
}
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
| (`expDiff' cannot be less than -30 here.)
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
q >>= ~expDiff & 31;
|
|
|
|
rem = (rem<<(expDiff + 30)) - q * (uint32_t) sigB;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
do {
|
|
|
|
altRem = rem;
|
|
|
|
++q;
|
|
|
|
rem -= sigB;
|
|
|
|
} while ( ! (rem & 0x80000000) );
|
|
|
|
meanRem = rem + altRem;
|
|
|
|
if ( (meanRem & 0x80000000) || (! meanRem && (q & 1)) ) rem = altRem;
|
|
|
|
signRem = signA;
|
|
|
|
if ( 0x80000000 <= rem ) {
|
|
|
|
signRem = ! signRem;
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
rem = ~rem + 1;
|
|
|
|
}
|
|
|
|
return softfloat_normRoundPackToF32( signRem, expB, rem );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
|
|
|
|
goto uiZ;
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF32UI;
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t uiZ, lastBitMask, roundBitsMask;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp <= 0x7E ) {
|
|
|
|
if ( ! (uint32_t) (uiA<<1) ) return a;
|
|
|
|
if ( exact ) raiseFlags(flag_inexact);
|
|
|
|
uiZ = uiA & packToF32UI( 1, 0, 0 );
|
|
|
|
switch ( roundingMode ) {
|
|
|
|
case round_near_even:
|
|
|
|
if ( ! fracF32UI( uiA ) ) break;
|
|
|
|
case round_near_maxMag:
|
|
|
|
if ( exp == 0x7E ) uiZ |= packToF32UI( 0, 0x7F, 0 );
|
|
|
|
break;
|
|
|
|
case round_min:
|
|
|
|
if ( uiZ ) uiZ = packToF32UI( 1, 0x7F, 0 );
|
|
|
|
break;
|
|
|
|
case round_max:
|
|
|
|
if ( ! uiZ ) uiZ = packToF32UI( 0, 0x7F, 0 );
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( 0x96 <= exp ) {
|
|
|
|
if ( (exp == 0xFF) && fracF32UI( uiA ) ) {
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiZ = uiA;
|
|
|
|
lastBitMask = (uint_fast32_t) 1<<(0x96 - exp);
|
|
|
|
roundBitsMask = lastBitMask - 1;
|
|
|
|
if ( roundingMode == round_near_maxMag ) {
|
|
|
|
uiZ += lastBitMask>>1;
|
|
|
|
} else if ( roundingMode == round_near_even ) {
|
|
|
|
uiZ += lastBitMask>>1;
|
|
|
|
if ( ! (uiZ & roundBitsMask) ) uiZ &= ~lastBitMask;
|
|
|
|
} else if (
|
|
|
|
roundingMode
|
|
|
|
== (signF32UI( uiZ ) ? round_min : round_max)
|
|
|
|
) {
|
|
|
|
uiZ += roundBitsMask;
|
|
|
|
}
|
|
|
|
uiZ &= ~roundBitsMask;
|
|
|
|
if ( exact && (uiZ != uiA) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_sqrt( float32_t a )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA, uiZ;
|
|
|
|
struct exp16_sig32 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast32_t sigZ, shiftedSigZ;
|
|
|
|
uint32_t negRem;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA ) {
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
if ( ! signA ) return a;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( signA ) {
|
|
|
|
if ( ! (expA | sigA) ) return a;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) return a;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = ((expA - 0x7F)>>1) + 0x7E;
|
|
|
|
expA &= 1;
|
|
|
|
sigA = (sigA | 0x00800000)<<8;
|
|
|
|
sigZ =
|
|
|
|
((uint_fast64_t) sigA * softfloat_approxRecipSqrt32_1( expA, sigA ))
|
|
|
|
>>32;
|
|
|
|
if ( expA ) sigZ >>= 1;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sigZ += 2;
|
|
|
|
if ( (sigZ & 0x3F) < 2 ) {
|
|
|
|
shiftedSigZ = sigZ>>2;
|
|
|
|
negRem = shiftedSigZ * shiftedSigZ;
|
|
|
|
sigZ &= ~3;
|
|
|
|
if ( negRem & 0x80000000 ) {
|
|
|
|
sigZ |= 1;
|
|
|
|
} else {
|
|
|
|
if ( negRem ) --sigZ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF32( 0, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF32UI;
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f32_sub( float32_t a, float32_t b )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
uint_fast32_t uiB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( signF32UI( uiA ^ uiB ) ) {
|
|
|
|
return softfloat_addMagsF32( uiA, uiB );
|
|
|
|
} else {
|
|
|
|
return softfloat_subMagsF32( uiA, uiB );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f32_to_f64( float32_t a )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t frac;
|
|
|
|
struct commonNaN commonNaN;
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
struct exp16_sig32 normExpSig;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
frac = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp == 0xFF ) {
|
|
|
|
if ( frac ) {
|
|
|
|
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
|
|
|
|
uiZ = softfloat_commonNaNToF64UI( &commonNaN );
|
|
|
|
} else {
|
|
|
|
uiZ = packToF64UI( sign, 0x7FF, 0 );
|
|
|
|
}
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! exp ) {
|
|
|
|
if ( ! frac ) {
|
|
|
|
uiZ = packToF64UI( sign, 0, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( frac );
|
|
|
|
exp = normExpSig.exp - 1;
|
|
|
|
frac = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiZ = packToF64UI( sign, exp + 0x380, (uint_fast64_t) frac<<29 );
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
uint_fast64_t sig64;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
|
|
|
|
if ( (exp == 0xFF) && sig ) {
|
|
|
|
#if (i32_fromNaN == i32_fromPosOverflow)
|
|
|
|
sign = 0;
|
|
|
|
#elif (i32_fromNaN == i32_fromNegOverflow)
|
|
|
|
sign = 1;
|
|
|
|
#else
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return i32_fromNaN;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= 0x00800000;
|
|
|
|
sig64 = (uint_fast64_t) sig<<32;
|
|
|
|
shiftDist = 0xAA - exp;
|
|
|
|
if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
|
|
|
|
return softfloat_roundToI32( sign, sig64, roundingMode, exact );
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
int_fast32_t absZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0x9E - exp;
|
|
|
|
if ( 32 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
if ( shiftDist <= 0 ) {
|
|
|
|
if ( uiA == packToF32UI( 1, 0x9E, 0 ) ) return -0x7FFFFFFF - 1;
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0xFF) && sig ? i32_fromNaN
|
|
|
|
: sign ? i32_fromNegOverflow : i32_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig = (sig | 0x00800000)<<8;
|
|
|
|
absZ = sig>>shiftDist;
|
|
|
|
if ( exact && ((uint_fast32_t) absZ<<shiftDist != sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return sign ? -absZ : absZ;
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast64_t f32_to_i64( float32_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
uint_fast64_t sig64, extra;
|
|
|
|
struct uint64_extra sig64Extra;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0xBE - exp;
|
|
|
|
if ( shiftDist < 0 ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0xFF) && sig ? i64_fromNaN
|
|
|
|
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= 0x00800000;
|
|
|
|
sig64 = (uint_fast64_t) sig<<40;
|
|
|
|
extra = 0;
|
|
|
|
if ( shiftDist ) {
|
|
|
|
sig64Extra = softfloat_shiftRightJam64Extra( sig64, 0, shiftDist );
|
|
|
|
sig64 = sig64Extra.v;
|
|
|
|
extra = sig64Extra.extra;
|
|
|
|
}
|
|
|
|
return softfloat_roundToI64( sign, sig64, extra, roundingMode, exact );
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast64_t f32_to_i64_r_minMag( float32_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
uint_fast64_t sig64;
|
|
|
|
int_fast64_t absZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0xBE - exp;
|
|
|
|
if ( 64 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
if ( shiftDist <= 0 ) {
|
|
|
|
if ( uiA == packToF32UI( 1, 0xBE, 0 ) ) {
|
|
|
|
return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
|
|
|
|
}
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0xFF) && sig ? i64_fromNaN
|
|
|
|
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig |= 0x00800000;
|
|
|
|
sig64 = (uint_fast64_t) sig<<40;
|
|
|
|
absZ = sig64>>shiftDist;
|
|
|
|
shiftDist = 40 - shiftDist;
|
|
|
|
if ( exact && (shiftDist < 0) && (uint32_t) (sig<<(shiftDist & 31)) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return sign ? -absZ : absZ;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast32_t f32_to_ui32( float32_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
uint_fast64_t sig64;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
|
|
|
|
if ( (exp == 0xFF) && sig ) {
|
|
|
|
#if (ui32_fromNaN == ui32_fromPosOverflow)
|
|
|
|
sign = 0;
|
|
|
|
#elif (ui32_fromNaN == ui32_fromNegOverflow)
|
|
|
|
sign = 1;
|
|
|
|
#else
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return ui32_fromNaN;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= 0x00800000;
|
|
|
|
sig64 = (uint_fast64_t) sig<<32;
|
|
|
|
shiftDist = 0xAA - exp;
|
|
|
|
if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
|
|
|
|
return softfloat_roundToUI32( sign, sig64, roundingMode, exact );
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast32_t f32_to_ui32_r_minMag( float32_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
uint_fast32_t z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0x9E - exp;
|
|
|
|
if ( 32 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
if ( sign || (shiftDist < 0) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0xFF) && sig ? ui32_fromNaN
|
|
|
|
: sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig = (sig | 0x00800000)<<8;
|
|
|
|
z = sig>>shiftDist;
|
|
|
|
if ( exact && (z<<shiftDist != sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast64_t f32_to_ui64( float32_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
uint_fast64_t sig64, extra;
|
|
|
|
struct uint64_extra sig64Extra;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0xBE - exp;
|
|
|
|
if ( shiftDist < 0 ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0xFF) && sig ? ui64_fromNaN
|
|
|
|
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= 0x00800000;
|
|
|
|
sig64 = (uint_fast64_t) sig<<40;
|
|
|
|
extra = 0;
|
|
|
|
if ( shiftDist ) {
|
|
|
|
sig64Extra = softfloat_shiftRightJam64Extra( sig64, 0, shiftDist );
|
|
|
|
sig64 = sig64Extra.v;
|
|
|
|
extra = sig64Extra.extra;
|
|
|
|
}
|
|
|
|
return softfloat_roundToUI64( sign, sig64, extra, roundingMode, exact );
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast64_t f32_to_ui64_r_minMag( float32_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast32_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
uint_fast64_t sig64, z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF32UI( uiA );
|
|
|
|
sig = fracF32UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0xBE - exp;
|
|
|
|
if ( 64 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF32UI( uiA );
|
|
|
|
if ( sign || (shiftDist < 0) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0xFF) && sig ? ui64_fromNaN
|
|
|
|
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig |= 0x00800000;
|
|
|
|
sig64 = (uint_fast64_t) sig<<40;
|
|
|
|
z = sig64>>shiftDist;
|
|
|
|
shiftDist = 40 - shiftDist;
|
|
|
|
if ( exact && (shiftDist < 0) && (uint32_t) (sig<<(shiftDist & 31)) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_add( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool signA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
if ( signA == signB ) {
|
|
|
|
return softfloat_addMagsF64( uiA, uiB, signA );
|
|
|
|
} else {
|
|
|
|
return softfloat_subMagsF64( uiA, uiB, signA );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_div( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast64_t sigB;
|
|
|
|
bool signZ;
|
|
|
|
struct exp16_sig64 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint32_t recip32, sig32Z, doubleTerm;
|
|
|
|
uint_fast64_t rem;
|
|
|
|
uint32_t q;
|
|
|
|
uint_fast64_t sigZ;
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
expB = expF64UI( uiB );
|
|
|
|
sigB = fracF64UI( uiB );
|
|
|
|
signZ = signA ^ signB;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA ) goto propagateNaN;
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
goto infinity;
|
|
|
|
}
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
goto zero;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) {
|
|
|
|
if ( ! (expA | sigA) ) goto invalid;
|
|
|
|
raiseFlags( flag_infinite );
|
|
|
|
goto infinity;
|
|
|
|
}
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) goto zero;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = expA - expB + 0x3FE;
|
|
|
|
sigA |= UINT64_C( 0x0010000000000000 );
|
|
|
|
sigB |= UINT64_C( 0x0010000000000000 );
|
|
|
|
if ( sigA < sigB ) {
|
|
|
|
--expZ;
|
|
|
|
sigA <<= 11;
|
|
|
|
} else {
|
|
|
|
sigA <<= 10;
|
|
|
|
}
|
|
|
|
sigB <<= 11;
|
|
|
|
recip32 = softfloat_approxRecip32_1( sigB>>32 ) - 2;
|
|
|
|
sig32Z = ((uint32_t) (sigA>>32) * (uint_fast64_t) recip32)>>32;
|
|
|
|
doubleTerm = sig32Z<<1;
|
|
|
|
rem =
|
|
|
|
((sigA - (uint_fast64_t) doubleTerm * (uint32_t) (sigB>>32))<<28)
|
|
|
|
- (uint_fast64_t) doubleTerm * ((uint32_t) sigB>>4);
|
|
|
|
q = (((uint32_t) (rem>>32) * (uint_fast64_t) recip32)>>32) + 4;
|
|
|
|
sigZ = ((uint_fast64_t) sig32Z<<32) + ((uint_fast64_t) q<<4);
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( (sigZ & 0x1FF) < 4<<4 ) {
|
|
|
|
q &= ~7;
|
|
|
|
sigZ &= ~(uint_fast64_t) 0x7F;
|
|
|
|
doubleTerm = q<<1;
|
|
|
|
rem =
|
|
|
|
((rem - (uint_fast64_t) doubleTerm * (uint32_t) (sigB>>32))<<28)
|
|
|
|
- (uint_fast64_t) doubleTerm * ((uint32_t) sigB>>4);
|
|
|
|
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
sigZ -= 1<<7;
|
|
|
|
} else {
|
|
|
|
if ( rem ) sigZ |= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF64( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF64UI;
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
infinity:
|
|
|
|
uiZ = packToF64UI( signZ, 0x7FF, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
zero:
|
|
|
|
uiZ = packToF64UI( signZ, 0, 0 );
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_eq( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
|
|
|
|
if (
|
|
|
|
softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
|
|
|
|
) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_eq_signaling( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_isSignalingNaN( float64_t a )
|
|
|
|
{
|
|
|
|
return softfloat_isSigNaNF64UI( a.v );
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_le( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB)
|
|
|
|
? signA || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
|
|
|
|
: (uiA == uiB) || (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_le_quiet( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
|
|
|
|
if (
|
|
|
|
softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
|
|
|
|
) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB)
|
|
|
|
? signA || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
|
|
|
|
: (uiA == uiB) || (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_lt( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB)
|
|
|
|
? signA && ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
|
|
|
|
: (uiA != uiB) && (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f64_lt_quiet( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signA, signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
|
|
|
|
if (
|
|
|
|
softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
|
|
|
|
) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
return
|
|
|
|
(signA != signB)
|
|
|
|
? signA && ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
|
|
|
|
: (uiA != uiB) && (signA ^ (uiA < uiB));
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
uint_fast64_t uiC;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
uiB = b.v;
|
|
|
|
uiC = c.v;
|
|
|
|
return softfloat_mulAddF64( uiA, uiB, uiC, 0 );
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_mul( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast64_t sigB;
|
|
|
|
bool signZ;
|
|
|
|
uint_fast64_t magBits;
|
|
|
|
struct exp16_sig64 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
struct uint128 sig128Z;
|
|
|
|
uint_fast64_t sigZ, uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
expB = expF64UI( uiB );
|
|
|
|
sigB = fracF64UI( uiB );
|
|
|
|
signZ = signA ^ signB;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN;
|
|
|
|
magBits = expB | sigB;
|
|
|
|
goto infArg;
|
|
|
|
}
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
magBits = expA | sigA;
|
|
|
|
goto infArg;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) goto zero;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) goto zero;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = expA + expB - 0x3FF;
|
|
|
|
sigA = (sigA | UINT64_C( 0x0010000000000000 ))<<10;
|
|
|
|
sigB = (sigB | UINT64_C( 0x0010000000000000 ))<<11;
|
|
|
|
sig128Z = softfloat_mul64To128( sigA, sigB );
|
|
|
|
sigZ = sig128Z.v64 | (sig128Z.v0 != 0);
|
|
|
|
|
|
|
|
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ <<= 1;
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF64( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
infArg:
|
|
|
|
if ( ! magBits ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF64UI;
|
|
|
|
} else {
|
|
|
|
uiZ = packToF64UI( signZ, 0x7FF, 0 );
|
|
|
|
}
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
zero:
|
|
|
|
uiZ = packToF64UI( signZ, 0, 0 );
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_rem( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast64_t sigB;
|
|
|
|
struct exp16_sig64 normExpSig;
|
|
|
|
uint64_t rem;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint32_t q, recip32;
|
|
|
|
uint_fast64_t q64;
|
|
|
|
uint64_t altRem, meanRem;
|
|
|
|
bool signRem;
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
expB = expF64UI( uiB );
|
|
|
|
sigB = fracF64UI( uiB );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA < expB - 1 ) return a;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) goto invalid;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) return a;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
rem = sigA | UINT64_C( 0x0010000000000000 );
|
|
|
|
sigB |= UINT64_C( 0x0010000000000000 );
|
|
|
|
expDiff = expA - expB;
|
|
|
|
if ( expDiff < 1 ) {
|
|
|
|
if ( expDiff < -1 ) return a;
|
|
|
|
sigB <<= 9;
|
|
|
|
if ( expDiff ) {
|
|
|
|
rem <<= 8;
|
|
|
|
q = 0;
|
|
|
|
} else {
|
|
|
|
rem <<= 9;
|
|
|
|
q = (sigB <= rem);
|
|
|
|
if ( q ) rem -= sigB;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
recip32 = softfloat_approxRecip32_1( sigB>>21 );
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
| Changing the shift of `rem' here requires also changing the initial
|
|
|
|
| subtraction from `expDiff'.
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
rem <<= 9;
|
|
|
|
expDiff -= 30;
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
| The scale of `sigB' affects how many bits are obtained during each
|
|
|
|
| cycle of the loop. Currently this is 29 bits per loop iteration,
|
|
|
|
| the maximum possible.
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sigB <<= 9;
|
|
|
|
for (;;) {
|
|
|
|
q64 = (uint32_t) (rem>>32) * (uint_fast64_t) recip32;
|
|
|
|
if ( expDiff < 0 ) break;
|
|
|
|
q = (q64 + 0x80000000)>>32;
|
|
|
|
rem <<= 29;
|
|
|
|
rem -= q * (uint64_t) sigB;
|
|
|
|
if ( rem & UINT64_C( 0x8000000000000000 ) ) rem += sigB;
|
|
|
|
expDiff -= 29;
|
|
|
|
}
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
| (`expDiff' cannot be less than -29 here.)
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
q = (uint32_t) (q64>>32)>>(~expDiff & 31);
|
|
|
|
rem = (rem<<(expDiff + 30)) - q * (uint64_t) sigB;
|
|
|
|
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
altRem = rem + sigB;
|
|
|
|
goto selectRem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
do {
|
|
|
|
altRem = rem;
|
|
|
|
++q;
|
|
|
|
rem -= sigB;
|
|
|
|
} while ( ! (rem & UINT64_C( 0x8000000000000000 )) );
|
|
|
|
selectRem:
|
|
|
|
meanRem = rem + altRem;
|
|
|
|
if (
|
|
|
|
(meanRem & UINT64_C( 0x8000000000000000 )) || (! meanRem && (q & 1))
|
|
|
|
) {
|
|
|
|
rem = altRem;
|
|
|
|
}
|
|
|
|
signRem = signA;
|
|
|
|
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
signRem = ! signRem;
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
rem = ~rem + 1;
|
|
|
|
}
|
|
|
|
return softfloat_normRoundPackToF64( signRem, expB, rem );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
|
|
|
|
goto uiZ;
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF64UI;
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t uiZ, lastBitMask, roundBitsMask;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp <= 0x3FE ) {
|
|
|
|
if ( ! (uiA & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) return a;
|
|
|
|
if ( exact ) raiseFlags(flag_inexact);
|
|
|
|
uiZ = uiA & packToF64UI( 1, 0, 0 );
|
|
|
|
switch ( roundingMode ) {
|
|
|
|
case round_near_even:
|
|
|
|
if ( ! fracF64UI( uiA ) ) break;
|
|
|
|
case round_near_maxMag:
|
|
|
|
if ( exp == 0x3FE ) uiZ |= packToF64UI( 0, 0x3FF, 0 );
|
|
|
|
break;
|
|
|
|
case round_min:
|
|
|
|
if ( uiZ ) uiZ = packToF64UI( 1, 0x3FF, 0 );
|
|
|
|
break;
|
|
|
|
case round_max:
|
|
|
|
if ( ! uiZ ) uiZ = packToF64UI( 0, 0x3FF, 0 );
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( 0x433 <= exp ) {
|
|
|
|
if ( (exp == 0x7FF) && fracF64UI( uiA ) ) {
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiZ = uiA;
|
|
|
|
lastBitMask = (uint_fast64_t) 1<<(0x433 - exp);
|
|
|
|
roundBitsMask = lastBitMask - 1;
|
|
|
|
if ( roundingMode == round_near_maxMag ) {
|
|
|
|
uiZ += lastBitMask>>1;
|
|
|
|
} else if ( roundingMode == round_near_even ) {
|
|
|
|
uiZ += lastBitMask>>1;
|
|
|
|
if ( ! (uiZ & roundBitsMask) ) uiZ &= ~lastBitMask;
|
|
|
|
} else if (
|
|
|
|
roundingMode
|
|
|
|
== (signF64UI( uiZ ) ? round_min : round_max)
|
|
|
|
) {
|
|
|
|
uiZ += roundBitsMask;
|
|
|
|
}
|
|
|
|
uiZ &= ~roundBitsMask;
|
|
|
|
if ( exact && (uiZ != uiA) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_sqrt( float64_t a )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA, uiZ;
|
|
|
|
struct exp16_sig64 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint32_t sig32A, recipSqrt32, sig32Z;
|
|
|
|
uint_fast64_t rem;
|
|
|
|
uint32_t q;
|
|
|
|
uint_fast64_t sigZ, shiftedSigZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA ) {
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
if ( ! signA ) return a;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( signA ) {
|
|
|
|
if ( ! (expA | sigA) ) return a;
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) return a;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
| (`sig32Z' is guaranteed to be a lower bound on the square root of
|
|
|
|
| `sig32A', which makes `sig32Z' also a lower bound on the square root of
|
|
|
|
| `sigA'.)
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = ((expA - 0x3FF)>>1) + 0x3FE;
|
|
|
|
expA &= 1;
|
|
|
|
sigA |= UINT64_C( 0x0010000000000000 );
|
|
|
|
sig32A = (uint32_t)(sigA>>21); //fixed warning on type cast
|
|
|
|
recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
|
|
|
|
sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
|
|
|
|
if ( expA ) {
|
|
|
|
sigA <<= 8;
|
|
|
|
sig32Z >>= 1;
|
|
|
|
} else {
|
|
|
|
sigA <<= 9;
|
|
|
|
}
|
|
|
|
rem = sigA - (uint_fast64_t) sig32Z * sig32Z;
|
|
|
|
q = ((uint32_t) (rem>>2) * (uint_fast64_t) recipSqrt32)>>32;
|
|
|
|
sigZ = ((uint_fast64_t) sig32Z<<32 | 1<<5) + ((uint_fast64_t) q<<3);
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( (sigZ & 0x1FF) < 1<<5 ) {
|
|
|
|
sigZ &= ~(uint_fast64_t) 0x3F;
|
|
|
|
shiftedSigZ = sigZ>>6;
|
|
|
|
rem = (sigA<<52) - shiftedSigZ * shiftedSigZ;
|
|
|
|
if ( rem & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
--sigZ;
|
|
|
|
} else {
|
|
|
|
if ( rem ) sigZ |= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF64( 0, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF64UI;
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_sub( float64_t a, float64_t b )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool signA;
|
|
|
|
uint_fast64_t uiB;
|
|
|
|
bool signB;
|
|
|
|
|
|
|
|
uiA = a.v;
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
uiB = b.v;
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
|
|
|
|
if ( signA == signB ) {
|
|
|
|
return softfloat_subMagsF64( uiA, uiB, signA );
|
|
|
|
} else {
|
|
|
|
return softfloat_addMagsF64( uiA, uiB, signA );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t f64_to_f32( float64_t a )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t frac;
|
|
|
|
struct commonNaN commonNaN;
|
|
|
|
uint_fast32_t uiZ, frac32;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
frac = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp == 0x7FF ) {
|
|
|
|
if ( frac ) {
|
|
|
|
softfloat_f64UIToCommonNaN( uiA, &commonNaN );
|
|
|
|
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
|
|
|
|
} else {
|
|
|
|
uiZ = packToF32UI( sign, 0xFF, 0 );
|
|
|
|
}
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
frac32 = (uint_fast32_t)softfloat_shortShiftRightJam64( frac, 22 ); //fixed warning on type cast
|
|
|
|
if ( ! (exp | frac32) ) {
|
|
|
|
uiZ = packToF32UI( sign, 0, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
return softfloat_roundPackToF32( sign, exp - 0x381, frac32 | 0x40000000 );
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
|
|
|
|
if ( (exp == 0x7FF) && sig ) {
|
|
|
|
#if (i32_fromNaN == i32_fromPosOverflow)
|
|
|
|
sign = 0;
|
|
|
|
#elif (i32_fromNaN == i32_fromNegOverflow)
|
|
|
|
sign = 1;
|
|
|
|
#else
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return i32_fromNaN;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
shiftDist = 0x427 - exp;
|
|
|
|
if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
|
|
|
|
return softfloat_roundToI32( sign, sig, roundingMode, exact );
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
int_fast32_t absZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0x433 - exp;
|
|
|
|
if ( 53 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
if ( shiftDist < 22 ) {
|
|
|
|
if (
|
|
|
|
sign && (exp == 0x41E) && (sig < UINT64_C( 0x0000000000200000 ))
|
|
|
|
) {
|
|
|
|
if ( exact && sig ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return -0x7FFFFFFF - 1;
|
|
|
|
}
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0x7FF) && sig ? i32_fromNaN
|
|
|
|
: sign ? i32_fromNegOverflow : i32_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
absZ = (int_fast32_t)(sig>>shiftDist); //fixed warning on type cast
|
|
|
|
if ( exact && ((uint_fast64_t) (uint_fast32_t) absZ<<shiftDist != sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return sign ? -absZ : absZ;
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast64_t f64_to_i64( float64_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
struct uint64_extra sigExtra;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
shiftDist = 0x433 - exp;
|
|
|
|
if ( shiftDist <= 0 ) {
|
|
|
|
if ( shiftDist < -11 ) goto invalid;
|
|
|
|
sigExtra.v = sig<<-shiftDist;
|
|
|
|
sigExtra.extra = 0;
|
|
|
|
} else {
|
|
|
|
sigExtra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
|
|
|
|
}
|
|
|
|
return
|
|
|
|
softfloat_roundToI64(
|
|
|
|
sign, sigExtra.v, sigExtra.extra, roundingMode, exact );
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0x7FF) && fracF64UI( uiA ) ? i64_fromNaN
|
|
|
|
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast64_t f64_to_i64_r_minMag( float64_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
int_fast64_t absZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0x433 - exp;
|
|
|
|
if ( shiftDist <= 0 ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( shiftDist < -10 ) {
|
|
|
|
if ( uiA == packToF64UI( 1, 0x43E, 0 ) ) {
|
|
|
|
return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
|
|
|
|
}
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0x7FF) && sig ? i64_fromNaN
|
|
|
|
: sign ? i64_fromNegOverflow : i64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
absZ = sig<<-shiftDist;
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( 53 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
absZ = sig>>shiftDist;
|
|
|
|
if ( exact && (absZ<<shiftDist != (int_fast64_t)sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return sign ? -absZ : absZ;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast32_t f64_to_ui32( float64_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
|
|
|
|
if ( (exp == 0x7FF) && sig ) {
|
|
|
|
#if (ui32_fromNaN == ui32_fromPosOverflow)
|
|
|
|
sign = 0;
|
|
|
|
#elif (ui32_fromNaN == ui32_fromNegOverflow)
|
|
|
|
sign = 1;
|
|
|
|
#else
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return ui32_fromNaN;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
shiftDist = 0x427 - exp;
|
|
|
|
if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
|
|
|
|
return softfloat_roundToUI32( sign, sig, roundingMode, exact );
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast32_t f64_to_ui32_r_minMag( float64_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
uint_fast32_t z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0x433 - exp;
|
|
|
|
if ( 53 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
if ( sign || (shiftDist < 21) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0x7FF) && sig ? ui32_fromNaN
|
|
|
|
: sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
z = (uint_fast32_t)(sig>>shiftDist); //fixed warning on type cast
|
|
|
|
if ( exact && ((uint_fast64_t) z<<shiftDist != sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast64_t f64_to_ui64( float64_t a, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
bool sign;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
struct uint64_extra sigExtra;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
shiftDist = 0x433 - exp;
|
|
|
|
if ( shiftDist <= 0 ) {
|
|
|
|
if ( shiftDist < -11 ) goto invalid;
|
|
|
|
sigExtra.v = sig<<-shiftDist;
|
|
|
|
sigExtra.extra = 0;
|
|
|
|
} else {
|
|
|
|
sigExtra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
|
|
|
|
}
|
|
|
|
return
|
|
|
|
softfloat_roundToUI64(
|
|
|
|
sign, sigExtra.v, sigExtra.extra, roundingMode, exact );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0x7FF) && fracF64UI( uiA ) ? ui64_fromNaN
|
|
|
|
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast64_t f64_to_ui64_r_minMag( float64_t a, bool exact )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiA;
|
|
|
|
int_fast16_t exp;
|
|
|
|
uint_fast64_t sig;
|
|
|
|
int_fast16_t shiftDist;
|
|
|
|
bool sign;
|
|
|
|
uint_fast64_t z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
uiA = a.v;
|
|
|
|
exp = expF64UI( uiA );
|
|
|
|
sig = fracF64UI( uiA );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
shiftDist = 0x433 - exp;
|
|
|
|
if ( 53 <= shiftDist ) {
|
|
|
|
if ( exact && (exp | sig) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sign = signF64UI( uiA );
|
|
|
|
if ( sign ) goto invalid;
|
|
|
|
if ( shiftDist <= 0 ) {
|
|
|
|
if ( shiftDist < -11 ) goto invalid;
|
|
|
|
z = (sig | UINT64_C( 0x0010000000000000 ))<<-shiftDist;
|
|
|
|
} else {
|
|
|
|
sig |= UINT64_C( 0x0010000000000000 );
|
|
|
|
z = sig>>shiftDist;
|
|
|
|
if ( exact && (uint64_t) (sig<<(-shiftDist & 63)) ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return
|
|
|
|
(exp == 0x7FF) && sig ? ui64_fromNaN
|
|
|
|
: sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t i32_to_f32( int32_t a )
|
|
|
|
{
|
|
|
|
bool sign;
|
|
|
|
uint_fast32_t absA;
|
|
|
|
|
|
|
|
sign = (a < 0);
|
|
|
|
if ( ! (a & 0x7FFFFFFF) ) {
|
|
|
|
return float32_t::fromRaw(sign ? packToF32UI( 1, 0x9E, 0 ) : 0);
|
|
|
|
}
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
absA = sign ? (~(uint_fast32_t) a + 1) : (uint_fast32_t) a;
|
|
|
|
return softfloat_normRoundPackToF32( sign, 0x9C, absA );
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t i32_to_f64( int32_t a )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
bool sign;
|
|
|
|
uint_fast32_t absA;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
|
|
|
|
if ( ! a ) {
|
|
|
|
uiZ = 0;
|
|
|
|
} else {
|
|
|
|
sign = (a < 0);
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
absA = sign ? (~(uint_fast32_t) a + 1) : (uint_fast32_t) a;
|
|
|
|
shiftDist = softfloat_countLeadingZeros32( absA ) + 21;
|
|
|
|
uiZ =
|
|
|
|
packToF64UI(
|
|
|
|
sign, 0x432 - shiftDist, (uint_fast64_t) absA<<shiftDist );
|
|
|
|
}
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t i64_to_f32( int64_t a )
|
|
|
|
{
|
|
|
|
bool sign;
|
|
|
|
uint_fast64_t absA;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
|
|
|
|
sign = (a < 0);
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
absA = sign ? (~(uint_fast64_t) a + 1) : (uint_fast64_t) a;
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( absA ) - 40;
|
|
|
|
if ( 0 <= shiftDist ) {
|
|
|
|
return float32_t::fromRaw(a ? packToF32UI(sign, 0x95 - shiftDist, (uint_fast32_t) absA<<shiftDist ) : 0);
|
|
|
|
} else {
|
|
|
|
shiftDist += 7;
|
|
|
|
sig =
|
|
|
|
(shiftDist < 0)
|
|
|
|
? (uint_fast32_t) softfloat_shortShiftRightJam64( absA, -shiftDist ) //fixed warning on type cast
|
|
|
|
: (uint_fast32_t) absA<<shiftDist;
|
|
|
|
return softfloat_roundPackToF32( sign, 0x9C - shiftDist, sig );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t i64_to_f64( int64_t a )
|
|
|
|
{
|
|
|
|
bool sign;
|
|
|
|
uint_fast64_t absA;
|
|
|
|
|
|
|
|
sign = (a < 0);
|
|
|
|
if ( ! (a & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) {
|
|
|
|
return float64_t::fromRaw(sign ? packToF64UI( 1, 0x43E, 0 ) : 0);
|
|
|
|
}
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
absA = sign ? (~(uint_fast64_t) a + 1) : (uint_fast64_t) a;
|
|
|
|
return softfloat_normRoundPackToF64( sign, 0x43C, absA );
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
|
|
|
|
{
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast32_t sigB;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint_fast32_t uiZ;
|
|
|
|
bool signZ;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast32_t sigZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
expB = expF32UI( uiB );
|
|
|
|
sigB = fracF32UI( uiB );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expDiff = expA - expB;
|
|
|
|
if ( ! expDiff ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
uiZ = uiA + sigB;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA | sigB ) goto propagateNaN;
|
|
|
|
uiZ = uiA;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
signZ = signF32UI( uiA );
|
|
|
|
expZ = expA;
|
|
|
|
sigZ = 0x01000000 + sigA + sigB;
|
|
|
|
if ( ! (sigZ & 1) && (expZ < 0xFE) ) {
|
|
|
|
uiZ = packToF32UI( signZ, expZ, sigZ>>1 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
sigZ <<= 6;
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
signZ = signF32UI( uiA );
|
|
|
|
sigA <<= 6;
|
|
|
|
sigB <<= 6;
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
uiZ = packToF32UI( signZ, 0xFF, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expB;
|
|
|
|
sigA += expA ? 0x20000000 : sigA;
|
|
|
|
sigA = softfloat_shiftRightJam32( sigA, -expDiff );
|
|
|
|
} else {
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA ) goto propagateNaN;
|
|
|
|
uiZ = uiA;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expA;
|
|
|
|
sigB += expB ? 0x20000000 : sigB;
|
|
|
|
sigB = softfloat_shiftRightJam32( sigB, expDiff );
|
|
|
|
}
|
|
|
|
sigZ = 0x20000000 + sigA + sigB;
|
|
|
|
if ( sigZ < 0x40000000 ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ <<= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF32( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t
|
|
|
|
softfloat_addMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
|
|
|
|
{
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast64_t sigB;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast64_t sigZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
expB = expF64UI( uiB );
|
|
|
|
sigB = fracF64UI( uiB );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expDiff = expA - expB;
|
|
|
|
if ( ! expDiff ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
uiZ = uiA + sigB;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA | sigB ) goto propagateNaN;
|
|
|
|
uiZ = uiA;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expA;
|
|
|
|
sigZ = UINT64_C( 0x0020000000000000 ) + sigA + sigB;
|
|
|
|
sigZ <<= 9;
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sigA <<= 9;
|
|
|
|
sigB <<= 9;
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
uiZ = packToF64UI( signZ, 0x7FF, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expB;
|
|
|
|
if ( expA ) {
|
|
|
|
sigA += UINT64_C( 0x2000000000000000 );
|
|
|
|
} else {
|
|
|
|
sigA <<= 1;
|
|
|
|
}
|
|
|
|
sigA = softfloat_shiftRightJam64( sigA, -expDiff );
|
|
|
|
} else {
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA ) goto propagateNaN;
|
|
|
|
uiZ = uiA;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expA;
|
|
|
|
if ( expB ) {
|
|
|
|
sigB += UINT64_C( 0x2000000000000000 );
|
|
|
|
} else {
|
|
|
|
sigB <<= 1;
|
|
|
|
}
|
|
|
|
sigB = softfloat_shiftRightJam64( sigB, expDiff );
|
|
|
|
}
|
|
|
|
sigZ = UINT64_C( 0x2000000000000000 ) + sigA + sigB;
|
|
|
|
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ <<= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return softfloat_roundPackToF64( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
|
|
|
|
{
|
|
|
|
int index;
|
|
|
|
uint16_t eps, r0;
|
|
|
|
uint_fast32_t ESqrR0;
|
|
|
|
uint32_t sigma0;
|
|
|
|
uint_fast32_t r;
|
|
|
|
uint32_t sqrSigma0;
|
|
|
|
|
|
|
|
index = (a>>27 & 0xE) + oddExpA;
|
|
|
|
eps = (uint16_t) (a>>12);
|
|
|
|
r0 = softfloat_approxRecipSqrt_1k0s[index]
|
|
|
|
- ((softfloat_approxRecipSqrt_1k1s[index] * (uint_fast32_t) eps)
|
|
|
|
>>20);
|
|
|
|
ESqrR0 = (uint_fast32_t) r0 * r0;
|
|
|
|
if ( ! oddExpA ) ESqrR0 <<= 1;
|
|
|
|
sigma0 = ~(uint_fast32_t) (((uint32_t) ESqrR0 * (uint_fast64_t) a)>>23);
|
|
|
|
r = (uint_fast32_t)(((uint_fast32_t) r0<<16) + ((r0 * (uint_fast64_t) sigma0)>>25)); //fixed warning on type cast
|
|
|
|
sqrSigma0 = ((uint_fast64_t) sigma0 * sigma0)>>32;
|
|
|
|
r += ((uint32_t) ((r>>1) + (r>>3) - ((uint_fast32_t) r0<<14))
|
|
|
|
* (uint_fast64_t) sqrSigma0)
|
|
|
|
>>48;
|
|
|
|
if ( ! (r & 0x80000000) ) r = 0x80000000;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
|
|
|
|
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
|
|
|
|
{
|
|
|
|
return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
|
|
|
|
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
|
|
|
|
{
|
|
|
|
return
|
|
|
|
(uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
|
|
|
|
| aPtr->v64>>12;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
|
|
|
|
{
|
|
|
|
uint_fast8_t count;
|
|
|
|
uint32_t a32;
|
|
|
|
|
|
|
|
count = 0;
|
|
|
|
a32 = a>>32;
|
|
|
|
if ( ! a32 ) {
|
|
|
|
count = 32;
|
|
|
|
a32 = (uint32_t) a; //fixed warning on type cast
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
| From here, result is current count + count leading zeros of `a32'.
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( a32 < 0x10000 ) {
|
|
|
|
count += 16;
|
|
|
|
a32 <<= 16;
|
|
|
|
}
|
|
|
|
if ( a32 < 0x1000000 ) {
|
|
|
|
count += 8;
|
|
|
|
a32 <<= 8;
|
|
|
|
}
|
|
|
|
count += softfloat_countLeadingZeros8[a32>>24];
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
|
|
|
|
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
|
|
|
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
|
|
|
| exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
|
|
|
|
{
|
|
|
|
if ( softfloat_isSigNaNF32UI( uiA ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
zPtr->sign = (uiA>>31) != 0;
|
|
|
|
zPtr->v64 = (uint_fast64_t) uiA<<41;
|
|
|
|
zPtr->v0 = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
|
|
|
|
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
|
|
|
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
|
|
|
| exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
|
|
|
|
{
|
|
|
|
if ( softfloat_isSigNaNF64UI( uiA ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
}
|
|
|
|
zPtr->sign = (uiA>>63) != 0;
|
|
|
|
zPtr->v64 = uiA<<12;
|
|
|
|
zPtr->v0 = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
|
|
|
|
{
|
|
|
|
uint32_t a32, a0, b32, b0;
|
|
|
|
struct uint128 z;
|
|
|
|
uint64_t mid1, mid;
|
|
|
|
|
|
|
|
a32 = a>>32;
|
|
|
|
a0 = (uint32_t)a; //fixed warning on type cast
|
|
|
|
b32 = b>>32;
|
|
|
|
b0 = (uint32_t) b; //fixed warning on type cast
|
|
|
|
z.v0 = (uint_fast64_t) a0 * b0;
|
|
|
|
mid1 = (uint_fast64_t) a32 * b0;
|
|
|
|
mid = mid1 + (uint_fast64_t) a0 * b32;
|
|
|
|
z.v64 = (uint_fast64_t) a32 * b32;
|
|
|
|
z.v64 += (uint_fast64_t) (mid < mid1)<<32 | mid>>32;
|
|
|
|
mid <<= 32;
|
|
|
|
z.v0 += mid;
|
|
|
|
z.v64 += (z.v0 < mid);
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t
|
|
|
|
softfloat_mulAddF32(
|
|
|
|
uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast8_t op )
|
|
|
|
{
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA;
|
|
|
|
bool signB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast32_t sigB;
|
|
|
|
bool signC;
|
|
|
|
int_fast16_t expC;
|
|
|
|
uint_fast32_t sigC;
|
|
|
|
bool signProd;
|
|
|
|
uint_fast32_t magBits, uiZ;
|
|
|
|
struct exp16_sig32 normExpSig;
|
|
|
|
int_fast16_t expProd;
|
|
|
|
uint_fast64_t sigProd;
|
|
|
|
bool signZ;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast32_t sigZ;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint_fast64_t sig64Z, sig64C;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
signA = signF32UI( uiA );
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
signB = signF32UI( uiB );
|
|
|
|
expB = expF32UI( uiB );
|
|
|
|
sigB = fracF32UI( uiB );
|
|
|
|
signC = signF32UI( uiC ) ^ (op == softfloat_mulAdd_subC);
|
|
|
|
expC = expF32UI( uiC );
|
|
|
|
sigC = fracF32UI( uiC );
|
|
|
|
signProd = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA || ((expB == 0xFF) && sigB) ) goto propagateNaN_ABC;
|
|
|
|
magBits = expB | sigB;
|
|
|
|
goto infProdArg;
|
|
|
|
}
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN_ABC;
|
|
|
|
magBits = expA | sigA;
|
|
|
|
goto infProdArg;
|
|
|
|
}
|
|
|
|
if ( expC == 0xFF ) {
|
|
|
|
if ( sigC ) {
|
|
|
|
uiZ = 0;
|
|
|
|
goto propagateNaN_ZC;
|
|
|
|
}
|
|
|
|
uiZ = uiC;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) goto zeroProd;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) goto zeroProd;
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expProd = expA + expB - 0x7E;
|
|
|
|
sigA = (sigA | 0x00800000)<<7;
|
|
|
|
sigB = (sigB | 0x00800000)<<7;
|
|
|
|
sigProd = (uint_fast64_t) sigA * sigB;
|
|
|
|
if ( sigProd < UINT64_C( 0x2000000000000000 ) ) {
|
|
|
|
--expProd;
|
|
|
|
sigProd <<= 1;
|
|
|
|
}
|
|
|
|
signZ = signProd;
|
|
|
|
if ( ! expC ) {
|
|
|
|
if ( ! sigC ) {
|
|
|
|
expZ = expProd - 1;
|
|
|
|
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sigProd, 31 ); //fixed warning on type cast
|
|
|
|
goto roundPack;
|
|
|
|
}
|
|
|
|
normExpSig = softfloat_normSubnormalF32Sig( sigC );
|
|
|
|
expC = normExpSig.exp;
|
|
|
|
sigC = normExpSig.sig;
|
|
|
|
}
|
|
|
|
sigC = (sigC | 0x00800000)<<6;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expDiff = expProd - expC;
|
|
|
|
if ( signProd == signC ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( expDiff <= 0 ) {
|
|
|
|
expZ = expC;
|
|
|
|
sigZ = sigC + (uint_fast32_t) softfloat_shiftRightJam64( sigProd, 32 - expDiff ); //fixed warning on type cast
|
|
|
|
} else {
|
|
|
|
expZ = expProd;
|
|
|
|
sig64Z =
|
|
|
|
sigProd
|
|
|
|
+ softfloat_shiftRightJam64(
|
|
|
|
(uint_fast64_t) sigC<<32, expDiff );
|
|
|
|
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sig64Z, 32 ); //fixed warning on type cast
|
|
|
|
}
|
|
|
|
if ( sigZ < 0x40000000 ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ <<= 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sig64C = (uint_fast64_t) sigC<<32;
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
signZ = signC;
|
|
|
|
expZ = expC;
|
|
|
|
sig64Z = sig64C - softfloat_shiftRightJam64( sigProd, -expDiff );
|
|
|
|
} else if ( ! expDiff ) {
|
|
|
|
expZ = expProd;
|
|
|
|
sig64Z = sigProd - sig64C;
|
|
|
|
if ( ! sig64Z ) goto completeCancellation;
|
|
|
|
if ( sig64Z & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
signZ = ! signZ;
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
sig64Z = ~sig64Z + 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
expZ = expProd;
|
|
|
|
sig64Z = sigProd - softfloat_shiftRightJam64( sig64C, expDiff );
|
|
|
|
}
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( sig64Z ) - 1;
|
|
|
|
expZ -= shiftDist;
|
|
|
|
shiftDist -= 32;
|
|
|
|
if ( shiftDist < 0 ) {
|
|
|
|
sigZ = (uint_fast32_t) softfloat_shortShiftRightJam64( sig64Z, -shiftDist ); //fixed warning on type cast
|
|
|
|
} else {
|
|
|
|
sigZ = (uint_fast32_t) sig64Z<<shiftDist;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
roundPack:
|
|
|
|
return softfloat_roundPackToF32( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN_ABC:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
|
|
|
|
goto propagateNaN_ZC;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
infProdArg:
|
|
|
|
if ( magBits ) {
|
|
|
|
uiZ = packToF32UI( signProd, 0xFF, 0 );
|
|
|
|
if ( expC != 0xFF ) goto uiZ;
|
|
|
|
if ( sigC ) goto propagateNaN_ZC;
|
|
|
|
if ( signProd == signC ) goto uiZ;
|
|
|
|
}
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF32UI;
|
|
|
|
propagateNaN_ZC:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiZ, uiC );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
zeroProd:
|
|
|
|
uiZ = uiC;
|
|
|
|
if ( ! (expC | sigC) && (signProd != signC) ) {
|
|
|
|
completeCancellation:
|
|
|
|
uiZ =
|
|
|
|
packToF32UI((globalRoundingMode == round_min), 0, 0 );
|
|
|
|
}
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t
|
|
|
|
softfloat_mulAddF64(
|
|
|
|
uint_fast64_t uiA, uint_fast64_t uiB, uint_fast64_t uiC, uint_fast8_t op )
|
|
|
|
{
|
|
|
|
bool signA;
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA;
|
|
|
|
bool signB;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast64_t sigB;
|
|
|
|
bool signC;
|
|
|
|
int_fast16_t expC;
|
|
|
|
uint_fast64_t sigC;
|
|
|
|
bool signZ;
|
|
|
|
uint_fast64_t magBits, uiZ;
|
|
|
|
struct exp16_sig64 normExpSig;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
struct uint128 sig128Z;
|
|
|
|
uint_fast64_t sigZ;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
struct uint128 sig128C;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
signA = signF64UI( uiA );
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
signB = signF64UI( uiB );
|
|
|
|
expB = expF64UI( uiB );
|
|
|
|
sigB = fracF64UI( uiB );
|
|
|
|
signC = signF64UI( uiC ) ^ (op == softfloat_mulAdd_subC);
|
|
|
|
expC = expF64UI( uiC );
|
|
|
|
sigC = fracF64UI( uiC );
|
|
|
|
signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA || ((expB == 0x7FF) && sigB) ) goto propagateNaN_ABC;
|
|
|
|
magBits = expB | sigB;
|
|
|
|
goto infProdArg;
|
|
|
|
}
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN_ABC;
|
|
|
|
magBits = expA | sigA;
|
|
|
|
goto infProdArg;
|
|
|
|
}
|
|
|
|
if ( expC == 0x7FF ) {
|
|
|
|
if ( sigC ) {
|
|
|
|
uiZ = 0;
|
|
|
|
goto propagateNaN_ZC;
|
|
|
|
}
|
|
|
|
uiZ = uiC;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( ! expA ) {
|
|
|
|
if ( ! sigA ) goto zeroProd;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigA );
|
|
|
|
expA = normExpSig.exp;
|
|
|
|
sigA = normExpSig.sig;
|
|
|
|
}
|
|
|
|
if ( ! expB ) {
|
|
|
|
if ( ! sigB ) goto zeroProd;
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigB );
|
|
|
|
expB = normExpSig.exp;
|
|
|
|
sigB = normExpSig.sig;
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expZ = expA + expB - 0x3FE;
|
|
|
|
sigA = (sigA | UINT64_C( 0x0010000000000000 ))<<10;
|
|
|
|
sigB = (sigB | UINT64_C( 0x0010000000000000 ))<<10;
|
|
|
|
sig128Z = softfloat_mul64To128( sigA, sigB );
|
|
|
|
if ( sig128Z.v64 < UINT64_C( 0x2000000000000000 ) ) {
|
|
|
|
--expZ;
|
|
|
|
sig128Z =
|
|
|
|
softfloat_add128(
|
|
|
|
sig128Z.v64, sig128Z.v0, sig128Z.v64, sig128Z.v0 );
|
|
|
|
}
|
|
|
|
if ( ! expC ) {
|
|
|
|
if ( ! sigC ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ = sig128Z.v64<<1 | (sig128Z.v0 != 0);
|
|
|
|
goto roundPack;
|
|
|
|
}
|
|
|
|
normExpSig = softfloat_normSubnormalF64Sig( sigC );
|
|
|
|
expC = normExpSig.exp;
|
|
|
|
sigC = normExpSig.sig;
|
|
|
|
}
|
|
|
|
sigC = (sigC | UINT64_C( 0x0010000000000000 ))<<9;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
//fixed initialization
|
|
|
|
sig128C.v0 = sig128C.v64 = 0;
|
|
|
|
expDiff = expZ - expC;
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
expZ = expC;
|
|
|
|
if ( (signZ == signC) || (expDiff < -1) ) {
|
|
|
|
sig128Z.v64 = softfloat_shiftRightJam64( sig128Z.v64, -expDiff );
|
|
|
|
} else {
|
|
|
|
sig128Z =
|
|
|
|
softfloat_shortShiftRightJam128( sig128Z.v64, sig128Z.v0, 1 );
|
|
|
|
}
|
|
|
|
} else if ( expDiff ) {
|
|
|
|
sig128C = softfloat_shiftRightJam128( sigC, 0, expDiff );
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( signZ == signC ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( expDiff <= 0 ) {
|
|
|
|
sigZ = (sigC + sig128Z.v64) | (sig128Z.v0 != 0);
|
|
|
|
} else {
|
|
|
|
sig128Z =
|
|
|
|
softfloat_add128(
|
|
|
|
sig128Z.v64, sig128Z.v0, sig128C.v64, sig128C.v0 );
|
|
|
|
sigZ = sig128Z.v64 | (sig128Z.v0 != 0);
|
|
|
|
}
|
|
|
|
if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
|
|
|
|
--expZ;
|
|
|
|
sigZ <<= 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
signZ = signC;
|
|
|
|
sig128Z = softfloat_sub128( sigC, 0, sig128Z.v64, sig128Z.v0 );
|
|
|
|
} else if ( ! expDiff ) {
|
|
|
|
sig128Z.v64 = sig128Z.v64 - sigC;
|
|
|
|
if ( ! (sig128Z.v64 | sig128Z.v0) ) goto completeCancellation;
|
|
|
|
if ( sig128Z.v64 & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
signZ = ! signZ;
|
|
|
|
sig128Z = softfloat_sub128( 0, 0, sig128Z.v64, sig128Z.v0 );
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sig128Z =
|
|
|
|
softfloat_sub128(
|
|
|
|
sig128Z.v64, sig128Z.v0, sig128C.v64, sig128C.v0 );
|
|
|
|
}
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( ! sig128Z.v64 ) {
|
|
|
|
expZ -= 64;
|
|
|
|
sig128Z.v64 = sig128Z.v0;
|
|
|
|
sig128Z.v0 = 0;
|
|
|
|
}
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( sig128Z.v64 ) - 1;
|
|
|
|
expZ -= shiftDist;
|
|
|
|
if ( shiftDist < 0 ) {
|
|
|
|
sigZ = softfloat_shortShiftRightJam64( sig128Z.v64, -shiftDist );
|
|
|
|
} else {
|
|
|
|
sig128Z =
|
|
|
|
softfloat_shortShiftLeft128(
|
|
|
|
sig128Z.v64, sig128Z.v0, shiftDist );
|
|
|
|
sigZ = sig128Z.v64;
|
|
|
|
}
|
|
|
|
sigZ |= (sig128Z.v0 != 0);
|
|
|
|
}
|
|
|
|
roundPack:
|
|
|
|
return softfloat_roundPackToF64( signZ, expZ, sigZ );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN_ABC:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
|
|
|
|
goto propagateNaN_ZC;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
infProdArg:
|
|
|
|
if ( magBits ) {
|
|
|
|
uiZ = packToF64UI( signZ, 0x7FF, 0 );
|
|
|
|
if ( expC != 0x7FF ) goto uiZ;
|
|
|
|
if ( sigC ) goto propagateNaN_ZC;
|
|
|
|
if ( signZ == signC ) goto uiZ;
|
|
|
|
}
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF64UI;
|
|
|
|
propagateNaN_ZC:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiZ, uiC );
|
|
|
|
goto uiZ;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
zeroProd:
|
|
|
|
uiZ = uiC;
|
|
|
|
if ( ! (expC | sigC) && (signZ != signC) ) {
|
|
|
|
completeCancellation:
|
|
|
|
uiZ =
|
|
|
|
packToF64UI((globalRoundingMode == round_min), 0, 0 );
|
|
|
|
}
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t
|
|
|
|
softfloat_normRoundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
|
|
|
|
{
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
|
|
|
|
shiftDist = softfloat_countLeadingZeros32( sig ) - 1;
|
|
|
|
exp -= shiftDist;
|
|
|
|
if ( (7 <= shiftDist) && ((unsigned int) exp < 0xFD) ) {
|
|
|
|
return float32_t::fromRaw(packToF32UI( sign, sig ? exp : 0, sig<<(shiftDist - 7) ));
|
|
|
|
} else {
|
|
|
|
return softfloat_roundPackToF32( sign, exp, sig<<shiftDist );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t
|
|
|
|
softfloat_normRoundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
|
|
|
|
{
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( sig ) - 1;
|
|
|
|
exp -= shiftDist;
|
|
|
|
if ( (10 <= shiftDist) && ((unsigned int) exp < 0x7FD) ) {
|
|
|
|
return float64_t::fromRaw(packToF64UI( sign, sig ? exp : 0, sig<<(shiftDist - 10) ));
|
|
|
|
} else {
|
|
|
|
return softfloat_roundPackToF64( sign, exp, sig<<shiftDist );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
|
|
|
|
{
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
struct exp16_sig32 z;
|
|
|
|
|
|
|
|
shiftDist = softfloat_countLeadingZeros32( sig ) - 8;
|
|
|
|
z.exp = 1 - shiftDist;
|
|
|
|
z.sig = sig<<shiftDist;
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
|
|
|
|
{
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
struct exp16_sig64 z;
|
|
|
|
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
|
|
|
|
z.exp = 1 - shiftDist;
|
|
|
|
z.sig = sig<<shiftDist;
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
|
|
|
|
| point values, at least one of which is a NaN, returns the bit pattern of
|
|
|
|
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
|
|
|
| signaling NaN, the invalid exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
uint_fast32_t
|
|
|
|
softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )
|
|
|
|
{
|
|
|
|
bool isSigNaNA;
|
|
|
|
|
|
|
|
isSigNaNA = softfloat_isSigNaNF32UI( uiA );
|
|
|
|
if ( isSigNaNA || softfloat_isSigNaNF32UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
if ( isSigNaNA ) return uiA | 0x00400000;
|
|
|
|
}
|
|
|
|
return (isNaNF32UI( uiA ) ? uiA : uiB) | 0x00400000;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
|
|
|
|
| point values, at least one of which is a NaN, returns the bit pattern of
|
|
|
|
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
|
|
|
| signaling NaN, the invalid exception is raised.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
uint_fast64_t
|
|
|
|
softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
|
|
|
|
{
|
|
|
|
bool isSigNaNA;
|
|
|
|
|
|
|
|
isSigNaNA = softfloat_isSigNaNF64UI( uiA );
|
|
|
|
if ( isSigNaNA || softfloat_isSigNaNF64UI( uiB ) ) {
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
if ( isSigNaNA ) return uiA | UINT64_C( 0x0008000000000000 );
|
|
|
|
}
|
|
|
|
return (isNaNF64UI( uiA ) ? uiA : uiB) | UINT64_C( 0x0008000000000000 );
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t
|
|
|
|
softfloat_roundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
|
|
|
|
{
|
|
|
|
uint_fast8_t roundingMode;
|
|
|
|
bool roundNearEven;
|
|
|
|
uint_fast8_t roundIncrement, roundBits;
|
|
|
|
bool isTiny;
|
|
|
|
uint_fast32_t uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
roundingMode = globalRoundingMode;
|
|
|
|
roundNearEven = (roundingMode == round_near_even);
|
|
|
|
roundIncrement = 0x40;
|
|
|
|
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
|
|
|
|
roundIncrement =
|
|
|
|
(roundingMode
|
|
|
|
== (sign ? round_min : round_max))
|
|
|
|
? 0x7F
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
roundBits = sig & 0x7F;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( 0xFD <= (unsigned int) exp ) {
|
|
|
|
if ( exp < 0 ) {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
isTiny =
|
|
|
|
(globalDetectTininess == tininess_beforeRounding)
|
|
|
|
|| (exp < -1) || (sig + roundIncrement < 0x80000000);
|
|
|
|
sig = softfloat_shiftRightJam32( sig, -exp );
|
|
|
|
exp = 0;
|
|
|
|
roundBits = sig & 0x7F;
|
|
|
|
if ( isTiny && roundBits ) {
|
|
|
|
raiseFlags( flag_underflow );
|
|
|
|
}
|
|
|
|
} else if ( (0xFD < exp) || (0x80000000 <= sig + roundIncrement) ) {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
raiseFlags(
|
|
|
|
flag_overflow | flag_inexact );
|
|
|
|
uiZ = packToF32UI( sign, 0xFF, 0 ) - ! roundIncrement;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig = (sig + roundIncrement)>>7;
|
|
|
|
if ( roundBits ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
if ( roundingMode == round_odd ) {
|
|
|
|
sig |= 1;
|
|
|
|
goto packReturn;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sig &= ~(uint_fast32_t) (! (roundBits ^ 0x40) & roundNearEven);
|
|
|
|
if ( ! sig ) exp = 0;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
packReturn:
|
|
|
|
uiZ = packToF32UI( sign, exp, sig );
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t
|
|
|
|
softfloat_roundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
|
|
|
|
{
|
|
|
|
uint_fast8_t roundingMode;
|
|
|
|
bool roundNearEven;
|
|
|
|
uint_fast16_t roundIncrement, roundBits;
|
|
|
|
bool isTiny;
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
roundingMode = globalRoundingMode;
|
|
|
|
roundNearEven = (roundingMode == round_near_even);
|
|
|
|
roundIncrement = 0x200;
|
|
|
|
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
|
|
|
|
roundIncrement =
|
|
|
|
(roundingMode
|
|
|
|
== (sign ? round_min : round_max))
|
|
|
|
? 0x3FF
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
roundBits = sig & 0x3FF;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
if ( 0x7FD <= (uint16_t) exp ) {
|
|
|
|
if ( exp < 0 ) {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
isTiny =
|
|
|
|
(globalDetectTininess == tininess_beforeRounding)
|
|
|
|
|| (exp < -1)
|
|
|
|
|| (sig + roundIncrement < UINT64_C( 0x8000000000000000 ));
|
|
|
|
sig = softfloat_shiftRightJam64( sig, -exp );
|
|
|
|
exp = 0;
|
|
|
|
roundBits = sig & 0x3FF;
|
|
|
|
if ( isTiny && roundBits ) {
|
|
|
|
raiseFlags( flag_underflow );
|
|
|
|
}
|
|
|
|
} else if (
|
|
|
|
(0x7FD < exp)
|
|
|
|
|| (UINT64_C( 0x8000000000000000 ) <= sig + roundIncrement)
|
|
|
|
) {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
raiseFlags(
|
|
|
|
flag_overflow | flag_inexact );
|
|
|
|
uiZ = packToF64UI( sign, 0x7FF, 0 ) - ! roundIncrement;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
sig = (sig + roundIncrement)>>10;
|
|
|
|
if ( roundBits ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
if ( roundingMode == round_odd ) {
|
|
|
|
sig |= 1;
|
|
|
|
goto packReturn;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sig &= ~(uint_fast64_t) (! (roundBits ^ 0x200) & roundNearEven);
|
|
|
|
if ( ! sig ) exp = 0;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
packReturn:
|
|
|
|
uiZ = packToF64UI( sign, exp, sig );
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast32_t
|
|
|
|
softfloat_roundToI32(
|
|
|
|
bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
bool roundNearEven;
|
|
|
|
uint_fast16_t roundIncrement, roundBits;
|
|
|
|
uint_fast32_t sig32;
|
|
|
|
union { uint32_t ui; int32_t i; } uZ;
|
|
|
|
int_fast32_t z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
roundNearEven = (roundingMode == round_near_even);
|
|
|
|
roundIncrement = 0x800;
|
|
|
|
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
|
|
|
|
roundIncrement =
|
|
|
|
(roundingMode
|
|
|
|
== (sign ? round_min : round_max))
|
|
|
|
? 0xFFF
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
roundBits = sig & 0xFFF;
|
|
|
|
sig += roundIncrement;
|
|
|
|
if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
|
|
|
|
sig32 = (uint_fast32_t)(sig>>12); //fixed warning on type cast
|
|
|
|
sig32 &= ~(uint_fast32_t) (! (roundBits ^ 0x800) & roundNearEven);
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
uZ.ui = sign ? (~sig32 + 1) : sig32;
|
|
|
|
z = uZ.i;
|
|
|
|
if ( z && ((z < 0) ^ sign) ) goto invalid;
|
|
|
|
if ( exact && roundBits ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return sign ? i32_fromNegOverflow : i32_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
int_fast64_t
|
|
|
|
softfloat_roundToI64(
|
|
|
|
bool sign,
|
|
|
|
uint_fast64_t sig,
|
|
|
|
uint_fast64_t sigExtra,
|
|
|
|
uint_fast8_t roundingMode,
|
|
|
|
bool exact
|
|
|
|
)
|
|
|
|
{
|
|
|
|
bool roundNearEven, doIncrement;
|
|
|
|
union { uint64_t ui; int64_t i; } uZ;
|
|
|
|
int_fast64_t z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
roundNearEven = (roundingMode == round_near_even);
|
|
|
|
doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
|
|
|
|
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
|
|
|
|
doIncrement =
|
|
|
|
(roundingMode
|
|
|
|
== (sign ? round_min : round_max))
|
|
|
|
&& sigExtra;
|
|
|
|
}
|
|
|
|
if ( doIncrement ) {
|
|
|
|
++sig;
|
|
|
|
if ( ! sig ) goto invalid;
|
|
|
|
sig &=
|
|
|
|
~(uint_fast64_t)
|
|
|
|
(! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
|
|
|
|
& roundNearEven);
|
|
|
|
}
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1
|
|
|
|
uZ.ui = sign ? (~sig + 1) : sig;
|
|
|
|
z = uZ.i;
|
|
|
|
if ( z && ((z < 0) ^ sign) ) goto invalid;
|
|
|
|
if ( exact && sigExtra ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast32_t
|
|
|
|
softfloat_roundToUI32(
|
|
|
|
bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
|
|
|
|
{
|
|
|
|
bool roundNearEven;
|
|
|
|
uint_fast16_t roundIncrement, roundBits;
|
|
|
|
uint_fast32_t z;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
roundNearEven = (roundingMode == round_near_even);
|
|
|
|
roundIncrement = 0x800;
|
|
|
|
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
|
|
|
|
roundIncrement =
|
|
|
|
(roundingMode
|
|
|
|
== (sign ? round_min : round_max))
|
|
|
|
? 0xFFF
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
roundBits = sig & 0xFFF;
|
|
|
|
sig += roundIncrement;
|
|
|
|
if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
|
|
|
|
z = (uint_fast32_t)(sig>>12); //fixed warning on type cast
|
|
|
|
z &= ~(uint_fast32_t) (! (roundBits ^ 0x800) & roundNearEven);
|
|
|
|
if ( sign && z ) goto invalid;
|
|
|
|
if ( exact && roundBits ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint_fast64_t
|
|
|
|
softfloat_roundToUI64(
|
|
|
|
bool sign,
|
|
|
|
uint_fast64_t sig,
|
|
|
|
uint_fast64_t sigExtra,
|
|
|
|
uint_fast8_t roundingMode,
|
|
|
|
bool exact
|
|
|
|
)
|
|
|
|
{
|
|
|
|
bool roundNearEven, doIncrement;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
roundNearEven = (roundingMode == round_near_even);
|
|
|
|
doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
|
|
|
|
if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
|
|
|
|
doIncrement =
|
|
|
|
(roundingMode
|
|
|
|
== (sign ? round_min : round_max))
|
|
|
|
&& sigExtra;
|
|
|
|
}
|
|
|
|
if ( doIncrement ) {
|
|
|
|
++sig;
|
|
|
|
if ( ! sig ) goto invalid;
|
|
|
|
sig &=
|
|
|
|
~(uint_fast64_t)
|
|
|
|
(! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
|
|
|
|
& roundNearEven);
|
|
|
|
}
|
|
|
|
if ( sign && sig ) goto invalid;
|
|
|
|
if ( exact && sigExtra ) {
|
|
|
|
raiseFlags(flag_inexact);
|
|
|
|
}
|
|
|
|
return sig;
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
invalid:
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
return sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct uint128
|
|
|
|
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
|
|
|
|
{
|
|
|
|
uint_fast8_t u8NegDist;
|
|
|
|
struct uint128 z;
|
|
|
|
|
|
|
|
if ( dist < 64 ) {
|
|
|
|
//fixed unsigned unary minus: -x == ~x + 1 , fixed type cast
|
|
|
|
u8NegDist = (uint_fast8_t)(~dist + 1);
|
|
|
|
z.v64 = a64>>dist;
|
|
|
|
z.v0 =
|
|
|
|
a64<<(u8NegDist & 63) | a0>>dist
|
|
|
|
| ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
|
|
|
|
} else {
|
|
|
|
z.v64 = 0;
|
|
|
|
z.v0 =
|
|
|
|
(dist < 127)
|
|
|
|
? a64>>(dist & 63)
|
|
|
|
| (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
|
|
|
|
!= 0)
|
|
|
|
: ((a64 | a0) != 0);
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
|
|
|
|
{
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast32_t sigA;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast32_t sigB;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint_fast32_t uiZ;
|
|
|
|
int_fast32_t sigDiff;
|
|
|
|
bool signZ;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast32_t sigX, sigY;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expA = expF32UI( uiA );
|
|
|
|
sigA = fracF32UI( uiA );
|
|
|
|
expB = expF32UI( uiB );
|
|
|
|
sigB = fracF32UI( uiB );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expDiff = expA - expB;
|
|
|
|
if ( ! expDiff ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA | sigB ) goto propagateNaN;
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF32UI;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
sigDiff = sigA - sigB;
|
|
|
|
if ( ! sigDiff ) {
|
|
|
|
uiZ =
|
|
|
|
packToF32UI(
|
|
|
|
(globalRoundingMode == round_min), 0, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
if ( expA ) --expA;
|
|
|
|
signZ = signF32UI( uiA );
|
|
|
|
if ( sigDiff < 0 ) {
|
|
|
|
signZ = ! signZ;
|
|
|
|
sigDiff = -sigDiff;
|
|
|
|
}
|
|
|
|
shiftDist = softfloat_countLeadingZeros32( sigDiff ) - 8;
|
|
|
|
expZ = expA - shiftDist;
|
|
|
|
if ( expZ < 0 ) {
|
|
|
|
shiftDist = (int_fast8_t)expA; //fixed type cast
|
|
|
|
expZ = 0;
|
|
|
|
}
|
|
|
|
uiZ = packToF32UI( signZ, expZ, sigDiff<<shiftDist );
|
|
|
|
goto uiZ;
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
signZ = signF32UI( uiA );
|
|
|
|
sigA <<= 7;
|
|
|
|
sigB <<= 7;
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
signZ = ! signZ;
|
|
|
|
if ( expB == 0xFF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
uiZ = packToF32UI( signZ, 0xFF, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expB - 1;
|
|
|
|
sigX = sigB | 0x40000000;
|
|
|
|
sigY = sigA + (expA ? 0x40000000 : sigA);
|
|
|
|
expDiff = -expDiff;
|
|
|
|
} else {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
if ( expA == 0xFF ) {
|
|
|
|
if ( sigA ) goto propagateNaN;
|
|
|
|
uiZ = uiA;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
expZ = expA - 1;
|
|
|
|
sigX = sigA | 0x40000000;
|
|
|
|
sigY = sigB + (expB ? 0x40000000 : sigB);
|
|
|
|
}
|
|
|
|
return
|
|
|
|
softfloat_normRoundPackToF32(
|
|
|
|
signZ, expZ, sigX - softfloat_shiftRightJam32( sigY, expDiff )
|
|
|
|
);
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
|
|
|
|
uiZ:
|
|
|
|
return float32_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t
|
|
|
|
softfloat_subMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
|
|
|
|
{
|
|
|
|
int_fast16_t expA;
|
|
|
|
uint_fast64_t sigA;
|
|
|
|
int_fast16_t expB;
|
|
|
|
uint_fast64_t sigB;
|
|
|
|
int_fast16_t expDiff;
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
int_fast64_t sigDiff;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
int_fast16_t expZ;
|
|
|
|
uint_fast64_t sigZ;
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expA = expF64UI( uiA );
|
|
|
|
sigA = fracF64UI( uiA );
|
|
|
|
expB = expF64UI( uiB );
|
|
|
|
sigB = fracF64UI( uiB );
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
expDiff = expA - expB;
|
|
|
|
if ( ! expDiff ) {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA | sigB ) goto propagateNaN;
|
|
|
|
raiseFlags( flag_invalid );
|
|
|
|
uiZ = defaultNaNF64UI;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
sigDiff = sigA - sigB;
|
|
|
|
if ( ! sigDiff ) {
|
|
|
|
uiZ =
|
|
|
|
packToF64UI(
|
|
|
|
(globalRoundingMode == round_min), 0, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
if ( expA ) --expA;
|
|
|
|
if ( sigDiff < 0 ) {
|
|
|
|
signZ = ! signZ;
|
|
|
|
sigDiff = -sigDiff;
|
|
|
|
}
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( sigDiff ) - 11;
|
|
|
|
expZ = expA - shiftDist;
|
|
|
|
if ( expZ < 0 ) {
|
|
|
|
shiftDist = (int_fast8_t)expA; //fixed type cast
|
|
|
|
expZ = 0;
|
|
|
|
}
|
|
|
|
uiZ = packToF64UI( signZ, expZ, sigDiff<<shiftDist );
|
|
|
|
goto uiZ;
|
|
|
|
} else {
|
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
*--------------------------------------------------------------------*/
|
|
|
|
sigA <<= 10;
|
|
|
|
sigB <<= 10;
|
|
|
|
if ( expDiff < 0 ) {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
signZ = ! signZ;
|
|
|
|
if ( expB == 0x7FF ) {
|
|
|
|
if ( sigB ) goto propagateNaN;
|
|
|
|
uiZ = packToF64UI( signZ, 0x7FF, 0 );
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
sigA += expA ? UINT64_C( 0x4000000000000000 ) : sigA;
|
|
|
|
sigA = softfloat_shiftRightJam64( sigA, -expDiff );
|
|
|
|
sigB |= UINT64_C( 0x4000000000000000 );
|
|
|
|
expZ = expB;
|
|
|
|
sigZ = sigB - sigA;
|
|
|
|
} else {
|
|
|
|
/*----------------------------------------------------------------
|
|
|
|
*----------------------------------------------------------------*/
|
|
|
|
if ( expA == 0x7FF ) {
|
|
|
|
if ( sigA ) goto propagateNaN;
|
|
|
|
uiZ = uiA;
|
|
|
|
goto uiZ;
|
|
|
|
}
|
|
|
|
sigB += expB ? UINT64_C( 0x4000000000000000 ) : sigB;
|
|
|
|
sigB = softfloat_shiftRightJam64( sigB, expDiff );
|
|
|
|
sigA |= UINT64_C( 0x4000000000000000 );
|
|
|
|
expZ = expA;
|
|
|
|
sigZ = sigA - sigB;
|
|
|
|
}
|
|
|
|
return softfloat_normRoundPackToF64( signZ, expZ - 1, sigZ );
|
|
|
|
}
|
|
|
|
/*------------------------------------------------------------------------
|
|
|
|
*------------------------------------------------------------------------*/
|
|
|
|
propagateNaN:
|
|
|
|
uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
|
|
|
|
uiZ:
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t ui32_to_f32( uint32_t a )
|
|
|
|
{
|
|
|
|
if ( ! a ) {
|
|
|
|
return float32_t::fromRaw(0);
|
|
|
|
}
|
|
|
|
if ( a & 0x80000000 ) {
|
|
|
|
return softfloat_roundPackToF32( 0, 0x9D, a>>1 | (a & 1) );
|
|
|
|
} else {
|
|
|
|
return softfloat_normRoundPackToF32( 0, 0x9C, a );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t ui32_to_f64( uint32_t a )
|
|
|
|
{
|
|
|
|
uint_fast64_t uiZ;
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
|
|
|
|
if ( ! a ) {
|
|
|
|
uiZ = 0;
|
|
|
|
} else {
|
|
|
|
shiftDist = softfloat_countLeadingZeros32( a ) + 21;
|
|
|
|
uiZ =
|
|
|
|
packToF64UI( 0, 0x432 - shiftDist, (uint_fast64_t) a<<shiftDist );
|
|
|
|
}
|
|
|
|
return float64_t::fromRaw(uiZ);
|
|
|
|
}
|
|
|
|
|
|
|
|
float32_t ui64_to_f32( uint64_t a )
|
|
|
|
{
|
|
|
|
int_fast8_t shiftDist;
|
|
|
|
uint_fast32_t sig;
|
|
|
|
|
|
|
|
shiftDist = softfloat_countLeadingZeros64( a ) - 40;
|
|
|
|
if ( 0 <= shiftDist ) {
|
|
|
|
return float32_t::fromRaw(a ? packToF32UI(0, 0x95 - shiftDist, (uint_fast32_t) a<<shiftDist ) : 0);
|
|
|
|
} else {
|
|
|
|
shiftDist += 7;
|
|
|
|
sig =
|
|
|
|
(shiftDist < 0) ? (uint_fast32_t) softfloat_shortShiftRightJam64( a, -shiftDist ) //fixed warning on type cast
|
|
|
|
: (uint_fast32_t) a<<shiftDist;
|
|
|
|
return softfloat_roundPackToF32( 0, 0x9C - shiftDist, sig );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t ui64_to_f64( uint64_t a )
|
|
|
|
{
|
|
|
|
if ( ! a ) {
|
|
|
|
return float64_t::fromRaw(0);
|
|
|
|
}
|
|
|
|
if ( a & UINT64_C( 0x8000000000000000 ) ) {
|
|
|
|
return
|
|
|
|
softfloat_roundPackToF64(
|
|
|
|
0, 0x43D, softfloat_shortShiftRightJam64( a, 1 ) );
|
|
|
|
} else {
|
|
|
|
return softfloat_normRoundPackToF64( 0, 0x43C, a );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------
|
|
|
|
| Ported from OpenCV.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
////////////////////////////////////// EXP /////////////////////////////////////
|
|
|
|
|
|
|
|
#define EXPTAB_SCALE 6
|
|
|
|
#define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
// .9670371139572337719125840413672004409288e-2
|
|
|
|
static const softdouble EXPPOLY_32F_A0 = float64_t::fromRaw(0x3f83ce0f3e46f431);
|
|
|
|
|
|
|
|
static const uint64_t expTab[] = {
|
|
|
|
0x3ff0000000000000, // 1.000000
|
|
|
|
0x3ff02c9a3e778061, // 1.010889
|
|
|
|
0x3ff059b0d3158574, // 1.021897
|
|
|
|
0x3ff0874518759bc8, // 1.033025
|
|
|
|
0x3ff0b5586cf9890f, // 1.044274
|
|
|
|
0x3ff0e3ec32d3d1a2, // 1.055645
|
|
|
|
0x3ff11301d0125b51, // 1.067140
|
|
|
|
0x3ff1429aaea92de0, // 1.078761
|
|
|
|
0x3ff172b83c7d517b, // 1.090508
|
|
|
|
0x3ff1a35beb6fcb75, // 1.102383
|
|
|
|
0x3ff1d4873168b9aa, // 1.114387
|
|
|
|
0x3ff2063b88628cd6, // 1.126522
|
|
|
|
0x3ff2387a6e756238, // 1.138789
|
|
|
|
0x3ff26b4565e27cdd, // 1.151189
|
|
|
|
0x3ff29e9df51fdee1, // 1.163725
|
|
|
|
0x3ff2d285a6e4030b, // 1.176397
|
|
|
|
0x3ff306fe0a31b715, // 1.189207
|
|
|
|
0x3ff33c08b26416ff, // 1.202157
|
|
|
|
0x3ff371a7373aa9cb, // 1.215247
|
|
|
|
0x3ff3a7db34e59ff7, // 1.228481
|
|
|
|
0x3ff3dea64c123422, // 1.241858
|
|
|
|
0x3ff4160a21f72e2a, // 1.255381
|
|
|
|
0x3ff44e086061892d, // 1.269051
|
|
|
|
0x3ff486a2b5c13cd0, // 1.282870
|
|
|
|
0x3ff4bfdad5362a27, // 1.296840
|
|
|
|
0x3ff4f9b2769d2ca7, // 1.310961
|
|
|
|
0x3ff5342b569d4f82, // 1.325237
|
|
|
|
0x3ff56f4736b527da, // 1.339668
|
|
|
|
0x3ff5ab07dd485429, // 1.354256
|
|
|
|
0x3ff5e76f15ad2148, // 1.369002
|
|
|
|
0x3ff6247eb03a5585, // 1.383910
|
|
|
|
0x3ff6623882552225, // 1.398980
|
|
|
|
0x3ff6a09e667f3bcd, // 1.414214
|
|
|
|
0x3ff6dfb23c651a2f, // 1.429613
|
|
|
|
0x3ff71f75e8ec5f74, // 1.445181
|
|
|
|
0x3ff75feb564267c9, // 1.460918
|
|
|
|
0x3ff7a11473eb0187, // 1.476826
|
|
|
|
0x3ff7e2f336cf4e62, // 1.492908
|
|
|
|
0x3ff82589994cce13, // 1.509164
|
|
|
|
0x3ff868d99b4492ed, // 1.525598
|
|
|
|
0x3ff8ace5422aa0db, // 1.542211
|
|
|
|
0x3ff8f1ae99157736, // 1.559004
|
|
|
|
0x3ff93737b0cdc5e5, // 1.575981
|
|
|
|
0x3ff97d829fde4e50, // 1.593142
|
|
|
|
0x3ff9c49182a3f090, // 1.610490
|
|
|
|
0x3ffa0c667b5de565, // 1.628027
|
|
|
|
0x3ffa5503b23e255d, // 1.645755
|
|
|
|
0x3ffa9e6b5579fdbf, // 1.663677
|
|
|
|
0x3ffae89f995ad3ad, // 1.681793
|
|
|
|
0x3ffb33a2b84f15fb, // 1.700106
|
|
|
|
0x3ffb7f76f2fb5e47, // 1.718619
|
|
|
|
0x3ffbcc1e904bc1d2, // 1.737334
|
|
|
|
0x3ffc199bdd85529c, // 1.756252
|
|
|
|
0x3ffc67f12e57d14b, // 1.775376
|
|
|
|
0x3ffcb720dcef9069, // 1.794709
|
|
|
|
0x3ffd072d4a07897c, // 1.814252
|
|
|
|
0x3ffd5818dcfba487, // 1.834008
|
|
|
|
0x3ffda9e603db3285, // 1.853979
|
|
|
|
0x3ffdfc97337b9b5f, // 1.874168
|
|
|
|
0x3ffe502ee78b3ff6, // 1.894576
|
|
|
|
0x3ffea4afa2a490da, // 1.915207
|
|
|
|
0x3ffefa1bee615a27, // 1.936062
|
|
|
|
0x3fff50765b6e4540, // 1.957144
|
|
|
|
0x3fffa7c1819e90d8, // 1.978456
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
};
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
// 1 / ln(2) * (1 << EXPTAB_SCALE) == 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE)
|
|
|
|
static const float64_t exp_prescale = float64_t::fromRaw(0x3ff71547652b82fe) * float64_t(1 << EXPTAB_SCALE);
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
static const float64_t exp_postscale = float64_t::one()/float64_t(1 << EXPTAB_SCALE);
|
|
|
|
static const float64_t exp_max_val(3000*(1 << EXPTAB_SCALE)); // log10(DBL_MAX) < 3000
|
|
|
|
|
|
|
|
float32_t f32_exp( float32_t x)
|
|
|
|
{
|
|
|
|
//special cases
|
|
|
|
if(x.isNaN()) return float32_t::nan();
|
|
|
|
if(x.isInf()) return (x == float32_t::inf()) ? x : float32_t::zero();
|
|
|
|
|
|
|
|
static const float64_t
|
2017-06-15 20:10:41 +08:00
|
|
|
A4 = float64_t::one() / EXPPOLY_32F_A0,
|
|
|
|
A3 = float64_t::fromRaw(0x3fe62e42fef9277b) / EXPPOLY_32F_A0, // .6931471805521448196800669615864773144641 / EXPPOLY_32F_A0,
|
|
|
|
A2 = float64_t::fromRaw(0x3fcebfbe081585e7) / EXPPOLY_32F_A0, // .2402265109513301490103372422686535526573 / EXPPOLY_32F_A0,
|
|
|
|
A1 = float64_t::fromRaw(0x3fac6af0d93cf576) / EXPPOLY_32F_A0; // .5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0;
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
float64_t x0;
|
|
|
|
if(expF32UI(x.v) > 127 + 10)
|
|
|
|
x0 = signF32UI(x.v) ? -exp_max_val : exp_max_val;
|
|
|
|
else
|
|
|
|
x0 = f32_to_f64(x) * exp_prescale;
|
|
|
|
|
|
|
|
int val0 = f64_to_i32(x0, round_near_even, false);
|
|
|
|
int t = (val0 >> EXPTAB_SCALE) + 1023;
|
|
|
|
t = t < 0 ? 0 : (t > 2047 ? 2047 : t);
|
|
|
|
float64_t buf; buf.v = packToF64UI(0, t, 0);
|
|
|
|
|
|
|
|
x0 = (x0 - f64_roundToInt(x0, round_near_even, false)) * exp_postscale;
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
return (buf * EXPPOLY_32F_A0 * float64_t::fromRaw(expTab[val0 & EXPTAB_MASK]) * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4));
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_exp(float64_t x)
|
|
|
|
{
|
|
|
|
//special cases
|
|
|
|
if(x.isNaN()) return float64_t::nan();
|
|
|
|
if(x.isInf()) return (x == float64_t::inf()) ? x : float64_t::zero();
|
|
|
|
|
|
|
|
static const float64_t
|
2017-06-15 20:10:41 +08:00
|
|
|
A5 = float64_t::one() / EXPPOLY_32F_A0,
|
|
|
|
A4 = float64_t::fromRaw(0x3fe62e42fefa39f1) / EXPPOLY_32F_A0, // .69314718055994546743029643825322 / EXPPOLY_32F_A0
|
|
|
|
A3 = float64_t::fromRaw(0x3fcebfbdff82a45a) / EXPPOLY_32F_A0, // .24022650695886477918181338054308 / EXPPOLY_32F_A0
|
|
|
|
A2 = float64_t::fromRaw(0x3fac6b08d81fec75) / EXPPOLY_32F_A0, // .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0
|
|
|
|
A1 = float64_t::fromRaw(0x3f83b2a72b4f3cd3) / EXPPOLY_32F_A0, // .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0
|
|
|
|
A0 = float64_t::fromRaw(0x3f55e7aa1566c2a4) / EXPPOLY_32F_A0; // .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
float64_t x0;
|
|
|
|
if(expF64UI(x.v) > 1023 + 10)
|
|
|
|
x0 = signF64UI(x.v) ? -exp_max_val : exp_max_val;
|
|
|
|
else
|
|
|
|
x0 = x * exp_prescale;
|
|
|
|
|
|
|
|
int val0 = cvRound(x0);
|
|
|
|
int t = (val0 >> EXPTAB_SCALE) + 1023;
|
|
|
|
t = t < 0 ? 0 : (t > 2047 ? 2047 : t);
|
|
|
|
float64_t buf; buf.v = packToF64UI(0, t, 0);
|
|
|
|
|
|
|
|
x0 = (x0 - f64_roundToInt(x0, round_near_even, false)) * exp_postscale;
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
return buf * EXPPOLY_32F_A0 * float64_t::fromRaw(expTab[val0 & EXPTAB_MASK]) * (((((A0*x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4)*x0 + A5);
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef EXPTAB_SCALE
|
|
|
|
#undef EXPTAB_MASK
|
|
|
|
#undef EXPPOLY_32F_A0
|
|
|
|
|
|
|
|
/////////////////////////////////////////// LOG ///////////////////////////////////////
|
|
|
|
|
|
|
|
#define LOGTAB_SCALE 8
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
static const uint64_t CV_DECL_ALIGNED(16) icvLogTab[] = {
|
|
|
|
0, 0x3ff0000000000000, // 0.000000, 1.000000
|
|
|
|
0x3f6ff00aa2b10bc0, 0x3fefe01fe01fe020, // 0.003899, 0.996109
|
|
|
|
0x3f7fe02a6b106788, 0x3fefc07f01fc07f0, // 0.007782, 0.992248
|
|
|
|
0x3f87dc475f810a76, 0x3fefa11caa01fa12, // 0.011651, 0.988417
|
|
|
|
0x3f8fc0a8b0fc03e3, 0x3fef81f81f81f820, // 0.015504, 0.984615
|
|
|
|
0x3f93cea44346a574, 0x3fef6310aca0dbb5, // 0.019343, 0.980843
|
|
|
|
0x3f97b91b07d5b11a, 0x3fef44659e4a4271, // 0.023167, 0.977099
|
|
|
|
0x3f9b9fc027af9197, 0x3fef25f644230ab5, // 0.026977, 0.973384
|
|
|
|
0x3f9f829b0e783300, 0x3fef07c1f07c1f08, // 0.030772, 0.969697
|
|
|
|
0x3fa1b0d98923d97f, 0x3feee9c7f8458e02, // 0.034552, 0.966038
|
|
|
|
0x3fa39e87b9febd5f, 0x3feecc07b301ecc0, // 0.038319, 0.962406
|
|
|
|
0x3fa58a5bafc8e4d4, 0x3feeae807aba01eb, // 0.042071, 0.958801
|
|
|
|
0x3fa77458f632dcfc, 0x3fee9131abf0b767, // 0.045810, 0.955224
|
|
|
|
0x3fa95c830ec8e3eb, 0x3fee741aa59750e4, // 0.049534, 0.951673
|
|
|
|
0x3fab42dd711971be, 0x3fee573ac901e574, // 0.053245, 0.948148
|
|
|
|
0x3fad276b8adb0b52, 0x3fee3a9179dc1a73, // 0.056941, 0.944649
|
|
|
|
0x3faf0a30c01162a6, 0x3fee1e1e1e1e1e1e, // 0.060625, 0.941176
|
|
|
|
0x3fb075983598e471, 0x3fee01e01e01e01e, // 0.064294, 0.937729
|
|
|
|
0x3fb16536eea37ae0, 0x3fede5d6e3f8868a, // 0.067951, 0.934307
|
|
|
|
0x3fb253f62f0a1416, 0x3fedca01dca01dca, // 0.071594, 0.930909
|
|
|
|
0x3fb341d7961bd1d0, 0x3fedae6076b981db, // 0.075223, 0.927536
|
|
|
|
0x3fb42edcbea646f0, 0x3fed92f2231e7f8a, // 0.078840, 0.924188
|
|
|
|
0x3fb51b073f06183f, 0x3fed77b654b82c34, // 0.082444, 0.920863
|
|
|
|
0x3fb60658a93750c3, 0x3fed5cac807572b2, // 0.086034, 0.917563
|
|
|
|
0x3fb6f0d28ae56b4b, 0x3fed41d41d41d41d, // 0.089612, 0.914286
|
|
|
|
0x3fb7da766d7b12cc, 0x3fed272ca3fc5b1a, // 0.093177, 0.911032
|
|
|
|
0x3fb8c345d6319b20, 0x3fed0cb58f6ec074, // 0.096730, 0.907801
|
|
|
|
0x3fb9ab42462033ac, 0x3fecf26e5c44bfc6, // 0.100269, 0.904594
|
|
|
|
0x3fba926d3a4ad563, 0x3fecd85689039b0b, // 0.103797, 0.901408
|
|
|
|
0x3fbb78c82bb0eda1, 0x3fecbe6d9601cbe7, // 0.107312, 0.898246
|
|
|
|
0x3fbc5e548f5bc743, 0x3feca4b3055ee191, // 0.110814, 0.895105
|
|
|
|
0x3fbd4313d66cb35d, 0x3fec8b265afb8a42, // 0.114305, 0.891986
|
|
|
|
0x3fbe27076e2af2e5, 0x3fec71c71c71c71c, // 0.117783, 0.888889
|
|
|
|
0x3fbf0a30c01162a6, 0x3fec5894d10d4986, // 0.121249, 0.885813
|
|
|
|
0x3fbfec9131dbeaba, 0x3fec3f8f01c3f8f0, // 0.124703, 0.882759
|
|
|
|
0x3fc0671512ca596e, 0x3fec26b5392ea01c, // 0.128146, 0.879725
|
|
|
|
0x3fc0d77e7cd08e59, 0x3fec0e070381c0e0, // 0.131576, 0.876712
|
|
|
|
0x3fc14785846742ac, 0x3febf583ee868d8b, // 0.134995, 0.873720
|
|
|
|
0x3fc1b72ad52f67a0, 0x3febdd2b899406f7, // 0.138402, 0.870748
|
|
|
|
0x3fc2266f190a5acb, 0x3febc4fd65883e7b, // 0.141798, 0.867797
|
|
|
|
0x3fc29552f81ff523, 0x3febacf914c1bad0, // 0.145182, 0.864865
|
|
|
|
0x3fc303d718e47fd2, 0x3feb951e2b18ff23, // 0.148555, 0.861953
|
|
|
|
0x3fc371fc201e8f74, 0x3feb7d6c3dda338b, // 0.151916, 0.859060
|
|
|
|
0x3fc3dfc2b0ecc629, 0x3feb65e2e3beee05, // 0.155266, 0.856187
|
|
|
|
0x3fc44d2b6ccb7d1e, 0x3feb4e81b4e81b4f, // 0.158605, 0.853333
|
|
|
|
0x3fc4ba36f39a55e5, 0x3feb37484ad806ce, // 0.161933, 0.850498
|
|
|
|
0x3fc526e5e3a1b437, 0x3feb2036406c80d9, // 0.165250, 0.847682
|
|
|
|
0x3fc59338d9982085, 0x3feb094b31d922a4, // 0.168555, 0.844884
|
|
|
|
0x3fc5ff3070a793d3, 0x3feaf286bca1af28, // 0.171850, 0.842105
|
|
|
|
0x3fc66acd4272ad50, 0x3feadbe87f94905e, // 0.175134, 0.839344
|
|
|
|
0x3fc6d60fe719d21c, 0x3feac5701ac5701b, // 0.178408, 0.836601
|
|
|
|
0x3fc740f8f54037a4, 0x3feaaf1d2f87ebfd, // 0.181670, 0.833876
|
|
|
|
0x3fc7ab890210d909, 0x3fea98ef606a63be, // 0.184922, 0.831169
|
|
|
|
0x3fc815c0a14357ea, 0x3fea82e65130e159, // 0.188164, 0.828479
|
|
|
|
0x3fc87fa06520c910, 0x3fea6d01a6d01a6d, // 0.191395, 0.825806
|
|
|
|
0x3fc8e928de886d40, 0x3fea574107688a4a, // 0.194615, 0.823151
|
|
|
|
0x3fc9525a9cf456b4, 0x3fea41a41a41a41a, // 0.197826, 0.820513
|
|
|
|
0x3fc9bb362e7dfb83, 0x3fea2c2a87c51ca0, // 0.201026, 0.817891
|
|
|
|
0x3fca23bc1fe2b563, 0x3fea16d3f97a4b02, // 0.204216, 0.815287
|
|
|
|
0x3fca8becfc882f18, 0x3fea01a01a01a01a, // 0.207395, 0.812698
|
|
|
|
0x3fcaf3c94e80bff2, 0x3fe9ec8e951033d9, // 0.210565, 0.810127
|
|
|
|
0x3fcb5b519e8fb5a4, 0x3fe9d79f176b682d, // 0.213724, 0.807571
|
|
|
|
0x3fcbc286742d8cd6, 0x3fe9c2d14ee4a102, // 0.216874, 0.805031
|
|
|
|
0x3fcc2968558c18c0, 0x3fe9ae24ea5510da, // 0.220014, 0.802508
|
|
|
|
0x3fcc8ff7c79a9a21, 0x3fe999999999999a, // 0.223144, 0.800000
|
|
|
|
0x3fccf6354e09c5dc, 0x3fe9852f0d8ec0ff, // 0.226264, 0.797508
|
|
|
|
0x3fcd5c216b4fbb91, 0x3fe970e4f80cb872, // 0.229374, 0.795031
|
|
|
|
0x3fcdc1bca0abec7d, 0x3fe95cbb0be377ae, // 0.232475, 0.792570
|
|
|
|
0x3fce27076e2af2e5, 0x3fe948b0fcd6e9e0, // 0.235566, 0.790123
|
|
|
|
0x3fce8c0252aa5a5f, 0x3fe934c67f9b2ce6, // 0.238648, 0.787692
|
|
|
|
0x3fcef0adcbdc5936, 0x3fe920fb49d0e229, // 0.241720, 0.785276
|
|
|
|
0x3fcf550a564b7b37, 0x3fe90d4f120190d5, // 0.244783, 0.782875
|
|
|
|
0x3fcfb9186d5e3e2a, 0x3fe8f9c18f9c18fa, // 0.247836, 0.780488
|
|
|
|
0x3fd00e6c45ad501c, 0x3fe8e6527af1373f, // 0.250880, 0.778116
|
|
|
|
0x3fd0402594b4d040, 0x3fe8d3018d3018d3, // 0.253915, 0.775758
|
|
|
|
0x3fd071b85fcd590d, 0x3fe8bfce8062ff3a, // 0.256941, 0.773414
|
|
|
|
0x3fd0a324e27390e3, 0x3fe8acb90f6bf3aa, // 0.259958, 0.771084
|
|
|
|
0x3fd0d46b579ab74b, 0x3fe899c0f601899c, // 0.262965, 0.768769
|
|
|
|
0x3fd1058bf9ae4ad5, 0x3fe886e5f0abb04a, // 0.265964, 0.766467
|
|
|
|
0x3fd136870293a8b0, 0x3fe87427bcc092b9, // 0.268953, 0.764179
|
|
|
|
0x3fd1675cababa60e, 0x3fe8618618618618, // 0.271934, 0.761905
|
|
|
|
0x3fd1980d2dd4236f, 0x3fe84f00c2780614, // 0.274905, 0.759644
|
|
|
|
0x3fd1c898c16999fa, 0x3fe83c977ab2bedd, // 0.277868, 0.757396
|
|
|
|
0x3fd1f8ff9e48a2f2, 0x3fe82a4a0182a4a0, // 0.280823, 0.755162
|
|
|
|
0x3fd22941fbcf7965, 0x3fe8181818181818, // 0.283768, 0.752941
|
|
|
|
0x3fd2596010df7639, 0x3fe8060180601806, // 0.286705, 0.750733
|
|
|
|
0x3fd2895a13de86a3, 0x3fe7f405fd017f40, // 0.289633, 0.748538
|
|
|
|
0x3fd2b9303ab89d24, 0x3fe7e225515a4f1d, // 0.292553, 0.746356
|
|
|
|
0x3fd2e8e2bae11d30, 0x3fe7d05f417d05f4, // 0.295464, 0.744186
|
|
|
|
0x3fd31871c9544184, 0x3fe7beb3922e017c, // 0.298367, 0.742029
|
|
|
|
0x3fd347dd9a987d54, 0x3fe7ad2208e0ecc3, // 0.301261, 0.739884
|
|
|
|
0x3fd3772662bfd85a, 0x3fe79baa6bb6398b, // 0.304147, 0.737752
|
|
|
|
0x3fd3a64c556945e9, 0x3fe78a4c8178a4c8, // 0.307025, 0.735632
|
|
|
|
0x3fd3d54fa5c1f70f, 0x3fe77908119ac60d, // 0.309894, 0.733524
|
|
|
|
0x3fd404308686a7e3, 0x3fe767dce434a9b1, // 0.312756, 0.731429
|
|
|
|
0x3fd432ef2a04e813, 0x3fe756cac201756d, // 0.315609, 0.729345
|
|
|
|
0x3fd4618bc21c5ec2, 0x3fe745d1745d1746, // 0.318454, 0.727273
|
|
|
|
0x3fd49006804009d0, 0x3fe734f0c541fe8d, // 0.321291, 0.725212
|
|
|
|
0x3fd4be5f957778a0, 0x3fe724287f46debc, // 0.324119, 0.723164
|
|
|
|
0x3fd4ec9732600269, 0x3fe713786d9c7c09, // 0.326940, 0.721127
|
|
|
|
0x3fd51aad872df82d, 0x3fe702e05c0b8170, // 0.329753, 0.719101
|
|
|
|
0x3fd548a2c3add262, 0x3fe6f26016f26017, // 0.332558, 0.717087
|
|
|
|
0x3fd5767717455a6c, 0x3fe6e1f76b4337c7, // 0.335356, 0.715084
|
|
|
|
0x3fd5a42ab0f4cfe1, 0x3fe6d1a62681c861, // 0.338145, 0.713092
|
|
|
|
0x3fd5d1bdbf5809ca, 0x3fe6c16c16c16c17, // 0.340927, 0.711111
|
|
|
|
0x3fd5ff3070a793d3, 0x3fe6b1490aa31a3d, // 0.343701, 0.709141
|
|
|
|
0x3fd62c82f2b9c795, 0x3fe6a13cd1537290, // 0.346467, 0.707182
|
|
|
|
0x3fd659b57303e1f2, 0x3fe691473a88d0c0, // 0.349225, 0.705234
|
|
|
|
0x3fd686c81e9b14ae, 0x3fe6816816816817, // 0.351976, 0.703297
|
|
|
|
0x3fd6b3bb2235943d, 0x3fe6719f3601671a, // 0.354720, 0.701370
|
|
|
|
0x3fd6e08eaa2ba1e3, 0x3fe661ec6a5122f9, // 0.357456, 0.699454
|
|
|
|
0x3fd70d42e2789235, 0x3fe6524f853b4aa3, // 0.360184, 0.697548
|
|
|
|
0x3fd739d7f6bbd006, 0x3fe642c8590b2164, // 0.362905, 0.695652
|
|
|
|
0x3fd7664e1239dbce, 0x3fe63356b88ac0de, // 0.365619, 0.693767
|
|
|
|
0x3fd792a55fdd47a2, 0x3fe623fa77016240, // 0.368326, 0.691892
|
|
|
|
0x3fd7bede0a37afbf, 0x3fe614b36831ae94, // 0.371025, 0.690027
|
|
|
|
0x3fd7eaf83b82afc3, 0x3fe6058160581606, // 0.373716, 0.688172
|
|
|
|
0x3fd816f41da0d495, 0x3fe5f66434292dfc, // 0.376401, 0.686327
|
|
|
|
0x3fd842d1da1e8b17, 0x3fe5e75bb8d015e7, // 0.379078, 0.684492
|
|
|
|
0x3fd86e919a330ba0, 0x3fe5d867c3ece2a5, // 0.381749, 0.682667
|
|
|
|
0x3fd89a3386c1425a, 0x3fe5c9882b931057, // 0.384412, 0.680851
|
|
|
|
0x3fd8c5b7c858b48a, 0x3fe5babcc647fa91, // 0.387068, 0.679045
|
|
|
|
0x3fd8f11e873662c7, 0x3fe5ac056b015ac0, // 0.389717, 0.677249
|
|
|
|
0x3fd91c67eb45a83d, 0x3fe59d61f123ccaa, // 0.392359, 0.675462
|
|
|
|
0x3fd947941c2116fa, 0x3fe58ed2308158ed, // 0.394994, 0.673684
|
|
|
|
0x3fd972a341135158, 0x3fe5805601580560, // 0.397622, 0.671916
|
|
|
|
0x3fd99d958117e08a, 0x3fe571ed3c506b3a, // 0.400243, 0.670157
|
|
|
|
0x3fd9c86b02dc0862, 0x3fe56397ba7c52e2, // 0.402858, 0.668407
|
|
|
|
0x3fd9f323ecbf984b, 0x3fe5555555555555, // 0.405465, 0.666667
|
|
|
|
0x3fda1dc064d5b995, 0x3fe54725e6bb82fe, // 0.408066, 0.664935
|
|
|
|
0x3fda484090e5bb0a, 0x3fe5390948f40feb, // 0.410660, 0.663212
|
|
|
|
0x3fda72a4966bd9ea, 0x3fe52aff56a8054b, // 0.413247, 0.661499
|
|
|
|
0x3fda9cec9a9a0849, 0x3fe51d07eae2f815, // 0.415828, 0.659794
|
|
|
|
0x3fdac718c258b0e4, 0x3fe50f22e111c4c5, // 0.418402, 0.658098
|
|
|
|
0x3fdaf1293247786b, 0x3fe5015015015015, // 0.420969, 0.656410
|
|
|
|
0x3fdb1b1e0ebdfc5b, 0x3fe4f38f62dd4c9b, // 0.423530, 0.654731
|
|
|
|
0x3fdb44f77bcc8f62, 0x3fe4e5e0a72f0539, // 0.426084, 0.653061
|
|
|
|
0x3fdb6eb59d3cf35d, 0x3fe4d843bedc2c4c, // 0.428632, 0.651399
|
|
|
|
0x3fdb9858969310fb, 0x3fe4cab88725af6e, // 0.431173, 0.649746
|
|
|
|
0x3fdbc1e08b0dad0a, 0x3fe4bd3edda68fe1, // 0.433708, 0.648101
|
|
|
|
0x3fdbeb4d9da71b7b, 0x3fe4afd6a052bf5b, // 0.436237, 0.646465
|
|
|
|
0x3fdc149ff115f026, 0x3fe4a27fad76014a, // 0.438759, 0.644836
|
|
|
|
0x3fdc3dd7a7cdad4d, 0x3fe49539e3b2d067, // 0.441275, 0.643216
|
|
|
|
0x3fdc66f4e3ff6ff7, 0x3fe4880522014880, // 0.443784, 0.641604
|
|
|
|
0x3fdc8ff7c79a9a21, 0x3fe47ae147ae147b, // 0.446287, 0.640000
|
|
|
|
0x3fdcb8e0744d7ac9, 0x3fe46dce34596066, // 0.448784, 0.638404
|
|
|
|
0x3fdce1af0b85f3eb, 0x3fe460cbc7f5cf9a, // 0.451275, 0.636816
|
|
|
|
0x3fdd0a63ae721e64, 0x3fe453d9e2c776ca, // 0.453759, 0.635236
|
|
|
|
0x3fdd32fe7e00ebd5, 0x3fe446f86562d9fb, // 0.456237, 0.633663
|
|
|
|
0x3fdd5b7f9ae2c683, 0x3fe43a2730abee4d, // 0.458710, 0.632099
|
|
|
|
0x3fdd83e7258a2f3e, 0x3fe42d6625d51f87, // 0.461176, 0.630542
|
|
|
|
0x3fddac353e2c5954, 0x3fe420b5265e5951, // 0.463636, 0.628993
|
|
|
|
0x3fddd46a04c1c4a0, 0x3fe4141414141414, // 0.466090, 0.627451
|
|
|
|
0x3fddfc859906d5b5, 0x3fe40782d10e6566, // 0.468538, 0.625917
|
|
|
|
0x3fde24881a7c6c26, 0x3fe3fb013fb013fb, // 0.470980, 0.624390
|
|
|
|
0x3fde4c71a8687704, 0x3fe3ee8f42a5af07, // 0.473416, 0.622871
|
|
|
|
0x3fde744261d68787, 0x3fe3e22cbce4a902, // 0.475846, 0.621359
|
|
|
|
0x3fde9bfa659861f5, 0x3fe3d5d991aa75c6, // 0.478270, 0.619855
|
|
|
|
0x3fdec399d2468cc0, 0x3fe3c995a47babe7, // 0.480689, 0.618357
|
|
|
|
0x3fdeeb20c640ddf4, 0x3fe3bd60d9232955, // 0.483101, 0.616867
|
|
|
|
0x3fdf128f5faf06ec, 0x3fe3b13b13b13b14, // 0.485508, 0.615385
|
|
|
|
0x3fdf39e5bc811e5b, 0x3fe3a524387ac822, // 0.487909, 0.613909
|
|
|
|
0x3fdf6123fa7028ac, 0x3fe3991c2c187f63, // 0.490304, 0.612440
|
|
|
|
0x3fdf884a36fe9ec2, 0x3fe38d22d366088e, // 0.492693, 0.610979
|
|
|
|
0x3fdfaf588f78f31e, 0x3fe3813813813814, // 0.495077, 0.609524
|
|
|
|
0x3fdfd64f20f61571, 0x3fe3755bd1c945ee, // 0.497455, 0.608076
|
|
|
|
0x3fdffd2e0857f498, 0x3fe3698df3de0748, // 0.499828, 0.606635
|
|
|
|
0x3fe011fab125ff8a, 0x3fe35dce5f9f2af8, // 0.502195, 0.605201
|
|
|
|
0x3fe02552a5a5d0fe, 0x3fe3521cfb2b78c1, // 0.504556, 0.603774
|
|
|
|
0x3fe0389eefce633b, 0x3fe34679ace01346, // 0.506912, 0.602353
|
|
|
|
0x3fe04bdf9da926d2, 0x3fe33ae45b57bcb2, // 0.509262, 0.600939
|
|
|
|
0x3fe05f14bd26459c, 0x3fe32f5ced6a1dfa, // 0.511607, 0.599532
|
|
|
|
0x3fe0723e5c1cdf40, 0x3fe323e34a2b10bf, // 0.513946, 0.598131
|
|
|
|
0x3fe0855c884b450e, 0x3fe3187758e9ebb6, // 0.516279, 0.596737
|
|
|
|
0x3fe0986f4f573520, 0x3fe30d190130d190, // 0.518608, 0.595349
|
|
|
|
0x3fe0ab76bece14d1, 0x3fe301c82ac40260, // 0.520931, 0.593968
|
|
|
|
0x3fe0be72e4252a82, 0x3fe2f684bda12f68, // 0.523248, 0.592593
|
|
|
|
0x3fe0d163ccb9d6b7, 0x3fe2eb4ea1fed14b, // 0.525560, 0.591224
|
|
|
|
0x3fe0e44985d1cc8b, 0x3fe2e025c04b8097, // 0.527867, 0.589862
|
|
|
|
0x3fe0f7241c9b497d, 0x3fe2d50a012d50a0, // 0.530169, 0.588506
|
|
|
|
0x3fe109f39e2d4c96, 0x3fe2c9fb4d812ca0, // 0.532465, 0.587156
|
|
|
|
0x3fe11cb81787ccf8, 0x3fe2bef98e5a3711, // 0.534756, 0.585812
|
|
|
|
0x3fe12f719593efbc, 0x3fe2b404ad012b40, // 0.537041, 0.584475
|
|
|
|
0x3fe1422025243d44, 0x3fe2a91c92f3c105, // 0.539322, 0.583144
|
|
|
|
0x3fe154c3d2f4d5e9, 0x3fe29e4129e4129e, // 0.541597, 0.581818
|
|
|
|
0x3fe1675cababa60e, 0x3fe293725bb804a5, // 0.543867, 0.580499
|
|
|
|
0x3fe179eabbd899a0, 0x3fe288b01288b013, // 0.546132, 0.579186
|
|
|
|
0x3fe18c6e0ff5cf06, 0x3fe27dfa38a1ce4d, // 0.548392, 0.577878
|
|
|
|
0x3fe19ee6b467c96e, 0x3fe27350b8812735, // 0.550647, 0.576577
|
|
|
|
0x3fe1b154b57da29e, 0x3fe268b37cd60127, // 0.552897, 0.575281
|
|
|
|
0x3fe1c3b81f713c24, 0x3fe25e22708092f1, // 0.555142, 0.573991
|
|
|
|
0x3fe1d610fe677003, 0x3fe2539d7e9177b2, // 0.557381, 0.572707
|
|
|
|
0x3fe1e85f5e7040d0, 0x3fe2492492492492, // 0.559616, 0.571429
|
|
|
|
0x3fe1faa34b87094c, 0x3fe23eb79717605b, // 0.561845, 0.570156
|
|
|
|
0x3fe20cdcd192ab6d, 0x3fe23456789abcdf, // 0.564070, 0.568889
|
|
|
|
0x3fe21f0bfc65beeb, 0x3fe22a0122a0122a, // 0.566290, 0.567627
|
|
|
|
0x3fe23130d7bebf42, 0x3fe21fb78121fb78, // 0.568505, 0.566372
|
|
|
|
0x3fe2434b6f483933, 0x3fe21579804855e6, // 0.570715, 0.565121
|
|
|
|
0x3fe2555bce98f7cb, 0x3fe20b470c67c0d9, // 0.572920, 0.563877
|
|
|
|
0x3fe26762013430df, 0x3fe2012012012012, // 0.575120, 0.562637
|
|
|
|
0x3fe2795e1289b11a, 0x3fe1f7047dc11f70, // 0.577315, 0.561404
|
|
|
|
0x3fe28b500df60782, 0x3fe1ecf43c7fb84c, // 0.579506, 0.560175
|
|
|
|
0x3fe29d37fec2b08a, 0x3fe1e2ef3b3fb874, // 0.581692, 0.558952
|
|
|
|
0x3fe2af15f02640ad, 0x3fe1d8f5672e4abd, // 0.583873, 0.557734
|
|
|
|
0x3fe2c0e9ed448e8b, 0x3fe1cf06ada2811d, // 0.586049, 0.556522
|
|
|
|
0x3fe2d2b4012edc9d, 0x3fe1c522fc1ce059, // 0.588221, 0.555315
|
|
|
|
0x3fe2e47436e40268, 0x3fe1bb4a4046ed29, // 0.590387, 0.554113
|
|
|
|
0x3fe2f62a99509546, 0x3fe1b17c67f2bae3, // 0.592550, 0.552916
|
|
|
|
0x3fe307d7334f10be, 0x3fe1a7b9611a7b96, // 0.594707, 0.551724
|
|
|
|
0x3fe3197a0fa7fe6a, 0x3fe19e0119e0119e, // 0.596860, 0.550538
|
|
|
|
0x3fe32b1339121d71, 0x3fe19453808ca29c, // 0.599008, 0.549356
|
|
|
|
0x3fe33ca2ba328994, 0x3fe18ab083902bdb, // 0.601152, 0.548180
|
|
|
|
0x3fe34e289d9ce1d3, 0x3fe1811811811812, // 0.603291, 0.547009
|
|
|
|
0x3fe35fa4edd36ea0, 0x3fe1778a191bd684, // 0.605425, 0.545842
|
|
|
|
0x3fe37117b54747b5, 0x3fe16e0689427379, // 0.607555, 0.544681
|
|
|
|
0x3fe38280fe58797e, 0x3fe1648d50fc3201, // 0.609681, 0.543524
|
|
|
|
0x3fe393e0d3562a19, 0x3fe15b1e5f75270d, // 0.611802, 0.542373
|
|
|
|
0x3fe3a5373e7ebdf9, 0x3fe151b9a3fdd5c9, // 0.613918, 0.541226
|
|
|
|
0x3fe3b68449fffc22, 0x3fe1485f0e0acd3b, // 0.616030, 0.540084
|
|
|
|
0x3fe3c7c7fff73205, 0x3fe13f0e8d344724, // 0.618137, 0.538947
|
|
|
|
0x3fe3d9026a7156fa, 0x3fe135c81135c811, // 0.620240, 0.537815
|
|
|
|
0x3fe3ea33936b2f5b, 0x3fe12c8b89edc0ac, // 0.622339, 0.536688
|
|
|
|
0x3fe3fb5b84d16f42, 0x3fe12358e75d3033, // 0.624433, 0.535565
|
|
|
|
0x3fe40c7a4880dce9, 0x3fe11a3019a74826, // 0.626523, 0.534447
|
|
|
|
0x3fe41d8fe84672ae, 0x3fe1111111111111, // 0.628609, 0.533333
|
|
|
|
0x3fe42e9c6ddf80bf, 0x3fe107fbbe011080, // 0.630690, 0.532225
|
|
|
|
0x3fe43f9fe2f9ce67, 0x3fe0fef010fef011, // 0.632767, 0.531120
|
|
|
|
0x3fe4509a5133bb0a, 0x3fe0f5edfab325a2, // 0.634839, 0.530021
|
|
|
|
0x3fe4618bc21c5ec2, 0x3fe0ecf56be69c90, // 0.636907, 0.528926
|
|
|
|
0x3fe472743f33aaad, 0x3fe0e40655826011, // 0.638971, 0.527835
|
|
|
|
0x3fe48353d1ea88df, 0x3fe0db20a88f4696, // 0.641031, 0.526749
|
|
|
|
0x3fe4942a83a2fc07, 0x3fe0d24456359e3a, // 0.643087, 0.525667
|
|
|
|
0x3fe4a4f85db03ebb, 0x3fe0c9714fbcda3b, // 0.645138, 0.524590
|
|
|
|
0x3fe4b5bd6956e273, 0x3fe0c0a7868b4171, // 0.647185, 0.523517
|
|
|
|
0x3fe4c679afccee39, 0x3fe0b7e6ec259dc8, // 0.649228, 0.522449
|
|
|
|
0x3fe4d72d3a39fd00, 0x3fe0af2f722eecb5, // 0.651267, 0.521385
|
|
|
|
0x3fe4e7d811b75bb0, 0x3fe0a6810a6810a7, // 0.653301, 0.520325
|
|
|
|
0x3fe4f87a3f5026e8, 0x3fe09ddba6af8360, // 0.655332, 0.519270
|
|
|
|
0x3fe50913cc01686b, 0x3fe0953f39010954, // 0.657358, 0.518219
|
|
|
|
0x3fe519a4c0ba3446, 0x3fe08cabb37565e2, // 0.659380, 0.517172
|
|
|
|
0x3fe52a2d265bc5aa, 0x3fe0842108421084, // 0.661398, 0.516129
|
|
|
|
0x3fe53aad05b99b7c, 0x3fe07b9f29b8eae2, // 0.663413, 0.515091
|
|
|
|
0x3fe54b2467999497, 0x3fe073260a47f7c6, // 0.665423, 0.514056
|
|
|
|
0x3fe55b9354b40bcd, 0x3fe06ab59c7912fb, // 0.667429, 0.513026
|
|
|
|
0x3fe56bf9d5b3f399, 0x3fe0624dd2f1a9fc, // 0.669431, 0.512000
|
|
|
|
0x3fe57c57f336f190, 0x3fe059eea0727586, // 0.671429, 0.510978
|
|
|
|
0x3fe58cadb5cd7989, 0x3fe05197f7d73404, // 0.673423, 0.509960
|
|
|
|
0x3fe59cfb25fae87d, 0x3fe04949cc1664c5, // 0.675413, 0.508946
|
|
|
|
0x3fe5ad404c359f2c, 0x3fe0410410410410, // 0.677399, 0.507937
|
|
|
|
0x3fe5bd7d30e71c73, 0x3fe038c6b78247fc, // 0.679381, 0.506931
|
|
|
|
0x3fe5cdb1dc6c1764, 0x3fe03091b51f5e1a, // 0.681359, 0.505929
|
|
|
|
0x3fe5ddde57149923, 0x3fe02864fc7729e9, // 0.683334, 0.504931
|
|
|
|
0x3fe5ee02a9241675, 0x3fe0204081020408, // 0.685304, 0.503937
|
|
|
|
0x3fe5fe1edad18918, 0x3fe0182436517a37, // 0.687271, 0.502947
|
|
|
|
0x3fe60e32f44788d8, 0x3fe0101010101010, // 0.689233, 0.501961
|
|
|
|
0x3fe62e42fefa39ef, 0x3fe0000000000000, // 0.693147, 0.500000
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
};
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
// 0.69314718055994530941723212145818
|
|
|
|
static const float64_t ln_2 = float64_t::fromRaw(0x3fe62e42fefa39ef);
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
float32_t f32_log(float32_t x)
|
|
|
|
{
|
|
|
|
//special cases
|
|
|
|
if(x.isNaN() || x < float32_t::zero()) return float32_t::nan();
|
|
|
|
if(x == float32_t::zero()) return -float32_t::inf();
|
|
|
|
|
|
|
|
//first 8 bits of mantissa
|
|
|
|
int h0 = (x.v >> (23 - LOGTAB_SCALE)) & ((1 << LOGTAB_SCALE) - 1);
|
|
|
|
//buf == 0.00000000_the_rest_mantissa_bits
|
|
|
|
float64_t buf; buf.v = packToF64UI(0, 1023, ((uint64_t)x.v << 29) & ((1LL << (52 - LOGTAB_SCALE)) - 1));
|
|
|
|
buf -= float64_t::one();
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
float64_t tab0 = float64_t::fromRaw(icvLogTab[2*h0]);
|
|
|
|
float64_t tab1 = float64_t::fromRaw(icvLogTab[2*h0+1]);
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
float64_t x0 = buf * tab1;
|
|
|
|
//if last elements of icvLogTab
|
2017-06-15 20:10:41 +08:00
|
|
|
if(h0 == 255) x0 += float64_t(-float64_t::one() / float64_t(512));
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
float64_t y0 = ln_2 * float64_t(expF32UI(x.v) - 127) + tab0 + x0*x0*x0/float64_t(3) - x0*x0/float64_t(2) + x0;
|
|
|
|
|
|
|
|
return y0;
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_log(float64_t x)
|
|
|
|
{
|
|
|
|
//special cases
|
|
|
|
if(x.isNaN() || x < float64_t::zero()) return float64_t::nan();
|
|
|
|
if(x == float64_t::zero()) return -float64_t::inf();
|
|
|
|
|
|
|
|
static const float64_t
|
2017-06-15 20:10:41 +08:00
|
|
|
A7(1),
|
|
|
|
A6(-float64_t::one() / float64_t(2)),
|
|
|
|
A5( float64_t::one() / float64_t(3)),
|
|
|
|
A4(-float64_t::one() / float64_t(4)),
|
|
|
|
A3( float64_t::one() / float64_t(5)),
|
|
|
|
A2(-float64_t::one() / float64_t(6)),
|
|
|
|
A1( float64_t::one() / float64_t(7)),
|
|
|
|
A0(-float64_t::one() / float64_t(8));
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
//first 8 bits of mantissa
|
|
|
|
int h0 = (x.v >> (52 - LOGTAB_SCALE)) & ((1 << LOGTAB_SCALE) - 1);
|
|
|
|
//buf == 0.00000000_the_rest_mantissa_bits
|
|
|
|
float64_t buf; buf.v = packToF64UI(0, 1023, x.v & ((1LL << (52 - LOGTAB_SCALE)) - 1));
|
|
|
|
buf -= float64_t::one();
|
|
|
|
|
2017-06-15 20:10:41 +08:00
|
|
|
float64_t tab0 = float64_t::fromRaw(icvLogTab[2*h0]);
|
|
|
|
float64_t tab1 = float64_t::fromRaw(icvLogTab[2*h0 + 1]);
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
float64_t x0 = buf * tab1;
|
|
|
|
//if last elements of icvLogTab
|
2017-06-15 20:10:41 +08:00
|
|
|
if(h0 == 255) x0 += float64_t(-float64_t::one()/float64_t(512));
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
float64_t xq = x0*x0;
|
|
|
|
|
|
|
|
return ln_2 * float64_t( expF64UI(x.v) - 1023) + tab0 + (((A0*xq + A2)*xq + A4)*xq + A6)*xq +
|
|
|
|
(((A1*xq + A3)*xq + A5)*xq + A7)*x0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ************************************************************************** *\
|
|
|
|
Fast cube root by Ken Turkowski
|
|
|
|
(http://www.worldserver.com/turk/computergraphics/papers.html)
|
|
|
|
\* ************************************************************************** */
|
|
|
|
float32_t f32_cbrt(float32_t x)
|
|
|
|
{
|
|
|
|
//special cases
|
2017-06-15 20:10:41 +08:00
|
|
|
if (x.isNaN()) return float32_t::nan();
|
|
|
|
if (x.isInf()) return x;
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
|
|
|
|
int s = signF32UI(x.v);
|
|
|
|
int ex = expF32UI(x.v) - 127;
|
|
|
|
int shx = ex % 3;
|
|
|
|
shx -= shx >= 0 ? 3 : 0;
|
|
|
|
ex = (ex - shx) / 3 - 1; /* exponent of cube root */
|
|
|
|
float64_t fr; fr.v = packToF64UI(0, shx + 1023, ((uint64_t)fracF32UI(x.v)) << 29);
|
|
|
|
|
|
|
|
/* 0.125 <= fr < 1.0 */
|
|
|
|
/* Use quartic rational polynomial with error < 2^(-24) */
|
2017-06-15 20:10:41 +08:00
|
|
|
const float64_t A1 = float64_t::fromRaw(0x4046a09e6653ba70); // 45.2548339756803022511987494
|
|
|
|
const float64_t A2 = float64_t::fromRaw(0x406808f46c6116e0); // 192.2798368355061050458134625
|
|
|
|
const float64_t A3 = float64_t::fromRaw(0x405dca97439cae14); // 119.1654824285581628956914143
|
|
|
|
const float64_t A4 = float64_t::fromRaw(0x402add70d2827500); // 13.43250139086239872172837314
|
|
|
|
const float64_t A5 = float64_t::fromRaw(0x3fc4f15f83f55d2d); // 0.1636161226585754240958355063
|
|
|
|
const float64_t A6 = float64_t::fromRaw(0x402d9e20660edb21); // 14.80884093219134573786480845
|
|
|
|
const float64_t A7 = float64_t::fromRaw(0x4062ff15c0285815); // 151.9714051044435648658557668
|
|
|
|
const float64_t A8 = float64_t::fromRaw(0x406510d06a8112ce); // 168.5254414101568283957668343
|
|
|
|
const float64_t A9 = float64_t::fromRaw(0x4040fecbc9e2c375); // 33.9905941350215598754191872
|
|
|
|
const float64_t A10 = float64_t::one();
|
|
|
|
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
fr = ((((A1 * fr + A2) * fr + A3) * fr + A4) * fr + A5)/
|
|
|
|
((((A6 * fr + A7) * fr + A8) * fr + A9) * fr + A10);
|
|
|
|
/* fr *= 2^ex * sign */
|
|
|
|
|
|
|
|
// checks for "+0" and "-0", reset sign bit
|
2017-06-15 20:10:41 +08:00
|
|
|
float32_t y; y.v = ((x.v & ((1u << 31) - 1)) != 0) ? packToF32UI(s, ex + 127, (uint32_t)(fracF64UI(fr.v) >> 29)) : 0;
|
SoftFloat integrated (#8668)
* everything is put into softfloat.cpp and softfloat.hpp
* WIP: try to integrate softfloat into OpenCV
* extra functions removed
* softfloat made stateless
* CV_EXPORTS added
* operators fixed
* exp added, log: WIP
* log32 fixed
* shorter names; a lot of TODOs
* log64 rewritten
* cbrt32 added
* minors, refactoring
* "inline" -> "CV_INLINE"
* cast to bool warnings fixed
* several warnings fixed
* fixed warning about unsigned unary minus
* fixed warnings on type cast
* inline -> CV_INLINE
* special cases processing added (NaNs, Infs, etc.)
* constants for NaN and Inf added
* more macros and helper functions added
* added (or fixed) tests for pow32, pow64, cbrt32
* exp-like functions fixed
* minor changes
* fixed random number generation for tests
* tests for exp32 and exp64: values are compared to SoftFloat-based naive implementation
* minor warning fix
* pow(f, i) 32/64: special cases handling added
* unused functions removed
* refactoring is in progress (not compiling)
* CV_inline added
* unions {uint_t, float_t} removed
* tests compilation fixed
* static const members -> static methods returning const
* reinterpret_cast
* warning fixed
* const-ness fixed
* all FP calculations (even compile-time) are done in SoftFloat + minor fixes
* pow(f, i) removed from interface (can cause incorrect cast) to internals of pow(f, f), tests fixed
* CV_INLINE -> inline
* internal constants moved to .cpp file
* toInt_minMag() methods merged into toInt() methods
* macros moved to .cpp file
* refactoring: types renamed to softfloat and softdouble; explicit constructors, etc.
* toFloat(), toDouble() -> operator float(), operator double()
* removed f32/f64 prefixes from functions names
* toType() methods removed, round() and trunc() functions added
* minor change
* minors
* MSVC: warnings fixed
* added int cvRound(), cvFloor, cvCeil, cvTrunc, saturate_cast<T>()
* typo fixed
* type cast fixed
2017-05-29 22:07:25 +08:00
|
|
|
return y;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// POW functions ///
|
|
|
|
|
|
|
|
float32_t f32_pow( float32_t x, float32_t y)
|
|
|
|
{
|
|
|
|
const float32_t zero = float32_t::zero(), one = float32_t::one(), inf = float32_t::inf(), nan = float32_t::nan();
|
|
|
|
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
|
|
|
|
float32_t ax = abs(x);
|
|
|
|
bool useInf = (y > zero) == (ax > one);
|
|
|
|
float32_t v;
|
|
|
|
//special cases
|
|
|
|
if(ynan) v = nan;
|
|
|
|
else if(yinf) v = (ax == one || xnan) ? nan : (useInf ? inf : zero);
|
|
|
|
else if(y == zero) v = one;
|
|
|
|
else if(y == one ) v = x;
|
|
|
|
else //here y is ok
|
|
|
|
{
|
|
|
|
if(xnan) v = nan;
|
|
|
|
else if(xinf) v = (y < zero) ? zero : inf;
|
|
|
|
else if(y == f32_roundToInt(y, round_near_even, false)) v = f32_powi(x, f32_to_i32(y, round_near_even, false));
|
|
|
|
else if(x < zero) v = nan;
|
|
|
|
// (0 ** 0) == 1
|
|
|
|
else if(x == zero) v = (y < zero) ? inf : (y == zero ? one : zero);
|
|
|
|
// here x and y are ok
|
|
|
|
else v = f32_exp(y * f32_log(x));
|
|
|
|
}
|
|
|
|
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_pow( float64_t x, float64_t y)
|
|
|
|
{
|
|
|
|
const float64_t zero = float64_t::zero(), one = float64_t::one(), inf = float64_t::inf(), nan = float64_t::nan();
|
|
|
|
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
|
|
|
|
float64_t ax = abs(x);
|
|
|
|
bool useInf = (y > zero) == (ax > one);
|
|
|
|
float64_t v;
|
|
|
|
//special cases
|
|
|
|
if(ynan) v = nan;
|
|
|
|
else if(yinf) v = (ax == one || xnan) ? nan : (useInf ? inf : zero);
|
|
|
|
else if(y == zero) v = one;
|
|
|
|
else if(y == one ) v = x;
|
|
|
|
else //here y is ok
|
|
|
|
{
|
|
|
|
if(xnan) v = nan;
|
|
|
|
else if(xinf) v = (y < zero) ? zero : inf;
|
|
|
|
else if(y == f64_roundToInt(y, round_near_even, false)) v = f64_powi(x, f64_to_i32(y, round_near_even, false));
|
|
|
|
else if(x < zero) v = nan;
|
|
|
|
// (0 ** 0) == 1
|
|
|
|
else if(x == zero) v = (y < zero) ? inf : (y == zero ? one : zero);
|
|
|
|
// here x and y are ok
|
|
|
|
else v = f64_exp(y * f64_log(x));
|
|
|
|
}
|
|
|
|
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
// These functions are for internal use only
|
|
|
|
|
|
|
|
float32_t f32_powi( float32_t x, int y)
|
|
|
|
{
|
|
|
|
float32_t v;
|
|
|
|
//special case: (0 ** 0) == 1
|
|
|
|
if(x == float32_t::zero())
|
|
|
|
v = (y < 0) ? float32_t::inf() : (y == 0 ? float32_t::one() : float32_t::zero());
|
|
|
|
// here x and y are ok
|
|
|
|
else
|
|
|
|
{
|
|
|
|
float32_t a = float32_t::one(), b = x;
|
|
|
|
int p = std::abs(y);
|
|
|
|
if( y < 0 )
|
|
|
|
b = float32_t::one()/b;
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if( p & 1 )
|
|
|
|
a *= b;
|
|
|
|
b *= b;
|
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
v = a * b;
|
|
|
|
}
|
|
|
|
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
float64_t f64_powi( float64_t x, int y)
|
|
|
|
{
|
|
|
|
float64_t v;
|
|
|
|
//special case: (0 ** 0) == 1
|
|
|
|
if(x == float64_t::zero())
|
|
|
|
v = (y < 0) ? float64_t::inf() : (y == 0 ? float64_t::one() : float64_t::zero());
|
|
|
|
// here x and y are ok
|
|
|
|
else
|
|
|
|
{
|
|
|
|
float64_t a = float64_t::one(), b = x;
|
|
|
|
int p = std::abs(y);
|
|
|
|
if( y < 0 )
|
|
|
|
b = float64_t::one()/b;
|
|
|
|
while( p > 1 )
|
|
|
|
{
|
|
|
|
if( p & 1 )
|
|
|
|
a *= b;
|
|
|
|
b *= b;
|
|
|
|
p >>= 1;
|
|
|
|
}
|
|
|
|
v = a * b;
|
|
|
|
}
|
|
|
|
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|