diff options
Diffstat (limited to 'fpu')
-rw-r--r-- | fpu/softfloat.c | 1055 |
1 files changed, 769 insertions, 286 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index dbda61bc8e..e0ea599769 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -42,6 +42,9 @@ these four paragraphs for those parts of this code that are retained. #include "fpu/softfloat.h" +/* We only need stdlib for abort() */ +#include <stdlib.h> + /*---------------------------------------------------------------------------- | Primitive arithmetic functions, including multi-word arithmetic, and | division and square root approximations. (Can be specialized to target if @@ -59,21 +62,6 @@ these four paragraphs for those parts of this code that are retained. *----------------------------------------------------------------------------*/ #include "softfloat-specialize.h" -void set_float_rounding_mode(int val STATUS_PARAM) -{ - STATUS(float_rounding_mode) = val; -} - -void set_float_exception_flags(int val STATUS_PARAM) -{ - STATUS(float_exception_flags) = val; -} - -void set_floatx80_rounding_precision(int val STATUS_PARAM) -{ - STATUS(floatx80_rounding_precision) = val; -} - /*---------------------------------------------------------------------------- | Returns the fraction bits of the half-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -121,20 +109,22 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM) roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); } roundBits = absZ & 0x7F; absZ = ( absZ + roundIncrement )>>7; @@ -170,19 +160,22 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (int64_t) absZ1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && absZ1; - } - else { - increment = ( roundingMode == float_round_up ) && absZ1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t) absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); } if ( increment ) { ++absZ0; @@ -204,6 +197,61 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU } /*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit unsigned integer corresponding to the +| input. Ordinarily, the fixed-point input is simply rounded to an integer, +| with the inexact exception raised if the input cannot be represented exactly +| as an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +static int64 roundAndPackUint64(flag zSign, uint64_t absZ0, + uint64_t absZ1 STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven, increment; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = (roundingMode == float_round_nearest_even); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); + } + if (increment) { + ++absZ0; + if (absZ0 == 0) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + } + + if (zSign && absZ0) { + float_raise(float_flag_invalid STATUS_VAR); + return 0; + } + + if (absZ1) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return absZ0; +} + +/*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -319,20 +367,23 @@ static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + break; } roundBits = zSig & 0x7F; if ( 0xFD <= (uint16_t) zExp ) { @@ -501,20 +552,22 @@ static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x200; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x3FF; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x200; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x3ff; + break; + case float_round_down: + roundIncrement = zSign ? 0x3ff : 0; + break; + default: + abort(); } roundBits = zSig & 0x3FF; if ( 0x7FD <= (uint16_t) zExp ) { @@ -684,19 +737,21 @@ static floatx80 goto precision80; } zSig0 |= ( zSig1 != 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = roundMask; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : roundMask; + break; + case float_round_down: + roundIncrement = zSign ? roundMask : 0; + break; + default: + abort(); } roundBits = zSig0 & roundMask; if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { @@ -743,19 +798,22 @@ static floatx80 if ( zSig0 == 0 ) zExp = 0; return packFloatx80( zSign, zExp, zSig0 ); precision80: - increment = ( (int64_t) zSig1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { if ( ( 0x7FFE < zExp ) @@ -785,16 +843,22 @@ static floatx80 zExp = 0; if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR); if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; - if ( roundNearestEven ) { - increment = ( (int64_t) zSig1 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } if ( increment ) { ++zSig0; @@ -994,19 +1058,22 @@ static float128 roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (int64_t) zSig2 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); } if ( 0x7FFD <= (uint32_t) zExp ) { if ( ( 0x7FFD < zExp ) @@ -1054,16 +1121,22 @@ static float128 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR); - if ( roundNearestEven ) { - increment = ( (int64_t) zSig2 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); } } } @@ -1121,7 +1194,7 @@ static float128 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 a STATUS_PARAM ) +float32 int32_to_float32(int32_t a STATUS_PARAM) { flag zSign; @@ -1138,7 +1211,7 @@ float32 int32_to_float32( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int32_to_float64( int32 a STATUS_PARAM ) +float64 int32_to_float64(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1161,7 +1234,7 @@ float64 int32_to_float64( int32 a STATUS_PARAM ) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) +floatx80 int32_to_floatx80(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1183,7 +1256,7 @@ floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 int32_to_float128( int32 a STATUS_PARAM ) +float128 int32_to_float128(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1205,7 +1278,7 @@ float128 int32_to_float128( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 int64_to_float32( int64 a STATUS_PARAM ) +float32 int64_to_float32(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1231,7 +1304,7 @@ float32 int64_to_float32( int64 a STATUS_PARAM ) } -float32 uint64_to_float32( uint64 a STATUS_PARAM ) +float32 uint64_to_float32(uint64_t a STATUS_PARAM) { int8 shiftCount; @@ -1258,7 +1331,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int64_to_float64( int64 a STATUS_PARAM ) +float64 int64_to_float64(int64_t a STATUS_PARAM) { flag zSign; @@ -1271,7 +1344,7 @@ float64 int64_to_float64( int64 a STATUS_PARAM ) } -float64 uint64_to_float64(uint64 a STATUS_PARAM) +float64 uint64_to_float64(uint64_t a STATUS_PARAM) { int exp = 0x43C; @@ -1292,7 +1365,7 @@ float64 uint64_to_float64(uint64 a STATUS_PARAM) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) +floatx80 int64_to_floatx80(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1312,7 +1385,7 @@ floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 int64_to_float128( int64 a STATUS_PARAM ) +float128 int64_to_float128(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1339,7 +1412,7 @@ float128 int64_to_float128( int64 a STATUS_PARAM ) } -float128 uint64_to_float128(uint64 a STATUS_PARAM) +float128 uint64_to_float128(uint64_t a STATUS_PARAM) { if (a == 0) { return float128_zero; @@ -1509,6 +1582,52 @@ int64 float32_to_int64( float32 a STATUS_PARAM ) /*---------------------------------------------------------------------------- | Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| unsigned integer is returned. Otherwise, if the conversion overflows, the +| largest unsigned integer is returned. If the 'a' is negative, the result +| is rounded and zero is returned; values that do not round to zero will +| raise the inexact exception flag. +*----------------------------------------------------------------------------*/ + +uint64 float32_to_uint64(float32 a STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp, shiftCount; + uint32_t aSig; + uint64_t aSig64, aSigExtra; + a = float32_squash_input_denormal(a STATUS_VAR); + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + if ((aSign) && (aExp > 126)) { + float_raise(float_flag_invalid STATUS_VAR); + if (float32_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + shiftCount = 0xBE - aExp; + if (aExp) { + aSig |= 0x00800000; + } + if (shiftCount < 0) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); + return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value | `a' to the 64-bit two's complement integer format. The conversion is | performed according to the IEC/IEEE Standard for Binary Floating-Point | Arithmetic, except that the conversion is always rounded toward zero. If @@ -1656,7 +1775,6 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) flag aSign; int_fast16_t aExp; uint32_t lastBitMask, roundBitsMask; - int8 roundingMode; uint32_t z; a = float32_squash_input_denormal(a STATUS_VAR); @@ -1677,6 +1795,11 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) return packFloat32( aSign, 0x7F, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x7E) { + return packFloat32(aSign, 0x7F, 0); + } + break; case float_round_down: return make_float32(aSign ? 0xBF800000 : 0); case float_round_up: @@ -1688,15 +1811,30 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) lastBitMask <<= 0x96 - aExp; roundBitsMask = lastBitMask - 1; z = float32_val(a); - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) { + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat32Sign(make_float32(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat32Sign(make_float32(z))) { z += roundBitsMask; } + break; + default: + abort(); } z &= ~ roundBitsMask; if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact; @@ -3005,6 +3143,128 @@ static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig) (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig); } +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper half-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the half-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal half- +| precision floating-point number. +| The `ieee' flag indicates whether to use IEEE standard half precision, or +| ARM-style "alternative representation", which omits the NaN and Inf +| encodings in order to raise the maximum representable exponent by one. +| The input significand `zSig' has its binary point between bits 22 +| and 23, which is 13 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| Note the slightly odd position of the binary point in zSig compared with the +| other roundAndPackFloat functions. This should probably be fixed if we +| need to implement more float16 routines than just conversion. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp, + uint32_t zSig, flag ieee STATUS_PARAM) +{ + int maxexp = ieee ? 29 : 30; + uint32_t mask; + uint32_t increment; + bool rounding_bumps_exp; + bool is_tiny = false; + + /* Calculate the mask of bits of the mantissa which are not + * representable in half-precision and will be lost. + */ + if (zExp < 1) { + /* Will be denormal in halfprec */ + mask = 0x00ffffff; + if (zExp >= -11) { + mask >>= 11 + zExp; + } + } else { + /* Normal number in halfprec */ + mask = 0x00001fff; + } + + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: + increment = (mask + 1) >> 1; + if ((zSig & mask) == increment) { + increment = zSig & (increment << 1); + } + break; + case float_round_ties_away: + increment = (mask + 1) >> 1; + break; + case float_round_up: + increment = zSign ? 0 : mask; + break; + case float_round_down: + increment = zSign ? mask : 0; + break; + default: /* round_to_zero */ + increment = 0; + break; + } + + rounding_bumps_exp = (zSig + increment >= 0x01000000); + + if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) { + if (ieee) { + float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat16(zSign, 0x1f, 0); + } else { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(zSign, 0x1f, 0x3ff); + } + } + + if (zExp < 0) { + /* Note that flush-to-zero does not affect half-precision results */ + is_tiny = + (STATUS(float_detect_tininess) == float_tininess_before_rounding) + || (zExp < -1) + || (!rounding_bumps_exp); + } + if (zSig & mask) { + float_raise(float_flag_inexact STATUS_VAR); + if (is_tiny) { + float_raise(float_flag_underflow STATUS_VAR); + } + } + + zSig += increment; + if (rounding_bumps_exp) { + zSig >>= 1; + zExp++; + } + + if (zExp < -10) { + return packFloat16(zSign, 0, 0); + } + if (zExp < 0) { + zSig >>= -zExp; + zExp = 0; + } + return packFloat16(zSign, zExp, zSig >> 13); +} + +static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, + uint32_t *zSigPtr) +{ + int8_t shiftCount = countLeadingZeros32(aSig) - 21; + *zSigPtr = aSig << shiftCount; + *zExpPtr = 1 - shiftCount; +} + /* Half precision floats come in two formats: standard IEEE and "ARM" format. The latter gains extra exponent range by omitting the NaN/Inf encodings. */ @@ -3025,15 +3285,12 @@ float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM) return packFloat32(aSign, 0xff, 0); } if (aExp == 0) { - int8 shiftCount; - if (aSig == 0) { return packFloat32(aSign, 0, 0); } - shiftCount = countLeadingZeros32( aSig ) - 21; - aSig = aSig << shiftCount; - aExp = -shiftCount; + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; } return packFloat32( aSign, aExp + 0x70, aSig << 13); } @@ -3043,9 +3300,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) flag aSign; int_fast16_t aExp; uint32_t aSig; - uint32_t mask; - uint32_t increment; - int8 roundingMode; + a = float32_squash_input_denormal(a STATUS_VAR); aSig = extractFloat32Frac( a ); @@ -3054,11 +3309,12 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) if ( aExp == 0xFF ) { if (aSig) { /* Input is a NaN */ - float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR ); if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); return packFloat16(aSign, 0, 0); } - return r; + return commonNaNToFloat16( + float32ToCommonNaN(a STATUS_VAR) STATUS_VAR); } /* Infinity */ if (!ieee) { @@ -3070,66 +3326,92 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) if (aExp == 0 && aSig == 0) { return packFloat16(aSign, 0, 0); } - /* Decimal point between bits 22 and 23. */ + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ aSig |= 0x00800000; - aExp -= 0x7f; - if (aExp < -14) { - mask = 0x00ffffff; - if (aExp >= -24) { - mask >>= 25 + aExp; + aExp -= 0x71; + + return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR); +} + +float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp; + uint32_t aSig; + + aSign = extractFloat16Sign(a); + aExp = extractFloat16Exp(a); + aSig = extractFloat16Frac(a); + + if (aExp == 0x1f && ieee) { + if (aSig) { + return commonNaNToFloat64( + float16ToCommonNaN(a STATUS_VAR) STATUS_VAR); } - } else { - mask = 0x00001fff; + return packFloat64(aSign, 0x7ff, 0); } - if (aSig & mask) { - float_raise( float_flag_underflow STATUS_VAR ); - roundingMode = STATUS(float_rounding_mode); - switch (roundingMode) { - case float_round_nearest_even: - increment = (mask + 1) >> 1; - if ((aSig & mask) == increment) { - increment = aSig & (increment << 1); - } - break; - case float_round_up: - increment = aSign ? 0 : mask; - break; - case float_round_down: - increment = aSign ? mask : 0; - break; - default: /* round_to_zero */ - increment = 0; - break; - } - aSig += increment; - if (aSig >= 0x01000000) { - aSig >>= 1; - aExp++; + if (aExp == 0) { + if (aSig == 0) { + return packFloat64(aSign, 0, 0); } - } else if (aExp < -14 - && STATUS(float_detect_tininess) == float_tininess_before_rounding) { - float_raise( float_flag_underflow STATUS_VAR); + + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; } + return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42); +} - if (ieee) { - if (aExp > 15) { - float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); - return packFloat16(aSign, 0x1f, 0); +float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp; + uint64_t aSig; + uint32_t zSig; + + a = float64_squash_input_denormal(a STATUS_VAR); + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aExp == 0x7FF) { + if (aSig) { + /* Input is a NaN */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(aSign, 0, 0); + } + return commonNaNToFloat16( + float64ToCommonNaN(a STATUS_VAR) STATUS_VAR); } - } else { - if (aExp > 16) { - float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR); + /* Infinity */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); return packFloat16(aSign, 0x1f, 0x3ff); } + return packFloat16(aSign, 0x1f, 0); } - if (aExp < -24) { + shift64RightJamming(aSig, 29, &aSig); + zSig = aSig; + if (aExp == 0 && zSig == 0) { return packFloat16(aSign, 0, 0); } - if (aExp < -14) { - aSig >>= -14 - aExp; - aExp = -14; - } - return packFloat16(aSign, aExp + 14, aSig >> 13); + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ + zSig |= 0x00800000; + aExp -= 0x3F1; + + return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR); } /*---------------------------------------------------------------------------- @@ -3206,7 +3488,6 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) flag aSign; int_fast16_t aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; uint64_t z; a = float64_squash_input_denormal(a STATUS_VAR); @@ -3227,6 +3508,11 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) return packFloat64( aSign, 0x3FF, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x3FE) { + return packFloat64(aSign, 0x3ff, 0); + } + break; case float_round_down: return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0); case float_round_up: @@ -3239,15 +3525,30 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) lastBitMask <<= 0x433 - aExp; roundBitsMask = lastBitMask - 1; z = float64_val(a); - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: + z += lastBitMask >> 1; + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat64Sign(make_float64(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat64Sign(make_float64(z))) { z += roundBitsMask; } + break; + default: + abort(); } z &= ~ roundBitsMask; if ( z != float64_val(a) ) @@ -4475,7 +4776,6 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) flag aSign; int32 aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; floatx80 z; aExp = extractFloatx80Exp( a ); @@ -4500,6 +4800,11 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); } break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); + } + break; case float_round_down: return aSign ? @@ -4516,15 +4821,30 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) lastBitMask <<= 0x403E - aExp; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z.low += lastBitMask>>1; - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + if ((z.low & roundBitsMask) == 0) { + z.low &= ~lastBitMask; + } + break; + case float_round_ties_away: + z.low += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloatx80Sign(z)) { z.low += roundBitsMask; } + break; + case float_round_down: + if (extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + default: + abort(); } z.low &= ~ roundBitsMask; if ( z.low == 0 ) { @@ -5550,7 +5870,6 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) flag aSign; int32 aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; float128 z; aExp = extractFloat128Exp( a ); @@ -5567,8 +5886,8 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: if ( lastBitMask ) { add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; @@ -5579,12 +5898,30 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1; } } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + break; + case float_round_ties_away: + if (lastBitMask) { + add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low); + } else { + if ((int64_t) z.low < 0) { + ++z.high; + } + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); + } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); } + break; + default: + abort(); } z.low &= ~ roundBitsMask; } @@ -5602,6 +5939,11 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) return packFloat128( aSign, 0x3FFF, 0, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloat128(aSign, 0x3FFF, 0, 0); + } + break; case float_round_down: return aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) @@ -5618,19 +5960,32 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) roundBitsMask = lastBitMask - 1; z.low = 0; z.high = a.high; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z.high += lastBitMask>>1; if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { z.high &= ~ lastBitMask; } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { + break; + case float_round_ties_away: + z.high += lastBitMask>>1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { z.high |= ( a.low != 0 ); z.high += roundBitsMask; } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + z.high |= (a.low != 0); + z.high += roundBitsMask; + } + break; + default: + abort(); } z.high &= ~ roundBitsMask; } @@ -6418,12 +6773,12 @@ int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM ) } /* misc functions */ -float32 uint32_to_float32( uint32 a STATUS_PARAM ) +float32 uint32_to_float32(uint32_t a STATUS_PARAM) { return int64_to_float32(a STATUS_VAR); } -float64 uint32_to_float64( uint32 a STATUS_PARAM ) +float64 uint32_to_float64(uint32_t a STATUS_PARAM) { return int64_to_float64(a STATUS_VAR); } @@ -6432,17 +6787,18 @@ uint32 float32_to_uint32( float32 a STATUS_PARAM ) { int64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6450,17 +6806,58 @@ uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM ) { int64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +int_fast16_t float32_to_int16(float32 a STATUS_PARAM) +{ + int32_t v; + int_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a STATUS_VAR); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM) +{ + int32_t v; + uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a STATUS_VAR); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6468,53 +6865,92 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM) { int64_t v; uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffff) { res = 0xffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } uint32 float64_to_uint32( float64 a STATUS_PARAM ) { - int64_t v; + uint64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); - v = float64_to_int64(a STATUS_VAR); - if (v < 0) { - res = 0; - float_raise( float_flag_invalid STATUS_VAR); - } else if (v > 0xffffffff) { + v = float64_to_uint64(a STATUS_VAR); + if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM ) { - int64_t v; + uint64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); - v = float64_to_int64_round_to_zero(a STATUS_VAR); + v = float64_to_uint64_round_to_zero(a STATUS_VAR); + if (v > 0xffffffff) { + res = 0xffffffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +int_fast16_t float64_to_int16(float64 a STATUS_PARAM) +{ + int64_t v; + int_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a STATUS_VAR); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM) +{ + int64_t v; + uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); - } else if (v > 0xffffffff) { - res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); + } else if (v > 0xffff) { + res = 0xffff; } else { - res = v; + return v; } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6522,41 +6958,75 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM) { int64_t v; uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); v = float64_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffff) { res = 0xffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } -/* FIXME: This looks broken. */ -uint64_t float64_to_uint64 (float64 a STATUS_PARAM) -{ - int64_t v; +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. If the conversion overflows, the +| largest unsigned integer is returned. If 'a' is negative, the value is +| rounded and zero is returned; negative values that do not round to zero +| will raise the inexact exception. +*----------------------------------------------------------------------------*/ - v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); - v += float64_val(a); - v = float64_to_int64(make_float64(v) STATUS_VAR); +uint64_t float64_to_uint64(float64 a STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp, shiftCount; + uint64_t aSig, aSigExtra; + a = float64_squash_input_denormal(a STATUS_VAR); - return v - INT64_MIN; + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aSign && (aExp > 1022)) { + float_raise(float_flag_invalid STATUS_VAR); + if (float64_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + if (aExp) { + aSig |= LIT64(0x0010000000000000); + } + shiftCount = 0x433 - aExp; + if (shiftCount <= 0) { + if (0x43E < aExp) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + aSigExtra = 0; + aSig <<= -shiftCount; + } else { + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra); + } + return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR); } uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM) { - int64_t v; - - v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); - v += float64_val(a); - v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR); - - return v - INT64_MIN; + signed char current_rounding_mode = STATUS(float_rounding_mode); + set_float_rounding_mode(float_round_to_zero STATUS_VAR); + int64_t v = float64_to_uint64(a STATUS_VAR); + set_float_rounding_mode(current_rounding_mode STATUS_VAR); + return v; } #define COMPARE(s, nan_exp) \ @@ -6795,10 +7265,13 @@ float32 float32_scalbn( float32 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig |= 0x00800000; - else if ( aSig == 0 ) + } else if (aSig == 0) { return a; + } else { + aExp++; + } if (n > 0x200) { n = 0x200; @@ -6828,10 +7301,13 @@ float64 float64_scalbn( float64 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig |= LIT64( 0x0010000000000000 ); - else if ( aSig == 0 ) + } else if (aSig == 0) { return a; + } else { + aExp++; + } if (n > 0x1000) { n = 0x1000; @@ -6861,8 +7337,12 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM ) return a; } - if (aExp == 0 && aSig == 0) - return a; + if (aExp == 0) { + if (aSig == 0) { + return a; + } + aExp++; + } if (n > 0x10000) { n = 0x10000; @@ -6891,10 +7371,13 @@ float128 float128_scalbn( float128 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig0 |= LIT64( 0x0001000000000000 ); - else if ( aSig0 == 0 && aSig1 == 0 ) + } else if (aSig0 == 0 && aSig1 == 0) { return a; + } else { + aExp++; + } if (n > 0x10000) { n = 0x10000; |