From 38970efafdfa8a992c177563c4f0d77062b88fd9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jan 2014 11:47:21 +0000 Subject: softfloat: Fix exception flag handling for float32_to_float16() Our float32 to float16 conversion routine was generating the correct numerical answers, but not always setting the right set of exception flags. Fix this, mostly by rearranging the code to more closely resemble RoundAndPackFloat*, and in particular: * non-IEEE halfprec always raises Invalid for input NaNs * we need to check for the overflow case before underflow * we weren't getting the tininess-detected-after-rounding case correct (somewhat academic since only ARM uses halfprec and it is always tininess-detected-before-rounding) * non-IEEE halfprec overflow raises only Invalid, not Invalid + Inexact * we weren't setting Inexact when we should Also add some clarifying comments about what the code is doing. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 105 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 39 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index dbda61bc8e..6a6b6567d7 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -3046,6 +3046,10 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) uint32_t mask; uint32_t increment; int8 roundingMode; + int maxexp = ieee ? 15 : 16; + bool rounding_bumps_exp; + bool is_tiny = false; + a = float32_squash_input_denormal(a STATUS_VAR); aSig = extractFloat32Frac( a ); @@ -3054,11 +3058,12 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) if ( aExp == 0xFF ) { if (aSig) { /* Input is a NaN */ - float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR ); if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); return packFloat16(aSign, 0, 0); } - return r; + return commonNaNToFloat16( + float32ToCommonNaN(a STATUS_VAR) STATUS_VAR); } /* Infinity */ if (!ieee) { @@ -3070,58 +3075,80 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) if (aExp == 0 && aSig == 0) { return packFloat16(aSign, 0, 0); } - /* Decimal point between bits 22 and 23. */ + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ aSig |= 0x00800000; aExp -= 0x7f; + /* Calculate the mask of bits of the mantissa which are not + * representable in half-precision and will be lost. + */ if (aExp < -14) { + /* Will be denormal in halfprec */ mask = 0x00ffffff; if (aExp >= -24) { mask >>= 25 + aExp; } } else { + /* Normal number in halfprec */ mask = 0x00001fff; } - if (aSig & mask) { - float_raise( float_flag_underflow STATUS_VAR ); - roundingMode = STATUS(float_rounding_mode); - switch (roundingMode) { - case float_round_nearest_even: - increment = (mask + 1) >> 1; - if ((aSig & mask) == increment) { - increment = aSig & (increment << 1); - } - break; - case float_round_up: - increment = aSign ? 0 : mask; - break; - case float_round_down: - increment = aSign ? mask : 0; - break; - default: /* round_to_zero */ - increment = 0; - break; - } - aSig += increment; - if (aSig >= 0x01000000) { - aSig >>= 1; - aExp++; - } - } else if (aExp < -14 - && STATUS(float_detect_tininess) == float_tininess_before_rounding) { - float_raise( float_flag_underflow STATUS_VAR); - } - if (ieee) { - if (aExp > 15) { - float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + roundingMode = STATUS(float_rounding_mode); + switch (roundingMode) { + case float_round_nearest_even: + increment = (mask + 1) >> 1; + if ((aSig & mask) == increment) { + increment = aSig & (increment << 1); + } + break; + case float_round_up: + increment = aSign ? 0 : mask; + break; + case float_round_down: + increment = aSign ? mask : 0; + break; + default: /* round_to_zero */ + increment = 0; + break; + } + + rounding_bumps_exp = (aSig + increment >= 0x01000000); + + if (aExp > maxexp || (aExp == maxexp && rounding_bumps_exp)) { + if (ieee) { + float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR); return packFloat16(aSign, 0x1f, 0); - } - } else { - if (aExp > 16) { - float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR); + } else { + float_raise(float_flag_invalid STATUS_VAR); return packFloat16(aSign, 0x1f, 0x3ff); } } + + if (aExp < -14) { + /* Note that flush-to-zero does not affect half-precision results */ + is_tiny = + (STATUS(float_detect_tininess) == float_tininess_before_rounding) + || (aExp < -15) + || (!rounding_bumps_exp); + } + if (aSig & mask) { + float_raise(float_flag_inexact STATUS_VAR); + if (is_tiny) { + float_raise(float_flag_underflow STATUS_VAR); + } + } + + aSig += increment; + if (rounding_bumps_exp) { + aSig >>= 1; + aExp++; + } + if (aExp < -24) { return packFloat16(aSign, 0, 0); } -- cgit v1.2.3-55-g7522 From f581bf5474d2319ca37484eb63208017ac96ce0a Mon Sep 17 00:00:00 2001 From: Will Newton Date: Tue, 7 Jan 2014 17:17:48 +0000 Subject: softfloat: Add float to 16bit integer conversions. ARMv8 requires support for converting 32 and 64bit floating point values to signed and unsigned 16bit integers. Signed-off-by: Will Newton [PMM: updated not to incorrectly set Inexact for Invalid inputs] Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ include/fpu/softfloat.h | 4 +++ 2 files changed, 84 insertions(+) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 6a6b6567d7..dbaa32c7e3 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6491,6 +6491,46 @@ uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM ) return res; } +int_fast16_t float32_to_int16(float32 a STATUS_PARAM) +{ + int32_t v; + int_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a STATUS_VAR); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM) +{ + int32_t v; + uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a STATUS_VAR); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM) { int64_t v; @@ -6545,6 +6585,46 @@ uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM ) return res; } +int_fast16_t float64_to_int16(float64 a STATUS_PARAM) +{ + int64_t v; + int_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a STATUS_VAR); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM) +{ + int64_t v; + uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a STATUS_VAR); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM) { int64_t v; diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 2365274daa..a9b8cd9950 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -265,6 +265,8 @@ extern const float16 float16_default_nan; /*---------------------------------------------------------------------------- | Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/ +int_fast16_t float32_to_int16(float32 STATUS_PARAM); +uint_fast16_t float32_to_uint16(float32 STATUS_PARAM); int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM); uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM); int32 float32_to_int32( float32 STATUS_PARAM ); @@ -371,6 +373,8 @@ extern const float32 float32_default_nan; /*---------------------------------------------------------------------------- | Software IEC/IEEE double-precision conversion routines. *----------------------------------------------------------------------------*/ +int_fast16_t float64_to_int16(float64 STATUS_PARAM); +uint_fast16_t float64_to_uint16(float64 STATUS_PARAM); int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM); uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM); int32 float64_to_int32( float64 STATUS_PARAM ); -- cgit v1.2.3-55-g7522 From c4850f9e1b9cef2ac35cd6e6f3d8d2147787740c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:17:49 +0000 Subject: softfloat: Make the int-to-float functions take exact-width types Currently the int-to-float functions take types which are specified as "at least X bits wide", rather than "exactly X bits wide". This is confusing and unhelpful since it means that the callers have to include an explicit cast to [u]intXX_t to ensure the correct behaviour. Fix them all to take the exactly-X-bits-wide types instead. Note that this doesn't change behaviour at all since at the moment we happen to define the 'int32' and 'uint32' types as exactly 32 bits wide, and the 'int64' and 'uint64' types as exactly 64 bits wide. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 26 +++++++++++++------------- include/fpu/softfloat.h | 26 +++++++++++++------------- 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index dbaa32c7e3..3170b882b9 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1121,7 +1121,7 @@ static float128 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 a STATUS_PARAM ) +float32 int32_to_float32(int32_t a STATUS_PARAM) { flag zSign; @@ -1138,7 +1138,7 @@ float32 int32_to_float32( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int32_to_float64( int32 a STATUS_PARAM ) +float64 int32_to_float64(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1161,7 +1161,7 @@ float64 int32_to_float64( int32 a STATUS_PARAM ) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) +floatx80 int32_to_floatx80(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1183,7 +1183,7 @@ floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 int32_to_float128( int32 a STATUS_PARAM ) +float128 int32_to_float128(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1205,7 +1205,7 @@ float128 int32_to_float128( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 int64_to_float32( int64 a STATUS_PARAM ) +float32 int64_to_float32(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1231,7 +1231,7 @@ float32 int64_to_float32( int64 a STATUS_PARAM ) } -float32 uint64_to_float32( uint64 a STATUS_PARAM ) +float32 uint64_to_float32(uint64_t a STATUS_PARAM) { int8 shiftCount; @@ -1258,7 +1258,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int64_to_float64( int64 a STATUS_PARAM ) +float64 int64_to_float64(int64_t a STATUS_PARAM) { flag zSign; @@ -1271,7 +1271,7 @@ float64 int64_to_float64( int64 a STATUS_PARAM ) } -float64 uint64_to_float64(uint64 a STATUS_PARAM) +float64 uint64_to_float64(uint64_t a STATUS_PARAM) { int exp = 0x43C; @@ -1292,7 +1292,7 @@ float64 uint64_to_float64(uint64 a STATUS_PARAM) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) +floatx80 int64_to_floatx80(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1312,7 +1312,7 @@ floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 int64_to_float128( int64 a STATUS_PARAM ) +float128 int64_to_float128(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1339,7 +1339,7 @@ float128 int64_to_float128( int64 a STATUS_PARAM ) } -float128 uint64_to_float128(uint64 a STATUS_PARAM) +float128 uint64_to_float128(uint64_t a STATUS_PARAM) { if (a == 0) { return float128_zero; @@ -6445,12 +6445,12 @@ int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM ) } /* misc functions */ -float32 uint32_to_float32( uint32 a STATUS_PARAM ) +float32 uint32_to_float32(uint32_t a STATUS_PARAM) { return int64_to_float32(a STATUS_VAR); } -float64 uint32_to_float64( uint32 a STATUS_PARAM ) +float64 uint32_to_float64(uint32_t a STATUS_PARAM) { return int64_to_float64(a STATUS_VAR); } diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 926d849252..78b165670d 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -225,19 +225,19 @@ enum { /*---------------------------------------------------------------------------- | Software IEC/IEEE integer-to-floating-point conversion routines. *----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 STATUS_PARAM ); -float64 int32_to_float64( int32 STATUS_PARAM ); -float32 uint32_to_float32( uint32 STATUS_PARAM ); -float64 uint32_to_float64( uint32 STATUS_PARAM ); -floatx80 int32_to_floatx80( int32 STATUS_PARAM ); -float128 int32_to_float128( int32 STATUS_PARAM ); -float32 int64_to_float32( int64 STATUS_PARAM ); -float32 uint64_to_float32( uint64 STATUS_PARAM ); -float64 int64_to_float64( int64 STATUS_PARAM ); -float64 uint64_to_float64( uint64 STATUS_PARAM ); -floatx80 int64_to_floatx80( int64 STATUS_PARAM ); -float128 int64_to_float128( int64 STATUS_PARAM ); -float128 uint64_to_float128( uint64 STATUS_PARAM ); +float32 int32_to_float32(int32_t STATUS_PARAM); +float64 int32_to_float64(int32_t STATUS_PARAM); +float32 uint32_to_float32(uint32_t STATUS_PARAM); +float64 uint32_to_float64(uint32_t STATUS_PARAM); +floatx80 int32_to_floatx80(int32_t STATUS_PARAM); +float128 int32_to_float128(int32_t STATUS_PARAM); +float32 int64_to_float32(int64_t STATUS_PARAM); +float32 uint64_to_float32(uint64_t STATUS_PARAM); +float64 int64_to_float64(int64_t STATUS_PARAM); +float64 uint64_to_float64(uint64_t STATUS_PARAM); +floatx80 int64_to_floatx80(int64_t STATUS_PARAM); +float128 int64_to_float128(int64_t STATUS_PARAM); +float128 uint64_to_float128(uint64_t STATUS_PARAM); /* We provide the int16 versions for symmetry of API with float-to-int */ INLINE float32 int16_to_float32(int16_t v STATUS_PARAM) -- cgit v1.2.3-55-g7522 From fb3ea83aa53cead46465c74ddd1872babe9f4a3e Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 17:17:49 +0000 Subject: softfloat: Fix float64_to_uint64 The comment preceding the float64_to_uint64 routine suggests that the implementation is broken. And this is, indeed, the case. This patch properly implements the conversion of a 64-bit floating point number to an unsigned, 64 bit integer. This contribution can be licensed under either the softfloat-2a or -2b license. Signed-off-by: Tom Musta Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 8 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 3170b882b9..2364513e0b 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -203,6 +203,56 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU } +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit unsigned integer corresponding to the +| input. Ordinarily, the fixed-point input is simply rounded to an integer, +| with the inexact exception raised if the input cannot be represented exactly +| as an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +static int64 roundAndPackUint64(flag zSign, uint64_t absZ0, + uint64_t absZ1 STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven, increment; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = (roundingMode == float_round_nearest_even); + increment = ((int64_t)absZ1 < 0); + if (!roundNearestEven) { + if (roundingMode == float_round_to_zero) { + increment = 0; + } else if (absZ1) { + if (zSign) { + increment = (roundingMode == float_round_down) && absZ1; + } else { + increment = (roundingMode == float_round_up) && absZ1; + } + } + } + if (increment) { + ++absZ0; + if (absZ0 == 0) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + } + + if (zSign && absZ0) { + float_raise(float_flag_invalid STATUS_VAR); + return 0; + } + + if (absZ1) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return absZ0; +} + /*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -6643,16 +6693,51 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM) return res; } -/* FIXME: This looks broken. */ -uint64_t float64_to_uint64 (float64 a STATUS_PARAM) -{ - int64_t v; +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. If the conversion overflows, the +| largest unsigned integer is returned. If 'a' is negative, the value is +| rounded and zero is returned; negative values that do not round to zero +| will raise the inexact exception. +*----------------------------------------------------------------------------*/ - v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); - v += float64_val(a); - v = float64_to_int64(make_float64(v) STATUS_VAR); +uint64_t float64_to_uint64(float64 a STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp, shiftCount; + uint64_t aSig, aSigExtra; + a = float64_squash_input_denormal(a STATUS_VAR); - return v - INT64_MIN; + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aSign && (aExp > 1022)) { + float_raise(float_flag_invalid STATUS_VAR); + if (float64_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + if (aExp) { + aSig |= LIT64(0x0010000000000000); + } + shiftCount = 0x433 - aExp; + if (shiftCount <= 0) { + if (0x43E < aExp) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + aSigExtra = 0; + aSig <<= -shiftCount; + } else { + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra); + } + return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR); } uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM) -- cgit v1.2.3-55-g7522 From 34e1c27bc3094ffe484d9855e07ad104bddf579f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:17:49 +0000 Subject: softfloat: Only raise Invalid when conversions to int are out of range We implement a number of float-to-integer conversions using conversion to an integer type with a wider range and then a check against the narrower range we are actually converting to. If we find the result to be out of range we correctly raise the Invalid exception, but we must also suppress other exceptions which might have been raised by the conversion function we called. This won't throw away exceptions we should have preserved, because for the 'core' exception flags the IEEE spec mandates that the only valid combinations of exception that can be raised by a single operation are Inexact + Overflow and Inexact + Underflow. For the non-IEEE softfloat flag for input denormals, we can guarantee that that flag won't have been set for out of range float-to-int conversions because a squashed denormal by definition goes to plus or minus zero, which is always in range after conversion to integer zero. This bug has been fixed for some of the float-to-int conversion routines by previous patches; fix it for the remaining functions as well, so that they all restore the pre-conversion status flags prior to raising Invalid. Signed-off-by: Peter Maydell Reviewed-by: Aurelien Jarno Reviewed-by: Richard Henderson --- fpu/softfloat.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 2364513e0b..6312e0c66f 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6509,17 +6509,18 @@ uint32 float32_to_uint32( float32 a STATUS_PARAM ) { int64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6527,17 +6528,18 @@ uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM ) { int64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6585,17 +6587,18 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM) { int64_t v; uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffff) { res = 0xffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6679,17 +6682,18 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM) { int64_t v; uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); v = float64_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffff) { res = 0xffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } -- cgit v1.2.3-55-g7522 From 3c85c37f254bdfaea4e105b5014a287b5cf9514b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:17:50 +0000 Subject: softfloat: Fix factor 2 error for scalbn on denormal inputs If the input to float*_scalbn() is denormal then it represents a number 0.[mantissabits] * 2^(1-exponentbias) (and the actual exponent field is all zeroes). This means that when we convert it to our unpacked encoding the unpacked exponent must be one greater than for a normal number, which represents 1.[mantissabits] * 2^(e-exponentbias) for an exponent field e. This meant we were giving answers too small by a factor of 2 for all denormal inputs. Note that the float-to-int routines also have this behaviour of not adjusting the exponent for denormals; however there it is harmless because denormals will all convert to integer zero anyway. Signed-off-by: Peter Maydell Reviewed-by: Aurelien Jarno Reviewed-by: Richard Henderson --- fpu/softfloat.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 6312e0c66f..d2e909593f 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6991,10 +6991,13 @@ float32 float32_scalbn( float32 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig |= 0x00800000; - else if ( aSig == 0 ) + } else if (aSig == 0) { return a; + } else { + aExp++; + } if (n > 0x200) { n = 0x200; @@ -7024,10 +7027,13 @@ float64 float64_scalbn( float64 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig |= LIT64( 0x0010000000000000 ); - else if ( aSig == 0 ) + } else if (aSig == 0) { return a; + } else { + aExp++; + } if (n > 0x1000) { n = 0x1000; @@ -7057,8 +7063,12 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM ) return a; } - if (aExp == 0 && aSig == 0) - return a; + if (aExp == 0) { + if (aSig == 0) { + return a; + } + aExp++; + } if (n > 0x10000) { n = 0x10000; @@ -7087,10 +7097,13 @@ float128 float128_scalbn( float128 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig0 |= LIT64( 0x0001000000000000 ); - else if ( aSig0 == 0 && aSig1 == 0 ) + } else if (aSig0 == 0 && aSig1 == 0) { return a; + } else { + aExp++; + } if (n > 0x10000) { n = 0x10000; -- cgit v1.2.3-55-g7522 From 2f18bbf9844ad110e0e69ad22708f37cab1557f6 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 17:17:50 +0000 Subject: softfloat: Add float32_to_uint64() This patch adds the float32_to_uint64() routine, which converts a 32-bit floating point number to an unsigned 64 bit number. This contribution can be licensed under either the softfloat-2a or -2b license. Signed-off-by: Tom Musta Reviewed-by: Peter Maydell [PMM: removed harmless but silly int64_t casts] Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/fpu/softfloat.h | 1 + 2 files changed, 47 insertions(+) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index d2e909593f..c2c1a5650b 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1557,6 +1557,52 @@ int64 float32_to_int64( float32 a STATUS_PARAM ) } +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| unsigned integer is returned. Otherwise, if the conversion overflows, the +| largest unsigned integer is returned. If the 'a' is negative, the result +| is rounded and zero is returned; values that do not round to zero will +| raise the inexact exception flag. +*----------------------------------------------------------------------------*/ + +uint64 float32_to_uint64(float32 a STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp, shiftCount; + uint32_t aSig; + uint64_t aSig64, aSigExtra; + a = float32_squash_input_denormal(a STATUS_VAR); + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + if ((aSign) && (aExp > 126)) { + float_raise(float_flag_invalid STATUS_VAR); + if (float32_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + shiftCount = 0xBE - aExp; + if (aExp) { + aSig |= 0x00800000; + } + if (shiftCount < 0) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); + return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR); +} + /*---------------------------------------------------------------------------- | Returns the result of converting the single-precision floating-point value | `a' to the 64-bit two's complement integer format. The conversion is diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 78b165670d..eb81c3b0cc 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -295,6 +295,7 @@ int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM ); uint32 float32_to_uint32( float32 STATUS_PARAM ); uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM ); int64 float32_to_int64( float32 STATUS_PARAM ); +uint64 float32_to_uint64(float32 STATUS_PARAM); int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM ); float64 float32_to_float64( float32 STATUS_PARAM ); floatx80 float32_to_floatx80( float32 STATUS_PARAM ); -- cgit v1.2.3-55-g7522 From 0a87a3107dad97abff5b06558b0cce6832229086 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 17:17:50 +0000 Subject: softfloat: Fix float64_to_uint64_round_to_zero The float64_to_uint64_round_to_zero routine is incorrect. For example, the following test pattern: 46697351FF4AEC29 / 0x1.97351ff4aec29p+103 currently produces 8000000000000000 instead of FFFFFFFFFFFFFFFF. This patch re-implements the routine to temporarily force the rounding mode and use the float64_to_uint64 routine. This contribution can be licensed under either the softfloat-2a or -2b license. Signed-off-by: Tom Musta Message-id: 1387397961-4894-4-git-send-email-tommusta@gmail.com Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index c2c1a5650b..f782c24ef2 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6792,13 +6792,11 @@ uint64_t float64_to_uint64(float64 a STATUS_PARAM) uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM) { - int64_t v; - - v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); - v += float64_val(a); - v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR); - - return v - INT64_MIN; + signed char current_rounding_mode = STATUS(float_rounding_mode); + set_float_rounding_mode(float_round_to_zero STATUS_VAR); + int64_t v = float64_to_uint64(a STATUS_VAR); + set_float_rounding_mode(current_rounding_mode STATUS_VAR); + return v; } #define COMPARE(s, nan_exp) \ -- cgit v1.2.3-55-g7522 From 5e7f654fa152de246f2254707bacac31ba50c660 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 17:17:51 +0000 Subject: softfloat: Fix float64_to_uint32 The float64_to_uint32 has several flaws: - for numbers between 2**32 and 2**64, the inexact exception flag may get incorrectly set. In this case, only the invalid flag should be set. test pattern: 425F81378DC0CD1F / 0x1.f81378dc0cd1fp+38 - for numbers between 2**63 and 2**64, incorrect results may be produced: test pattern: 43EAAF73F1F0B8BD / 0x1.aaf73f1f0b8bdp+63 This patch re-implements float64_to_uint32 to re-use the float64_to_uint64 routine (instead of float64_to_int64). For the saturation case, we ignore any flags which the conversion routine has set and raise only the invalid flag. This contribution can be licensed under either the softfloat-2a or -2b license. Signed-off-by: Tom Musta Message-id: 1387397961-4894-5-git-send-email-tommusta@gmail.com Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index f782c24ef2..e68a87b6da 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6650,19 +6650,18 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM) uint32 float64_to_uint32( float64 a STATUS_PARAM ) { - int64_t v; + uint64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); - v = float64_to_int64(a STATUS_VAR); - if (v < 0) { - res = 0; - float_raise( float_flag_invalid STATUS_VAR); - } else if (v > 0xffffffff) { + v = float64_to_uint64(a STATUS_VAR); + if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } -- cgit v1.2.3-55-g7522 From fd728f2f949273563f799640b863b4b94dc4c6da Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 17:17:51 +0000 Subject: softfloat: Fix float64_to_uint32_round_to_zero The float64_to_uint32_round_to_zero routine is incorrect. For example, the following test pattern: 425F81378DC0CD1F / 0x1.f81378dc0cd1fp+38 will erroneously set the inexact flag. This patch re-implements the routine to use the float64_to_uint64_round_to_zero routine. If saturation occurs we ignore any flags set by the conversion function and raise only Invalid. This contribution can be licensed under either the softfloat-2a or -2b license. Signed-off-by: Tom Musta Message-id: 1387397961-4894-6-git-send-email-tommusta@gmail.com Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index e68a87b6da..3232ce2547 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6667,19 +6667,18 @@ uint32 float64_to_uint32( float64 a STATUS_PARAM ) uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM ) { - int64_t v; + uint64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); - v = float64_to_int64_round_to_zero(a STATUS_VAR); - if (v < 0) { - res = 0; - float_raise( float_flag_invalid STATUS_VAR); - } else if (v > 0xffffffff) { + v = float64_to_uint64_round_to_zero(a STATUS_VAR); + if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } -- cgit v1.2.3-55-g7522 From 879d096b37b1233fec334bc2c2f569ac3b8fec9a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:19:11 +0000 Subject: softfloat: Provide complete set of accessors for fp state Tidy up the get/set accessors for the fp state to add missing ones and make them all inline in softfloat.h rather than some inline and some not. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 15 --------------- include/fpu/softfloat.h | 39 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 18 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 3232ce2547..4abcd36b15 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -59,21 +59,6 @@ these four paragraphs for those parts of this code that are retained. *----------------------------------------------------------------------------*/ #include "softfloat-specialize.h" -void set_float_rounding_mode(int val STATUS_PARAM) -{ - STATUS(float_rounding_mode) = val; -} - -void set_float_exception_flags(int val STATUS_PARAM) -{ - STATUS(float_exception_flags) = val; -} - -void set_floatx80_rounding_precision(int val STATUS_PARAM) -{ - STATUS(floatx80_rounding_precision) = val; -} - /*---------------------------------------------------------------------------- | Returns the fraction bits of the half-precision floating-point value `a'. *----------------------------------------------------------------------------*/ diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index eb81c3b0cc..a634a4ebff 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -180,12 +180,22 @@ typedef struct float_status { flag default_nan_mode; } float_status; -void set_float_rounding_mode(int val STATUS_PARAM); -void set_float_exception_flags(int val STATUS_PARAM); INLINE void set_float_detect_tininess(int val STATUS_PARAM) { STATUS(float_detect_tininess) = val; } +INLINE void set_float_rounding_mode(int val STATUS_PARAM) +{ + STATUS(float_rounding_mode) = val; +} +INLINE void set_float_exception_flags(int val STATUS_PARAM) +{ + STATUS(float_exception_flags) = val; +} +INLINE void set_floatx80_rounding_precision(int val STATUS_PARAM) +{ + STATUS(floatx80_rounding_precision) = val; +} INLINE void set_flush_to_zero(flag val STATUS_PARAM) { STATUS(flush_to_zero) = val; @@ -198,11 +208,34 @@ INLINE void set_default_nan_mode(flag val STATUS_PARAM) { STATUS(default_nan_mode) = val; } +INLINE int get_float_detect_tininess(float_status *status) +{ + return STATUS(float_detect_tininess); +} +INLINE int get_float_rounding_mode(float_status *status) +{ + return STATUS(float_rounding_mode); +} INLINE int get_float_exception_flags(float_status *status) { return STATUS(float_exception_flags); } -void set_floatx80_rounding_precision(int val STATUS_PARAM); +INLINE int get_floatx80_rounding_precision(float_status *status) +{ + return STATUS(floatx80_rounding_precision); +} +INLINE flag get_flush_to_zero(float_status *status) +{ + return STATUS(flush_to_zero); +} +INLINE flag get_flush_inputs_to_zero(float_status *status) +{ + return STATUS(flush_inputs_to_zero); +} +INLINE flag get_default_nan_mode(float_status *status) +{ + return STATUS(default_nan_mode); +} /*---------------------------------------------------------------------------- | Routine to raise any or all of the software IEC/IEEE floating-point -- cgit v1.2.3-55-g7522 From c4a1c5e7e2fae28ef3fde2aadf7ec6fed0a5a967 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:19:11 +0000 Subject: softfloat: Factor out RoundAndPackFloat16 and NormalizeFloat16Subnormal In preparation for adding conversions between float16 and float64, factor out code currently done inline in the float16<=>float32 conversion functions into functions RoundAndPackFloat16 and NormalizeFloat16Subnormal along the lines of the existing versions for the other float types. Note that we change the handling of zExp from the inline code to match the API of the other RoundAndPackFloat functions; however we leave the positioning of the binary point between bits 22 and 23 rather than shifting it up to the high end of the word. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 209 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 125 insertions(+), 84 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 4abcd36b15..c63e011ae8 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -3086,6 +3086,127 @@ static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig) (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig); } +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper half-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the half-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal half- +| precision floating-point number. +| The `ieee' flag indicates whether to use IEEE standard half precision, or +| ARM-style "alternative representation", which omits the NaN and Inf +| encodings in order to raise the maximum representable exponent by one. +| The input significand `zSig' has its binary point between bits 22 +| and 23, which is 13 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| Note the slightly odd position of the binary point in zSig compared with the +| other roundAndPackFloat functions. This should probably be fixed if we +| need to implement more float16 routines than just conversion. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp, + uint32_t zSig, flag ieee STATUS_PARAM) +{ + int maxexp = ieee ? 29 : 30; + uint32_t mask; + uint32_t increment; + int8 roundingMode; + bool rounding_bumps_exp; + bool is_tiny = false; + + /* Calculate the mask of bits of the mantissa which are not + * representable in half-precision and will be lost. + */ + if (zExp < 1) { + /* Will be denormal in halfprec */ + mask = 0x00ffffff; + if (zExp >= -11) { + mask >>= 11 + zExp; + } + } else { + /* Normal number in halfprec */ + mask = 0x00001fff; + } + + roundingMode = STATUS(float_rounding_mode); + switch (roundingMode) { + case float_round_nearest_even: + increment = (mask + 1) >> 1; + if ((zSig & mask) == increment) { + increment = zSig & (increment << 1); + } + break; + case float_round_up: + increment = zSign ? 0 : mask; + break; + case float_round_down: + increment = zSign ? mask : 0; + break; + default: /* round_to_zero */ + increment = 0; + break; + } + + rounding_bumps_exp = (zSig + increment >= 0x01000000); + + if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) { + if (ieee) { + float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat16(zSign, 0x1f, 0); + } else { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(zSign, 0x1f, 0x3ff); + } + } + + if (zExp < 0) { + /* Note that flush-to-zero does not affect half-precision results */ + is_tiny = + (STATUS(float_detect_tininess) == float_tininess_before_rounding) + || (zExp < -1) + || (!rounding_bumps_exp); + } + if (zSig & mask) { + float_raise(float_flag_inexact STATUS_VAR); + if (is_tiny) { + float_raise(float_flag_underflow STATUS_VAR); + } + } + + zSig += increment; + if (rounding_bumps_exp) { + zSig >>= 1; + zExp++; + } + + if (zExp < -10) { + return packFloat16(zSign, 0, 0); + } + if (zExp < 0) { + zSig >>= -zExp; + zExp = 0; + } + return packFloat16(zSign, zExp, zSig >> 13); +} + +static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, + uint32_t *zSigPtr) +{ + int8_t shiftCount = countLeadingZeros32(aSig) - 21; + *zSigPtr = aSig << shiftCount; + *zExpPtr = 1 - shiftCount; +} + /* Half precision floats come in two formats: standard IEEE and "ARM" format. The latter gains extra exponent range by omitting the NaN/Inf encodings. */ @@ -3106,15 +3227,12 @@ float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM) return packFloat32(aSign, 0xff, 0); } if (aExp == 0) { - int8 shiftCount; - if (aSig == 0) { return packFloat32(aSign, 0, 0); } - shiftCount = countLeadingZeros32( aSig ) - 21; - aSig = aSig << shiftCount; - aExp = -shiftCount; + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; } return packFloat32( aSign, aExp + 0x70, aSig << 13); } @@ -3124,12 +3242,6 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) flag aSign; int_fast16_t aExp; uint32_t aSig; - uint32_t mask; - uint32_t increment; - int8 roundingMode; - int maxexp = ieee ? 15 : 16; - bool rounding_bumps_exp; - bool is_tiny = false; a = float32_squash_input_denormal(a STATUS_VAR); @@ -3164,80 +3276,9 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) * codepath. */ aSig |= 0x00800000; - aExp -= 0x7f; - /* Calculate the mask of bits of the mantissa which are not - * representable in half-precision and will be lost. - */ - if (aExp < -14) { - /* Will be denormal in halfprec */ - mask = 0x00ffffff; - if (aExp >= -24) { - mask >>= 25 + aExp; - } - } else { - /* Normal number in halfprec */ - mask = 0x00001fff; - } + aExp -= 0x71; - roundingMode = STATUS(float_rounding_mode); - switch (roundingMode) { - case float_round_nearest_even: - increment = (mask + 1) >> 1; - if ((aSig & mask) == increment) { - increment = aSig & (increment << 1); - } - break; - case float_round_up: - increment = aSign ? 0 : mask; - break; - case float_round_down: - increment = aSign ? mask : 0; - break; - default: /* round_to_zero */ - increment = 0; - break; - } - - rounding_bumps_exp = (aSig + increment >= 0x01000000); - - if (aExp > maxexp || (aExp == maxexp && rounding_bumps_exp)) { - if (ieee) { - float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR); - return packFloat16(aSign, 0x1f, 0); - } else { - float_raise(float_flag_invalid STATUS_VAR); - return packFloat16(aSign, 0x1f, 0x3ff); - } - } - - if (aExp < -14) { - /* Note that flush-to-zero does not affect half-precision results */ - is_tiny = - (STATUS(float_detect_tininess) == float_tininess_before_rounding) - || (aExp < -15) - || (!rounding_bumps_exp); - } - if (aSig & mask) { - float_raise(float_flag_inexact STATUS_VAR); - if (is_tiny) { - float_raise(float_flag_underflow STATUS_VAR); - } - } - - aSig += increment; - if (rounding_bumps_exp) { - aSig >>= 1; - aExp++; - } - - if (aExp < -24) { - return packFloat16(aSign, 0, 0); - } - if (aExp < -14) { - aSig >>= -14 - aExp; - aExp = -14; - } - return packFloat16(aSign, aExp + 14, aSig >> 13); + return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR); } /*---------------------------------------------------------------------------- -- cgit v1.2.3-55-g7522 From 14c9a07eb9cae3d3bc1d39cc8815dd88337ce07a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:19:12 +0000 Subject: softfloat: Add float16 <=> float64 conversion functions Add the conversion functions float16_to_float64() and float64_to_float16(), which will be needed for the ARM A64 instruction set. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ include/fpu/softfloat.h | 2 ++ 2 files changed, 77 insertions(+) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index c63e011ae8..d4ca7cf7f1 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -3281,6 +3281,81 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR); } +float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp; + uint32_t aSig; + + aSign = extractFloat16Sign(a); + aExp = extractFloat16Exp(a); + aSig = extractFloat16Frac(a); + + if (aExp == 0x1f && ieee) { + if (aSig) { + return commonNaNToFloat64( + float16ToCommonNaN(a STATUS_VAR) STATUS_VAR); + } + return packFloat64(aSign, 0x7ff, 0); + } + if (aExp == 0) { + if (aSig == 0) { + return packFloat64(aSign, 0, 0); + } + + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; + } + return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42); +} + +float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp; + uint64_t aSig; + uint32_t zSig; + + a = float64_squash_input_denormal(a STATUS_VAR); + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aExp == 0x7FF) { + if (aSig) { + /* Input is a NaN */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(aSign, 0, 0); + } + return commonNaNToFloat16( + float64ToCommonNaN(a STATUS_VAR) STATUS_VAR); + } + /* Infinity */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(aSign, 0x1f, 0x3ff); + } + return packFloat16(aSign, 0x1f, 0); + } + shift64RightJamming(aSig, 29, &aSig); + zSig = aSig; + if (aExp == 0 && zSig == 0) { + return packFloat16(aSign, 0, 0); + } + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ + zSig |= 0x00800000; + aExp -= 0x3F1; + + return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR); +} + /*---------------------------------------------------------------------------- | Returns the result of converting the double-precision floating-point value | `a' to the extended double-precision floating-point format. The conversion diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index a634a4ebff..83d324aca8 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -298,6 +298,8 @@ INLINE float64 uint16_to_float64(uint16_t v STATUS_PARAM) *----------------------------------------------------------------------------*/ float16 float32_to_float16( float32, flag STATUS_PARAM ); float32 float16_to_float32( float16, flag STATUS_PARAM ); +float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM); +float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM); /*---------------------------------------------------------------------------- | Software half-precision operations. -- cgit v1.2.3-55-g7522 From dc355b764de890e1b5872d0b402dccfd9a9b5286 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:19:12 +0000 Subject: softfloat: Refactor code handling various rounding modes Refactor the code in various functions which calculates rounding increments given the current rounding mode, so that instead of a set of nested if statements we have a simple switch statement. This will give us a clean place to add the case for the new tiesAway rounding mode. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 405 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 241 insertions(+), 164 deletions(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index d4ca7cf7f1..6c7a90a054 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -42,6 +42,9 @@ these four paragraphs for those parts of this code that are retained. #include "fpu/softfloat.h" +/* We only need stdlib for abort() */ +#include + /*---------------------------------------------------------------------------- | Primitive arithmetic functions, including multi-word arithmetic, and | division and square root approximations. (Can be specialized to target if @@ -106,20 +109,21 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM) roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); } roundBits = absZ & 0x7F; absZ = ( absZ + roundIncrement )>>7; @@ -155,19 +159,21 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (int64_t) absZ1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && absZ1; - } - else { - increment = ( roundingMode == float_round_up ) && absZ1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + increment = ((int64_t) absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); } if ( increment ) { ++absZ0; @@ -206,17 +212,21 @@ static int64 roundAndPackUint64(flag zSign, uint64_t absZ0, roundingMode = STATUS(float_rounding_mode); roundNearestEven = (roundingMode == float_round_nearest_even); - increment = ((int64_t)absZ1 < 0); - if (!roundNearestEven) { - if (roundingMode == float_round_to_zero) { - increment = 0; - } else if (absZ1) { - if (zSign) { - increment = (roundingMode == float_round_down) && absZ1; - } else { - increment = (roundingMode == float_round_up) && absZ1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + increment = ((int64_t)absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); } if (increment) { ++absZ0; @@ -354,20 +364,22 @@ static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + break; } roundBits = zSig & 0x7F; if ( 0xFD <= (uint16_t) zExp ) { @@ -536,20 +548,21 @@ static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x200; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x3FF; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + roundIncrement = 0x200; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x3ff; + break; + case float_round_down: + roundIncrement = zSign ? 0x3ff : 0; + break; + default: + abort(); } roundBits = zSig & 0x3FF; if ( 0x7FD <= (uint16_t) zExp ) { @@ -719,19 +732,20 @@ static floatx80 goto precision80; } zSig0 |= ( zSig1 != 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = roundMask; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : roundMask; + break; + case float_round_down: + roundIncrement = zSign ? roundMask : 0; + break; + default: + abort(); } roundBits = zSig0 & roundMask; if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { @@ -778,19 +792,21 @@ static floatx80 if ( zSig0 == 0 ) zExp = 0; return packFloatx80( zSign, zExp, zSig0 ); precision80: - increment = ( (int64_t) zSig1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { if ( ( 0x7FFE < zExp ) @@ -820,16 +836,21 @@ static floatx80 zExp = 0; if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR); if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; - if ( roundNearestEven ) { - increment = ( (int64_t) zSig1 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } + switch (roundingMode) { + case float_round_nearest_even: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } if ( increment ) { ++zSig0; @@ -1029,19 +1050,21 @@ static float128 roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (int64_t) zSig2 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } + switch (roundingMode) { + case float_round_nearest_even: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); } if ( 0x7FFD <= (uint32_t) zExp ) { if ( ( 0x7FFD < zExp ) @@ -1089,16 +1112,21 @@ static float128 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR); - if ( roundNearestEven ) { - increment = ( (int64_t) zSig2 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } + switch (roundingMode) { + case float_round_nearest_even: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); } } } @@ -1737,7 +1765,6 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) flag aSign; int_fast16_t aExp; uint32_t lastBitMask, roundBitsMask; - int8 roundingMode; uint32_t z; a = float32_squash_input_denormal(a STATUS_VAR); @@ -1769,15 +1796,27 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) lastBitMask <<= 0x96 - aExp; roundBitsMask = lastBitMask - 1; z = float32_val(a); - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) { + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat32Sign(make_float32(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat32Sign(make_float32(z))) { z += roundBitsMask; } + break; + default: + abort(); } z &= ~ roundBitsMask; if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact; @@ -3120,7 +3159,6 @@ static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp, int maxexp = ieee ? 29 : 30; uint32_t mask; uint32_t increment; - int8 roundingMode; bool rounding_bumps_exp; bool is_tiny = false; @@ -3138,8 +3176,7 @@ static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp, mask = 0x00001fff; } - roundingMode = STATUS(float_rounding_mode); - switch (roundingMode) { + switch (STATUS(float_rounding_mode)) { case float_round_nearest_even: increment = (mask + 1) >> 1; if ((zSig & mask) == increment) { @@ -3430,7 +3467,6 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) flag aSign; int_fast16_t aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; uint64_t z; a = float64_squash_input_denormal(a STATUS_VAR); @@ -3463,15 +3499,27 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) lastBitMask <<= 0x433 - aExp; roundBitsMask = lastBitMask - 1; z = float64_val(a); - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: + z += lastBitMask >> 1; + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat64Sign(make_float64(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat64Sign(make_float64(z))) { z += roundBitsMask; } + break; + default: + abort(); } z &= ~ roundBitsMask; if ( z != float64_val(a) ) @@ -4699,7 +4747,6 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) flag aSign; int32 aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; floatx80 z; aExp = extractFloatx80Exp( a ); @@ -4740,15 +4787,27 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) lastBitMask <<= 0x403E - aExp; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z.low += lastBitMask>>1; - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + if ((z.low & roundBitsMask) == 0) { + z.low &= ~lastBitMask; + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + case float_round_down: + if (extractFloatx80Sign(z)) { z.low += roundBitsMask; } + break; + default: + abort(); } z.low &= ~ roundBitsMask; if ( z.low == 0 ) { @@ -5774,7 +5833,6 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) flag aSign; int32 aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; float128 z; aExp = extractFloat128Exp( a ); @@ -5791,8 +5849,8 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: if ( lastBitMask ) { add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; @@ -5803,12 +5861,21 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1; } } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); + } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); } + break; + default: + abort(); } z.low &= ~ roundBitsMask; } @@ -5842,19 +5909,29 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) roundBitsMask = lastBitMask - 1; z.low = 0; z.high = a.high; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z.high += lastBitMask>>1; if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { z.high &= ~ lastBitMask; } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { z.high |= ( a.low != 0 ); z.high += roundBitsMask; } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + z.high |= (a.low != 0); + z.high += roundBitsMask; + } + break; + default: + abort(); } z.high &= ~ roundBitsMask; } -- cgit v1.2.3-55-g7522 From f9288a76f1819c2dbf8d2873aebab6aec0f461f5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jan 2014 17:19:12 +0000 Subject: softfloat: Add support for ties-away rounding IEEE754-2008 specifies a new rounding mode: "roundTiesToAway: the floating-point number nearest to the infinitely precise result shall be delivered; if the two nearest floating-point numbers bracketing an unrepresentable infinitely precise result are equally near, the one with larger magnitude shall be delivered." Implement this new mode (it is needed for ARM). The general principle is that the required code is exactly like the ties-to-even code, except that we do not need to do the "in case of exact tie clear LSB to round-to-even", because the rounding operation naturally causes the exact tie to round up in magnitude. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- fpu/softfloat.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ include/fpu/softfloat.h | 3 ++- 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'fpu/softfloat.c') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 6c7a90a054..e0ea599769 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -111,6 +111,7 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM) roundNearestEven = ( roundingMode == float_round_nearest_even ); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: roundIncrement = 0x40; break; case float_round_to_zero: @@ -161,6 +162,7 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU roundNearestEven = ( roundingMode == float_round_nearest_even ); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: increment = ((int64_t) absZ1 < 0); break; case float_round_to_zero: @@ -214,6 +216,7 @@ static int64 roundAndPackUint64(flag zSign, uint64_t absZ0, roundNearestEven = (roundingMode == float_round_nearest_even); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: increment = ((int64_t)absZ1 < 0); break; case float_round_to_zero: @@ -366,6 +369,7 @@ static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig roundNearestEven = ( roundingMode == float_round_nearest_even ); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: roundIncrement = 0x40; break; case float_round_to_zero: @@ -550,6 +554,7 @@ static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig roundNearestEven = ( roundingMode == float_round_nearest_even ); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: roundIncrement = 0x200; break; case float_round_to_zero: @@ -734,6 +739,7 @@ static floatx80 zSig0 |= ( zSig1 != 0 ); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: break; case float_round_to_zero: roundIncrement = 0; @@ -794,6 +800,7 @@ static floatx80 precision80: switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: increment = ((int64_t)zSig1 < 0); break; case float_round_to_zero: @@ -838,6 +845,7 @@ static floatx80 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: increment = ((int64_t)zSig1 < 0); break; case float_round_to_zero: @@ -1052,6 +1060,7 @@ static float128 roundNearestEven = ( roundingMode == float_round_nearest_even ); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: increment = ((int64_t)zSig2 < 0); break; case float_round_to_zero: @@ -1114,6 +1123,7 @@ static float128 if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR); switch (roundingMode) { case float_round_nearest_even: + case float_round_ties_away: increment = ((int64_t)zSig2 < 0); break; case float_round_to_zero: @@ -1785,6 +1795,11 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) return packFloat32( aSign, 0x7F, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x7E) { + return packFloat32(aSign, 0x7F, 0); + } + break; case float_round_down: return make_float32(aSign ? 0xBF800000 : 0); case float_round_up: @@ -1803,6 +1818,9 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) z &= ~lastBitMask; } break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; case float_round_to_zero: break; case float_round_up: @@ -3183,6 +3201,9 @@ static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp, increment = zSig & (increment << 1); } break; + case float_round_ties_away: + increment = (mask + 1) >> 1; + break; case float_round_up: increment = zSign ? 0 : mask; break; @@ -3487,6 +3508,11 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) return packFloat64( aSign, 0x3FF, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x3FE) { + return packFloat64(aSign, 0x3ff, 0); + } + break; case float_round_down: return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0); case float_round_up: @@ -3506,6 +3532,9 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) z &= ~lastBitMask; } break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; case float_round_to_zero: break; case float_round_up: @@ -4771,6 +4800,11 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); } break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); + } + break; case float_round_down: return aSign ? @@ -4794,6 +4828,9 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) z.low &= ~lastBitMask; } break; + case float_round_ties_away: + z.low += lastBitMask >> 1; + break; case float_round_to_zero: break; case float_round_up: @@ -5862,6 +5899,15 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) } } break; + case float_round_ties_away: + if (lastBitMask) { + add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low); + } else { + if ((int64_t) z.low < 0) { + ++z.high; + } + } + break; case float_round_to_zero: break; case float_round_up: @@ -5893,6 +5939,11 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) return packFloat128( aSign, 0x3FFF, 0, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloat128(aSign, 0x3FFF, 0, 0); + } + break; case float_round_down: return aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) @@ -5916,6 +5967,9 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) z.high &= ~ lastBitMask; } break; + case float_round_ties_away: + z.high += lastBitMask>>1; + break; case float_round_to_zero: break; case float_round_up: diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 83d324aca8..806ae13780 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -152,7 +152,8 @@ enum { float_round_nearest_even = 0, float_round_down = 1, float_round_up = 2, - float_round_to_zero = 3 + float_round_to_zero = 3, + float_round_ties_away = 4, }; /*---------------------------------------------------------------------------- -- cgit v1.2.3-55-g7522