From c953da8f0be5e026d1c9128660736d72294feb3e Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Wed, 12 Sep 2018 17:35:33 -0400 Subject: softfloat: remove float64_trunc_to_int It has not had users since f83311e476 ("target-m68k: use floatx80 internally", 2017-06-21). Note that no other bit-width has floatX_trunc_to_int. Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- include/fpu/softfloat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index cc1b58b029..8fd9f9bbae 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -535,7 +535,6 @@ float128 float64_to_float128(float64, float_status *status); | Software IEC/IEEE double-precision operations. *----------------------------------------------------------------------------*/ float64 float64_round_to_int(float64, float_status *status); -float64 float64_trunc_to_int(float64, float_status *status); float64 float64_add(float64, float64, float_status *status); float64 float64_sub(float64, float64, float_status *status); float64 float64_mul(float64, float64, float_status *status); -- cgit v1.2.3-55-g7522 From 0019d5c3a18c31604fb55f9cec3ceb13999c4866 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 28 Sep 2018 09:01:35 +0200 Subject: softfloat: Replace countLeadingZeros32/64 with clz32/64 Our minimum required compiler for compiling QEMU is GCC 4.1 these days, so we can drop the support for compilers which do not provide the __builtin_clz*() functions yet. Since the countLeadingZeros32/64 are then identical to the clz32/64 functions, and we do not have to sync the softloat 2 codebase with upstream anymore (softloat 3 is a complete rewrite) we can simply replace the functions with our QEMU versions. Suggested-by: Peter Maydell Acked-by: Alex Bennée Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth Message-Id: <1538118095-7003-1-git-send-email-thuth@redhat.com> Signed-off-by: Richard Henderson --- fpu/softfloat.c | 26 ++++++------- include/fpu/softfloat-macros.h | 87 ------------------------------------------ 2 files changed, 13 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 9405f12a03..71da0f68bb 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -2683,7 +2683,7 @@ static void { int8_t shiftCount; - shiftCount = countLeadingZeros32( aSig ) - 8; + shiftCount = clz32(aSig) - 8; *zSigPtr = aSig<>( - shiftCount ); *zSig1Ptr = aSig1<<( shiftCount & 63 ); @@ -3308,7 +3308,7 @@ static void *zExpPtr = - shiftCount - 63; } else { - shiftCount = countLeadingZeros64( aSig0 ) - 15; + shiftCount = clz64(aSig0) - 15; shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); *zExpPtr = 1 - shiftCount; } @@ -3497,7 +3497,7 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, zSig1 = 0; zExp -= 64; } - shiftCount = countLeadingZeros64( zSig0 ) - 15; + shiftCount = clz64(zSig0) - 15; if ( 0 <= shiftCount ) { zSig2 = 0; shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); @@ -3529,7 +3529,7 @@ floatx80 int32_to_floatx80(int32_t a, float_status *status) if ( a == 0 ) return packFloatx80( 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 32; + shiftCount = clz32(absA) + 32; zSig = absA; return packFloatx80( zSign, 0x403E - shiftCount, zSig<= ((maj) << 16) + (min)) -#else -# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0 -#endif - - /*---------------------------------------------------------------------------- | Shifts `a' right by the number of bits given in `count'. If any nonzero | bits are shifted off, they are ``jammed'' into the least significant bit of @@ -712,82 +701,6 @@ static inline uint32_t estimateSqrt32(int aExp, uint32_t a) } -/*---------------------------------------------------------------------------- -| Returns the number of leading 0 bits before the most-significant 1 bit of -| `a'. If `a' is zero, 32 is returned. -*----------------------------------------------------------------------------*/ - -static inline int8_t countLeadingZeros32(uint32_t a) -{ -#if SOFTFLOAT_GNUC_PREREQ(3, 4) - if (a) { - return __builtin_clz(a); - } else { - return 32; - } -#else - static const int8_t countLeadingZerosHigh[] = { - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - int8_t shiftCount; - - shiftCount = 0; - if ( a < 0x10000 ) { - shiftCount += 16; - a <<= 16; - } - if ( a < 0x1000000 ) { - shiftCount += 8; - a <<= 8; - } - shiftCount += countLeadingZerosHigh[ a>>24 ]; - return shiftCount; -#endif -} - -/*---------------------------------------------------------------------------- -| Returns the number of leading 0 bits before the most-significant 1 bit of -| `a'. If `a' is zero, 64 is returned. -*----------------------------------------------------------------------------*/ - -static inline int8_t countLeadingZeros64(uint64_t a) -{ -#if SOFTFLOAT_GNUC_PREREQ(3, 4) - if (a) { - return __builtin_clzll(a); - } else { - return 64; - } -#else - int8_t shiftCount; - - shiftCount = 0; - if ( a < ( (uint64_t) 1 )<<32 ) { - shiftCount += 32; - } - else { - a >>= 32; - } - shiftCount += countLeadingZeros32( a ); - return shiftCount; -#endif -} - /*---------------------------------------------------------------------------- | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' | is equal to the 128-bit value formed by concatenating `b0' and `b1'. -- cgit v1.2.3-55-g7522 From 5dfbc9e4903c0121140f2945f05df48cea72dd82 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 Oct 2018 09:35:51 -0500 Subject: softfloat: Fix division The __udiv_qrnnd primitive that we nicked from gmp requires its inputs to be normalized. We were not doing that. Because the inputs are nearly normalized already, finishing that is trivial. Replace div128to64 with a "proper" udiv_qrnnd, so that this remains a reusable primitive. Fixes: cf07323d494 Fixes: https://bugs.launchpad.net/qemu/+bug/1793119 Tested-by: Emilio G. Cota Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- fpu/softfloat.c | 35 +++++++++++++++++++++++++++-------- include/fpu/softfloat-macros.h | 34 +++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 71da0f68bb..46ae206172 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1112,19 +1112,38 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s) bool sign = a.sign ^ b.sign; if (a.cls == float_class_normal && b.cls == float_class_normal) { - uint64_t temp_lo, temp_hi; + uint64_t n0, n1, q, r; int exp = a.exp - b.exp; + + /* + * We want a 2*N / N-bit division to produce exactly an N-bit + * result, so that we do not lose any precision and so that we + * do not have to renormalize afterward. If A.frac < B.frac, + * then division would produce an (N-1)-bit result; shift A left + * by one to produce the an N-bit result, and decrement the + * exponent to match. + * + * The udiv_qrnnd algorithm that we're using requires normalization, + * i.e. the msb of the denominator must be set. Since we know that + * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left + * by one (more), and the remainder must be shifted right by one. + */ if (a.frac < b.frac) { exp -= 1; - shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, - &temp_hi, &temp_lo); + shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0); } else { - shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, - &temp_hi, &temp_lo); + shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0); } - /* LSB of quot is set if inexact which roundandpack will use - * to set flags. Yet again we re-use a for the result */ - a.frac = div128To64(temp_lo, temp_hi, b.frac); + q = udiv_qrnnd(&r, n1, n0, b.frac << 1); + + /* + * Set lsb if there is a remainder, to set inexact. + * As mentioned above, to find the actual value of the remainder we + * would need to shift right, but (1) we are only concerned about + * non-zero-ness, and (2) the remainder will always be even because + * both inputs to the division primitive are even. + */ + a.frac = q | (r != 0); a.sign = sign; a.exp = exp; return a; diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index edc682139e..a1d99c730d 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -329,15 +329,30 @@ static inline void | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void - shortShift128Left( - uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) +static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count, + uint64_t *z0Ptr, uint64_t *z1Ptr) { + *z1Ptr = a1 << count; + *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); +} - *z1Ptr = a1<>( ( - count ) & 63 ) ); +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' may be greater than 64. The result is broken into two 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ +static inline void shift128Left(uint64_t a0, uint64_t a1, int count, + uint64_t *z0Ptr, uint64_t *z1Ptr) +{ + if (count < 64) { + *z1Ptr = a1 << count; + *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); + } else { + *z1Ptr = 0; + *z0Ptr = a1 << (count - 64); + } } /*---------------------------------------------------------------------------- @@ -619,7 +634,8 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) * * Licensed under the GPLv2/LGPLv3 */ -static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d) +static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, + uint64_t n0, uint64_t d) { uint64_t d0, d1, q0, q1, r1, r0, m; @@ -658,8 +674,8 @@ static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d) } r0 -= m; - /* Return remainder in LSB */ - return (q1 << 32) | q0 | (r0 != 0); + *r = r0; + return (q1 << 32) | q0; } /*---------------------------------------------------------------------------- -- cgit v1.2.3-55-g7522 From b299e88d4261b0af30190e74005ad930e04f3a11 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 Oct 2018 11:32:09 -0500 Subject: softfloat: Specialize udiv_qrnnd for x86_64 The ISA has a 128/64-bit division instruction. Tested-by: Emilio G. Cota Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- include/fpu/softfloat-macros.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index a1d99c730d..39eb08b4f1 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -637,6 +637,11 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, uint64_t n0, uint64_t d) { +#if defined(__x86_64__) + uint64_t q; + asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); + return q; +#else uint64_t d0, d1, q0, q1, r1, r0, m; d0 = (uint32_t)d; @@ -676,6 +681,7 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, *r = r0; return (q1 << 32) | q0; +#endif } /*---------------------------------------------------------------------------- -- cgit v1.2.3-55-g7522 From 739df333dc8853ae6578492675a26a601d6be077 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 Oct 2018 11:34:25 -0500 Subject: softfloat: Specialize udiv_qrnnd for s390x The ISA has a 128/64-bit division instruction. Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- include/fpu/softfloat-macros.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index 39eb08b4f1..eafc68932b 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -641,6 +641,12 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, uint64_t q; asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); return q; +#elif defined(__s390x__) + /* Need to use a TImode type to get an even register pair for DLGR. */ + unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; + asm("dlgr %0, %1" : "+r"(n) : "r"(d)); + *r = n >> 64; + return n; #else uint64_t d0, d1, q0, q1, r1, r0, m; -- cgit v1.2.3-55-g7522 From 27ae5109a2ba8b6b679cce3e03e16570a34390a0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 3 Oct 2018 17:10:17 +0000 Subject: softfloat: Specialize udiv_qrnnd for ppc64 The ISA has a 128/64-bit division instruction, though it assumes the low 64-bits of the numerator are 0, and so requires a bit more fixup than a full 128-bit division insn. Reviewed-by: David Gibson Signed-off-by: Richard Henderson --- include/fpu/softfloat-macros.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index eafc68932b..c86687fa5e 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -647,6 +647,22 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, asm("dlgr %0, %1" : "+r"(n) : "r"(d)); *r = n >> 64; return n; +#elif defined(_ARCH_PPC64) + /* From Power ISA 3.0B, programming note for divdeu. */ + uint64_t q1, q2, Q, r1, r2, R; + asm("divdeu %0,%2,%4; divdu %1,%3,%4" + : "=&r"(q1), "=r"(q2) + : "r"(n1), "r"(n0), "r"(d)); + r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ + r2 = n0 - (q2 * d); + Q = q1 + q2; + R = r1 + r2; + if (R >= d || R < r2) { /* overflow implies R > d */ + Q += 1; + R -= d; + } + *r = R; + return Q; #else uint64_t d0, d1, q0, q1, r1, r0, m; -- cgit v1.2.3-55-g7522