diff options
author | Michael Brown | 2007-07-30 03:37:42 +0200 |
---|---|---|
committer | Michael Brown | 2007-07-30 03:43:43 +0200 |
commit | 4ce8d61a5cb8a0e3f68bff8400ba2b8246c9a58f (patch) | |
tree | 807be055e6f6edd1c8f85cf6c17635251f0c276a /src | |
parent | GCC's optimiser seems to screw up if this is left static... (diff) | |
download | ipxe-4ce8d61a5cb8a0e3f68bff8400ba2b8246c9a58f.tar.gz ipxe-4ce8d61a5cb8a0e3f68bff8400ba2b8246c9a58f.tar.xz ipxe-4ce8d61a5cb8a0e3f68bff8400ba2b8246c9a58f.zip |
Import various libgcc functions from syslinux.
Experimentation reveals that gcc ignores -mrtd for the implicit
arithmetic functions (e.g. __udivdi3), but not for the implicit
memcpy() and memset() functions. Mark the implicit arithmetic
functions with __attribute__((cdecl)) to compensate for this.
(Note: we cannot mark with with __cdecl, because we define __cdecl to
incorporate regparm(0) as well.)
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile | 1 | ||||
-rw-r--r-- | src/arch/i386/core/udivmod64.c | 336 | ||||
-rw-r--r-- | src/libgcc/__divdi3.c | 26 | ||||
-rw-r--r-- | src/libgcc/__moddi3.c | 26 | ||||
-rw-r--r-- | src/libgcc/__udivdi3.c | 10 | ||||
-rw-r--r-- | src/libgcc/__udivmoddi4.c | 32 | ||||
-rw-r--r-- | src/libgcc/__umoddi3.c | 13 | ||||
-rw-r--r-- | src/libgcc/libgcc.h | 26 | ||||
-rw-r--r-- | src/libgcc/memcpy.c (renamed from src/core/gcc_implicit.c) | 2 |
9 files changed, 134 insertions, 338 deletions
diff --git a/src/Makefile b/src/Makefile index a567b922..8e0c8bea 100644 --- a/src/Makefile +++ b/src/Makefile @@ -145,6 +145,7 @@ DEBUG_TARGETS += dbg%.o c s # SRCDIRS lists all directories containing source files. # +SRCDIRS += libgcc SRCDIRS += core SRCDIRS += proto SRCDIRS += net net/tcp net/udp diff --git a/src/arch/i386/core/udivmod64.c b/src/arch/i386/core/udivmod64.c deleted file mode 100644 index 6293c8e1..00000000 --- a/src/arch/i386/core/udivmod64.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/** @file - * - * 64-bit division - * - * The x86 CPU (386 upwards) has a divl instruction which will perform - * unsigned division of a 64-bit dividend by a 32-bit divisor. If the - * resulting quotient does not fit in 32 bits, then a CPU exception - * will occur. - * - * Unsigned integer division is expressed as solving - * - * x = d.q + r 0 <= q, 0 <= r < d - * - * given the dividend (x) and divisor (d), to find the quotient (q) - * and remainder (r). - * - * The x86 divl instruction will solve - * - * x = d.q + r 0 <= q, 0 <= r < d - * - * given x in the range 0 <= x < 2^64 and 1 <= d < 2^32, and causing a - * hardware exception if the resulting q >= 2^32. - * - * We can therefore use divl only if we can prove that the conditions - * - * 0 <= x < 2^64 - * 1 <= d < 2^32 - * q < 2^32 - * - * are satisfied. - * - * - * Case 1 : 1 <= d < 2^32 - * ====================== - * - * We express x as - * - * x = xh.2^32 + xl 0 <= xh < 2^32, 0 <= xl < 2^32 (1) - * - * i.e. split x into low and high dwords. We then solve - * - * xh = d.qh + r' 0 <= qh, 0 <= r' < d (2) - * - * which we can do using a divl instruction since - * - * 0 <= xh < 2^64 since 0 <= xh < 2^32 from (1) (3) - * - * and - * - * 1 <= d < 2^32 by definition of this Case (4) - * - * and - * - * d.qh = xh - r' from (2) - * d.qh <= xh since r' >= 0 from (2) - * qh <= xh since d >= 1 from (2) - * qh < 2^32 since xh < 2^32 from (1) (5) - * - * Having obtained qh and r', we then solve - * - * ( r'.2^32 + xl ) = d.ql + r 0 <= ql, 0 <= r < d (6) - * - * which we can do using another divl instruction since - * - * xl <= 2^32 - 1 from (1), so - * r'.2^32 + xl <= ( r' + 1 ).2^32 - 1 - * r'.2^32 + xl <= d.2^32 - 1 since r' < d from (2) - * r'.2^32 + xl < d.2^32 (7) - * r'.2^32 + xl < 2^64 since d < 2^32 from (4) (8) - * - * and - * - * 1 <= d < 2^32 by definition of this Case (9) - * - * and - * - * d.ql = ( r'.2^32 + xl ) - r from (6) - * d.ql <= r'.2^32 + xl since r >= 0 from (6) - * d.ql < d.2^32 from (7) - * ql < 2^32 since d >= 1 from (2) (10) - * - * This then gives us - * - * x = xh.2^32 + xl from (1) - * x = ( d.qh + r' ).2^32 + xl from (2) - * x = d.qh.2^32 + ( r'.2^32 + xl ) - * x = d.qh.2^32 + d.ql + r from (3) - * x = d.( qh.2^32 + ql ) + r (11) - * - * Letting - * - * q = qh.2^32 + ql (12) - * - * gives - * - * x = d.q + r from (11) and (12) - * - * which is the solution. - * - * - * This therefore gives us a two-step algorithm: - * - * xh = d.qh + r' 0 <= qh, 0 <= r' < d (2) - * ( r'.2^32 + xl ) = d.ql + r 0 <= ql, 0 <= r < d (6) - * - * which translates to - * - * %edx:%eax = 0:xh - * divl d - * qh = %eax - * r' = %edx - * - * %edx:%eax = r':xl - * divl d - * ql = %eax - * r = %edx - * - * Note that if - * - * xh < d - * - * (which is a fast dword comparison) then the first divl instruction - * can be omitted, since the answer will be - * - * qh = 0 - * r = xh - * - * - * Case 2 : 2^32 <= d < 2^64 - * ========================= - * - * We first express d as - * - * d = dh.2^k + dl 2^31 <= dh < 2^32, - * 0 <= dl < 2^k, 1 <= k <= 32 (1) - * - * i.e. find the highest bit set in d, subtract 32, and split d into - * dh and dl at that point. - * - * We then express x as - * - * x = xh.2^k + xl 0 <= xl < 2^k (2) - * - * giving - * - * xh.2^k = x - xl from (2) - * xh.2^k <= x since xl >= 0 from (1) - * xh.2^k < 2^64 since xh < 2^64 from (1) - * xh < 2^(64-k) (3) - * - * We then solve the division - * - * xh = dh.q' + r' 0 <= r' < dh (4) - * - * which we can do using a divl instruction since - * - * 0 <= xh < 2^64 since x < 2^64 and xh < x - * - * and - * - * 1 <= dh < 2^32 from (1) - * - * and - * - * dh.q' = xh - r' from (4) - * dh.q' <= xh since r' >= 0 from (4) - * dh.q' < 2^(64-k) from (3) (5) - * q'.2^31 <= dh.q' since dh >= 2^31 from (1) (6) - * q'.2^31 < 2^(64-k) from (5) and (6) - * q' < 2^(33-k) - * q' < 2^32 since k >= 1 from (1) (7) - * - * This gives us - * - * xh.2^k = dh.q'.2^k + r'.2^k from (4) - * x - xl = ( d - dl ).q' + r'.2^k from (1) and (2) - * x = d.q' + ( r'.2^k + xl ) - dl.q' (8) - * - * Now - * - * r'.2^k + xl < r'.2^k + 2^k since xl < 2^k from (2) - * r'.2^k + xl < ( r' + 1 ).2^k - * r'.2^k + xl < dh.2^k since r' < dh from (4) - * r'.2^k + xl < ( d - dl ) from (1) (9) - * - * - * (missing) - * - * - * This gives us two cases to consider: - * - * case (a): - * - * dl.q' <= ( r'.2^k + xl ) (15a) - * - * in which case - * - * x = d.q' + ( r'.2^k + xl - dl.q' ) - * - * is a direct solution to the division, since - * - * r'.2^k + xl < d from (9) - * ( r'.2^k + xl - dl.q' ) < d since dl >= 0 and q' >= 0 - * - * and - * - * 0 <= ( r'.2^k + xl - dl.q' ) from (15a) - * - * case (b): - * - * dl.q' > ( r'.2^k + xl ) (15b) - * - * Express - * - * x = d.(q'-1) + ( r'.2^k + xl ) + ( d - dl.q' ) - * - * - * (missing) - * - * - * special case: k = 32 cannot be handled with shifts - * - * (missing) - * - */ - -#include <stdint.h> -#include <assert.h> - -typedef uint64_t UDItype; - -struct uint64 { - uint32_t l; - uint32_t h; -}; - -static inline void udivmod64_lo ( const struct uint64 *x, - const struct uint64 *d, - struct uint64 *q, - struct uint64 *r ) { - uint32_t r_dash; - - q->h = 0; - r->h = 0; - r_dash = x->h; - - if ( x->h >= d->l ) { - __asm__ ( "divl %2" - : "=&a" ( q->h ), "=&d" ( r_dash ) - : "g" ( d->l ), "0" ( x->h ), "1" ( 0 ) ); - } - - __asm__ ( "divl %2" - : "=&a" ( q->l ), "=&d" ( r->l ) - : "g" ( d->l ), "0" ( x->l ), "1" ( r_dash ) ); -} - -void udivmod64 ( const struct uint64 *x, - const struct uint64 *d, - struct uint64 *q, - struct uint64 *r ) { - - if ( d->h == 0 ) { - udivmod64_lo ( x, d, q, r ); - } else { - assert ( 0 ); - while ( 1 ) {}; - } -} - -/** - * 64-bit division with remainder - * - * @v x Dividend - * @v d Divisor - * @ret r Remainder - * @ret q Quotient - */ -UDItype __udivmoddi4 ( UDItype x, UDItype d, UDItype *r ) { - UDItype q; - UDItype *_x = &x; - UDItype *_d = &d; - UDItype *_q = &q; - UDItype *_r = r; - - udivmod64 ( ( struct uint64 * ) _x, ( struct uint64 * ) _d, - ( struct uint64 * ) _q, ( struct uint64 * ) _r ); - - assert ( ( x == ( ( d * q ) + (*r) ) ) ); - assert ( (*r) < d ); - - return q; -} - -/** - * 64-bit division - * - * @v x Dividend - * @v d Divisor - * @ret q Quotient - */ -UDItype __udivdi3 ( UDItype x, UDItype d ) { - UDItype r; - return __udivmoddi4 ( x, d, &r ); -} - -/** - * 64-bit modulus - * - * @v x Dividend - * @v d Divisor - * @ret q Quotient - */ -UDItype __umoddi3 ( UDItype x, UDItype d ) { - UDItype r; - __udivmoddi4 ( x, d, &r ); - return r; -} diff --git a/src/libgcc/__divdi3.c b/src/libgcc/__divdi3.c new file mode 100644 index 00000000..36f0b37f --- /dev/null +++ b/src/libgcc/__divdi3.c @@ -0,0 +1,26 @@ +/* + * arch/i386/libgcc/__divdi3.c + */ + +#include "libgcc.h" + +LIBGCC int64_t __divdi3(int64_t num, int64_t den) +{ + int minus = 0; + int64_t v; + + if ( num < 0 ) { + num = -num; + minus = 1; + } + if ( den < 0 ) { + den = -den; + minus ^= 1; + } + + v = __udivmoddi4(num, den, NULL); + if ( minus ) + v = -v; + + return v; +} diff --git a/src/libgcc/__moddi3.c b/src/libgcc/__moddi3.c new file mode 100644 index 00000000..eb7784b7 --- /dev/null +++ b/src/libgcc/__moddi3.c @@ -0,0 +1,26 @@ +/* + * arch/i386/libgcc/__moddi3.c + */ + +#include "libgcc.h" + +LIBGCC int64_t __moddi3(int64_t num, int64_t den) +{ + int minus = 0; + int64_t v; + + if ( num < 0 ) { + num = -num; + minus = 1; + } + if ( den < 0 ) { + den = -den; + minus ^= 1; + } + + (void) __udivmoddi4(num, den, (uint64_t *)&v); + if ( minus ) + v = -v; + + return v; +} diff --git a/src/libgcc/__udivdi3.c b/src/libgcc/__udivdi3.c new file mode 100644 index 00000000..9ae0c3dc --- /dev/null +++ b/src/libgcc/__udivdi3.c @@ -0,0 +1,10 @@ +/* + * arch/i386/libgcc/__divdi3.c + */ + +#include "libgcc.h" + +LIBGCC uint64_t __udivdi3(uint64_t num, uint64_t den) +{ + return __udivmoddi4(num, den, NULL); +} diff --git a/src/libgcc/__udivmoddi4.c b/src/libgcc/__udivmoddi4.c new file mode 100644 index 00000000..59966edb --- /dev/null +++ b/src/libgcc/__udivmoddi4.c @@ -0,0 +1,32 @@ +#include "libgcc.h" + +LIBGCC uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *rem_p) +{ + uint64_t quot = 0, qbit = 1; + + if ( den == 0 ) { + return 1/((unsigned)den); /* Intentional divide by zero, without + triggering a compiler warning which + would abort the build */ + } + + /* Left-justify denominator and count shift */ + while ( (int64_t)den >= 0 ) { + den <<= 1; + qbit <<= 1; + } + + while ( qbit ) { + if ( den <= num ) { + num -= den; + quot += qbit; + } + den >>= 1; + qbit >>= 1; + } + + if ( rem_p ) + *rem_p = num; + + return quot; +} diff --git a/src/libgcc/__umoddi3.c b/src/libgcc/__umoddi3.c new file mode 100644 index 00000000..f6c76cb6 --- /dev/null +++ b/src/libgcc/__umoddi3.c @@ -0,0 +1,13 @@ +/* + * arch/i386/libgcc/__umoddi3.c + */ + +#include "libgcc.h" + +LIBGCC uint64_t __umoddi3(uint64_t num, uint64_t den) +{ + uint64_t v; + + (void) __udivmoddi4(num, den, &v); + return v; +} diff --git a/src/libgcc/libgcc.h b/src/libgcc/libgcc.h new file mode 100644 index 00000000..5b4a6244 --- /dev/null +++ b/src/libgcc/libgcc.h @@ -0,0 +1,26 @@ +#ifndef _LIBGCC_H +#define _LIBGCC_H + +#include <stdint.h> +#include <stddef.h> + +/* + * It seems as though gcc expects its implicit arithmetic functions to + * be cdecl, even if -mrtd is specified. This is somewhat + * inconsistent; for example, if -mregparm=3 is used then the implicit + * functions do become regparm(3). + * + * The implicit calls to memcpy() and memset() which gcc can generate + * do not seem to have this inconsistency; -mregparm and -mrtd affect + * them in the same way as any other function. + * + */ +#define LIBGCC __attribute__ (( cdecl )) + +extern LIBGCC uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *rem); +extern LIBGCC uint64_t __udivdi3(uint64_t num, uint64_t den); +extern LIBGCC uint64_t __umoddi3(uint64_t num, uint64_t den); +extern LIBGCC int64_t __divdi3(int64_t num, int64_t den); +extern LIBGCC int64_t __moddi3(int64_t num, int64_t den); + +#endif /* _LIBGCC_H */ diff --git a/src/core/gcc_implicit.c b/src/libgcc/memcpy.c index 8f217b6d..e98b7838 100644 --- a/src/core/gcc_implicit.c +++ b/src/libgcc/memcpy.c @@ -1,7 +1,5 @@ /** @file * - * gcc implicit functions - * * gcc sometimes likes to insert implicit calls to memcpy(). * Unfortunately, there doesn't seem to be any way to prevent it from * doing this, or to force it to use the optimised memcpy() as seen by |