From bb81a09e55eaf7e5f798468ab971469b6f66a259 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86: all cpu backtrace When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu backtrace, so we get to see which CPU is holding the lock, and where. Cc: Andi Kleen Cc: Ingo Molnar Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/linux/nmi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e16904e28c3a..acb4ed130247 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -15,9 +15,14 @@ * disables interrupts for a long time. This call is stateless. */ #ifdef ARCH_HAS_NMI_WATCHDOG +#include extern void touch_nmi_watchdog(void); #else # define touch_nmi_watchdog() touch_softlockup_watchdog() #endif +#ifndef trigger_all_cpu_backtrace +#define trigger_all_cpu_backtrace() do { } while (0) +#endif + #endif -- cgit v1.2.3-55-g7522 From 968de4f02621db35b8ae5239c8cfc6664fb872d8 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Thu, 7 Dec 2006 02:14:04 +0100 Subject: [PATCH] i386: Relocatable kernel support This patch modifies the i386 kernel so that if CONFIG_RELOCATABLE is selected it will be able to be loaded at any 4K aligned address below 1G. The technique used is to compile the decompressor with -fPIC and modify it so the decompressor is fully relocatable. For the main kernel relocations are generated. Resulting in a kernel that is relocatable with no runtime overhead and no need to modify the source code. A reserved 32bit word in the parameters has been assigned to serve as a stack so we figure out where are running. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- arch/i386/Kconfig | 12 + arch/i386/Makefile | 4 +- arch/i386/boot/compressed/Makefile | 28 +- arch/i386/boot/compressed/head.S | 184 +++++++---- arch/i386/boot/compressed/misc.c | 261 ++++++++-------- arch/i386/boot/compressed/relocs.c | 563 ++++++++++++++++++++++++++++++++++ arch/i386/boot/compressed/vmlinux.lds | 43 +++ arch/i386/boot/compressed/vmlinux.scr | 3 +- arch/i386/boot/setup.S | 29 +- include/linux/screen_info.h | 3 +- 10 files changed, 920 insertions(+), 210 deletions(-) create mode 100644 arch/i386/boot/compressed/relocs.c create mode 100644 arch/i386/boot/compressed/vmlinux.lds (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 8ff1c6fb5aa1..d588ca874bb4 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -773,6 +773,18 @@ config CRASH_DUMP PHYSICAL_START. For more details see Documentation/kdump/kdump.txt +config RELOCATABLE + bool "Build a relocatable kernel" + help + This build a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 0677908dfa06..d1aca52bf690 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -26,7 +26,9 @@ endif LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S -LDFLAGS_vmlinux := +ifdef CONFIG_RELOCATABLE +LDFLAGS_vmlinux := --emit-relocs +endif CHECKFLAGS += -D__i386__ CFLAGS += -pipe -msoft-float diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile index 258ea95224f6..cc28da3a881e 100644 --- a/arch/i386/boot/compressed/Makefile +++ b/arch/i386/boot/compressed/Makefile @@ -4,22 +4,42 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \ + vmlinux.bin.all vmlinux.relocs EXTRA_AFLAGS := -traditional -LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 +LDFLAGS_vmlinux := -T +CFLAGS_misc.o += -fPIC +hostprogs-y := relocs -$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE +$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(obj)/relocs $< > $@ +$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs +quiet_cmd_relocbin = BUILD $@ + cmd_relocbin = cat $(filter-out FORCE,$^) > $@ +$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE + $(call if_changed,relocbin) + +ifdef CONFIG_RELOCATABLE +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,gzip) +else $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +endif LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE +$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE $(call if_changed,ld) diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index 40a8de8270a9..e4dd7a6b9b0f 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -25,9 +25,11 @@ #include #include +#include +.section ".text.head" .globl startup_32 - + startup_32: cld cli @@ -36,93 +38,141 @@ startup_32: movl %eax,%es movl %eax,%fs movl %eax,%gs + movl %eax,%ss - lss stack_start,%esp - xorl %eax,%eax -1: incl %eax # check that A20 really IS enabled - movl %eax,0x000000 # loop forever if it isn't - cmpl %eax,0x100000 - je 1b +/* Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x34-0x3f are used as the stack for this calculation. + * Only 4 bytes are needed. + */ + leal 0x40(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* Compute the delta between where we were compiled to run at + * and where the code will actually run at. + */ + /* Start with the delta to where the kernel will run at. If we are + * a relocatable kernel this is the delta to our load address otherwise + * this is the delta to CONFIG_PHYSICAL start. + */ +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx +#else + movl $(CONFIG_PHYSICAL_START - startup_32), %ebx +#endif + + /* Replace the compressed data size with the uncompressed size */ + subl input_len(%ebp), %ebx + movl output_len(%ebp), %eax + addl %eax, %ebx + /* Add 8 bytes for every 32K input block */ + shrl $12, %eax + addl %eax, %ebx + /* Add 32K + 18 bytes of extra slack */ + addl $(32768 + 18), %ebx + /* Align on a 4K boundary */ + addl $4095, %ebx + andl $~4095, %ebx + +/* Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal _end(%ebp), %esi + leal _end(%ebx), %edi + movl $(_end - startup_32), %ecx + std + rep + movsb + cld + popl %esi + +/* Compute the kernel start address. + */ +#ifdef CONFIG_RELOCATABLE + leal startup_32(%ebp), %ebp +#else + movl $CONFIG_PHYSICAL_START, %ebp +#endif /* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. + * Jump to the relocated address. */ - pushl $0 - popfl + leal relocated(%ebx), %eax + jmp *%eax +.section ".text" +relocated: + /* * Clear BSS */ xorl %eax,%eax - movl $_edata,%edi - movl $_end,%ecx + leal _edata(%ebx),%edi + leal _end(%ebx), %ecx subl %edi,%ecx cld rep stosb + +/* + * Setup the stack for the decompressor + */ + leal stack_end(%ebx), %esp + /* * Do the decompression, and jump to the new kernel.. */ - subl $16,%esp # place for structure on the stack - movl %esp,%eax + movl output_len(%ebx), %eax + pushl %eax + pushl %ebp # output address + movl input_len(%ebx), %eax + pushl %eax # input_len + leal input_data(%ebx), %eax + pushl %eax # input_data + leal _end(%ebx), %eax + pushl %eax # end of the image as third argument pushl %esi # real mode pointer as second arg - pushl %eax # address of structure as first arg call decompress_kernel - orl %eax,%eax - jnz 3f - popl %esi # discard address - popl %esi # real mode pointer - xorl %ebx,%ebx - ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START + addl $20, %esp + popl %ecx + +#if CONFIG_RELOCATABLE +/* Find the address of the relocations. + */ + movl %ebp, %edi + addl %ecx, %edi + +/* Calculate the delta between where vmlinux was compiled to run + * and where it was actually loaded. + */ + movl %ebp, %ebx + subl $CONFIG_PHYSICAL_START, %ebx /* - * We come here, if we were loaded high. - * We need to move the move-in-place routine down to 0x1000 - * and then start it with the buffer addresses in registers, - * which we got from the stack. + * Process relocations. */ -3: - movl $move_routine_start,%esi - movl $0x1000,%edi - movl $move_routine_end,%ecx - subl %esi,%ecx - addl $3,%ecx - shrl $2,%ecx - cld - rep - movsl - - popl %esi # discard the address - popl %ebx # real mode pointer - popl %esi # low_buffer_start - popl %ecx # lcount - popl %edx # high_buffer_start - popl %eax # hcount - movl $CONFIG_PHYSICAL_START,%edi - cli # make sure we don't get interrupted - ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine + +1: subl $4, %edi + movl 0(%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b +2: +#endif /* - * Routine (template) for moving the decompressed kernel in place, - * if we were high loaded. This _must_ PIC-code ! + * Jump to the decompressed kernel. */ -move_routine_start: - movl %ecx,%ebp - shrl $2,%ecx - rep - movsl - movl %ebp,%ecx - andl $3,%ecx - rep - movsb - movl %edx,%esi - movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0 - addl $3,%ecx - shrl $2,%ecx - rep - movsl - movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START -move_routine_end: + jmp *%ebp + +.bss +.balign 4 +stack: + .fill 4096, 1, 0 +stack_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 20970ff44119..4eac24e95a10 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -13,6 +13,88 @@ #include #include #include +#include + +/* WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically + * at run time, but no relocation processing is performed. + * This means that it is not safe to place pointers in static structures. + */ + +/* + * Getting to provable safe in place decompression is hard. + * Worst case behaviours need to be analized. + * Background information: + * + * The file layout is: + * magic[2] + * method[1] + * flags[1] + * timestamp[4] + * extraflags[1] + * os[1] + * compressed data blocks[N] + * crc[4] orig_len[4] + * + * resulting in 18 bytes of non compressed data overhead. + * + * Files divided into blocks + * 1 bit (last block flag) + * 2 bits (block type) + * + * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved. + * The smallest block type encoding is always used. + * + * stored: + * 32 bits length in bytes. + * + * fixed: + * magic fixed tree. + * symbols. + * + * dynamic: + * dynamic tree encoding. + * symbols. + * + * + * The buffer for decompression in place is the length of the + * uncompressed data, plus a small amount extra to keep the algorithm safe. + * The compressed data is placed at the end of the buffer. The output + * pointer is placed at the start of the buffer and the input pointer + * is placed where the compressed data starts. Problems will occur + * when the output pointer overruns the input pointer. + * + * The output pointer can only overrun the input pointer if the input + * pointer is moving faster than the output pointer. A condition only + * triggered by data whose compressed form is larger than the uncompressed + * form. + * + * The worst case at the block level is a growth of the compressed data + * of 5 bytes per 32767 bytes. + * + * The worst case internal to a compressed block is very hard to figure. + * The worst case can at least be boundined by having one bit that represents + * 32764 bytes and then all of the rest of the bytes representing the very + * very last byte. + * + * All of which is enough to compute an amount of extra data that is required + * to be safe. To avoid problems at the block level allocating 5 extra bytes + * per 32767 bytes of data is sufficient. To avoind problems internal to a block + * adding an extra 32767 bytes (the worst case uncompressed block size) is + * sufficient, to ensure that in the worst case the decompressed data for + * block will stop the byte before the compressed data for a block begins. + * To avoid problems with the compressed data's meta information an extra 18 + * bytes are needed. Leading to the formula: + * + * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. + * + * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. + * Adding 32768 instead of 32767 just makes for round numbers. + * Adding the decompressor_size is necessary as it musht live after all + * of the data as well. Last I measured the decompressor is about 14K. + * 10K of actuall data and 4K of bss. + * + */ /* * gzip declarations @@ -29,15 +111,20 @@ typedef unsigned char uch; typedef unsigned short ush; typedef unsigned long ulg; -#define WSIZE 0x8000 /* Window size must be at least 32k, */ - /* and a power of two */ +#define WSIZE 0x80000000 /* Window size must be at least 32k, + * and a power of two + * We don't actually have a window just + * a huge output buffer so I report + * a 2G windows size, as that should + * always be larger than our output buffer. + */ -static uch *inbuf; /* input buffer */ -static uch window[WSIZE]; /* Sliding window buffer */ +static uch *inbuf; /* input buffer */ +static uch *window; /* Sliding window buffer, (and final output buffer) */ -static unsigned insize = 0; /* valid bytes in inbuf */ -static unsigned inptr = 0; /* index of next byte to be processed in inbuf */ -static unsigned outcnt = 0; /* bytes in output buffer */ +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ @@ -88,8 +175,6 @@ extern unsigned char input_data[]; extern int input_len; static long bytes_out = 0; -static uch *output_data; -static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); @@ -99,17 +184,10 @@ static void *memcpy(void *dest, const void *src, unsigned n); static void putstr(const char *); -extern int end; -static long free_mem_ptr = (long)&end; -static long free_mem_end_ptr; +static unsigned long free_mem_ptr; +static unsigned long free_mem_end_ptr; -#define INPLACE_MOVE_ROUTINE 0x1000 -#define LOW_BUFFER_START 0x2000 -#define LOW_BUFFER_MAX 0x90000 #define HEAP_SIZE 0x3000 -static unsigned int low_buffer_end, low_buffer_size; -static int high_loaded =0; -static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; static char *vidmem = (char *)0xb8000; static int vidport; @@ -150,7 +228,7 @@ static void gzip_mark(void **ptr) static void gzip_release(void **ptr) { - free_mem_ptr = (long) *ptr; + free_mem_ptr = (unsigned long) *ptr; } static void scroll(void) @@ -178,7 +256,7 @@ static void putstr(const char *s) y--; } } else { - vidmem [ ( x + cols * y ) * 2 ] = c; + vidmem [ ( x + cols * y ) * 2 ] = c; if ( ++x >= cols ) { x = 0; if ( ++y >= lines ) { @@ -223,58 +301,31 @@ static void* memcpy(void* dest, const void* src, unsigned n) */ static int fill_inbuf(void) { - if (insize != 0) { - error("ran out of input data"); - } - - inbuf = input_data; - insize = input_len; - inptr = 1; - return inbuf[0]; + error("ran out of input data"); + return 0; } /* =========================================================================== * Write the output window window[0..outcnt-1] and update crc and bytes_out. * (Used for the decompressed data only.) */ -static void flush_window_low(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, *out, ch; - - in = window; - out = &output_data[output_ptr]; - for (n = 0; n < outcnt; n++) { - ch = *out++ = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - output_ptr += (ulg)outcnt; - outcnt = 0; -} - -static void flush_window_high(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, ch; - in = window; - for (n = 0; n < outcnt; n++) { - ch = *output_data++ = *in++; - if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; -} - static void flush_window(void) { - if (high_loaded) flush_window_high(); - else flush_window_low(); + /* With my window equal to my output buffer + * I only need to compute the crc here. + */ + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, ch; + + in = window; + for (n = 0; n < outcnt; n++) { + ch = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + outcnt = 0; } static void error(char *x) @@ -286,66 +337,8 @@ static void error(char *x) while(1); /* Halt */ } -#define STACK_SIZE (4096) - -long user_stack [STACK_SIZE]; - -struct { - long * a; - short b; - } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS }; - -static void setup_normal_output_buffer(void) -{ -#ifdef STANDARD_MEMORY_BIOS_CALL - if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory"); -#else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); -#endif - output_data = (unsigned char *)CONFIG_PHYSICAL_START; /* Normally Points to 1M */ - free_mem_end_ptr = (long)real_mode; -} - -struct moveparams { - uch *low_buffer_start; int lcount; - uch *high_buffer_start; int hcount; -}; - -static void setup_output_buffer_if_we_run_high(struct moveparams *mv) -{ - high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); -#ifdef STANDARD_MEMORY_BIOS_CALL - if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); -#else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); -#endif - mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; - low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX - ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; - low_buffer_size = low_buffer_end - LOW_BUFFER_START; - high_loaded = 1; - free_mem_end_ptr = (long)high_buffer_start; - if ( (CONFIG_PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(CONFIG_PHYSICAL_START + low_buffer_size); - mv->hcount = 0; /* say: we need not to move high_buffer */ - } - else mv->hcount = -1; - mv->high_buffer_start = high_buffer_start; -} - -static void close_output_buffer_if_we_run_high(struct moveparams *mv) -{ - if (bytes_out > low_buffer_size) { - mv->lcount = low_buffer_size; - if (mv->hcount) - mv->hcount = bytes_out - low_buffer_size; - } else { - mv->lcount = bytes_out; - mv->hcount = 0; - } -} - -asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) +asmlinkage void decompress_kernel(void *rmode, unsigned long end, + uch *input_data, unsigned long input_len, uch *output) { real_mode = rmode; @@ -360,13 +353,25 @@ asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) lines = RM_SCREEN_INFO.orig_video_lines; cols = RM_SCREEN_INFO.orig_video_cols; - if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); - else setup_output_buffer_if_we_run_high(mv); + window = output; /* Output buffer (Normally at 1M) */ + free_mem_ptr = end; /* Heap */ + free_mem_end_ptr = end + HEAP_SIZE; + inbuf = input_data; /* Input buffer */ + insize = input_len; + inptr = 0; + + if (((u32)output - CONFIG_PHYSICAL_START) & 0x3fffff) + error("Destination address not 4M aligned"); + if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#ifndef CONFIG_RELOCATABLE + if ((u32)output != CONFIG_PHYSICAL_START) + error("Wrong destination address"); +#endif makecrc(); putstr("Uncompressing Linux... "); gunzip(); putstr("Ok, booting the kernel.\n"); - if (high_loaded) close_output_buffer_if_we_run_high(mv); - return high_loaded; + return; } diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c new file mode 100644 index 000000000000..0551ceb21bed --- /dev/null +++ b/arch/i386/boot/compressed/relocs.c @@ -0,0 +1,563 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define USE_BSD +#include + +#define MAX_SHDRS 100 +static Elf32_Ehdr ehdr; +static Elf32_Shdr shdr[MAX_SHDRS]; +static Elf32_Sym *symtab[MAX_SHDRS]; +static Elf32_Rel *reltab[MAX_SHDRS]; +static char *strtab[MAX_SHDRS]; +static unsigned long reloc_count, reloc_idx; +static unsigned long *relocs; + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static const char *sym_type(unsigned type) +{ + static const char *type_name[] = { +#define SYM_TYPE(X) [X] = #X + SYM_TYPE(STT_NOTYPE), + SYM_TYPE(STT_OBJECT), + SYM_TYPE(STT_FUNC), + SYM_TYPE(STT_SECTION), + SYM_TYPE(STT_FILE), + SYM_TYPE(STT_COMMON), + SYM_TYPE(STT_TLS), +#undef SYM_TYPE + }; + const char *name = "unknown sym type name"; + if (type < sizeof(type_name)/sizeof(type_name[0])) { + name = type_name[type]; + } + return name; +} + +static const char *sym_bind(unsigned bind) +{ + static const char *bind_name[] = { +#define SYM_BIND(X) [X] = #X + SYM_BIND(STB_LOCAL), + SYM_BIND(STB_GLOBAL), + SYM_BIND(STB_WEAK), +#undef SYM_BIND + }; + const char *name = "unknown sym bind name"; + if (bind < sizeof(bind_name)/sizeof(bind_name[0])) { + name = bind_name[bind]; + } + return name; +} + +static const char *sym_visibility(unsigned visibility) +{ + static const char *visibility_name[] = { +#define SYM_VISIBILITY(X) [X] = #X + SYM_VISIBILITY(STV_DEFAULT), + SYM_VISIBILITY(STV_INTERNAL), + SYM_VISIBILITY(STV_HIDDEN), + SYM_VISIBILITY(STV_PROTECTED), +#undef SYM_VISIBILITY + }; + const char *name = "unknown sym visibility name"; + if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) { + name = visibility_name[visibility]; + } + return name; +} + +static const char *rel_type(unsigned type) +{ + static const char *type_name[] = { +#define REL_TYPE(X) [X] = #X + REL_TYPE(R_386_NONE), + REL_TYPE(R_386_32), + REL_TYPE(R_386_PC32), + REL_TYPE(R_386_GOT32), + REL_TYPE(R_386_PLT32), + REL_TYPE(R_386_COPY), + REL_TYPE(R_386_GLOB_DAT), + REL_TYPE(R_386_JMP_SLOT), + REL_TYPE(R_386_RELATIVE), + REL_TYPE(R_386_GOTOFF), + REL_TYPE(R_386_GOTPC), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + if (type < sizeof(type_name)/sizeof(type_name[0])) { + name = type_name[type]; + } + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + sec_strtab = strtab[ehdr.e_shstrndx]; + name = ""; + if (shndx < ehdr.e_shnum) { + name = sec_strtab + shdr[shndx].sh_name; + } + else if (shndx == SHN_ABS) { + name = "ABSOLUTE"; + } + else if (shndx == SHN_COMMON) { + name = "COMMON"; + } + return name; +} + +static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) +{ + const char *name; + name = ""; + if (sym->st_name) { + name = sym_strtab + sym->st_name; + } + else { + name = sec_name(shdr[sym->st_shndx].sh_name); + } + return name; +} + + + +#if BYTE_ORDER == LITTLE_ENDIAN +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#endif +#if BYTE_ORDER == BIG_ENDIAN +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#endif + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { + die("Cannot read ELF header: %s\n", + strerror(errno)); + } + if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { + die("No ELF magic\n"); + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + die("Not a 32 bit executable\n"); + } + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + die("Not a LSB ELF executable\n"); + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + die("Unknown ELF version\n"); + } + /* Convert the fields to native endian */ + ehdr.e_type = elf16_to_cpu(ehdr.e_type); + ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); + ehdr.e_version = elf32_to_cpu(ehdr.e_version); + ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { + die("Unsupported ELF header type\n"); + } + if (ehdr.e_machine != EM_386) { + die("Not for x86\n"); + } + if (ehdr.e_version != EV_CURRENT) { + die("Unknown ELF version\n"); + } + if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { + die("Bad Elf header size\n"); + } + if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { + die("Bad program header entry\n"); + } + if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { + die("Bad section header entry\n"); + } + if (ehdr.e_shstrndx >= ehdr.e_shnum) { + die("String table index out of bounds\n"); + } +} + +static void read_shdrs(FILE *fp) +{ + int i; + if (ehdr.e_shnum > MAX_SHDRS) { + die("%d section headers supported: %d\n", + ehdr.e_shnum, MAX_SHDRS); + } + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_shoff, strerror(errno)); + } + if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { + die("Cannot read ELF section headers: %s\n", + strerror(errno)); + } + for(i = 0; i < ehdr.e_shnum; i++) { + shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); + shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); + shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); + shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); + shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); + shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); + shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); + shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); + shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); + shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); + } + +} + +static void read_strtabs(FILE *fp) +{ + int i; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_STRTAB) { + continue; + } + strtab[i] = malloc(shdr[i].sh_size); + if (!strtab[i]) { + die("malloc of %d bytes for strtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + } +} + +static void read_symtabs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + symtab[i] = malloc(shdr[i].sh_size); + if (!symtab[i]) { + die("malloc of %d bytes for symtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { + symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); + symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); + symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); + symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); + } + } +} + + +static void read_relocs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_REL) { + continue; + } + reltab[i] = malloc(shdr[i].sh_size); + if (!reltab[i]) { + die("malloc of %d bytes for relocs failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); + reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); + } + } +} + + +static void print_absolute_symbols(void) +{ + int i; + printf("Absolute symbols\n"); + printf(" Num: Value Size Type Bind Visibility Name\n"); + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + int j; + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + sh_symtab = symtab[i]; + sym_strtab = strtab[shdr[i].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { + Elf32_Sym *sym; + const char *name; + sym = &symtab[i][j]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%5d %08x %5d %10s %10s %12s %s\n", + j, sym->st_value, sym->st_size, + sym_type(ELF32_ST_TYPE(sym->st_info)), + sym_bind(ELF32_ST_BIND(sym->st_info)), + sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), + name); + } + } + printf("\n"); +} + +static void print_absolute_relocs(void) +{ + int i; + printf("Absolute relocations\n"); + printf("Offset Info Type Sym.Value Sym.Name\n"); + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + const char *name; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%08x %08x %10s %08x %s\n", + rel->r_offset, + rel->r_info, + rel_type(ELF32_R_TYPE(rel->r_info)), + sym->st_value, + name); + } + } + printf("\n"); +} + +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) +{ + int i; + /* Walk through the relocations */ + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + unsigned r_type; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + r_type = ELF32_R_TYPE(rel->r_info); + /* Don't visit relocations to absolute symbols */ + if (sym->st_shndx == SHN_ABS) { + continue; + } + if (r_type == R_386_PC32) { + /* PC relative relocations don't need to be adjusted */ + } + else if (r_type == R_386_32) { + /* Visit relocations that need to be adjusted */ + visit(rel, sym); + } + else { + die("Unsupported relocation type: %d\n", r_type); + } + } + } +} + +static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + reloc_count += 1; +} + +static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + /* Remember the address that needs to be adjusted. */ + relocs[reloc_idx++] = rel->r_offset; +} + +static int cmp_relocs(const void *va, const void *vb) +{ + const unsigned long *a, *b; + a = va; b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; +} + +static void emit_relocs(int as_text) +{ + int i; + /* Count how many relocations I have and allocate space for them. */ + reloc_count = 0; + walk_relocs(count_reloc); + relocs = malloc(reloc_count * sizeof(relocs[0])); + if (!relocs) { + die("malloc of %d entries for relocs failed\n", + reloc_count); + } + /* Collect up the relocations */ + reloc_idx = 0; + walk_relocs(collect_reloc); + + /* Order the relocations for more efficient processing */ + qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + for(i = 0; i < reloc_count; i++) { + printf("\t .long 0x%08lx\n", relocs[i]); + } + printf("\n"); + } + else { + unsigned char buf[4]; + buf[0] = buf[1] = buf[2] = buf[3] = 0; + /* Print a stop */ + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + /* Now print each relocation */ + for(i = 0; i < reloc_count; i++) { + buf[0] = (relocs[i] >> 0) & 0xff; + buf[1] = (relocs[i] >> 8) & 0xff; + buf[2] = (relocs[i] >> 16) & 0xff; + buf[3] = (relocs[i] >> 24) & 0xff; + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + } + } +} + +static void usage(void) +{ + die("i386_reloc [--abs | --text] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_absolute; + int as_text; + const char *fname; + FILE *fp; + int i; + + show_absolute = 0; + as_text = 0; + fname = NULL; + for(i = 1; i < argc; i++) { + char *arg = argv[i]; + if (*arg == '-') { + if (strcmp(argv[1], "--abs") == 0) { + show_absolute = 1; + continue; + } + else if (strcmp(argv[1], "--text") == 0) { + as_text = 1; + continue; + } + } + else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) { + usage(); + } + fp = fopen(fname, "r"); + if (!fp) { + die("Cannot open %s: %s\n", + fname, strerror(errno)); + } + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_absolute) { + print_absolute_symbols(); + print_absolute_relocs(); + return 0; + } + emit_relocs(as_text); + return 0; +} diff --git a/arch/i386/boot/compressed/vmlinux.lds b/arch/i386/boot/compressed/vmlinux.lds new file mode 100644 index 000000000000..cc4854f6c6c1 --- /dev/null +++ b/arch/i386/boot/compressed/vmlinux.lds @@ -0,0 +1,43 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(startup_32) +SECTIONS +{ + /* Be careful parts of head.S assume startup_32 is at + * address 0. + */ + . = 0 ; + .text.head : { + _head = . ; + *(.text.head) + _ehead = . ; + } + .data.compressed : { + *(.data.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + _end = . ; + } +} diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr index 1ed9d791f863..707a88f7f29e 100644 --- a/arch/i386/boot/compressed/vmlinux.scr +++ b/arch/i386/boot/compressed/vmlinux.scr @@ -1,9 +1,10 @@ SECTIONS { - .data : { + .data.compressed : { input_len = .; LONG(input_data_end - input_data) input_data = .; *(.data) + output_len = . - 4; input_data_end = .; } } diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 3aec4538a113..9aa8b0518184 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -588,11 +588,6 @@ rmodeswtch_normal: call default_switch rmodeswtch_end: -# we get the code32 start address and modify the below 'jmpi' -# (loader may have changed it) - movl %cs:code32_start, %eax - movl %eax, %cs:code32 - # Now we move the system to its rightful place ... but we check if we have a # big-kernel. In that case we *must* not move it ... testb $LOADED_HIGH, %cs:loadflags @@ -788,11 +783,12 @@ a20_err_msg: a20_done: #endif /* CONFIG_X86_VOYAGER */ -# set up gdt and idt +# set up gdt and idt and 32bit start address lidt idt_48 # load idt with 0,0 xorl %eax, %eax # Compute gdt_base movw %ds, %ax # (Convert %ds:gdt to a linear ptr) shll $4, %eax + addl %eax, code32 addl $gdt, %eax movl %eax, (gdt_48+2) lgdt gdt_48 # load gdt with whatever is @@ -851,9 +847,26 @@ flush_instr: # Manual, Mixing 16-bit and 32-bit code, page 16-6) .byte 0x66, 0xea # prefix + jmpi-opcode -code32: .long 0x1000 # will be set to 0x100000 - # for big kernels +code32: .long startup_32 # will be set to %cs+startup_32 .word __BOOT_CS +.code32 +startup_32: + movl $(__BOOT_DS), %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss + + xorl %eax, %eax +1: incl %eax # check that A20 really IS enabled + movl %eax, 0x00000000 # loop forever if it isn't + cmpl %eax, 0x00100000 + je 1b + + # Jump to the 32bit entry point + jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi) +.code16 # Here's a bunch of information about your current kernel.. kernel_version: .ascii UTS_RELEASE diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 2925e66a6732..b02308ee7667 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -42,7 +42,8 @@ struct screen_info { u16 pages; /* 0x32 */ u16 vesa_attributes; /* 0x34 */ u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3f reserved for future expansion */ + /* 0x3a -- 0x3b reserved for future expansion */ + /* 0x3c -- 0x3f micro stack for relocatable kernels */ }; extern struct screen_info screen_info; -- cgit v1.2.3-55-g7522 From 2fff0a48416af891dce38fd425246e337831e0bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] Generic: Move __user cast into probe_kernel_address Caller of probe_kernel_address shouldn't need to know that pka is internally implemented with __get_user. So move the __user cast into pka. Signed-off-by: Andi Kleen --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a48d7f11c7be..65a68da8bd5d 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -36,7 +36,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, long ret; \ \ inc_preempt_count(); \ - ret = __get_user(retval, addr); \ + ret = __get_user(retval, (__force typeof(*addr) __user *)addr);\ dec_preempt_count(); \ ret; \ }) -- cgit v1.2.3-55-g7522 From d7cd56111f30259e1b532a12e06f59f8e0a20355 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] i386: cpu_detect extraction Both lhype and Xen want to call the core of the x86 cpu detect code before calling start_kernel. (extracted from larger patch) AK: folded in start_kernel header patch Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/cpu/common.c | 37 +++++++++++++++++++++---------------- include/asm-i386/processor.h | 3 +++ include/linux/start_kernel.h | 12 ++++++++++++ init/main.c | 1 + 4 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 include/linux/start_kernel.h (limited to 'include/linux') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index cda41aef79ad..68bcb687019a 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -236,29 +236,14 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +void __init cpu_detect(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; - - if (!have_cpuid_p()) - return; - /* Get vendor name */ cpuid(0x00000000, &c->cpuid_level, (int *)&c->x86_vendor_id[0], (int *)&c->x86_vendor_id[8], (int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c, 1); - c->x86 = 4; if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -275,6 +260,26 @@ static void __init early_cpu_detect(void) } } +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + static void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 5f0418d0078c..a52d65440429 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include /* flag for disabling the tsc */ extern int tsc_disable; @@ -112,6 +113,8 @@ extern struct cpuinfo_x86 cpu_data[]; extern int cpu_llc_id[NR_CPUS]; extern char ignore_fpu_irq; +void __init cpu_detect(struct cpuinfo_x86 *c); + extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h new file mode 100644 index 000000000000..d3e5f2756545 --- /dev/null +++ b/include/linux/start_kernel.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_START_KERNEL_H +#define _LINUX_START_KERNEL_H + +#include +#include + +/* Define the prototype for start_kernel here, rather than cluttering + up something else. */ + +extern asmlinkage void __init start_kernel(void); + +#endif /* _LINUX_START_KERNEL_H */ diff --git a/init/main.c b/init/main.c index 36f608a7cfba..985c9ed86050 100644 --- a/init/main.c +++ b/init/main.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-55-g7522 From 72486f1f8f0a2bc828b9d30cf4690cf2dd6807fc Mon Sep 17 00:00:00 2001 From: Siddha, Suresh B Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] i386: change the 'no_control' field to 'hotpluggable' in the struct cpu Change the 'no_control' field in the cpu struct to a more positive and better term 'hotpluggable'. And change(/cleanup) the logic accordingly. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/i386/kernel/topology.c | 4 ++-- arch/ia64/kernel/topology.c | 8 ++++---- arch/powerpc/kernel/sysfs.c | 8 ++++---- drivers/base/cpu.c | 6 +++--- include/linux/cpu.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 844c08fdb225..79cf608e14ca 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -44,8 +44,8 @@ int arch_register_cpu(int num) * Also certain PCI quirks require not to enable hotplug control * for all CPU's. */ - if (!num || !enable_cpu_hotplug) - cpu_devices[num].cpu.no_control = 1; + if (num && enable_cpu_hotplug) + cpu_devices[num].cpu.hotpluggable = 1; return register_cpu(&cpu_devices[num].cpu, num); } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5629b45e89c6..687500ddb4b8 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -31,11 +31,11 @@ int arch_register_cpu(int num) { #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* - * If CPEI cannot be re-targetted, and this is - * CPEI target, then dont create the control file + * If CPEI can be re-targetted or if this is not + * CPEI target, then it is hotpluggable */ - if (!can_cpei_retarget() && is_cpu_cpei_target(num)) - sysfs_cpus[num].cpu.no_control = 1; + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; map_cpu_to_node(num, node_cpuid[num].nid); #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 22123a0d5416..63ed265b7f09 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -239,7 +239,7 @@ static void unregister_cpu_online(unsigned int cpu) struct cpu *c = &per_cpu(cpu_devices, cpu); struct sys_device *s = &c->sysdev; - BUG_ON(c->no_control); + BUG_ON(!c->hotpluggable); if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) @@ -424,10 +424,10 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (!ppc_md.cpu_die) - c->no_control = 1; + if (ppc_md.cpu_die) + c->hotpluggable = 1; - if (cpu_online(cpu) || (c->no_control == 0)) { + if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); sysdev_create_file(&c->sysdev, &attr_physical_id); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 1f745f12f94e..7fd095efaebd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -104,8 +104,8 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); /* * register_cpu - Setup a driverfs device for a CPU. - * @cpu - Callers can set the cpu->no_control field to 1, to indicate not to - * generate a control file in sysfs for this CPU. + * @cpu - cpu->hotpluggable field set to 1 will generate a control file in + * sysfs for this CPU. * @num - CPU number to use when creating the device. * * Initialize and register the CPU device. @@ -119,7 +119,7 @@ int __devinit register_cpu(struct cpu *cpu, int num) error = sysdev_register(&cpu->sysdev); - if (!error && !cpu->no_control) + if (!error && cpu->hotpluggable) register_cpu_control(cpu); if (!error) cpu_sys_devices[num] = &cpu->sysdev; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f02d71bf6894..ad90340e7dba 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -27,7 +27,7 @@ struct cpu { int node_id; /* The node which contains the CPU */ - int no_control; /* Should the sysfs control file be created? */ + int hotpluggable; /* creates sysfs control file if hotpluggable */ struct sys_device sysdev; }; -- cgit v1.2.3-55-g7522 From e1cccf48b182dd743c3c83a4fdf8dc570a43b393 Mon Sep 17 00:00:00 2001 From: Artiom Myaskouvskey Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] i386: call efi_get_time during suspend Function efi_get_time called not only during init kernel phase but also during suspend (from get_cmos_time). When it is called from get_cmos_time the corresponding runtime service should be called in virtual and not in physical mode. Signed-off-by: Artiom Myaskouvskey Signed-off-by: Andi Kleen Cc: "Narayanan, Chandramouli" Cc: "Jiossy, Rami" Cc: "Satt, Shai" Cc: Andi Kleen Cc: Matt Domsch Signed-off-by: Andrew Morton --- arch/i386/kernel/efi.c | 17 ++++++++++++----- include/linux/efi.h | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 8b40648d0ef0..b92c7f0a358a 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -194,17 +194,24 @@ inline int efi_set_rtc_mmss(unsigned long nowtime) return 0; } /* - * This should only be used during kernel init and before runtime - * services have been remapped, therefore, we'll need to call in physical - * mode. Note, this call isn't used later, so mark it __init. + * This is used during kernel init before runtime + * services have been remapped and also during suspend, therefore, + * we'll need to call both in physical and virtual modes. */ -inline unsigned long __init efi_get_time(void) +inline unsigned long efi_get_time(void) { efi_status_t status; efi_time_t eft; efi_time_cap_t cap; - status = phys_efi_get_time(&eft, &cap); + if (efi.get_time) { + /* if we are in virtual mode use remapped function */ + status = efi.get_time(&eft, &cap); + } else { + /* we are in physical mode */ + status = phys_efi_get_time(&eft, &cap); + } + if (status != EFI_SUCCESS) printk("Oops: efitime: can't read time status: 0x%lx\n",status); diff --git a/include/linux/efi.h b/include/linux/efi.h index 66d621dbcb6c..91ecf49fbf21 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -300,7 +300,7 @@ extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); -extern unsigned long __init efi_get_time(void); +extern unsigned long efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); extern struct efi_memory_map memmap; -- cgit v1.2.3-55-g7522 From bf7e6a196318316e921f357557fca9d11d15f486 Mon Sep 17 00:00:00 2001 From: Artiom Myaskouvskey Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] i386: Preserve EFI run time regions with memmap parameter When using memmap kernel parameter in EFI boot we should also add to memory map memory regions of runtime services to enable their mapping later. AK: merged and cleaned up the patch Signed-off-by: Artiom Myaskouvskey Signed-off-by: Andi Kleen --- arch/i386/kernel/e820.c | 60 +++++++++++++++++++++++++++++++++++-------------- arch/i386/mm/init.c | 2 -- include/linux/efi.h | 1 + 3 files changed, 44 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index b704790f7969..2f7d0a92fd7c 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -742,29 +742,55 @@ void __init print_memory_map(char *who) } } -void __init limit_regions(unsigned long long size) +static __init __always_inline void efi_limit_regions(unsigned long long size) { unsigned long long current_addr = 0; + efi_memory_desc_t *md, *next_md; + void *p, *p1; + int i, j; + + j = 0; + p1 = memmap.map; + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; + next_md = p1; + current_addr = md->phys_addr + + PFN_PHYS(md->num_pages); + if (is_available_memory(md)) { + if (md->phys_addr >= size) continue; + memcpy(next_md, md, memmap.desc_size); + if (current_addr >= size) { + next_md->num_pages -= + PFN_UP(current_addr-size); + } + p1 += memmap.desc_size; + next_md = p1; + j++; + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == + EFI_MEMORY_RUNTIME) { + /* In order to make runtime services + * available we have to include runtime + * memory regions in memory map */ + memcpy(next_md, md, memmap.desc_size); + p1 += memmap.desc_size; + next_md = p1; + j++; + } + } + memmap.nr_map = j; + memmap.map_end = memmap.map + + (memmap.nr_map * memmap.desc_size); +} + +void __init limit_regions(unsigned long long size) +{ + unsigned long long current_addr; int i; print_memory_map("limit_regions start"); if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map, i = 0; p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - current_addr = md->phys_addr + (md->num_pages << 12); - if (md->type == EFI_CONVENTIONAL_MEMORY) { - if (current_addr >= size) { - md->num_pages -= - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); - memmap.nr_map = i + 1; - return; - } - } - } + efi_limit_regions(size); + return; } for (i = 0; i < e820.nr_map; i++) { current_addr = e820.map[i].addr + e820.map[i].size; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 167416155ee4..f4dd048187ff 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -192,8 +192,6 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } -extern int is_available_memory(efi_memory_desc_t *); - int page_is_ram(unsigned long pagenr) { int i; diff --git a/include/linux/efi.h b/include/linux/efi.h index 91ecf49fbf21..df1c91855f0e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -302,6 +302,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); extern unsigned long efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); +extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.3-55-g7522