diff options
Diffstat (limited to 'contrib/syslinux-4.02/memdisk/memdisk16.asm')
-rw-r--r-- | contrib/syslinux-4.02/memdisk/memdisk16.asm | 793 |
1 files changed, 793 insertions, 0 deletions
diff --git a/contrib/syslinux-4.02/memdisk/memdisk16.asm b/contrib/syslinux-4.02/memdisk/memdisk16.asm new file mode 100644 index 0000000..6bafae7 --- /dev/null +++ b/contrib/syslinux-4.02/memdisk/memdisk16.asm @@ -0,0 +1,793 @@ +;; -*- fundamental -*- +;; ----------------------------------------------------------------------- +;; +;; Copyright 1994-2008 H. Peter Anvin - All Rights Reserved +;; Copyright 2009 Intel Corporation; author: H. Peter Anvin +;; +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, +;; Boston MA 02111-1307, USA; either version 2 of the License, or +;; (at your option) any later version; incorporated herein by reference. +;; +;; ----------------------------------------------------------------------- + +;; +;; init16.asm +;; +;; Routine to initialize and to trampoline into 32-bit +;; protected memory. This code is derived from bcopy32.inc and +;; com32.inc in the main SYSLINUX distribution. +;; + +%include '../version.gen' + +MY_CS equ 0x0800 ; Segment address to use +CS_BASE equ (MY_CS << 4) ; Corresponding address + +; Low memory bounce buffer +BOUNCE_SEG equ (MY_CS+0x1000) + +%define DO_WBINVD 0 + + section .rodata align=16 + section .data align=16 + section .bss align=16 + section .stack align=16 nobits +stack resb 512 +stack_end equ $ + +;; ----------------------------------------------------------------------- +;; Kernel image header +;; ----------------------------------------------------------------------- + + section .text ; Must be first in image + bits 16 + +cmdline times 497 db 0 ; We put the command line here +setup_sects db 0 +root_flags dw 0 +syssize dw 0 +swap_dev dw 0 +ram_size dw 0 +vid_mode dw 0 +root_dev dw 0 +boot_flag dw 0xAA55 + +_start: jmp short start + + db "HdrS" ; Header signature + dw 0x0203 ; Header version number + +realmode_swtch dw 0, 0 ; default_switch, SETUPSEG +start_sys_seg dw 0x1000 ; obsolete +version_ptr dw memdisk_version-0x200 ; version string ptr +type_of_loader db 0 ; Filled in by boot loader +loadflags db 1 ; Please load high +setup_move_size dw 0 ; Unused +code32_start dd 0x100000 ; 32-bit start address +ramdisk_image dd 0 ; Loaded ramdisk image address +ramdisk_size dd 0 ; Size of loaded ramdisk +bootsect_kludge dw 0, 0 +heap_end_ptr dw 0 +pad1 dw 0 +cmd_line_ptr dd 0 ; Command line +ramdisk_max dd 0xffffffff ; Highest allowed ramdisk address + +; +; These fields aren't real setup fields, they're poked in by the +; 32-bit code. +; +b_esdi dd 0 ; ES:DI for boot sector invocation +b_edx dd 0 ; EDX for boot sector invocation +b_sssp dd 0 ; SS:SP on boot sector invocation +b_csip dd 0 ; CS:IP on boot sector invocation + + section .rodata +memdisk_version: + db "MEMDISK ", VERSION_STR, " ", DATE, 0 + +;; ----------------------------------------------------------------------- +;; End kernel image header +;; ----------------------------------------------------------------------- + +; +; Move ourselves down into memory to reduce the risk of conflicts; +; then canonicalize CS to match the other segments. +; + section .text + bits 16 +start: + mov ax,MY_CS + mov es,ax + movzx cx,byte [setup_sects] + inc cx ; Add one for the boot sector + shl cx,7 ; Convert to dwords + xor si,si + xor di,di + mov fs,si ; fs <- 0 + cld + rep movsd + mov ds,ax + mov ss,ax + mov esp,stack_end + jmp MY_CS:.next +.next: + +; +; Copy the command line, if there is one +; +copy_cmdline: + xor di,di ; Bottom of our own segment (= "boot sector") + mov eax,[cmd_line_ptr] + and eax,eax + jz .endcmd ; No command line + mov si,ax + shr eax,4 ; Convert to segment + and si,0x000F ; Starting offset only + mov gs,ax + mov cx,496 ; Max number of bytes +.copycmd: + gs lodsb + and al,al + jz .endcmd + stosb + loop .copycmd +.endcmd: + xor al,al + stosb + +; +; Now jump to 32-bit code +; + sti + call init32 +; +; When init32 returns, we have been set up, the new boot sector loaded, +; and we should go and and run the newly loaded boot sector. +; +; The setup function will have poked values into the setup area. +; + movzx edi,word [cs:b_esdi] + mov es,word [cs:b_esdi+2] + mov edx,[cs:b_edx] + + cli + xor esi,esi ; No partition table involved + mov ds,si ; Make all the segments consistent + mov fs,si + mov gs,si + lss sp,[cs:b_sssp] + movzx esp,sp + jmp far [cs:b_csip] + +; +; We enter protected mode, set up a flat 32-bit environment, run rep movsd +; and then exit. IMPORTANT: This code assumes cs == MY_CS. +; +; This code is probably excessively anal-retentive in its handling of +; segments, but this stuff is painful enough as it is without having to rely +; on everything happening "as it ought to." +; +DummyTSS equ 0x580 ; Hopefully safe place in low mmoery + + section .data + + ; desc base, limit, flags +%macro desc 3 + dd (%2 & 0xffff) | ((%1 & 0xffff) << 16) + dd (%1 & 0xff000000) | (%2 & 0xf0000) | ((%3 & 0xf0ff) << 8) | ((%1 & 0x00ff0000) >> 16) +%endmacro + + align 8, db 0 +call32_gdt: dw call32_gdt_size-1 ; Null descriptor - contains GDT +.adj1: dd call32_gdt+CS_BASE ; pointer for LGDT instruction + dw 0 + + ; 0008: Dummy TSS to make Intel VT happy + ; Should never be actually accessed... + desc DummyTSS, 103, 0x8089 + + ; 0010: Code segment, use16, readable, dpl 0, base CS_BASE, 64K + desc CS_BASE, 0xffff, 0x009b + + ; 0018: Data segment, use16, read/write, dpl 0, base CS_BASE, 64K + desc CS_BASE, 0xffff, 0x0093 + + ; 0020: Code segment, use32, read/write, dpl 0, base 0, 4G + desc 0, 0xfffff, 0xc09b + + ; 0028: Data segment, use32, read/write, dpl 0, base 0, 4G + desc 0, 0xfffff, 0xc093 + +call32_gdt_size: equ $-call32_gdt + +err_a20: db 'ERROR: A20 gate not responding!',13,10,0 + + section .bss + alignb 4 +Return resd 1 ; Return value +SavedSP resw 1 ; Place to save SP +A20Tries resb 1 + + section .data + align 4, db 0 +Target dd 0 ; Target address +Target_Seg dw 20h ; Target CS + +A20Type dw 0 ; Default = unknown + + section .text + bits 16 +; +; Routines to enable and disable (yuck) A20. These routines are gathered +; from tips from a couple of sources, including the Linux kernel and +; http://www.x86.org/. The need for the delay to be as large as given here +; is indicated by Donnie Barnes of RedHat, the problematic system being an +; IBM ThinkPad 760EL. +; +; We typically toggle A20 twice for every 64K transferred. +; +%define io_delay call _io_delay +%define IO_DELAY_PORT 80h ; Invalid port (we hope!) +%define disable_wait 32 ; How long to wait for a disable + +%define A20_DUNNO 0 ; A20 type unknown +%define A20_NONE 1 ; A20 always on? +%define A20_BIOS 2 ; A20 BIOS enable +%define A20_KBC 3 ; A20 through KBC +%define A20_FAST 4 ; A20 through port 92h + + align 2, db 0 +A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast +A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast +a20_adjust_cnt equ ($-A20List)/2 + +slow_out: out dx, al ; Fall through + +_io_delay: out IO_DELAY_PORT,al + out IO_DELAY_PORT,al + ret + +enable_a20: + pushad + mov byte [A20Tries],255 ; Times to try to make this work + +try_enable_a20: + +; +; Flush the caches +; +%if DO_WBINVD + call try_wbinvd +%endif + +; +; If the A20 type is known, jump straight to type +; + mov bp,[A20Type] + add bp,bp ; Convert to word offset +.adj4: jmp word [bp+A20List] + +; +; First, see if we are on a system with no A20 gate +; +a20_dunno: +a20_none: + mov byte [A20Type], A20_NONE + call a20_test + jnz a20_done + +; +; Next, try the BIOS (INT 15h AX=2401h) +; +a20_bios: + mov byte [A20Type], A20_BIOS + mov ax,2401h + pushf ; Some BIOSes muck with IF + int 15h + popf + + call a20_test + jnz a20_done + +; +; Enable the keyboard controller A20 gate +; +a20_kbc: + mov dl, 1 ; Allow early exit + call empty_8042 + jnz a20_done ; A20 live, no need to use KBC + + mov byte [A20Type], A20_KBC ; Starting KBC command sequence + + mov al,0D1h ; Write output port + out 064h, al + call empty_8042_uncond + + mov al,0DFh ; A20 on + out 060h, al + call empty_8042_uncond + + ; Apparently the UHCI spec assumes that A20 toggle + ; ends with a null command (assumed to be for sychronization?) + ; Put it here to see if it helps anything... + mov al,0FFh ; Null command + out 064h, al + call empty_8042_uncond + + ; Verify that A20 actually is enabled. Do that by + ; observing a word in low memory and the same word in + ; the HMA until they are no longer coherent. Note that + ; we don't do the same check in the disable case, because + ; we don't want to *require* A20 masking (SYSLINUX should + ; work fine without it, if the BIOS does.) +.kbc_wait: push cx + xor cx,cx +.kbc_wait_loop: + call a20_test + jnz a20_done_pop + loop .kbc_wait_loop + + pop cx +; +; Running out of options here. Final attempt: enable the "fast A20 gate" +; +a20_fast: + mov byte [A20Type], A20_FAST ; Haven't used the KBC yet + in al, 092h + or al,02h + and al,~01h ; Don't accidentally reset the machine! + out 092h, al + +.fast_wait: push cx + xor cx,cx +.fast_wait_loop: + call a20_test + jnz a20_done_pop + loop .fast_wait_loop + + pop cx + +; +; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up +; and report failure to the user. +; + + dec byte [A20Tries] + jnz try_enable_a20 + + + ; Error message time + mov si,err_a20 +print_err: + lodsb + and al,al + jz die + mov bx,7 + mov ah,0xe + int 10h + jmp print_err + + +die: + sti +.hlt: hlt + jmp short .hlt + +; +; A20 unmasked, proceed... +; +a20_done_pop: pop cx +a20_done: popad + ret + +; +; This routine tests if A20 is enabled (ZF = 0). This routine +; must not destroy any register contents. +; + +; This is the INT 1Fh vector, which is standard PCs is used by the +; BIOS when the screen is in graphics mode. Even if it is, it points to +; data, not code, so it should be safe enough to fiddle with. +A20Test equ (1Fh*4) + +a20_test: + push ds + push es + push cx + push eax + xor ax,ax + mov ds,ax ; DS == 0 + dec ax + mov es,ax ; ES == 0FFFFh + mov cx,32 ; Loop count + mov eax,[A20Test] + cmp eax,[es:A20Test+10h] + jne .a20_done + push eax +.a20_wait: + inc eax + mov [A20Test],eax + io_delay + cmp eax,[es:A20Test+10h] + loopz .a20_wait + pop dword [A20Test] ; Restore original value +.a20_done: + pop eax + pop cx + pop es + pop ds + ret + +disable_a20: + pushad +; +; Flush the caches +; +%if DO_WBINVD + call try_wbinvd +%endif + + mov bp,[A20Type] + add bp,bp ; Convert to word offset +.adj5: jmp word [bp+A20DList] + +a20d_bios: + mov ax,2400h + pushf ; Some BIOSes muck with IF + int 15h + popf + jmp short a20d_snooze + +; +; Disable the "fast A20 gate" +; +a20d_fast: + in al, 092h + and al,~03h + out 092h, al + jmp short a20d_snooze + +; +; Disable the keyboard controller A20 gate +; +a20d_kbc: + call empty_8042_uncond + + mov al,0D1h + out 064h, al ; Write output port + call empty_8042_uncond + + mov al,0DDh ; A20 off + out 060h, al + call empty_8042_uncond + + mov al,0FFh ; Null command/synchronization + out 064h, al + call empty_8042_uncond + + ; Wait a bit for it to take effect +a20d_snooze: + push cx + mov cx, disable_wait +.delayloop: call a20_test + jz .disabled + loop .delayloop +.disabled: pop cx +a20d_dunno: +a20d_none: + popad + ret + +; +; Routine to empty the 8042 KBC controller. If dl != 0 +; then we will test A20 in the loop and exit if A20 is +; suddenly enabled. +; +empty_8042_uncond: + xor dl,dl +empty_8042: + call a20_test + jz .a20_on + and dl,dl + jnz .done +.a20_on: io_delay + in al, 064h ; Status port + test al,1 + jz .no_output + io_delay + in al, 060h ; Read input + jmp short empty_8042 +.no_output: + test al,2 + jnz empty_8042 + io_delay +.done: ret + +; +; Execute a WBINVD instruction if possible on this CPU +; +%if DO_WBINVD +try_wbinvd: + wbinvd + ret +%endif + + section .bss + alignb 4 +PMESP resd 1 ; Protected mode %esp + + section .idt nobits align=4096 + alignb 4096 +pm_idt resb 4096 ; Protected-mode IDT, followed by interrupt stubs + + + + +pm_entry: equ 0x100000 + + section .rodata + align 2, db 0 +call32_rmidt: + dw 0ffffh ; Limit + dd 0 ; Address + + section .data + alignb 2 +call32_pmidt: + dw 8*256 ; Limit + dd 0 ; Address (entered later) + + section .text +; +; This is the main entrypoint in this function +; +init32: + mov bx,call32_call_start ; Where to go in PM + +; +; Enter protected mode. BX contains the entry point relative to the +; real-mode CS. +; +call32_enter_pm: + mov ax,cs + mov ds,ax + movzx ebp,ax + shl ebp,4 ; EBP <- CS_BASE + movzx ebx,bx + add ebx,ebp ; entry point += CS_BASE + cli + mov [SavedSP],sp + cld + call enable_a20 + mov byte [call32_gdt+8+5],89h ; Mark TSS unbusy + o32 lgdt [call32_gdt] ; Set up GDT + o32 lidt [call32_pmidt] ; Set up IDT + mov eax,cr0 + or al,1 + mov cr0,eax ; Enter protected mode + jmp 20h:strict dword .in_pm+CS_BASE +.pm_jmp equ $-6 + + + bits 32 +.in_pm: + xor eax,eax ; Available for future use... + mov fs,eax + mov gs,eax + lldt ax + + mov al,28h ; Set up data segments + mov es,eax + mov ds,eax + mov ss,eax + + mov al,08h + ltr ax + + mov esp,[ebp+PMESP] ; Load protmode %esp if available + jmp ebx ; Go to where we need to go + +; +; This is invoked before first dispatch of the 32-bit code, in 32-bit mode +; +call32_call_start: + ; + ; Set up a temporary stack in the bounce buffer; + ; start32.S will override this to point us to the real + ; high-memory stack. + ; + mov esp, (BOUNCE_SEG << 4) + 0x10000 + + push dword call32_enter_rm.rm_jmp+CS_BASE + push dword call32_enter_pm.pm_jmp+CS_BASE + push dword stack_end ; RM size + push dword call32_gdt+CS_BASE + push dword call32_handle_interrupt+CS_BASE + push dword CS_BASE ; Segment base + push dword (BOUNCE_SEG << 4) ; Bounce buffer address + push dword call32_syscall+CS_BASE ; Syscall entry point + + call pm_entry-CS_BASE ; Run the program... + + ; ... fall through to call32_exit ... + +call32_exit: + mov bx,call32_done ; Return to command loop + +call32_enter_rm: + ; Careful here... the PM code may have relocated the + ; entire RM code, so we need to figure out exactly + ; where we are executing from. If the PM code has + ; relocated us, it *will* have adjusted the GDT to + ; match, though. + call .here +.here: pop ebp + sub ebp,.here + o32 sidt [ebp+call32_pmidt] + cli + cld + mov [ebp+PMESP],esp ; Save exit %esp + xor esp,esp ; Make sure the high bits are zero + jmp 10h:.in_pm16 ; Return to 16-bit mode first + + bits 16 +.in_pm16: + mov ax,18h ; Real-mode-like segment + mov es,ax + mov ds,ax + mov ss,ax + mov fs,ax + mov gs,ax + + lidt [call32_rmidt] ; Real-mode IDT (rm needs no GDT) + mov eax,cr0 + and al,~1 + mov cr0,eax + jmp MY_CS:.in_rm +.rm_jmp equ $-2 + +.in_rm: ; Back in real mode + mov ax,cs + mov ds,ax + mov es,ax + mov fs,ax + mov gs,ax + mov ss,ax + mov sp,[SavedSP] ; Restore stack + jmp bx ; Go to whereever we need to go... + +call32_done: + call disable_a20 + sti + ret + +; +; 16-bit support code +; + bits 16 + +; +; 16-bit interrupt-handling code +; +call32_int_rm: + pushf ; Flags on stack + push cs ; Return segment + push word .cont ; Return address + push dword edx ; Segment:offset of IVT entry + retf ; Invoke IVT routine +.cont: ; ... on resume ... + mov bx,call32_int_resume + jmp call32_enter_pm ; Go back to PM + +; +; 16-bit system call handling code +; +call32_sys_rm: + pop gs + pop fs + pop es + pop ds + popad + popfd + retf ; Invoke routine +.return: + pushfd + pushad + push ds + push es + push fs + push gs + mov bx,call32_sys_resume + jmp call32_enter_pm + +; +; 32-bit support code +; + bits 32 + +; +; This is invoked on getting an interrupt in protected mode. At +; this point, we need to context-switch to real mode and invoke +; the interrupt routine. +; +; When this gets invoked, the registers are saved on the stack and +; AL contains the register number. +; +call32_handle_interrupt: + movzx eax,al + xor ebx,ebx ; Actually makes the code smaller + mov edx,[ebx+eax*4] ; Get the segment:offset of the routine + mov bx,call32_int_rm + jmp call32_enter_rm ; Go to real mode + +call32_int_resume: + popad + iret + +; +; Syscall invocation. We manifest a structure on the real-mode stack, +; containing the call32sys_t structure from <call32.h> as well as +; the following entries (from low to high address): +; - Target offset +; - Target segment +; - Return offset +; - Return segment (== real mode cs) +; - Return flags +; +call32_syscall: + pushfd ; Save IF among other things... + pushad ; We only need to save some, but... + cld + call .here +.here: pop ebp + sub ebp,.here + + movzx edi,word [ebp+SavedSP] + sub edi,54 ; Allocate 54 bytes + mov [ebp+SavedSP],di + add edi,ebp ; Create linear address + + mov esi,[esp+11*4] ; Source regs + xor ecx,ecx + mov cl,11 ; 44 bytes to copy + rep movsd + + movzx eax,byte [esp+10*4] ; Interrupt number + ; ecx == 0 here; adding it to the EA makes the + ; encoding smaller + mov eax,[ecx+eax*4] ; Get IVT entry + stosd ; Save in stack frame + mov ax,call32_sys_rm.return ; Return offset + stosw ; Save in stack frame + mov eax,ebp + shr eax,4 ; Return segment + stosw ; Save in stack frame + mov eax,[edi-12] ; Return flags + and eax,0x200cd7 ; Mask (potentially) unsafe flags + mov [edi-12],eax ; Primary flags entry + stosw ; Return flags + + mov bx,call32_sys_rm + jmp call32_enter_rm ; Go to real mode + + ; On return, the 44-byte return structure is on the + ; real-mode stack. call32_enter_pm will leave ebp + ; pointing to the real-mode base. +call32_sys_resume: + movzx esi,word [ebp+SavedSP] + mov edi,[esp+12*4] ; Dest regs + add esi,ebp ; Create linear address + and edi,edi ; NULL pointer? + jnz .do_copy +.no_copy: mov edi,esi ; Do a dummy copy-to-self +.do_copy: xor ecx,ecx + mov cl,11 ; 44 bytes + rep movsd ; Copy register block + + add word [ebp+SavedSP],44 ; Remove from stack + + popad + popfd + ret ; Return to 32-bit program |