summaryrefslogtreecommitdiffstats
path: root/contrib/syslinux-4.02/memdisk/memdisk16.asm
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/syslinux-4.02/memdisk/memdisk16.asm')
-rw-r--r--contrib/syslinux-4.02/memdisk/memdisk16.asm793
1 files changed, 793 insertions, 0 deletions
diff --git a/contrib/syslinux-4.02/memdisk/memdisk16.asm b/contrib/syslinux-4.02/memdisk/memdisk16.asm
new file mode 100644
index 0000000..6bafae7
--- /dev/null
+++ b/contrib/syslinux-4.02/memdisk/memdisk16.asm
@@ -0,0 +1,793 @@
+;; -*- fundamental -*-
+;; -----------------------------------------------------------------------
+;;
+;; Copyright 1994-2008 H. Peter Anvin - All Rights Reserved
+;; Copyright 2009 Intel Corporation; author: H. Peter Anvin
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+;; Boston MA 02111-1307, USA; either version 2 of the License, or
+;; (at your option) any later version; incorporated herein by reference.
+;;
+;; -----------------------------------------------------------------------
+
+;;
+;; init16.asm
+;;
+;; Routine to initialize and to trampoline into 32-bit
+;; protected memory. This code is derived from bcopy32.inc and
+;; com32.inc in the main SYSLINUX distribution.
+;;
+
+%include '../version.gen'
+
+MY_CS equ 0x0800 ; Segment address to use
+CS_BASE equ (MY_CS << 4) ; Corresponding address
+
+; Low memory bounce buffer
+BOUNCE_SEG equ (MY_CS+0x1000)
+
+%define DO_WBINVD 0
+
+ section .rodata align=16
+ section .data align=16
+ section .bss align=16
+ section .stack align=16 nobits
+stack resb 512
+stack_end equ $
+
+;; -----------------------------------------------------------------------
+;; Kernel image header
+;; -----------------------------------------------------------------------
+
+ section .text ; Must be first in image
+ bits 16
+
+cmdline times 497 db 0 ; We put the command line here
+setup_sects db 0
+root_flags dw 0
+syssize dw 0
+swap_dev dw 0
+ram_size dw 0
+vid_mode dw 0
+root_dev dw 0
+boot_flag dw 0xAA55
+
+_start: jmp short start
+
+ db "HdrS" ; Header signature
+ dw 0x0203 ; Header version number
+
+realmode_swtch dw 0, 0 ; default_switch, SETUPSEG
+start_sys_seg dw 0x1000 ; obsolete
+version_ptr dw memdisk_version-0x200 ; version string ptr
+type_of_loader db 0 ; Filled in by boot loader
+loadflags db 1 ; Please load high
+setup_move_size dw 0 ; Unused
+code32_start dd 0x100000 ; 32-bit start address
+ramdisk_image dd 0 ; Loaded ramdisk image address
+ramdisk_size dd 0 ; Size of loaded ramdisk
+bootsect_kludge dw 0, 0
+heap_end_ptr dw 0
+pad1 dw 0
+cmd_line_ptr dd 0 ; Command line
+ramdisk_max dd 0xffffffff ; Highest allowed ramdisk address
+
+;
+; These fields aren't real setup fields, they're poked in by the
+; 32-bit code.
+;
+b_esdi dd 0 ; ES:DI for boot sector invocation
+b_edx dd 0 ; EDX for boot sector invocation
+b_sssp dd 0 ; SS:SP on boot sector invocation
+b_csip dd 0 ; CS:IP on boot sector invocation
+
+ section .rodata
+memdisk_version:
+ db "MEMDISK ", VERSION_STR, " ", DATE, 0
+
+;; -----------------------------------------------------------------------
+;; End kernel image header
+;; -----------------------------------------------------------------------
+
+;
+; Move ourselves down into memory to reduce the risk of conflicts;
+; then canonicalize CS to match the other segments.
+;
+ section .text
+ bits 16
+start:
+ mov ax,MY_CS
+ mov es,ax
+ movzx cx,byte [setup_sects]
+ inc cx ; Add one for the boot sector
+ shl cx,7 ; Convert to dwords
+ xor si,si
+ xor di,di
+ mov fs,si ; fs <- 0
+ cld
+ rep movsd
+ mov ds,ax
+ mov ss,ax
+ mov esp,stack_end
+ jmp MY_CS:.next
+.next:
+
+;
+; Copy the command line, if there is one
+;
+copy_cmdline:
+ xor di,di ; Bottom of our own segment (= "boot sector")
+ mov eax,[cmd_line_ptr]
+ and eax,eax
+ jz .endcmd ; No command line
+ mov si,ax
+ shr eax,4 ; Convert to segment
+ and si,0x000F ; Starting offset only
+ mov gs,ax
+ mov cx,496 ; Max number of bytes
+.copycmd:
+ gs lodsb
+ and al,al
+ jz .endcmd
+ stosb
+ loop .copycmd
+.endcmd:
+ xor al,al
+ stosb
+
+;
+; Now jump to 32-bit code
+;
+ sti
+ call init32
+;
+; When init32 returns, we have been set up, the new boot sector loaded,
+; and we should go and and run the newly loaded boot sector.
+;
+; The setup function will have poked values into the setup area.
+;
+ movzx edi,word [cs:b_esdi]
+ mov es,word [cs:b_esdi+2]
+ mov edx,[cs:b_edx]
+
+ cli
+ xor esi,esi ; No partition table involved
+ mov ds,si ; Make all the segments consistent
+ mov fs,si
+ mov gs,si
+ lss sp,[cs:b_sssp]
+ movzx esp,sp
+ jmp far [cs:b_csip]
+
+;
+; We enter protected mode, set up a flat 32-bit environment, run rep movsd
+; and then exit. IMPORTANT: This code assumes cs == MY_CS.
+;
+; This code is probably excessively anal-retentive in its handling of
+; segments, but this stuff is painful enough as it is without having to rely
+; on everything happening "as it ought to."
+;
+DummyTSS equ 0x580 ; Hopefully safe place in low mmoery
+
+ section .data
+
+ ; desc base, limit, flags
+%macro desc 3
+ dd (%2 & 0xffff) | ((%1 & 0xffff) << 16)
+ dd (%1 & 0xff000000) | (%2 & 0xf0000) | ((%3 & 0xf0ff) << 8) | ((%1 & 0x00ff0000) >> 16)
+%endmacro
+
+ align 8, db 0
+call32_gdt: dw call32_gdt_size-1 ; Null descriptor - contains GDT
+.adj1: dd call32_gdt+CS_BASE ; pointer for LGDT instruction
+ dw 0
+
+ ; 0008: Dummy TSS to make Intel VT happy
+ ; Should never be actually accessed...
+ desc DummyTSS, 103, 0x8089
+
+ ; 0010: Code segment, use16, readable, dpl 0, base CS_BASE, 64K
+ desc CS_BASE, 0xffff, 0x009b
+
+ ; 0018: Data segment, use16, read/write, dpl 0, base CS_BASE, 64K
+ desc CS_BASE, 0xffff, 0x0093
+
+ ; 0020: Code segment, use32, read/write, dpl 0, base 0, 4G
+ desc 0, 0xfffff, 0xc09b
+
+ ; 0028: Data segment, use32, read/write, dpl 0, base 0, 4G
+ desc 0, 0xfffff, 0xc093
+
+call32_gdt_size: equ $-call32_gdt
+
+err_a20: db 'ERROR: A20 gate not responding!',13,10,0
+
+ section .bss
+ alignb 4
+Return resd 1 ; Return value
+SavedSP resw 1 ; Place to save SP
+A20Tries resb 1
+
+ section .data
+ align 4, db 0
+Target dd 0 ; Target address
+Target_Seg dw 20h ; Target CS
+
+A20Type dw 0 ; Default = unknown
+
+ section .text
+ bits 16
+;
+; Routines to enable and disable (yuck) A20. These routines are gathered
+; from tips from a couple of sources, including the Linux kernel and
+; http://www.x86.org/. The need for the delay to be as large as given here
+; is indicated by Donnie Barnes of RedHat, the problematic system being an
+; IBM ThinkPad 760EL.
+;
+; We typically toggle A20 twice for every 64K transferred.
+;
+%define io_delay call _io_delay
+%define IO_DELAY_PORT 80h ; Invalid port (we hope!)
+%define disable_wait 32 ; How long to wait for a disable
+
+%define A20_DUNNO 0 ; A20 type unknown
+%define A20_NONE 1 ; A20 always on?
+%define A20_BIOS 2 ; A20 BIOS enable
+%define A20_KBC 3 ; A20 through KBC
+%define A20_FAST 4 ; A20 through port 92h
+
+ align 2, db 0
+A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast
+A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast
+a20_adjust_cnt equ ($-A20List)/2
+
+slow_out: out dx, al ; Fall through
+
+_io_delay: out IO_DELAY_PORT,al
+ out IO_DELAY_PORT,al
+ ret
+
+enable_a20:
+ pushad
+ mov byte [A20Tries],255 ; Times to try to make this work
+
+try_enable_a20:
+
+;
+; Flush the caches
+;
+%if DO_WBINVD
+ call try_wbinvd
+%endif
+
+;
+; If the A20 type is known, jump straight to type
+;
+ mov bp,[A20Type]
+ add bp,bp ; Convert to word offset
+.adj4: jmp word [bp+A20List]
+
+;
+; First, see if we are on a system with no A20 gate
+;
+a20_dunno:
+a20_none:
+ mov byte [A20Type], A20_NONE
+ call a20_test
+ jnz a20_done
+
+;
+; Next, try the BIOS (INT 15h AX=2401h)
+;
+a20_bios:
+ mov byte [A20Type], A20_BIOS
+ mov ax,2401h
+ pushf ; Some BIOSes muck with IF
+ int 15h
+ popf
+
+ call a20_test
+ jnz a20_done
+
+;
+; Enable the keyboard controller A20 gate
+;
+a20_kbc:
+ mov dl, 1 ; Allow early exit
+ call empty_8042
+ jnz a20_done ; A20 live, no need to use KBC
+
+ mov byte [A20Type], A20_KBC ; Starting KBC command sequence
+
+ mov al,0D1h ; Write output port
+ out 064h, al
+ call empty_8042_uncond
+
+ mov al,0DFh ; A20 on
+ out 060h, al
+ call empty_8042_uncond
+
+ ; Apparently the UHCI spec assumes that A20 toggle
+ ; ends with a null command (assumed to be for sychronization?)
+ ; Put it here to see if it helps anything...
+ mov al,0FFh ; Null command
+ out 064h, al
+ call empty_8042_uncond
+
+ ; Verify that A20 actually is enabled. Do that by
+ ; observing a word in low memory and the same word in
+ ; the HMA until they are no longer coherent. Note that
+ ; we don't do the same check in the disable case, because
+ ; we don't want to *require* A20 masking (SYSLINUX should
+ ; work fine without it, if the BIOS does.)
+.kbc_wait: push cx
+ xor cx,cx
+.kbc_wait_loop:
+ call a20_test
+ jnz a20_done_pop
+ loop .kbc_wait_loop
+
+ pop cx
+;
+; Running out of options here. Final attempt: enable the "fast A20 gate"
+;
+a20_fast:
+ mov byte [A20Type], A20_FAST ; Haven't used the KBC yet
+ in al, 092h
+ or al,02h
+ and al,~01h ; Don't accidentally reset the machine!
+ out 092h, al
+
+.fast_wait: push cx
+ xor cx,cx
+.fast_wait_loop:
+ call a20_test
+ jnz a20_done_pop
+ loop .fast_wait_loop
+
+ pop cx
+
+;
+; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up
+; and report failure to the user.
+;
+
+ dec byte [A20Tries]
+ jnz try_enable_a20
+
+
+ ; Error message time
+ mov si,err_a20
+print_err:
+ lodsb
+ and al,al
+ jz die
+ mov bx,7
+ mov ah,0xe
+ int 10h
+ jmp print_err
+
+
+die:
+ sti
+.hlt: hlt
+ jmp short .hlt
+
+;
+; A20 unmasked, proceed...
+;
+a20_done_pop: pop cx
+a20_done: popad
+ ret
+
+;
+; This routine tests if A20 is enabled (ZF = 0). This routine
+; must not destroy any register contents.
+;
+
+; This is the INT 1Fh vector, which is standard PCs is used by the
+; BIOS when the screen is in graphics mode. Even if it is, it points to
+; data, not code, so it should be safe enough to fiddle with.
+A20Test equ (1Fh*4)
+
+a20_test:
+ push ds
+ push es
+ push cx
+ push eax
+ xor ax,ax
+ mov ds,ax ; DS == 0
+ dec ax
+ mov es,ax ; ES == 0FFFFh
+ mov cx,32 ; Loop count
+ mov eax,[A20Test]
+ cmp eax,[es:A20Test+10h]
+ jne .a20_done
+ push eax
+.a20_wait:
+ inc eax
+ mov [A20Test],eax
+ io_delay
+ cmp eax,[es:A20Test+10h]
+ loopz .a20_wait
+ pop dword [A20Test] ; Restore original value
+.a20_done:
+ pop eax
+ pop cx
+ pop es
+ pop ds
+ ret
+
+disable_a20:
+ pushad
+;
+; Flush the caches
+;
+%if DO_WBINVD
+ call try_wbinvd
+%endif
+
+ mov bp,[A20Type]
+ add bp,bp ; Convert to word offset
+.adj5: jmp word [bp+A20DList]
+
+a20d_bios:
+ mov ax,2400h
+ pushf ; Some BIOSes muck with IF
+ int 15h
+ popf
+ jmp short a20d_snooze
+
+;
+; Disable the "fast A20 gate"
+;
+a20d_fast:
+ in al, 092h
+ and al,~03h
+ out 092h, al
+ jmp short a20d_snooze
+
+;
+; Disable the keyboard controller A20 gate
+;
+a20d_kbc:
+ call empty_8042_uncond
+
+ mov al,0D1h
+ out 064h, al ; Write output port
+ call empty_8042_uncond
+
+ mov al,0DDh ; A20 off
+ out 060h, al
+ call empty_8042_uncond
+
+ mov al,0FFh ; Null command/synchronization
+ out 064h, al
+ call empty_8042_uncond
+
+ ; Wait a bit for it to take effect
+a20d_snooze:
+ push cx
+ mov cx, disable_wait
+.delayloop: call a20_test
+ jz .disabled
+ loop .delayloop
+.disabled: pop cx
+a20d_dunno:
+a20d_none:
+ popad
+ ret
+
+;
+; Routine to empty the 8042 KBC controller. If dl != 0
+; then we will test A20 in the loop and exit if A20 is
+; suddenly enabled.
+;
+empty_8042_uncond:
+ xor dl,dl
+empty_8042:
+ call a20_test
+ jz .a20_on
+ and dl,dl
+ jnz .done
+.a20_on: io_delay
+ in al, 064h ; Status port
+ test al,1
+ jz .no_output
+ io_delay
+ in al, 060h ; Read input
+ jmp short empty_8042
+.no_output:
+ test al,2
+ jnz empty_8042
+ io_delay
+.done: ret
+
+;
+; Execute a WBINVD instruction if possible on this CPU
+;
+%if DO_WBINVD
+try_wbinvd:
+ wbinvd
+ ret
+%endif
+
+ section .bss
+ alignb 4
+PMESP resd 1 ; Protected mode %esp
+
+ section .idt nobits align=4096
+ alignb 4096
+pm_idt resb 4096 ; Protected-mode IDT, followed by interrupt stubs
+
+
+
+
+pm_entry: equ 0x100000
+
+ section .rodata
+ align 2, db 0
+call32_rmidt:
+ dw 0ffffh ; Limit
+ dd 0 ; Address
+
+ section .data
+ alignb 2
+call32_pmidt:
+ dw 8*256 ; Limit
+ dd 0 ; Address (entered later)
+
+ section .text
+;
+; This is the main entrypoint in this function
+;
+init32:
+ mov bx,call32_call_start ; Where to go in PM
+
+;
+; Enter protected mode. BX contains the entry point relative to the
+; real-mode CS.
+;
+call32_enter_pm:
+ mov ax,cs
+ mov ds,ax
+ movzx ebp,ax
+ shl ebp,4 ; EBP <- CS_BASE
+ movzx ebx,bx
+ add ebx,ebp ; entry point += CS_BASE
+ cli
+ mov [SavedSP],sp
+ cld
+ call enable_a20
+ mov byte [call32_gdt+8+5],89h ; Mark TSS unbusy
+ o32 lgdt [call32_gdt] ; Set up GDT
+ o32 lidt [call32_pmidt] ; Set up IDT
+ mov eax,cr0
+ or al,1
+ mov cr0,eax ; Enter protected mode
+ jmp 20h:strict dword .in_pm+CS_BASE
+.pm_jmp equ $-6
+
+
+ bits 32
+.in_pm:
+ xor eax,eax ; Available for future use...
+ mov fs,eax
+ mov gs,eax
+ lldt ax
+
+ mov al,28h ; Set up data segments
+ mov es,eax
+ mov ds,eax
+ mov ss,eax
+
+ mov al,08h
+ ltr ax
+
+ mov esp,[ebp+PMESP] ; Load protmode %esp if available
+ jmp ebx ; Go to where we need to go
+
+;
+; This is invoked before first dispatch of the 32-bit code, in 32-bit mode
+;
+call32_call_start:
+ ;
+ ; Set up a temporary stack in the bounce buffer;
+ ; start32.S will override this to point us to the real
+ ; high-memory stack.
+ ;
+ mov esp, (BOUNCE_SEG << 4) + 0x10000
+
+ push dword call32_enter_rm.rm_jmp+CS_BASE
+ push dword call32_enter_pm.pm_jmp+CS_BASE
+ push dword stack_end ; RM size
+ push dword call32_gdt+CS_BASE
+ push dword call32_handle_interrupt+CS_BASE
+ push dword CS_BASE ; Segment base
+ push dword (BOUNCE_SEG << 4) ; Bounce buffer address
+ push dword call32_syscall+CS_BASE ; Syscall entry point
+
+ call pm_entry-CS_BASE ; Run the program...
+
+ ; ... fall through to call32_exit ...
+
+call32_exit:
+ mov bx,call32_done ; Return to command loop
+
+call32_enter_rm:
+ ; Careful here... the PM code may have relocated the
+ ; entire RM code, so we need to figure out exactly
+ ; where we are executing from. If the PM code has
+ ; relocated us, it *will* have adjusted the GDT to
+ ; match, though.
+ call .here
+.here: pop ebp
+ sub ebp,.here
+ o32 sidt [ebp+call32_pmidt]
+ cli
+ cld
+ mov [ebp+PMESP],esp ; Save exit %esp
+ xor esp,esp ; Make sure the high bits are zero
+ jmp 10h:.in_pm16 ; Return to 16-bit mode first
+
+ bits 16
+.in_pm16:
+ mov ax,18h ; Real-mode-like segment
+ mov es,ax
+ mov ds,ax
+ mov ss,ax
+ mov fs,ax
+ mov gs,ax
+
+ lidt [call32_rmidt] ; Real-mode IDT (rm needs no GDT)
+ mov eax,cr0
+ and al,~1
+ mov cr0,eax
+ jmp MY_CS:.in_rm
+.rm_jmp equ $-2
+
+.in_rm: ; Back in real mode
+ mov ax,cs
+ mov ds,ax
+ mov es,ax
+ mov fs,ax
+ mov gs,ax
+ mov ss,ax
+ mov sp,[SavedSP] ; Restore stack
+ jmp bx ; Go to whereever we need to go...
+
+call32_done:
+ call disable_a20
+ sti
+ ret
+
+;
+; 16-bit support code
+;
+ bits 16
+
+;
+; 16-bit interrupt-handling code
+;
+call32_int_rm:
+ pushf ; Flags on stack
+ push cs ; Return segment
+ push word .cont ; Return address
+ push dword edx ; Segment:offset of IVT entry
+ retf ; Invoke IVT routine
+.cont: ; ... on resume ...
+ mov bx,call32_int_resume
+ jmp call32_enter_pm ; Go back to PM
+
+;
+; 16-bit system call handling code
+;
+call32_sys_rm:
+ pop gs
+ pop fs
+ pop es
+ pop ds
+ popad
+ popfd
+ retf ; Invoke routine
+.return:
+ pushfd
+ pushad
+ push ds
+ push es
+ push fs
+ push gs
+ mov bx,call32_sys_resume
+ jmp call32_enter_pm
+
+;
+; 32-bit support code
+;
+ bits 32
+
+;
+; This is invoked on getting an interrupt in protected mode. At
+; this point, we need to context-switch to real mode and invoke
+; the interrupt routine.
+;
+; When this gets invoked, the registers are saved on the stack and
+; AL contains the register number.
+;
+call32_handle_interrupt:
+ movzx eax,al
+ xor ebx,ebx ; Actually makes the code smaller
+ mov edx,[ebx+eax*4] ; Get the segment:offset of the routine
+ mov bx,call32_int_rm
+ jmp call32_enter_rm ; Go to real mode
+
+call32_int_resume:
+ popad
+ iret
+
+;
+; Syscall invocation. We manifest a structure on the real-mode stack,
+; containing the call32sys_t structure from <call32.h> as well as
+; the following entries (from low to high address):
+; - Target offset
+; - Target segment
+; - Return offset
+; - Return segment (== real mode cs)
+; - Return flags
+;
+call32_syscall:
+ pushfd ; Save IF among other things...
+ pushad ; We only need to save some, but...
+ cld
+ call .here
+.here: pop ebp
+ sub ebp,.here
+
+ movzx edi,word [ebp+SavedSP]
+ sub edi,54 ; Allocate 54 bytes
+ mov [ebp+SavedSP],di
+ add edi,ebp ; Create linear address
+
+ mov esi,[esp+11*4] ; Source regs
+ xor ecx,ecx
+ mov cl,11 ; 44 bytes to copy
+ rep movsd
+
+ movzx eax,byte [esp+10*4] ; Interrupt number
+ ; ecx == 0 here; adding it to the EA makes the
+ ; encoding smaller
+ mov eax,[ecx+eax*4] ; Get IVT entry
+ stosd ; Save in stack frame
+ mov ax,call32_sys_rm.return ; Return offset
+ stosw ; Save in stack frame
+ mov eax,ebp
+ shr eax,4 ; Return segment
+ stosw ; Save in stack frame
+ mov eax,[edi-12] ; Return flags
+ and eax,0x200cd7 ; Mask (potentially) unsafe flags
+ mov [edi-12],eax ; Primary flags entry
+ stosw ; Return flags
+
+ mov bx,call32_sys_rm
+ jmp call32_enter_rm ; Go to real mode
+
+ ; On return, the 44-byte return structure is on the
+ ; real-mode stack. call32_enter_pm will leave ebp
+ ; pointing to the real-mode base.
+call32_sys_resume:
+ movzx esi,word [ebp+SavedSP]
+ mov edi,[esp+12*4] ; Dest regs
+ add esi,ebp ; Create linear address
+ and edi,edi ; NULL pointer?
+ jnz .do_copy
+.no_copy: mov edi,esi ; Do a dummy copy-to-self
+.do_copy: xor ecx,ecx
+ mov cl,11 ; 44 bytes
+ rep movsd ; Copy register block
+
+ add word [ebp+SavedSP],44 ; Remove from stack
+
+ popad
+ popfd
+ ret ; Return to 32-bit program