diff options
Diffstat (limited to 'contrib/syslinux-4.02/core/bcopyxx.inc')
-rw-r--r-- | contrib/syslinux-4.02/core/bcopyxx.inc | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/contrib/syslinux-4.02/core/bcopyxx.inc b/contrib/syslinux-4.02/core/bcopyxx.inc new file mode 100644 index 0000000..c669b7a --- /dev/null +++ b/contrib/syslinux-4.02/core/bcopyxx.inc @@ -0,0 +1,318 @@ +;; ----------------------------------------------------------------------- +;; +;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved +;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin +;; +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, +;; Boston MA 02111-1307, USA; either version 2 of the License, or +;; (at your option) any later version; incorporated herein by reference. +;; +;; ----------------------------------------------------------------------- + +;; +;; bcopy32xx.inc +;; + + +; +; 32-bit bcopy routine +; +; This is the actual 32-bit portion of the bcopy and shuffle and boot +; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the +; sole exception being the actual relocation code at the beginning of +; pm_shuffle_boot. +; +; It also really needs to live all in a single segment, for the +; address calculcations to actually work. +; + + bits 32 + section .bcopyxx.text + align 16 +; +; pm_bcopy: +; +; This is the protected-mode core of the "bcopy" routine. +; Try to do aligned transfers; if the src and dst are relatively +; misaligned, align the dst. +; +; ECX is guaranteed to not be zero on entry. +; +; Clobbers ESI, EDI, ECX. +; + +pm_bcopy: + push ebx + push edx + push eax + + cmp esi,-1 + je .bzero + + cmp esi,edi ; If source < destination, we might + jb .reverse ; have to copy backwards + +.forward: + ; Initial alignment + mov edx,edi + shr edx,1 + jnc .faa1 + movsb + dec ecx +.faa1: + mov al,cl + cmp ecx,2 + jb .f_tiny + + shr edx,1 + jnc .faa2 + movsw + sub ecx,2 +.faa2: + + ; Bulk transfer + mov al,cl ; Save low bits + shr ecx,2 ; Convert to dwords + rep movsd ; Do our business + ; At this point ecx == 0 + + test al,2 + jz .fab2 + movsw +.fab2: +.f_tiny: + test al,1 + jz .fab1 + movsb +.fab1: +.done: + pop eax + pop edx + pop ebx + ret + +.reverse: + lea eax,[esi+ecx-1] ; Point to final byte + cmp edi,eax + ja .forward ; No overlap, do forward copy + + std ; Reverse copy + lea edi,[edi+ecx-1] + mov esi,eax + + ; Initial alignment + mov edx,edi + shr edx,1 + jc .raa1 + movsb + dec ecx +.raa1: + + dec esi + dec edi + mov al,cl + cmp ecx,2 + jb .r_tiny + shr edx,1 + jc .raa2 + movsw + sub ecx,2 +.raa2: + + ; Bulk copy + sub esi,2 + sub edi,2 + mov al,cl ; Save low bits + shr ecx,2 + rep movsd + + ; Final alignment +.r_final: + add esi,2 + add edi,2 + test al,2 + jz .rab2 + movsw +.rab2: +.r_tiny: + inc esi + inc edi + test al,1 + jz .rab1 + movsb +.rab1: + cld + jmp short .done + +.bzero: + xor eax,eax + + ; Initial alignment + mov edx,edi + shr edx,1 + jnc .zaa1 + stosb + dec ecx +.zaa1: + + mov bl,cl + cmp ecx,2 + jb .z_tiny + shr edx,1 + jnc .zaa2 + stosw + sub ecx,2 +.zaa2: + + ; Bulk + mov bl,cl ; Save low bits + shr ecx,2 + rep stosd + + test bl,2 + jz .zab2 + stosw +.zab2: +.z_tiny: + test bl,1 + jz .zab1 + stosb +.zab1: + jmp short .done + +; +; shuffle_and_boot: +; +; This routine is used to shuffle memory around, followed by +; invoking an entry point somewhere in low memory. This routine +; can clobber any memory outside the bcopy special area. +; +; IMPORTANT: This routine does not set up any registers. +; It is the responsibility of the caller to generate an appropriate entry +; stub; *especially* when going to real mode. +; +; Inputs: +; ESI -> Pointer to list of (dst, src, len) pairs(*) +; EDI -> Pointer to safe area for list + shuffler +; (must not overlap this code nor the RM stack) +; ECX -> Byte count of list area (for initial copy) +; +; If src == -1: then the memory pointed to by (dst, len) is bzeroed; +; this is handled inside the bcopy routine. +; +; If len == 0: this marks the end of the list; dst indicates +; the entry point and src the mode (0 = pm, 1 = rm) +; +pm_shuffle: + cli ; End interrupt service (for good) + mov ebx,edi ; EBX <- descriptor list + lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to + and edx,~15 ; Align 16 to benefit the GDT + call pm_bcopy + mov esi,__bcopyxx_start ; Absolute source address + mov edi,edx ; Absolute target address + sub edx,esi ; EDX <- address delta + mov ecx,__bcopyxx_dwords + lea eax,[edx+.safe] ; Resume point + ; Relocate this code + rep movsd + jmp eax ; Jump to safe location +.safe: + ; Give ourselves a safe stack + lea esp,[edx+bcopyxx_stack+__bcopyxx_end] + add edx,bcopy_gdt ; EDX <- new GDT + mov [edx+2],edx ; GDT self-pointer + lgdt [edx] ; Switch to local GDT + + ; Now for the actual shuffling... +.loop: + mov edi,[ebx] + mov esi,[ebx+4] + mov ecx,[ebx+8] + add ebx,12 + jecxz .done + call pm_bcopy + jmp .loop +.done: + lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT + push ecx ; == 0, for cleaning the flags register + and esi,esi + jz pm_shuffle_16 + popfd ; Clean the flags + jmp edi ; Protected mode entry + + ; We have a 16-bit entry point, so we need to return + ; to 16-bit mode. Note: EDX already points to the GDT. +pm_shuffle_16: + mov eax,edi + mov [edx+PM_CS16+2],ax + mov [edx+PM_DS16+2],ax + shr eax,16 + mov [edx+PM_CS16+4],al + mov [edx+PM_CS16+7],ah + mov [edx+PM_DS16+4],al + mov [edx+PM_DS16+7],ah + mov eax,cr0 + and al,~1 + popfd ; Clean the flags + ; No flag-changing instructions below... + mov dx,PM_DS16 + mov ds,edx + mov es,edx + mov fs,edx + mov gs,edx + mov ss,edx + jmp PM_CS16:0 + + section .bcopyxx.data + + alignz 16 +; GDT descriptor entry +%macro desc 1 +bcopy_gdt.%1: +PM_%1 equ bcopy_gdt.%1-bcopy_gdt +%endmacro + +bcopy_gdt: + dw bcopy_gdt_size-1 ; Null descriptor - contains GDT + dd bcopy_gdt ; pointer for LGDT instruction + dw 0 + + ; TSS segment to keep Intel VT happy. Intel VT is + ; unhappy about anything that doesn't smell like a + ; full-blown 32-bit OS. + desc TSS + dw 104-1, DummyTSS ; 08h 32-bit task state segment + dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS + + desc CS16 + dd 0000ffffh ; 10h Code segment, use16, readable, + dd 00009b00h ; present, dpl 0, cover 64K + desc DS16 + dd 0000ffffh ; 18h Data segment, use16, read/write, + dd 00009300h ; present, dpl 0, cover 64K + desc CS32 + dd 0000ffffh ; 20h Code segment, use32, readable, + dd 00cf9b00h ; present, dpl 0, cover all 4G + desc DS32 + dd 0000ffffh ; 28h Data segment, use32, read/write, + dd 00cf9300h ; present, dpl 0, cover all 4G + +bcopy_gdt_size: equ $-bcopy_gdt +; +; Space for a dummy task state segment. It should never be actually +; accessed, but just in case it is, point to a chunk of memory that +; has a chance to not be used for anything real... +; +DummyTSS equ 0x580 + + align 4 +RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) + dd 0 ; Offset + +bcopyxx_stack equ 128 ; We want this much stack + + bits 16 + section .text16 |