1 files changed, 318 insertions, 0 deletions
diff --git a/contrib/syslinux-4.02/core/bcopyxx.inc b/contrib/syslinux-4.02/core/bcopyxx.inc
new file mode 100644
index 0000000..c669b7a
--- /dev/null
+++ b/contrib/syslinux-4.02/core/bcopyxx.inc
@@ -0,0 +1,318 @@
+;; -----------------------------------------------------------------------
+;;
+;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
+;;   Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin
+;;
+;;   This program is free software; you can redistribute it and/or modify
+;;   it under the terms of the GNU General Public License as published by
+;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+;;   Boston MA 02111-1307, USA; either version 2 of the License, or
+;;   (at your option) any later version; incorporated herein by reference.
+;;
+;; -----------------------------------------------------------------------
+
+;;
+;; bcopy32xx.inc
+;;
+
+
+;
+; 32-bit bcopy routine
+;
+; This is the actual 32-bit portion of the bcopy and shuffle and boot
+; routines.  ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
+; sole exception being the actual relocation code at the beginning of
+; pm_shuffle_boot.
+;
+; It also really needs to live all in a single segment, for the
+; address calculcations to actually work.
+;
+
+		bits 32
+		section .bcopyxx.text
+		align 16
+;
+; pm_bcopy:
+;
+;	This is the protected-mode core of the "bcopy" routine.
+;	Try to do aligned transfers; if the src and dst are relatively
+;	misaligned, align the dst.
+;
+;	ECX is guaranteed to not be zero on entry.
+;
+;	Clobbers ESI, EDI, ECX.
+;
+
+pm_bcopy:
+		push ebx
+		push edx
+		push eax
+
+		cmp esi,-1
+		je .bzero
+
+		cmp esi,edi		; If source < destination, we might
+		jb .reverse		; have to copy backwards
+
+.forward:
+		; Initial alignment
+		mov edx,edi
+		shr edx,1
+		jnc .faa1
+		movsb
+		dec ecx
+.faa1:
+		mov al,cl
+		cmp ecx,2
+		jb .f_tiny
+
+		shr edx,1
+		jnc .faa2
+		movsw
+		sub ecx,2
+.faa2:
+
+		; Bulk transfer
+		mov al,cl		; Save low bits
+		shr ecx,2		; Convert to dwords
+		rep movsd		; Do our business
+		; At this point ecx == 0
+
+		test al,2
+		jz .fab2
+		movsw
+.fab2:
+.f_tiny:
+		test al,1
+		jz .fab1
+		movsb
+.fab1:
+.done:
+		pop eax
+		pop edx
+		pop ebx
+		ret
+
+.reverse:
+		lea eax,[esi+ecx-1]	; Point to final byte
+		cmp edi,eax
+		ja .forward		; No overlap, do forward copy
+
+		std			; Reverse copy
+		lea edi,[edi+ecx-1]
+		mov esi,eax
+
+		; Initial alignment
+		mov edx,edi
+		shr edx,1
+		jc .raa1
+		movsb
+		dec ecx
+.raa1:
+
+		dec esi
+		dec edi
+		mov al,cl
+		cmp ecx,2
+		jb .r_tiny
+		shr edx,1
+		jc .raa2
+		movsw
+		sub ecx,2
+.raa2:
+
+		; Bulk copy
+		sub esi,2
+		sub edi,2
+		mov al,cl		; Save low bits
+		shr ecx,2
+		rep movsd
+
+		; Final alignment
+.r_final:
+		add esi,2
+		add edi,2
+		test al,2
+		jz .rab2
+		movsw
+.rab2:
+.r_tiny:
+		inc esi
+		inc edi
+		test al,1
+		jz .rab1
+		movsb
+.rab1:
+		cld
+		jmp short .done
+
+.bzero:
+		xor eax,eax
+
+		; Initial alignment
+		mov edx,edi
+		shr edx,1
+		jnc .zaa1
+		stosb
+		dec ecx
+.zaa1:
+
+		mov bl,cl
+		cmp ecx,2
+		jb .z_tiny
+		shr edx,1
+		jnc .zaa2
+		stosw
+		sub ecx,2
+.zaa2:
+
+		; Bulk
+		mov bl,cl		; Save low bits
+		shr ecx,2
+		rep stosd
+
+		test bl,2
+		jz .zab2
+		stosw
+.zab2:
+.z_tiny:
+		test bl,1
+		jz .zab1
+		stosb
+.zab1:
+		jmp short .done
+
+;
+; shuffle_and_boot:
+;
+; This routine is used to shuffle memory around, followed by
+; invoking an entry point somewhere in low memory.  This routine
+; can clobber any memory outside the bcopy special area.
+;
+; IMPORTANT: This routine does not set up any registers.
+; It is the responsibility of the caller to generate an appropriate entry
+; stub; *especially* when going to real mode.
+;
+; Inputs:
+;	ESI		-> Pointer to list of (dst, src, len) pairs(*)
+;	EDI		-> Pointer to safe area for list + shuffler
+;			   (must not overlap this code nor the RM stack)
+;	ECX		-> Byte count of list area (for initial copy)
+;
+;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
+;		    this is handled inside the bcopy routine.
+;
+;     If len == 0:  this marks the end of the list; dst indicates
+;		    the entry point and src the mode (0 = pm, 1 = rm)
+;
+pm_shuffle:
+		cli			; End interrupt service (for good)
+		mov ebx,edi		; EBX <- descriptor list
+		lea edx,[edi+ecx+15]	; EDX <- where to relocate our code to
+		and edx,~15		; Align 16 to benefit the GDT
+		call pm_bcopy
+		mov esi,__bcopyxx_start	; Absolute source address
+		mov edi,edx		; Absolute target address
+		sub edx,esi		; EDX <- address delta
+		mov ecx,__bcopyxx_dwords
+		lea eax,[edx+.safe]	; Resume point
+		; Relocate this code
+		rep movsd
+		jmp eax			; Jump to safe location
+.safe:
+		; Give ourselves a safe stack
+		lea esp,[edx+bcopyxx_stack+__bcopyxx_end]
+		add edx,bcopy_gdt	; EDX <- new GDT
+		mov [edx+2],edx		; GDT self-pointer
+		lgdt [edx]		; Switch to local GDT
+
+		; Now for the actual shuffling...
+.loop:
+		mov edi,[ebx]
+		mov esi,[ebx+4]
+		mov ecx,[ebx+8]
+		add ebx,12
+		jecxz .done
+		call pm_bcopy
+		jmp .loop
+.done:
+		lidt [edx+RM_IDT_ptr-bcopy_gdt]	; RM-like IDT
+		push ecx		; == 0, for cleaning the flags register
+		and esi,esi
+		jz pm_shuffle_16
+		popfd			; Clean the flags
+		jmp edi			; Protected mode entry
+
+		; We have a 16-bit entry point, so we need to return
+		; to 16-bit mode.  Note: EDX already points to the GDT.
+pm_shuffle_16:
+		mov eax,edi
+		mov [edx+PM_CS16+2],ax
+		mov [edx+PM_DS16+2],ax
+		shr eax,16
+		mov [edx+PM_CS16+4],al
+		mov [edx+PM_CS16+7],ah
+		mov [edx+PM_DS16+4],al
+		mov [edx+PM_DS16+7],ah
+		mov eax,cr0
+		and al,~1
+		popfd			; Clean the flags
+		; No flag-changing instructions below...
+		mov dx,PM_DS16
+		mov ds,edx
+		mov es,edx
+		mov fs,edx
+		mov gs,edx
+		mov ss,edx
+		jmp PM_CS16:0
+
+		section	.bcopyxx.data
+
+		alignz 16
+; GDT descriptor entry
+%macro desc 1
+bcopy_gdt.%1:
+PM_%1		equ bcopy_gdt.%1-bcopy_gdt
+%endmacro
+
+bcopy_gdt:
+		dw bcopy_gdt_size-1	; Null descriptor - contains GDT
+		dd bcopy_gdt		; pointer for LGDT instruction
+		dw 0
+
+		; TSS segment to keep Intel VT happy.  Intel VT is
+		; unhappy about anything that doesn't smell like a
+		; full-blown 32-bit OS.
+	desc TSS
+		dw 104-1, DummyTSS	; 08h 32-bit task state segment
+		dd 00008900h		; present, dpl 0, 104 bytes @DummyTSS
+
+	desc CS16
+		dd 0000ffffh		; 10h Code segment, use16, readable,
+		dd 00009b00h		; present, dpl 0, cover 64K
+	desc DS16
+		dd 0000ffffh		; 18h Data segment, use16, read/write,
+		dd 00009300h		; present, dpl 0, cover 64K
+	desc CS32
+		dd 0000ffffh		; 20h Code segment, use32, readable,
+		dd 00cf9b00h		; present, dpl 0, cover all 4G
+	desc DS32
+		dd 0000ffffh		; 28h Data segment, use32, read/write,
+		dd 00cf9300h		; present, dpl 0, cover all 4G
+
+bcopy_gdt_size:	equ $-bcopy_gdt
+;
+; Space for a dummy task state segment.  It should never be actually
+; accessed, but just in case it is, point to a chunk of memory that
+; has a chance to not be used for anything real...
+;
+DummyTSS	equ 0x580
+
+		align 4
+RM_IDT_ptr:	dw 0FFFFh		; Length (nonsense, but matches CPU)
+		dd 0			; Offset
+
+bcopyxx_stack	equ 128			; We want this much stack
+
+		bits 16
+		section .text16