diff options
Diffstat (limited to 'efi_memtest')
22 files changed, 3717 insertions, 410 deletions
diff --git a/efi_memtest/Makefile b/efi_memtest/Makefile index c351d0c..7451619 100644 --- a/efi_memtest/Makefile +++ b/efi_memtest/Makefile @@ -90,12 +90,17 @@ AutoGen.obj: memtest86+/efi/Include/AutoGen.c $(CC) $(CFLAGS) $(PREPROCESSOR) $(M) -c -o $@ $< \ -I"memtest86+/efi" +%.o: memtest86+/efi/%.c + $(CC) $(CFLAGS) $(PREPROCESSOR) $(M) -c -o $@ $< \ + -I"memtest86+" \ + -I"memtest86+/efi" + clean: rm -f OUTPUT/* rm -f memtest86+/*.o rm -f *.o rm -f MemtestEfi.obj - rm MemtestEfi.map + rm -f MemtestEfi.map move: diff --git a/efi_memtest/MemtestEfi.c b/efi_memtest/MemtestEfi.c index c9e44a0..b847aaa 100644 --- a/efi_memtest/MemtestEfi.c +++ b/efi_memtest/MemtestEfi.c @@ -16,7 +16,7 @@ UefiMain ( { Print(L"MemtestEfi started\n"); - //test_start(); + test_start(); return EFI_SUCCESS; } diff --git a/efi_memtest/memtest86+/cpuid.h b/efi_memtest/memtest86+/bios/cpuid.h index 0feb56e..0feb56e 100644 --- a/efi_memtest/memtest86+/cpuid.h +++ b/efi_memtest/memtest86+/bios/cpuid.h diff --git a/efi_memtest/memtest86+/init.c b/efi_memtest/memtest86+/bios/init.c index 32bff7f..32bff7f 100644 --- a/efi_memtest/memtest86+/init.c +++ b/efi_memtest/memtest86+/bios/init.c diff --git a/efi_memtest/logger.h b/efi_memtest/memtest86+/bios/logger.h index e69de29..e69de29 100644 --- a/efi_memtest/logger.h +++ b/efi_memtest/memtest86+/bios/logger.h diff --git a/efi_memtest/memtest86+/bios/main_asm.h b/efi_memtest/memtest86+/bios/main_asm.h new file mode 100644 index 0000000..8e6efbc --- /dev/null +++ b/efi_memtest/memtest86+/bios/main_asm.h @@ -0,0 +1,49 @@ +static inline void enable_fp_processing(void) { + if (cpu_id.fid.bits.fpu) + __asm__ __volatile__ + ( + "movl %%cr0, %%eax\n\t" + "andl $0x7, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + : : + : "ax" + ); + if (cpu_id.fid.bits.sse) + __asm__ __volatile__ + ( + "movl %%cr4, %%eax\n\t" + "orl $0x00000200, %%eax\n\t" + "movl %%eax, %%cr4\n\t" + : : + : "ax" + ); + +} + +static inline void setup_mm_modes(void) { + /* If we have PAE, turn it on */ + if (cpu_id.fid.bits.pae == 1) { + __asm__ __volatile__ + ( + "movl %%cr4, %%eax\n\t" + "orl $0x00000020, %%eax\n\t" + "movl %%eax, %%cr4\n\t" + : : + : "ax" + ); + cprint(LINE_TITLE+1, COL_MODE, "(PAE Mode)"); + } + /* If this is a 64 CPU enable long mode */ + if (cpu_id.fid.bits.lm == 1) { + __asm__ __volatile__ + ( + "movl $0xc0000080, %%ecx\n\t" + "rdmsr\n\t" + "orl $0x00000100, %%eax\n\t" + "wrmsr\n\t" + : : + : "ax", "cx" + ); + cprint(LINE_TITLE+1, COL_MODE, "(X64 Mode)"); + } +}
\ No newline at end of file diff --git a/efi_memtest/memtest86+/test.c b/efi_memtest/memtest86+/bios/test.c index 864dfcc..864dfcc 100644 --- a/efi_memtest/memtest86+/test.c +++ b/efi_memtest/memtest86+/bios/test.c diff --git a/efi_memtest/memtest86+/bios/test_cache.h b/efi_memtest/memtest86+/bios/test_cache.h new file mode 100644 index 0000000..48b4869 --- /dev/null +++ b/efi_memtest/memtest86+/bios/test_cache.h @@ -0,0 +1,20 @@ +static inline void cache_off(void) +{ + asm( + "push %eax\n\t" + "movl %cr0,%eax\n\t" + "orl $0x40000000,%eax\n\t" /* Set CD */ + "movl %eax,%cr0\n\t" + "wbinvd\n\t" + "pop %eax\n\t"); +} + +static inline void cache_on(void) +{ + asm( + "push %eax\n\t" + "movl %cr0,%eax\n\t" + "andl $0x9fffffff,%eax\n\t" /* Clear CD and NW */ + "movl %eax,%cr0\n\t" + "pop %eax\n\t"); +} diff --git a/efi_memtest/memtest86+/vmem.c b/efi_memtest/memtest86+/bios/vmem.c index 6125e0d..6125e0d 100644 --- a/efi_memtest/memtest86+/vmem.c +++ b/efi_memtest/memtest86+/bios/vmem.c diff --git a/efi_memtest/memtest86+/efi/cpuid.h b/efi_memtest/memtest86+/efi/cpuid.h new file mode 100644 index 0000000..19e2d51 --- /dev/null +++ b/efi_memtest/memtest86+/efi/cpuid.h @@ -0,0 +1,205 @@ + + +#ifndef CPUID_H_ +#define CPUID_H_ + + +/* + * cpuid.h -- + * contains the data structures required for CPUID + * implementation. + */ + +#define CPUID_VENDOR_LENGTH 3 /* 3 GPRs hold vendor ID */ +#define CPUID_VENDOR_STR_LENGTH (CPUID_VENDOR_LENGTH * sizeof(uint32_t) + 1) +#define CPUID_BRAND_LENGTH 12 /* 12 GPRs hold vendor ID */ +#define CPUID_BRAND_STR_LENGTH (CPUID_BRAND_LENGTH * sizeof(uint32_t) + 1) + +extern struct cpu_ident cpu_id; + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("\t" + "push %%rbx; cpuid; mov %%ebx, %%edi; pop %%rbx" + : "=a" (*eax), + "=D" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* Typedef for storing the Cache Information */ +typedef union { + unsigned char ch[48]; + uint32_t uint[12]; + struct { + uint32_t fill1:24; /* Bit 0 */ + uint32_t l1_i_sz:8; + uint32_t fill2:24; + uint32_t l1_d_sz:8; + uint32_t fill3:16; + uint32_t l2_sz:16; + uint32_t fill4:18; + uint32_t l3_sz:14; + uint32_t fill5[8]; + } amd; +} cpuid_cache_info_t; + +/* Typedef for storing the CPUID Vendor String */ +typedef union { + /* Note: the extra byte in the char array is for '\0'. */ + char char_array[CPUID_VENDOR_STR_LENGTH]; + uint32_t uint32_array[CPUID_VENDOR_LENGTH]; +} cpuid_vendor_string_t; + +/* Typedef for storing the CPUID Brand String */ +typedef union { + /* Note: the extra byte in the char array is for '\0'. */ + char char_array[CPUID_BRAND_STR_LENGTH]; + uint32_t uint32_array[CPUID_BRAND_LENGTH]; +} cpuid_brand_string_t; + +/* Typedef for storing CPUID Version */ +typedef union { + uint32_t flat; + struct { + uint32_t stepping:4; /* Bit 0 */ + uint32_t model:4; + uint32_t family:4; + uint32_t processorType:2; + uint32_t reserved1514:2; + uint32_t extendedModel:4; + uint32_t extendedFamily:8; + uint32_t reserved3128:4; /* Bit 31 */ + } bits; +} cpuid_version_t; + +/* Typedef for storing CPUID Processor Information */ +typedef union { + uint32_t flat; + struct { + uint32_t brandIndex:8; /* Bit 0 */ + uint32_t cflushLineSize:8; + uint32_t logicalProcessorCount:8; + uint32_t apicID:8; /* Bit 31 */ + } bits; +} cpuid_proc_info_t; + +/* Typedef for storing CPUID Feature flags */ +typedef union { + uint32_t flat; + struct { + uint32_t :1; + } bits; +} cpuid_custom_features; + +/* Typedef for storing CPUID Feature flags */ +typedef union { + uint32_t uint32_array[3]; + struct { + uint32_t fpu:1; /* EDX feature flags, bit 0 */ + uint32_t vme:1; + uint32_t de:1; + uint32_t pse:1; + uint32_t rdtsc:1; + uint32_t msr:1; + uint32_t pae:1; + uint32_t mce:1; + uint32_t cx8:1; + uint32_t apic:1; + uint32_t bit10:1; + uint32_t sep:1; + uint32_t mtrr:1; + uint32_t pge:1; + uint32_t mca:1; + uint32_t cmov:1; + uint32_t pat:1; + uint32_t pse36:1; + uint32_t psn:1; + uint32_t cflush:1; + uint32_t bit20:1; + uint32_t ds:1; + uint32_t acpi:1; + uint32_t mmx:1; + uint32_t fxsr:1; + uint32_t sse:1; + uint32_t sse2:1; + uint32_t ss:1; + uint32_t htt:1; + uint32_t tm:1; + uint32_t bit30:1; + uint32_t pbe:1; /* EDX feature flags, bit 31 */ + uint32_t sse3:1; /* ECX feature flags, bit 0 */ + uint32_t mulq:1; + uint32_t bit2:1; + uint32_t mon:1; + uint32_t dscpl:1; + uint32_t vmx:1; + uint32_t smx:1; + uint32_t eist:1; + uint32_t tm2:1; + uint32_t bits_9_31:23; + uint32_t bits0_28:29; /* EDX extended feature flags, bit 0 */ + uint32_t lm:1; /* Long Mode */ + uint32_t bits_30_31:2; /* EDX extended feature flags, bit 32 */ + } bits; +} cpuid_feature_flags_t; + +/* An overall structure to cache all of the CPUID information */ +struct cpu_ident { + uint32_t max_cpuid; + uint32_t max_xcpuid; + uint32_t dts_pmp; + cpuid_version_t vers; + cpuid_proc_info_t info; + cpuid_feature_flags_t fid; + cpuid_vendor_string_t vend_id; + cpuid_brand_string_t brand_id; + cpuid_cache_info_t cache_info; + cpuid_custom_features custom; +}; + +struct cpuid4_eax { + uint32_t ctype:5; + uint32_t level:3; + uint32_t is_self_initializing:1; + uint32_t is_fully_associative:1; + uint32_t reserved:4; + uint32_t num_threads_sharing:12; + uint32_t num_cores_on_die:6; +}; + +struct cpuid4_ebx { + uint32_t coherency_line_size:12; + uint32_t physical_line_partition:10; + uint32_t ways_of_associativity:10; +}; + +struct cpuid4_ecx { + uint32_t number_of_sets:32; +}; + +void get_cpuid(); + +#endif // CPUID_H_ diff --git a/efi_memtest/memtest86+/efi/init.c b/efi_memtest/memtest86+/efi/init.c new file mode 100644 index 0000000..6388443 --- /dev/null +++ b/efi_memtest/memtest86+/efi/init.c @@ -0,0 +1,1297 @@ +/* + * MemTest86+ V5 Specific code (GPL V2.0) + * By Samuel DEMEULEMEESTER, sdemeule@memtest.org + * http://www.canardpc.com - http://www.memtest.org + * ------------------------------------------------ + * init.c - MemTest-86 Version 3.6 + * + * Released under version 2 of the Gnu Public License. + * By Chris Brady + */ + + +#include "stdin.h" +#include "stddef.h" +#include "test.h" +#include "defs.h" +#include "config.h" +#include "cpuid.h" +#include "smp.h" +#include "io.h" +#include "spd.h" +#include "pci.h" +#include "controller.h" + +extern struct tseq tseq[]; +extern short memsz_mode; +extern int num_cpus; +extern int act_cpus; +extern int found_cpus; +unsigned long imc_type = 0; +extern int maxcpus; +extern char cpu_mask[]; +extern void initialise_cpus(); + +/* Here we store all of the cpuid data */ +extern struct cpu_ident cpu_id; + +int l1_cache=0, l2_cache=0, l3_cache=0; +int tsc_invariable = 0; +ulong extclock; + +ulong memspeed(ulong src, ulong len, int iter); +static void cpu_type(void); +static int cpuspeed(void); +static void get_cache_size(); +static void cpu_cache_speed(); +void get_cpuid(); +int beepmode; +extern short dmi_initialized; +extern int dmi_err_cnts[MAX_DMI_MEMDEVS]; + +/* Failsafe function */ +/* msec: number of ms to wait - scs: scancode expected to stop */ +/* bits: 0 = extended detection - 1: SMP - 2: Temp Check */ +/* 3: MP SMP - 4-7: RSVD */ +void failsafe(int msec, int scs) +{ + int i; + ulong sh, sl, l, h, t; + unsigned char c; + volatile char *pp; + + for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(18*2)+1); i<40; i++, pp+=2) { + *pp = 0x1E; + } + for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(18*2)+1); i<3; i++, pp+=2) { + *pp = 0x9E; + } + for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(55*2)+1); i<3; i++, pp+=2) { + *pp = 0x9E; + } + + cprint(18, 18, "==> Press F1 to enter Fail-Safe Mode <=="); + + if(vv->fail_safe & 2) + { + cprint(19, 15, "==> Press F2 to force Multi-Threading (SMP) <=="); + } + + /* save the starting time */ + asm __volatile__ + ("rdtsc":"=a" (sl),"=d" (sh)); + + /* loop for n seconds */ + while (1) { + /* asm __volatile__( + "rdtsc":"=a" (l),"=d" (h)); + asm __volatile__ ( + "subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (l), "=d" (h) + :"g" (sl), "g" (sh), + "0" (l), "1" (h));*/ + h = 1; // TODO remove + l = 1; // TODO remove + t = h * ((unsigned)0xffffffff / vv->clks_msec); + t += (l / vv->clks_msec); + + /* Is the time up? */ + if (t >= msec) { break; } + + /* Is expected Scan code pressed? */ + c = get_key(); + c &= 0x7f; + + /* F1 */ + if(c == scs) { vv->fail_safe |= 1; break; } + + /* F2 */ + if(c == scs+1) + { + vv->fail_safe ^= 2; + break; + + } + + /* F3 */ + if(c == scs+2) + { + if(vv->fail_safe & 2) { vv->fail_safe ^= 2; } + vv->fail_safe |= 8; + break; + } + } + + cprint(18, 18, " "); + cprint(19, 15, " "); + + for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(18*2)+1); i<40; i++, pp+=2) { + *pp = 0x17; + } +} + +static void display_init(void) +{ + int i; + volatile char *pp; + + /* Set HW cursor out of screen boundaries */ + __outb(0x0F, 0x03D4); + __outb(0xFF, 0x03D5); + + __outb(0x0E, 0x03D4); + __outb(0xFF, 0x03D5); + + + serial_echo_init(); + serial_echo_print("[LINE_SCROLL;24r"); /* Set scroll area row 7-23 */ + serial_echo_print("[H[2J"); /* Clear Screen */ + serial_echo_print("[37m[44m"); + serial_echo_print("[0m"); + serial_echo_print("[37m[44m"); + + /* Clear screen & set background to blue */ + for(i=0, pp=(char *)(SCREEN_ADR); i<80*24; i++) { + *pp++ = ' '; + *pp++ = 0x17; + } + + /* Make the name background green */ + for(i=0, pp=(char *)(SCREEN_ADR+1); i<TITLE_WIDTH; i++, pp+=2) { + *pp = 0x20; + } + cprint(0, 0, " Memtest86 5.31b "); + + /* Set Blinking "+" */ + for(i=0, pp=(char *)(SCREEN_ADR+1); i<2; i++, pp+=30) { + *pp = 0xA4; + } + cprint(0, 15, "+"); + + /* Do reverse video for the bottom display line */ + for(i=0, pp=(char *)(SCREEN_ADR+1+(24 * 160)); i<80; i++, pp+=2) { + *pp = 0x71; + } + + serial_echo_print("[0m"); +} + +/* + * Initialize test, setup screen and find out how much memory there is. + */ +void init(void) +{ + int i; + + outb(0x8, 0x3f2); /* Kill Floppy Motor */ + + /* Turn on cache */ + set_cache(1); + + /* Setup the display */ + display_init(); + + cprint(5, 60, "| Time: 0:00:00"); + cprint(1, COL_MID,"Pass %"); + cprint(2, COL_MID,"Test %"); + cprint(3, COL_MID,"Test #"); + cprint(4, COL_MID,"Testing: "); + cprint(5, COL_MID,"Pattern: "); + cprint(1, 0, "CLK: (32b Mode)"); + cprint(2, 0, "L1 Cache: Unknown "); + cprint(3, 0, "L2 Cache: Unknown "); + cprint(4, 0, "L3 Cache: None "); + cprint(5, 0, "Memory : "); + cprint(6, 0, "------------------------------------------------------------------------------"); + cprint(7, 0, "Core#:"); + cprint(8, 0, "State:"); + cprint(9, 0, "Cores: Active / Total (Run: All) | Pass: 0 Errors: 0 "); + cprint(10, 0, "------------------------------------------------------------------------------"); + + /* + for(i=0, pp=(char *)(SCREEN_ADR+(5*160)+(53*2)+1); i<20; i++, pp+=2) { + *pp = 0x92; + } + + for(i=0, pp=(char *)(SCREEN_ADR+0*160+1); i<80; i++, pp+=2) { + *pp = 0x47; + } + */ + + cprint(7, 39, "| Chipset : Unknown"); + cprint(8, 39, "| Memory Type : Unknown"); + + for(i=0; i < 6; i++) { + cprint(i, COL_MID-2, "| "); + } + + footer(); + + aprint(5, 10, vv->test_pages); + + vv->pass = 0; + vv->msg_line = 0; + vv->ecount = 0; + vv->ecc_ecount = 0; + vv->testsel = -1; + vv->msg_line = LINE_SCROLL-1; + vv->scroll_start = vv->msg_line * 160; + vv->erri.low_addr.page = 0x7fffffff; + vv->erri.low_addr.offset = 0xfff; + vv->erri.high_addr.page = 0; + vv->erri.high_addr.offset = 0; + vv->erri.min_bits = 32; + vv->erri.max_bits = 0; + vv->erri.min_bits = 32; + vv->erri.max_bits = 0; + vv->erri.maxl = 0; + vv->erri.cor_err = 0; + vv->erri.ebits = 0; + vv->erri.hdr_flag = 0; + vv->erri.tbits = 0; + for (i=0; tseq[i].msg != NULL; i++) { + tseq[i].errors = 0; + } + if (dmi_initialized) { + for (i=0; i < MAX_DMI_MEMDEVS; i++){ + if (dmi_err_cnts[i] > 0) { + dmi_err_cnts[i] = 0; + } + } + } + + /* setup beep mode */ + beepmode = BEEP_MODE; + + /* Get the cpu and cache information */ + get_cpuid(); + + /* setup pci */ + pci_init(); + + get_cache_size(); + + cpu_type(); + + cpu_cache_speed(); + + /* Check fail safe */ + failsafe(5000, 0x3B); + + /* Initalize SMP */ + initialise_cpus(); + + for (i = 0; i <num_cpus; i++) { + dprint(7, i+7, i%10, 1, 0); + cprint(8, i+7, "S"); + } + + dprint(9, 19, num_cpus, 2, 0); + + if((vv->fail_safe & 3) == 2) + { + cprint(LINE_CPU,9, "(SMP: Disabled)"); + cprint(LINE_RAM,9, "Running..."); + } + // dprint(10, 5, found_cpus, 2, 0); + + /* Find Memory Specs */ + if(vv->fail_safe & 1) + { + cprint(LINE_CPU, COL_SPEC, " **** FAIL SAFE **** FAIL SAFE **** "); + cprint(LINE_RAM, COL_SPEC, " No detection, same reliability "); + } else { + find_controller(); + get_spd_spec(); + if(num_cpus <= 16 && !(vv->fail_safe & 4)) { coretemp(); } + } + + if(vv->check_temp > 0 && !(vv->fail_safe & 4)) + { + cprint(LINE_CPU, 26, "| CPU Temp"); + cprint(LINE_CPU+1, 26, "| øC"); + } + + beep(600); + beep(1000); + + /* Record the start time */ + asm __volatile__ ("rdtsc":"=a" (vv->startl),"=d" (vv->starth)); + vv->snapl = vv->startl; + vv->snaph = vv->starth; + if (l1_cache == 0) { l1_cache = 64; } + if (l2_cache == 0) { l1_cache = 512; } + vv->printmode=PRINTMODE_ADDRESSES; + vv->numpatn=0; +} + +/* Get cache sizes for most AMD and Intel CPUs, exceptions for old CPUs are + * handled in CPU detection */ +void get_cache_size() +{ + int i, j, n, size; + unsigned int v[4]; + unsigned char *dp = (unsigned char *)v; + struct cpuid4_eax *eax = (struct cpuid4_eax *)&v[0]; + struct cpuid4_ebx *ebx = (struct cpuid4_ebx *)&v[1]; + struct cpuid4_ecx *ecx = (struct cpuid4_ecx *)&v[2]; + + switch(cpu_id.vend_id.char_array[0]) { + /* AMD Processors */ + case 'A': + //l1_cache = cpu_id.cache_info.amd.l1_i_sz; + l1_cache = cpu_id.cache_info.amd.l1_d_sz; + l2_cache = cpu_id.cache_info.amd.l2_sz; + l3_cache = cpu_id.cache_info.amd.l3_sz; + l3_cache *= 512; + break; + case 'G': + /* Intel Processors */ + l1_cache = 0; + l2_cache = 0; + l3_cache = 0; + + /* Use CPUID(4) if it is available */ + if (cpu_id.max_cpuid > 3) { + + /* figure out how many cache leaves */ + n = -1; + do + { + ++n; + /* Do cpuid(4) loop to find out num_cache_leaves */ + cpuid_count(4, n, &v[0], &v[1], &v[2], &v[3]); + } while ((eax->ctype) != 0); + + /* loop through all of the leaves */ + for (i=0; i<n; i++) + { + cpuid_count(4, i, &v[0], &v[1], &v[2], &v[3]); + + /* Check for a valid cache type */ + if (eax->ctype == 1 || eax->ctype == 3) + { + + /* Compute the cache size */ + size = (ecx->number_of_sets + 1) * + (ebx->coherency_line_size + 1) * + (ebx->physical_line_partition + 1) * + (ebx->ways_of_associativity + 1); + size /= 1024; + + switch (eax->level) + { + case 1: + l1_cache += size; + break; + case 2: + l2_cache += size; + break; + case 3: + l3_cache += size; + break; + } + } + } + return; + } + + /* No CPUID(4) so we use the older CPUID(2) method */ + /* Get number of times to iterate */ + cpuid(2, &v[0], &v[1], &v[2], &v[3]); + n = v[0] & 0xff; + for (i=0 ; i<n ; i++) { + cpuid(2, &v[0], &v[1], &v[2], &v[3]); + + /* If bit 31 is set, this is an unknown format */ + for (j=0 ; j<3 ; j++) { + if (v[j] & (1 << 31)) { + v[j] = 0; + } + } + + /* Byte 0 is level count, not a descriptor */ + for (j = 1 ; j < 16 ; j++) { + switch(dp[j]) { + case 0x6: + case 0xa: + case 0x66: + l1_cache += 8; + break; + case 0x8: + case 0xc: + case 0xd: + case 0x60: + case 0x67: + l1_cache += 16; + break; + case 0xe: + l1_cache += 24; + break; + case 0x9: + case 0x2c: + case 0x30: + case 0x68: + l1_cache += 32; + break; + case 0x39: + case 0x3b: + case 0x41: + case 0x79: + l2_cache += 128; + break; + case 0x3a: + l2_cache += 192; + break; + case 0x21: + case 0x3c: + case 0x3f: + case 0x42: + case 0x7a: + case 0x82: + l2_cache += 256; + break; + case 0x3d: + l2_cache += 384; + break; + case 0x3e: + case 0x43: + case 0x7b: + case 0x7f: + case 0x80: + case 0x83: + case 0x86: + l2_cache += 512; + break; + case 0x44: + case 0x78: + case 0x7c: + case 0x84: + case 0x87: + l2_cache += 1024; + break; + case 0x45: + case 0x7d: + case 0x85: + l2_cache += 2048; + break; + case 0x48: + l2_cache += 3072; + break; + case 0x4e: + l2_cache += 6144; + break; + case 0x23: + case 0xd0: + l3_cache += 512; + break; + case 0xd1: + case 0xd6: + l3_cache += 1024; + break; + case 0x25: + case 0xd2: + case 0xd7: + case 0xdc: + case 0xe2: + l3_cache += 2048; + break; + case 0x29: + case 0x46: + case 0x49: + case 0xd8: + case 0xdd: + case 0xe3: + l3_cache += 4096; + break; + case 0x4a: + l3_cache += 6144; + break; + case 0x47: + case 0x4b: + case 0xde: + case 0xe4: + l3_cache += 8192; + break; + case 0x4c: + case 0xea: + l3_cache += 12288; + break; + case 0x4d: + l3_cache += 16384; + break; + case 0xeb: + l3_cache += 18432; + break; + case 0xec: + l3_cache += 24576; + break; + } /* end switch */ + } /* end for 1-16 */ + } /* end for 0 - n */ + } +} + +/* + * Find IMC type and set global variables accordingly + */ +void detect_imc(void) +{ + // Check AMD IMC + if(cpu_id.vend_id.char_array[0] == 'A' && cpu_id.vers.bits.family == 0xF) + { + switch(cpu_id.vers.bits.extendedFamily) + { + case 0x0: + imc_type = 0x0100; // Old K8 + break; + case 0x1: + case 0x2: + imc_type = 0x0101; // K10 (Family 10h & 11h) + break; + case 0x3: + imc_type = 0x0102; // A-Series APU (Family 12h) + break; + case 0x5: + imc_type = 0x0103; // C- / E- / Z- Series APU (Family 14h) + break; + case 0x6: + imc_type = 0x0104; // FX Series (Family 15h) + break; + case 0x7: + imc_type = 0x0105; // Kabini & related (Family 16h) + break; + } + return; + } + + // Check Intel IMC + if(cpu_id.vend_id.char_array[0] == 'G' && cpu_id.vers.bits.family == 6 && cpu_id.vers.bits.extendedModel) + { + switch(cpu_id.vers.bits.model) + { + case 0x5: + if(cpu_id.vers.bits.extendedModel == 2) { imc_type = 0x0003; } // Core i3/i5 1st Gen 45 nm (NHM) + if(cpu_id.vers.bits.extendedModel == 3) { vv->fail_safe |= 4; } // Atom Clover Trail + if(cpu_id.vers.bits.extendedModel == 4) { imc_type = 0x0007; } // HSW-ULT + break; + case 0x6: + if(cpu_id.vers.bits.extendedModel == 3) { + imc_type = 0x0009; // Atom Cedar Trail + vv->fail_safe |= 4; // Disable Core temp + } + break; + case 0xA: + switch(cpu_id.vers.bits.extendedModel) + { + case 0x1: + imc_type = 0x0001; // Core i7 1st Gen 45 nm (NHME) + break; + case 0x2: + imc_type = 0x0004; // Core 2nd Gen (SNB) + break; + case 0x3: + imc_type = 0x0006; // Core 3nd Gen (IVB) + break; + } + break; + case 0xC: + switch(cpu_id.vers.bits.extendedModel) + { + case 0x1: + if(cpu_id.vers.bits.stepping > 9) { imc_type = 0x0008; } // Atom PineView + vv->fail_safe |= 4; // Disable Core temp + break; + case 0x2: + imc_type = 0x0002; // Core i7 1st Gen 32 nm (WMR) + break; + case 0x3: + imc_type = 0x0007; // Core 4nd Gen (HSW) + break; + } + break; + case 0xD: + imc_type = 0x0005; // SNB-E + break; + case 0xE: + imc_type = 0x0001; // Core i7 1st Gen 45 nm (NHM) + break; + } + + if(imc_type) { tsc_invariable = 1; } + return; + } +} + +void smp_default_mode(void) +{ + int i, result; + char *cpupsn = cpu_id.brand_id.char_array; + char *disabledcpu[] = { "Opteron", "Xeon", "EPYC", "Genuine Intel" }; + + for(i = 0; i < 3; i++) + { + result = mt86_strstr(cpupsn , disabledcpu[i]); + if(result != -1) { vv->fail_safe |= 0b10; } + } + + // For 5.01 release, SMP disabled by defualt by config.h toggle + if(CONSERVATIVE_SMP) { vv->fail_safe |= 0b10; } + +} + +/* + * Find CPU type + */ +void cpu_type(void) +{ + /* If we can get a brand string use it, and we are done */ + if (cpu_id.max_xcpuid >= 0x80000004) { + cprint(0, COL_MID, cpu_id.brand_id.char_array); + //If we have a brand string, maybe we have an IMC. Check that. + detect_imc(); + smp_default_mode(); + return; + } + + /* The brand string is not available so we need to figure out + * CPU what we have */ + switch(cpu_id.vend_id.char_array[0]) { + /* AMD Processors */ + case 'A': + switch(cpu_id.vers.bits.family) { + case 4: + switch(cpu_id.vers.bits.model) { + case 3: + cprint(0, COL_MID, "AMD 486DX2"); + break; + case 7: + cprint(0, COL_MID, "AMD 486DX2-WB"); + break; + case 8: + cprint(0, COL_MID, "AMD 486DX4"); + break; + case 9: + cprint(0, COL_MID, "AMD 486DX4-WB"); + break; + case 14: + cprint(0, COL_MID, "AMD 5x86-WT"); + break; + case 15: + cprint(0, COL_MID, "AMD 5x86-WB"); + break; + } + /* Since we can't get CPU speed or cache info return */ + return; + case 5: + switch(cpu_id.vers.bits.model) { + case 0: + case 1: + case 2: + case 3: + cprint(0, COL_MID, "AMD K5"); + l1_cache = 8; + break; + case 6: + case 7: + cprint(0, COL_MID, "AMD K6"); + break; + case 8: + cprint(0, COL_MID, "AMD K6-2"); + break; + case 9: + cprint(0, COL_MID, "AMD K6-III"); + break; + case 13: + cprint(0, COL_MID, "AMD K6-III+"); + break; + } + break; + case 6: + + switch(cpu_id.vers.bits.model) { + case 1: + cprint(0, COL_MID, "AMD Athlon (0.25)"); + break; + case 2: + case 4: + cprint(0, COL_MID, "AMD Athlon (0.18)"); + break; + case 6: + if (l2_cache == 64) { + cprint(0, COL_MID, "AMD Duron (0.18)"); + } else { + cprint(0, COL_MID, "Athlon XP (0.18)"); + } + break; + case 8: + case 10: + if (l2_cache == 64) { + cprint(0, COL_MID, "AMD Duron (0.13)"); + } else { + cprint(0, COL_MID, "Athlon XP (0.13)"); + } + break; + case 3: + case 7: + cprint(0, COL_MID, "AMD Duron"); + /* Duron stepping 0 CPUID for L2 is broken */ + /* (AMD errata T13)*/ + if (cpu_id.vers.bits.stepping == 0) { /* stepping 0 */ + /* Hard code the right L2 size */ + l2_cache = 64; + } else { + } + break; + } + break; + + /* All AMD family values >= 10 have the Brand ID + * feature so we don't need to find the CPU type */ + } + break; + + /* Intel or Transmeta Processors */ + case 'G': + if ( cpu_id.vend_id.char_array[7] == 'T' ) { /* GenuineTMx86 */ + if (cpu_id.vers.bits.family == 5) { + cprint(0, COL_MID, "TM 5x00"); + } else if (cpu_id.vers.bits.family == 15) { + cprint(0, COL_MID, "TM 8x00"); + } + l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7]; + l2_cache = (cpu_id.cache_info.ch[11]*256) + cpu_id.cache_info.ch[10]; + } else { /* GenuineIntel */ + if (cpu_id.vers.bits.family == 4) { + switch(cpu_id.vers.bits.model) { + case 0: + case 1: + cprint(0, COL_MID, "Intel 486DX"); + break; + case 2: + cprint(0, COL_MID, "Intel 486SX"); + break; + case 3: + cprint(0, COL_MID, "Intel 486DX2"); + break; + case 4: + cprint(0, COL_MID, "Intel 486SL"); + break; + case 5: + cprint(0, COL_MID, "Intel 486SX2"); + break; + case 7: + cprint(0, COL_MID, "Intel 486DX2-WB"); + break; + case 8: + cprint(0, COL_MID, "Intel 486DX4"); + break; + case 9: + cprint(0, COL_MID, "Intel 486DX4-WB"); + break; + } + /* Since we can't get CPU speed or cache info return */ + return; + } + + + switch(cpu_id.vers.bits.family) { + case 5: + switch(cpu_id.vers.bits.model) { + case 0: + case 1: + case 2: + case 3: + case 7: + cprint(0, COL_MID, "Pentium"); + if (l1_cache == 0) { + l1_cache = 8; + } + break; + case 4: + case 8: + cprint(0, COL_MID, "Pentium-MMX"); + if (l1_cache == 0) { + l1_cache = 16; + } + break; + } + break; + case 6: + switch(cpu_id.vers.bits.model) { + case 0: + case 1: + cprint(0, COL_MID, "Pentium Pro"); + break; + case 3: + case 4: + cprint(0, COL_MID, "Pentium II"); + break; + case 5: + if (l2_cache == 0) { + cprint(0, COL_MID, "Celeron"); + } else { + cprint(0, COL_MID, "Pentium II"); + } + break; + case 6: + if (l2_cache == 128) { + cprint(0, COL_MID, "Celeron"); + } else { + cprint(0, COL_MID, "Pentium II"); + } + } + break; + case 7: + case 8: + case 11: + if (l2_cache == 128) { + cprint(0, COL_MID, "Celeron"); + } else { + cprint(0, COL_MID, "Pentium III"); + } + break; + case 9: + if (l2_cache == 512) { + cprint(0, COL_MID, "Celeron M (0.13)"); + } else { + cprint(0, COL_MID, "Pentium M (0.13)"); + } + break; + case 10: + cprint(0, COL_MID, "Pentium III Xeon"); + break; + case 12: + l1_cache = 24; + cprint(0, COL_MID, "Atom (0.045)"); + break; + case 13: + if (l2_cache == 1024) { + cprint(0, COL_MID, "Celeron M (0.09)"); + } else { + cprint(0, COL_MID, "Pentium M (0.09)"); + } + break; + case 14: + cprint(0, COL_MID, "Intel Core"); + break; + case 15: + if (l2_cache == 1024) { + cprint(0, COL_MID, "Pentium E"); + } else { + cprint(0, COL_MID, "Intel Core 2"); + } + break; + } + break; + case 15: + switch(cpu_id.vers.bits.model) { + case 0: + case 1: + case 2: + if (l2_cache == 128) { + cprint(0, COL_MID, "Celeron"); + } else { + cprint(0, COL_MID, "Pentium 4"); + } + break; + case 3: + case 4: + if (l2_cache == 256) { + cprint(0, COL_MID, "Celeron (0.09)"); + } else { + cprint(0, COL_MID, "Pentium 4 (0.09)"); + } + break; + case 6: + cprint(0, COL_MID, "Pentium D (65nm)"); + break; + default: + cprint(0, COL_MID, "Unknown Intel"); + break; + break; + } + + } + break; + + /* VIA/Cyrix/Centaur Processors with CPUID */ + case 'C': + if ( cpu_id.vend_id.char_array[1] == 'e' ) { /* CentaurHauls */ + l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7]; + l2_cache = cpu_id.cache_info.ch[11]; + switch(cpu_id.vers.bits.family){ + case 5: + cprint(0, COL_MID, "Centaur 5x86"); + break; + case 6: // VIA C3 + switch(cpu_id.vers.bits.model){ + default: + if (cpu_id.vers.bits.stepping < 8) { + cprint(0, COL_MID, "VIA C3 Samuel2"); + } else { + cprint(0, COL_MID, "VIA C3 Eden"); + } + break; + case 10: + cprint(0, COL_MID, "VIA C7 (C5J)"); + l1_cache = 64; + l2_cache = 128; + break; + case 13: + cprint(0, COL_MID, "VIA C7 (C5R)"); + l1_cache = 64; + l2_cache = 128; + break; + case 15: + cprint(0, COL_MID, "VIA Isaiah (CN)"); + l1_cache = 64; + l2_cache = 128; + break; + } + } + } else { /* CyrixInstead */ + switch(cpu_id.vers.bits.family) { + case 5: + switch(cpu_id.vers.bits.model) { + case 0: + cprint(0, COL_MID, "Cyrix 6x86MX/MII"); + break; + case 4: + cprint(0, COL_MID, "Cyrix GXm"); + break; + } + return; + + case 6: // VIA C3 + switch(cpu_id.vers.bits.model) { + case 6: + cprint(0, COL_MID, "Cyrix III"); + break; + case 7: + if (cpu_id.vers.bits.stepping < 8) { + cprint(0, COL_MID, "VIA C3 Samuel2"); + } else { + cprint(0, COL_MID, "VIA C3 Ezra-T"); + } + break; + case 8: + cprint(0, COL_MID, "VIA C3 Ezra-T"); + break; + case 9: + cprint(0, COL_MID, "VIA C3 Nehemiah"); + break; + } + // L1 = L2 = 64 KB from Cyrix III to Nehemiah + l1_cache = 64; + l2_cache = 64; + break; + } + } + break; + /* Unknown processor */ + default: + /* Make a guess at the family */ + switch(cpu_id.vers.bits.family) { + case 5: + cprint(0, COL_MID, "586"); + case 6: + cprint(0, COL_MID, "686"); + default: + cprint(0, COL_MID, "Unidentified Processor"); + } + } +} + +#define STEST_ADDR 0x100000 /* Measure memory speed starting at 1MB */ + +/* Measure and display CPU and cache sizes and speeds */ +void cpu_cache_speed() +{ + int i, off = 4; + ulong speed; + + + /* Print CPU speed */ + if ((speed = cpuspeed()) > 0) { + if (speed < 999499) { + speed += 50; /* for rounding */ + cprint(1, off, " . MHz"); + dprint(1, off+1, speed/1000, 3, 1); + dprint(1, off+5, (speed/100)%10, 1, 0); + } else { + speed += 500; /* for rounding */ + cprint(1, off, " MHz"); + dprint(1, off, speed/1000, 5, 0); + } + extclock = speed; + } + + /* Print out L1 cache info */ + /* To measure L1 cache speed we use a block size that is 1/4th */ + /* of the total L1 cache size since half of it is for instructions */ + if (l1_cache) { + cprint(2, 0, "L1 Cache: K "); + dprint(2, 11, l1_cache, 3, 0); + if ((speed=memspeed(STEST_ADDR, (l1_cache/2)*1024, 200))) { + cprint(2, 16, " MB/s"); + dprint(2, 16, speed, 6, 0); + } + } + + /* Print out L2 cache info */ + /* We measure the L2 cache speed by using a block size that is */ + /* the size of the L1 cache. We have to fudge if the L1 */ + /* cache is bigger than the L2 */ + if (l2_cache) { + cprint(3, 0, "L2 Cache: K "); + dprint(3, 10, l2_cache, 4, 0); + + if (l2_cache < l1_cache) { + i = l1_cache / 4 + l2_cache / 4; + } else { + i = l1_cache; + } + if ((speed=memspeed(STEST_ADDR, i*1024, 200))) { + cprint(3, 16, " MB/s"); + dprint(3, 16, speed, 6, 0); + } + } + /* Print out L3 cache info */ + /* We measure the L3 cache speed by using a block size that is */ + /* 2X the size of the L2 cache. */ + + if (l3_cache) + { + cprint(4, 0, "L3 Cache: K "); + aprint(4, 10, l3_cache/4); + //dprint(4, 10, l3_cache, 4, 0); + + i = l2_cache*2; + + if ((speed=memspeed(STEST_ADDR, i*1024, 150))) { + cprint(4, 16, " MB/s"); + dprint(4, 16, speed, 6, 0); + } + } +} + +/* Measure and display memory speed, multitasked using all CPUs */ +ulong spd[MAX_CPUS]; +void get_mem_speed(int me, int ncpus) +{ + int i; + ulong speed=0; + + /* Determine memory speed. To find the memory speed we use + * A block size that is the sum of all the L1, L2 & L3 caches + * in all cpus * 6 */ + i = (l3_cache + l2_cache + l1_cache) * 4; + + /* Make sure that we have enough memory to do the test */ + /* If not use all we have */ + if ((1 + (i * 2)) > (vv->plim_upper << 2)) { + i = ((vv->plim_upper <<2) - 1) / 2; + } + + speed = memspeed(STEST_ADDR, i * 1024, 100); + cprint(5, 16, " MB/s"); + dprint(5, 16, speed, 6, 0); + +} + +/* #define TICKS 5 * 11832 (count = 6376)*/ +/* #define TICKS (65536 - 12752) */ +#define TICKS 59659 /* 50 ms */ + +/* Returns CPU clock in khz */ +ulong stlow, sthigh; +static int cpuspeed(void) +{ + int loops; + ulong end_low, end_high; + + if (cpu_id.fid.bits.rdtsc == 0 ) { + return(-1); + } + + /* Setup timer */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + outb(0xb0, 0x43); + outb(TICKS & 0xff, 0x42); + outb(TICKS >> 8, 0x42); + + asm __volatile__ ("rdtsc":"=a" (stlow),"=d" (sthigh)); + + loops = 0; + do { + loops++; + } while ((inb(0x61) & 0x20) == 0); + + asm __volatile__ ( + "rdtsc\n\t" \ + "subl stlow,%%eax\n\t" \ + "sbbl sthigh,%%edx\n\t" \ + :"=a" (end_low), "=d" (end_high) + ); + + /* Make sure we have a credible result */ + if (loops < 4 || end_low < 50000) { + return(-1); + } + vv->clks_msec = end_low/50; + + if (tsc_invariable) end_low = correct_tsc(end_low); + + return(vv->clks_msec); +} + +/* Measure cache speed by copying a block of memory. */ +/* Returned value is kbytes/second */ +ulong memspeed(ulong src, ulong len, int iter) +{ + //int i; + //ulong dst, wlen; + //ulong st_low, st_high; + ulong end_low, end_high; + //ulong cal_low, cal_high; + + if (cpu_id.fid.bits.rdtsc == 0 ) { + return(-1); + } + if (len == 0) return(-2); + + //dst = src + len; + //wlen = len / 4; /* Length is bytes */ + + /* Calibrate the overhead with a zero word copy */ +/* asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high)); + for (i=0; i<iter; i++) { + asm __volatile__ ( + "movl %0,%%esi\n\t" \ + "movl %1,%%edi\n\t" \ + "movl %2,%%ecx\n\t" \ + "cld\n\t" \ + "rep\n\t" \ + "movsl\n\t" \ + :: "g" (src), "g" (dst), "g" (0) + : "esi", "edi", "ecx" + ); + } + asm __volatile__ ("rdtsc":"=a" (cal_low),"=d" (cal_high)); +*/ + /* Compute the overhead time *//* + asm __volatile__ ( + "subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (cal_low), "=d" (cal_high) + :"g" (st_low), "g" (st_high), + "0" (cal_low), "1" (cal_high) + );*/ + + + /* Now measure the speed */ + /* Do the first copy to prime the cache */ +/* asm __volatile__ ( + "movl %0,%%esi\n\t" \ + "movl %1,%%edi\n\t" \ + "movl %2,%%ecx\n\t" \ + "cld\n\t" \ + "rep\n\t" \ + "movsl\n\t" \ + :: "g" (src), "g" (dst), "g" (wlen) + : "esi", "edi", "ecx" + ); + asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high)); + for (i=0; i<iter; i++) { + asm __volatile__ ( + "movl %0,%%esi\n\t" \ + "movl %1,%%edi\n\t" \ + "movl %2,%%ecx\n\t" \ + "cld\n\t" \ + "rep\n\t" \ + "movsl\n\t" \ + :: "g" (src), "g" (dst), "g" (wlen) + : "esi", "edi", "ecx" + ); + } + asm __volatile__ ("rdtsc":"=a" (end_low),"=d" (end_high));*/ + + /* Compute the elapsed time */ +/* asm __volatile__ ( + "subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (end_low), "=d" (end_high) + :"g" (st_low), "g" (st_high), + "0" (end_low), "1" (end_high) + );*/ + /* Subtract the overhead time */ +/* asm __volatile__ ( + "subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (end_low), "=d" (end_high) + :"g" (cal_low), "g" (cal_high), + "0" (end_low), "1" (end_high) + ); +*/ + /* Make sure that the result fits in 32 bits */ + //hprint(11,40,end_high); + if (end_high) { + return(-3); + } + end_low /= 2; + + /* Convert to clocks/KB */ + end_low /= len; + end_low *= 1024; + end_low /= iter; + if (end_low == 0) { + return(-4); + } + + /* Convert to kbytes/sec */ + + if (tsc_invariable) end_low = correct_tsc(end_low); + + return((vv->clks_msec)/end_low); +} + +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + + +ulong correct_tsc(ulong el_org) +{ + float coef_now, coef_max; + int msr_lo, msr_hi, is_xe; + + rdmsr(0x198, msr_lo, msr_hi); + is_xe = (msr_lo >> 31) & 0x1; + + if(is_xe){ + rdmsr(0x198, msr_lo, msr_hi); + coef_max = ((msr_hi >> 8) & 0x1F); + if ((msr_hi >> 14) & 0x1) { coef_max = coef_max + 0.5f; } + } else { + rdmsr(0x17, msr_lo, msr_hi); + coef_max = ((msr_lo >> 8) & 0x1F); + if ((msr_lo >> 14) & 0x1) { coef_max = coef_max + 0.5f; } + } + + if(cpu_id.fid.bits.eist) { + rdmsr(0x198, msr_lo, msr_hi); + coef_now = ((msr_lo >> 8) & 0x1F); + if ((msr_lo >> 14) & 0x1) { coef_now = coef_now + 0.5f; } + } else { + rdmsr(0x2A, msr_lo, msr_hi); + coef_now = (msr_lo >> 22) & 0x1F; + } + if(coef_max && coef_now) { + el_org = (ulong)(el_org * coef_now / coef_max); + } + return el_org; +} + diff --git a/efi_memtest/logger.c b/efi_memtest/memtest86+/efi/logger.c index e69de29..e69de29 100644 --- a/efi_memtest/logger.c +++ b/efi_memtest/memtest86+/efi/logger.c diff --git a/efi_memtest/memtest86+/efi/logger.h b/efi_memtest/memtest86+/efi/logger.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/efi_memtest/memtest86+/efi/logger.h diff --git a/efi_memtest/memtest86+/efi/main.h b/efi_memtest/memtest86+/efi/main.h index e69de29..6885f35 100644 --- a/efi_memtest/memtest86+/efi/main.h +++ b/efi_memtest/memtest86+/efi/main.h @@ -0,0 +1 @@ +void test_start(void);
\ No newline at end of file diff --git a/efi_memtest/memtest86+/efi/main_asm.h b/efi_memtest/memtest86+/efi/main_asm.h new file mode 100644 index 0000000..8e6efbc --- /dev/null +++ b/efi_memtest/memtest86+/efi/main_asm.h @@ -0,0 +1,49 @@ +static inline void enable_fp_processing(void) { + if (cpu_id.fid.bits.fpu) + __asm__ __volatile__ + ( + "movl %%cr0, %%eax\n\t" + "andl $0x7, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + : : + : "ax" + ); + if (cpu_id.fid.bits.sse) + __asm__ __volatile__ + ( + "movl %%cr4, %%eax\n\t" + "orl $0x00000200, %%eax\n\t" + "movl %%eax, %%cr4\n\t" + : : + : "ax" + ); + +} + +static inline void setup_mm_modes(void) { + /* If we have PAE, turn it on */ + if (cpu_id.fid.bits.pae == 1) { + __asm__ __volatile__ + ( + "movl %%cr4, %%eax\n\t" + "orl $0x00000020, %%eax\n\t" + "movl %%eax, %%cr4\n\t" + : : + : "ax" + ); + cprint(LINE_TITLE+1, COL_MODE, "(PAE Mode)"); + } + /* If this is a 64 CPU enable long mode */ + if (cpu_id.fid.bits.lm == 1) { + __asm__ __volatile__ + ( + "movl $0xc0000080, %%ecx\n\t" + "rdmsr\n\t" + "orl $0x00000100, %%eax\n\t" + "wrmsr\n\t" + : : + : "ax", "cx" + ); + cprint(LINE_TITLE+1, COL_MODE, "(X64 Mode)"); + } +}
\ No newline at end of file diff --git a/efi_memtest/memtest86+/efi/test.c b/efi_memtest/memtest86+/efi/test.c new file mode 100644 index 0000000..c4e0873 --- /dev/null +++ b/efi_memtest/memtest86+/efi/test.c @@ -0,0 +1,1551 @@ +/* test.c - MemTest-86 Version 3.4 + * + * Released under version 2 of the Gnu Public License. + * By Chris Brady + * ---------------------------------------------------- + * MemTest86+ V5 Specific code (GPL V2.0) + * By Samuel DEMEULEMEESTER, sdemeule@memtest.org + * http://www.canardpc.com - http://www.memtest.org + * Thanks to Passmark for calculate_chunk() and various comments ! + */ + +#include "test.h" +#include "config.h" +#include "stdint.h" +#include "cpuid.h" +#include "smp.h" +#include "io.h" + +extern struct cpu_ident cpu_id; +extern volatile int mstr_cpu; +extern volatile int run_cpus; +extern volatile int test; +extern volatile int segs, bail; +extern int test_ticks, nticks; +extern struct tseq tseq[]; +extern void update_err_counts(void); +extern void print_err_counts(void); +void rand_seed( unsigned int seed1, unsigned int seed2, int me); +ulong rand(int me); +void poll_errors(); + +// NOTE(jcoiner): +// Defining 'STATIC' to empty string results in crashes. (It should +// work fine, of course.) I suspect relocation problems in reloc.c. +// When we declare these routines static, we use relative addresses +// for them instead of looking up their addresses in (supposedly +// relocated) global elf tables, which avoids the crashes. + +#define STATIC static +//#define STATIC + +#define PREFER_C 0 + +static const void* const nullptr = 0x0; + +// Writes *start and *end with the VA range to test. +// +// me - this threads CPU number +// j - index into v->map for current segment we are testing +// align - number of bytes to align each block to +STATIC void calculate_chunk(ulong** start, ulong** end, int me, + int j, int makeMultipleOf) { + ulong chunk; + + // If we are only running 1 CPU then test the whole block + if (run_cpus == 1) { + *start = vv->map[j].start; + *end = vv->map[j].end; + } else { + + // Divide the current segment by the number of CPUs + chunk = (ulong)vv->map[j].end-(ulong)vv->map[j].start; + chunk /= run_cpus; + + // Round down to the nearest desired bitlength multiple + chunk = (chunk + (makeMultipleOf-1)) & ~(makeMultipleOf-1); + + // Figure out chunk boundaries + *start = (ulong*)((ulong)vv->map[j].start+(chunk*me)); + /* Set end addrs for the highest CPU num to the + * end of the segment for rounding errors */ + /* Also rounds down to boundary if needed, may miss some ram but + better than crashing or producing false errors. */ + /* This rounding probably will never happen as the segments should + be in 4096 bytes pages if I understand correctly. */ + if (me == mstr_cpu) { + *end = (ulong*)(vv->map[j].end); + } else { + *end = (ulong*)((ulong)(*start) + chunk); + (*end)--; + } + } +} + +/* Call segment_fn() for each up-to-SPINSZ segment between + * 'start' and 'end'. + */ +void foreach_segment +(ulong* start, ulong* end, + int me, const void* ctx, segment_fn func) { + + ASSERT(start < end); + + // Confirm 'start' points to an even dword, and 'end' + // should point to an odd dword + ASSERT(0 == (((ulong)start) & 0x7)); + ASSERT(0x4 == (((ulong)end) & 0x7)); + + // 'end' may be exactly 0xfffffffc, right at the 4GB boundary. + // + // To avoid overflow in our loop tests and length calculations, + // use dword indices (the '_dw' vars) to avoid overflows. + ulong start_dw = ((ulong)start) >> 2; + ulong end_dw = ((ulong) end) >> 2; + + // end is always xxxxxffc, but increment end_dw to an + // address beyond the segment for easier boundary calculations. + ++end_dw; + + ulong seg_dw = start_dw; + ulong seg_end_dw = start_dw; + + int done = 0; + do { + do_tick(me); + { BAILR } + + // ensure no overflow + ASSERT((seg_end_dw + SPINSZ_DWORDS) > seg_end_dw); + seg_end_dw += SPINSZ_DWORDS; + + if (seg_end_dw >= end_dw) { + seg_end_dw = end_dw; + done++; + } + if (seg_dw == seg_end_dw) { + break; + } + + ASSERT(((ulong)seg_end_dw) <= 0x40000000); + ASSERT(seg_end_dw > seg_dw); + ulong seg_len_dw = seg_end_dw - seg_dw; + + func((ulong*)(seg_dw << 2), seg_len_dw, ctx); + + seg_dw = seg_end_dw; + } while (!done); +} + +/* Calls segment_fn() for each segment in vv->map. + * + * Does not slice by CPU number, so it covers the entire memory. + * Contrast to sliced_foreach_segment(). + */ +STATIC void unsliced_foreach_segment +(const void* ctx, int me, segment_fn func) { + int j; + for (j=0; j<segs; j++) { + foreach_segment(vv->map[j].start, + vv->map[j].end, + me, ctx, func); + } +} + +/* Calls segment_fn() for each segment to be tested by CPU 'me'. + * + * In multicore mode, slices the segments by 'me' (the CPU ordinal + * number) so that each call will cover only 1/Nth of memory. + */ +STATIC void sliced_foreach_segment +(const void *ctx, int me, segment_fn func) { + int j; + ulong *start, *end; // VAs + ulong* prev_end = 0; + for (j=0; j<segs; j++) { + calculate_chunk(&start, &end, me, j, 64); + + // Ensure no overlap among chunks + ASSERT(end > start); + if (prev_end > 0) { + ASSERT(prev_end < start); + } + prev_end = end; + + foreach_segment(start, end, me, ctx, func); + } +} + +STATIC void addr_tst1_seg(ulong* restrict buf, + ulong len_dw, const void* unused) { + // Within each segment: + // - choose a low dword offset 'off' + // - write pat to *off + // - write ~pat to addresses that are above off by + // 1, 2, 4, ... dwords up to the top of the segment. None + // should alias to the original dword. + // - write ~pat to addresses that are below off by + // 1, 2, 4, etc dwords, down to the start of the segment. None + // should alias to the original dword. If adding a given offset + // doesn't produce a single bit address flip (because it produced + // a carry) subtracting the same offset should give a single bit flip. + // - repeat this, moving off ahead in increments of 1MB; + // this covers address bits within physical memory banks, we hope? + + ulong pat; + int k; + + for (pat=0x5555aaaa, k=0; k<2; k++) { + hprint(LINE_PAT, COL_PAT, pat); + + for (ulong off_dw = 0; off_dw < len_dw; off_dw += (1 << 18)) { + buf[off_dw] = pat; + pat = ~pat; + + for (ulong more_off_dw = 1; off_dw + more_off_dw < len_dw; + more_off_dw = more_off_dw << 1) { + ASSERT(more_off_dw); // it should never get to zero + buf[off_dw + more_off_dw] = pat; + ulong bad; + if ((bad = buf[off_dw]) != ~pat) { + ad_err1(buf + off_dw, + buf + off_dw + more_off_dw, + bad, ~pat); + break; + } + } + for (ulong more_off_dw = 1; off_dw > more_off_dw; + more_off_dw = more_off_dw << 1) { + ASSERT(more_off_dw); // it should never get to zero + buf[off_dw - more_off_dw] = pat; + ulong bad; + if ((bad = buf[off_dw]) != ~pat) { + ad_err1(buf + off_dw, + buf + off_dw - more_off_dw, + bad, ~pat); + break; + } + } + } + } +} + +/* + * Memory address test, walking ones + */ +void addr_tst1(int me) +{ + unsliced_foreach_segment(nullptr, me, addr_tst1_seg); +} + +STATIC void addr_tst2_init_segment(ulong* p, + ulong len_dw, const void* unused) { + ulong* pe = p + (len_dw - 1); + + /* Original C code replaced with hand tuned assembly code + * for (; p <= pe; p++) { + * *p = (ulong)p; + * } + */ + asm __volatile__ ( + "jmp L91\n\t" + ".p2align 4,,7\n\t" + "L90:\n\t" + "addl $4,%%edi\n\t" + "L91:\n\t" + "movl %%edi,(%%edi)\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L90\n\t" + : : "D" (p), "d" (pe) + ); +} + +STATIC void addr_tst2_check_segment(ulong* p, + ulong len_dw, const void* unused) { + ulong* pe = p + (len_dw - 1); + + /* Original C code replaced with hand tuned assembly code + * for (; p <= pe; p++) { + * if((bad = *p) != (ulong)p) { + * ad_err2((ulong)p, bad); + * } + * } + */ + asm __volatile__ + ( + "jmp L95\n\t" + ".p2align 4,,7\n\t" + "L99:\n\t" + "addl $4,%%edi\n\t" + "L95:\n\t" + "movl (%%edi),%%ecx\n\t" + "cmpl %%edi,%%ecx\n\t" + "jne L97\n\t" + "L96:\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L99\n\t" + "jmp L98\n\t" + + "L97:\n\t" + "pushq %%rdx\n\t" + "pushq %%rcx\n\t" + "pushq %%rdi\n\t" + "call ad_err2\n\t" + "popq %%rdi\n\t" + "popq %%rcx\n\t" + "popq %%rdx\n\t" + "jmp L96\n\t" + + "L98:\n\t" + : : "D" (p), "d" (pe) + : "ecx" + ); +} + +/* + * Memory address test, own address + */ +void addr_tst2(int me) +{ + cprint(LINE_PAT, COL_PAT, "address "); + + /* Write each address with its own address */ + unsliced_foreach_segment(nullptr, me, addr_tst2_init_segment); + { BAILR } + + /* Each address should have its own address */ + unsliced_foreach_segment(nullptr, me, addr_tst2_check_segment); +} + +typedef struct { + int me; + ulong xorVal; +} movinvr_ctx; + +STATIC void movinvr_init(ulong* p, + ulong len_dw, const void* vctx) { + ulong* pe = p + (len_dw - 1); + const movinvr_ctx* ctx = (const movinvr_ctx*)vctx; + /* Original C code replaced with hand tuned assembly code */ + /* + for (; p <= pe; p++) { + *p = rand(me); + } + */ + + asm __volatile__ + ( + "jmp L200\n\t" + ".p2align 4,,7\n\t" + "L201:\n\t" + "addl $4,%%edi\n\t" + "L200:\n\t" + "pushq %%rcx\n\t" + "call rand\n\t" + "popq %%rcx\n\t" + "movl %%eax,(%%edi)\n\t" + "cmpl %%ebx,%%edi\n\t" + "jb L201\n\t" + : : "D" (p), "b" (pe), "c" (ctx->me) + : "eax" + ); +} + +STATIC void movinvr_body(ulong* p, ulong len_dw, const void* vctx) { + ulong* pe = p + (len_dw - 1); + const movinvr_ctx* ctx = (const movinvr_ctx*)vctx; + + /* Original C code replaced with hand tuned assembly code */ + + /*for (; p <= pe; p++) { + num = rand(me); + if (i) { + num = ~num; + } + if ((bad=*p) != num) { + mt86_error((ulong*)p, num, bad); + } + *p = ~num; + }*/ + + asm __volatile__ + ( + "pushq %%rbp\n\t" + + // Skip first increment + "jmp L26\n\t" + ".p2align 4,,7\n\t" + + // increment 4 bytes (32-bits) + "L27:\n\t" + "addl $4,%%edi\n\t" + + // Check this byte + "L26:\n\t" + + // Get next random number, pass in me(edx), random value returned in num(eax) + // num = rand(me); + // cdecl call maintains all registers except eax, ecx, and edx + // We maintain edx with a push and pop here using it also as an input + // we don't need the current eax value and want it to change to the return value + // we overwrite ecx shortly after this discarding its current value + "pushq %%rdx\n\t" // Push function inputs onto stack + "call rand\n\t" + "popq %%rdx\n\t" // Remove function inputs from stack + + // XOR the random number with xorVal(ebx), which is either 0xffffffff or 0 depending on the outer loop + // if (i) { num = ~num; } + "xorl %%ebx,%%eax\n\t" + + // Move the current value of the current position p(edi) into bad(ecx) + // (bad=*p) + "movl (%%edi),%%ecx\n\t" + + // Compare bad(ecx) to num(eax) + "cmpl %%eax,%%ecx\n\t" + + // If not equal jump the error case + "jne L23\n\t" + + // Set a new value or not num(eax) at the current position p(edi) + // *p = ~num; + "L25:\n\t" + "movl $0xffffffff,%%ebp\n\t" + "xorl %%ebp,%%eax\n\t" + "movl %%eax,(%%edi)\n\t" + + // Loop until current position p(edi) equals the end position pe(esi) + "cmpl %%esi,%%edi\n\t" + "jb L27\n\t" + "jmp L24\n" + + // Error case + "L23:\n\t" + // Must manually maintain eax, ecx, and edx as part of cdecl call convention + "pushq %%rdx\n\t" + "pushq %%rcx\n\t" // Next three pushes are functions input + "pushq %%rax\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" // Remove function inputs from stack and restore register values + "popq %%rax\n\t" + "popq %%rcx\n\t" + "popq %%rdx\n\t" + "jmp L25\n" + + "L24:\n\t" + "popq %%rbp\n\t" + :: "D" (p), "S" (pe), "b" (ctx->xorVal), + "d" (ctx->me) + : "eax", "ecx" + ); +} + +/* + * Test all of memory using a "half moving inversions" algorithm using random + * numbers and their complement as the data pattern. Since we are not able to + * produce random numbers in reverse order testing is only done in the forward + * direction. + */ +void movinvr(int me) +{ + int i, seed1, seed2; + + movinvr_ctx ctx; + ctx.me = me; + ctx.xorVal = 0; + + /* Initialize memory with initial sequence of random numbers. */ + if (cpu_id.fid.bits.rdtsc) { + asm __volatile__ ("rdtsc":"=a" (seed1),"=d" (seed2)); + } else { + seed1 = 521288629 + vv->pass; + seed2 = 362436069 - vv->pass; + } + + /* Display the current seed */ + if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, seed1); + rand_seed(seed1, seed2, me); + + sliced_foreach_segment(&ctx, me, movinvr_init); + { BAILR } + + /* Do moving inversions test. Check for initial pattern and then + * write the complement for each memory location. + */ + for (i=0; i<2; i++) { + rand_seed(seed1, seed2, me); + + if (i) { + ctx.xorVal = 0xffffffff; + } else { + ctx.xorVal = 0; + } + + sliced_foreach_segment(&ctx, me, movinvr_body); + { BAILR } + } +} + +typedef struct { + ulong p1; + ulong p2; +} movinv1_ctx; + +STATIC void movinv1_init(ulong* start, + ulong len_dw, const void* vctx) { + const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; + + ulong p1 = ctx->p1; + ulong* p = start; + + asm __volatile__ + ( + "rep\n\t" + "stosl\n\t" + : : "c" (len_dw), "D" (p), "a" (p1) + ); +} + +STATIC void movinv1_bottom_up(ulong* start, + ulong len_dw, const void* vctx) { + const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; + ulong p1 = ctx->p1; + ulong p2 = ctx->p2; + ulong* p = start; + ulong* pe = p + (len_dw - 1); + + // Original C code replaced with hand tuned assembly code + // seems broken + /*for (; p <= pe; p++) { + if ((bad=*p) != p1) { + mt86_error((ulong*)p, p1, bad); + } + *p = p2; + }*/ + + asm __volatile__ + ( + "jmp L2\n\t" + ".p2align 4,,7\n\t" + "L0:\n\t" + "addl $4,%%edi\n\t" + "L2:\n\t" + "movl (%%edi),%%ecx\n\t" + "cmpl %%eax,%%ecx\n\t" + "jne L3\n\t" + "L5:\n\t" + "movl %%ebx,(%%edi)\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L0\n\t" + "jmp L4\n" + + "L3:\n\t" + "pushq %%rdx\n\t" + "pushq %%rbx\n\t" + "pushq %%rcx\n\t" + "pushq %%rax\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" + "popq %%rax\n\t" + "popq %%rcx\n\t" + "popq %%rbx\n\t" + "popq %%rdx\n\t" + "jmp L5\n" + + "L4:\n\t" + :: "a" (p1), "D" (p), "d" (pe), "b" (p2) + : "ecx" + ); +} + +STATIC void movinv1_top_down(ulong* start, + ulong len_dw, const void* vctx) { + const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; + ulong p1 = ctx->p1; + ulong p2 = ctx->p2; + ulong* p = start + (len_dw - 1); + ulong* pe = start; + + //Original C code replaced with hand tuned assembly code + // seems broken + /*do { + if ((bad=*p) != p2) { + mt86_error((ulong*)p, p2, bad); + } + *p = p1; + } while (--p >= pe);*/ + + asm __volatile__ + ( + "jmp L9\n\t" + ".p2align 4,,7\n\t" + "L11:\n\t" + "subl $4, %%edi\n\t" + "L9:\n\t" + "movl (%%edi),%%ecx\n\t" + "cmpl %%ebx,%%ecx\n\t" + "jne L6\n\t" + "L10:\n\t" + "movl %%eax,(%%edi)\n\t" + "cmpl %%edi, %%edx\n\t" + "jne L11\n\t" + "jmp L7\n\t" + + "L6:\n\t" + "pushq %%rdx\n\t" + "pushq %%rax\n\t" + "pushq %%rcx\n\t" + "pushq %%rbx\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" + "popq %%rbx\n\t" + "popq %%rcx\n\t" + "popq %%rax\n\t" + "popq %%rdx\n\t" + "jmp L10\n" + + "L7:\n\t" + :: "a" (p1), "D" (p), "d" (pe), "b" (p2) + : "ecx" + ); +} + +/* + * Test all of memory using a "moving inversions" algorithm using the + * pattern in p1 and its complement in p2. + */ +void movinv1 (int iter, ulong p1, ulong p2, int me) +{ + int i; + + /* Display the current pattern */ + if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1); + + movinv1_ctx ctx; + ctx.p1 = p1; + ctx.p2 = p2; + sliced_foreach_segment(&ctx, me, movinv1_init); + { BAILR } + + /* Do moving inversions test. Check for initial pattern and then + * write the complement for each memory location. Test from bottom + * up and then from the top down. */ + for (i=0; i<iter; i++) { + sliced_foreach_segment(&ctx, me, movinv1_bottom_up); + { BAILR } + + // NOTE(jcoiner): + // For the top-down pass, the original 5.01 code iterated over + // 'segs' in from n-1 down to 0, and then within each mapped segment, + // it would form the SPINSZ windows from the top down -- thus forming + // a different set of windows than the bottom-up pass, when the segment + // is not an integer number of windows. + // + // My guess is that this buys us very little additional coverage, that + // the value in going top-down happens at the word or cache-line level + // and that there's little to be gained from reversing the direction of + // the outer loops. So I'm leaving a 'direction' bit off of the + // foreach_segment() routines for now. + sliced_foreach_segment(&ctx, me, movinv1_top_down); + { BAILR } + } +} + +typedef struct { + ulong p1; + ulong lb; + ulong hb; + int sval; + int off; +} movinv32_ctx; + +STATIC void movinv32_init(ulong* restrict buf, + ulong len_dw, const void* vctx) { + const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; + + ulong* p = buf; + ulong* pe = buf + (len_dw - 1); + + int k = ctx->off; + ulong pat = ctx->p1; + ulong lb = ctx->lb; + int sval = ctx->sval; + + /* Original C code replaced with hand tuned assembly code + * while (p <= pe) { + * *p = pat; + * if (++k >= 32) { + * pat = lb; + * k = 0; + * } else { + * pat = pat << 1; + * pat |= sval; + * } + * p++; + * } + */ + asm __volatile__ + ( + "jmp L20\n\t" + ".p2align 4,,7\n\t" + "L923:\n\t" + "addl $4,%%edi\n\t" + "L20:\n\t" + "movl %%ecx,(%%edi)\n\t" + "addl $1,%%ebx\n\t" + "cmpl $32,%%ebx\n\t" + "jne L21\n\t" + "movl %%esi,%%ecx\n\t" + "xorl %%ebx,%%ebx\n\t" + "jmp L22\n" + "L21:\n\t" + "shll $1,%%ecx\n\t" + "orl %%eax,%%ecx\n\t" + "L22:\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L923\n\t" + :: "D" (p),"d" (pe),"b" (k),"c" (pat), + "a" (sval), "S" (lb) + ); +} + +STATIC void movinv32_bottom_up(ulong* restrict buf, ulong len_dw, + const void* vctx) { + const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; + + ulong* p = buf; + ulong* pe = buf + (len_dw - 1); + + int k = ctx->off; + ulong pat = ctx->p1; + ulong lb = ctx->lb; + int sval = ctx->sval; + + /* Original C code replaced with hand tuned assembly code + * while (1) { + * if ((bad=*p) != pat) { + * mt86_error((ulong*)p, pat, bad); + * } + * *p = ~pat; + * if (p >= pe) break; + * p++; + * + * if (++k >= 32) { + * pat = lb; + * k = 0; + * } else { + * pat = pat << 1; + * pat |= sval; + * } + * } + */ + asm __volatile__ + ( + "pushq %%rbp\n\t" + "jmp L30\n\t" + ".p2align 4,,7\n\t" + "L930:\n\t" + "addl $4,%%edi\n\t" + "L30:\n\t" + "movl (%%edi),%%ebp\n\t" + "cmpl %%ecx,%%ebp\n\t" + "jne L34\n\t" + + "L35:\n\t" + "notl %%ecx\n\t" + "movl %%ecx,(%%edi)\n\t" + "notl %%ecx\n\t" + "incl %%ebx\n\t" + "cmpl $32,%%ebx\n\t" + "jne L31\n\t" + "movl %%esi,%%ecx\n\t" + "xorl %%ebx,%%ebx\n\t" + "jmp L32\n" + "L31:\n\t" + "shll $1,%%ecx\n\t" + "orl %%eax,%%ecx\n\t" + "L32:\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L930\n\t" + "jmp L33\n\t" + + "L34:\n\t" + "pushq %%rsi\n\t" + "pushq %%rax\n\t" + "pushq %%rbx\n\t" + "pushq %%rdx\n\t" + "pushq %%rbp\n\t" + "pushq %%rcx\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" + "popq %%rcx\n\t" + "popq %%rbp\n\t" + "popq %%rdx\n\t" + "popq %%rbx\n\t" + "popq %%rax\n\t" + "popq %%rsi\n\t" + "jmp L35\n" + + "L33:\n\t" + "popq %%rbp\n\t" + : "=b" (k),"=c" (pat) + : "D" (p),"d" (pe),"b" (k),"c" (pat), + "a" (sval), "S" (lb) + ); +} + +STATIC void movinv32_top_down(ulong* restrict buf, + ulong len_dw, const void* vctx) { + const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; + + ulong* pe = buf; + ulong* p = buf + (len_dw - 1); + + int k = ctx->off; + ulong pat = ctx->p1; + ulong hb = ctx->hb; + int sval = ctx->sval; + ulong p3 = (ulong)sval << 31; + + // Advance 'k' and 'pat' to where they would have been + // at the end of the corresponding bottom_up segment. + // + // The '-1' is because we didn't advance 'k' or 'pat' + // on the final bottom_up loop, so they're off by one... + ulong mod_len = (len_dw - 1) % 32; + for (int i = 0; i < mod_len; i++) { + if (++k >= 32) { + pat = ctx->lb; + k = 0; + } else { + pat = pat << 1; + pat |= sval; + } + } + + // Increment 'k' only because the code below has an off-by-one + // interpretation of 'k' relative to the bottom_up routine. + // There it ranges from 0:31, and here it ranges from 1:32. + k++; + + /* Original C code replaced with hand tuned assembly code */ +#if PREFER_C + ulong bad; + while(1) { + if ((bad=*p) != ~pat) { + mt86_error((ulong*)p, ~pat, bad); + } + *p = pat; + if (p <= pe) break; + p--; + + if (--k <= 0) { + k = 32; + pat = hb; + } else { + pat = pat >> 1; + pat |= p3; + } + }; +#else + asm __volatile__ + ( + "pushq %%rbp\n\t" + "jmp L40\n\t" + ".p2align 4,,7\n\t" + "L49:\n\t" + "subl $4,%%edi\n\t" + "L40:\n\t" + "movl (%%edi),%%ebp\n\t" + "notl %%ecx\n\t" + "cmpl %%ecx,%%ebp\n\t" + "jne L44\n\t" + + "L45:\n\t" + "notl %%ecx\n\t" + "movl %%ecx,(%%edi)\n\t" + "decl %%ebx\n\t" + "cmpl $0,%%ebx\n\t" + "jg L41\n\t" + "movl %%esi,%%ecx\n\t" + "movl $32,%%ebx\n\t" + "jmp L42\n" + "L41:\n\t" + "shrl $1,%%ecx\n\t" + "orl %%eax,%%ecx\n\t" + "L42:\n\t" + "cmpl %%edx,%%edi\n\t" + "ja L49\n\t" + "jmp L43\n\t" + + "L44:\n\t" + "pushq %%rsi\n\t" + "pushq %%rax\n\t" + "pushq %%rbx\n\t" + "pushq %%rdx\n\t" + "pushq %%rbp\n\t" + "pushq %%rcx\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" + "popq %%rcx\n\t" + "popq %%rbp\n\t" + "popq %%rdx\n\t" + "popq %%rbx\n\t" + "popq %%rax\n\t" + "popq %%rsi\n\t" + "jmp L45\n" + + "L43:\n\t" + "popq %%rbp\n\t" + : : "D" (p),"d" (pe),"b" (k),"c" (pat), + "a" (p3), "S" (hb) + ); +#endif +} + +void movinv32(int iter, ulong p1, ulong lb, ulong hb, int sval, int off,int me) +{ + // First callsite: + // - p1 has 1 bit set (somewhere) + // - lb = 1 ("low bit") + // - hb = 0x80000000 ("high bit") + // - sval = 0 + // - 'off' indicates the position of the set bit in p1 + // + // Second callsite is the same, but inverted: + // - p1 has 1 bit clear (somewhere) + // - lb = 0xfffffffe + // - hb = 0x7fffffff + // - sval = 1 + // - 'off' indicates the position of the cleared bit in p1 + + movinv32_ctx ctx; + ctx.p1 = p1; + ctx.lb = lb; + ctx.hb = hb; + ctx.sval = sval; + ctx.off = off; + + /* Display the current pattern */ + if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1); + + sliced_foreach_segment(&ctx, me, movinv32_init); + { BAILR } + + /* Do moving inversions test. Check for initial pattern and then + * write the complement for each memory location. Test from bottom + * up and then from the top down. */ + for (int i=0; i<iter; i++) { + sliced_foreach_segment(&ctx, me, movinv32_bottom_up); + { BAILR } + + sliced_foreach_segment(&ctx, me, movinv32_top_down); + { BAILR } + } +} + +typedef struct { + int offset; + ulong p1; + ulong p2; +} modtst_ctx; + +STATIC void modtst_sparse_writes(ulong* restrict start, + ulong len_dw, const void* vctx) { + const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; + ulong p1 = ctx->p1; + ulong offset = ctx->offset; + +#if PREFER_C + for (ulong i = offset; i < len_dw; i += MOD_SZ) { + start[i] = p1; + } +#else + ulong* p = start + offset; + ulong* pe = start + len_dw; + asm __volatile__ + ( + "jmp L60\n\t" + ".p2align 4,,7\n\t" + + "L60:\n\t" + "movl %%eax,(%%edi)\n\t" + "addl $80,%%edi\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L60\n\t" + :: "D" (p), "d" (pe), "a" (p1) + ); +#endif +} + +STATIC void modtst_dense_writes(ulong* restrict start, ulong len_dw, + const void* vctx) { + const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; + ulong p2 = ctx->p2; + ulong offset = ctx->offset; + + ASSERT(offset < MOD_SZ); + + ulong k = 0; +#if PREFER_C + for (ulong i = 0; i < len_dw; i++) { + if (k != offset) { + start[i] = p2; + } + if (++k >= MOD_SZ) { + k = 0; + } + } +#else + ulong* pe = start + (len_dw - 1); + asm __volatile__ + ( + "jmp L50\n\t" + ".p2align 4,,7\n\t" + + "L54:\n\t" + "addl $4,%%edi\n\t" + "L50:\n\t" + "cmpl %%ebx,%%ecx\n\t" + "je L52\n\t" + "movl %%eax,(%%edi)\n\t" + "L52:\n\t" + "incl %%ebx\n\t" + "cmpl $19,%%ebx\n\t" + "jle L53\n\t" + "xorl %%ebx,%%ebx\n\t" + "L53:\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L54\n\t" + : : "D" (start), "d" (pe), "a" (p2), + "b" (k), "c" (offset) + ); +#endif +} + +STATIC void modtst_check(ulong* restrict start, + ulong len_dw, const void* vctx) { + const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; + ulong p1 = ctx->p1; + ulong offset = ctx->offset; + + ASSERT(offset < MOD_SZ); + +#if PREFER_C + ulong bad; + for (ulong i = offset; i < len_dw; i += MOD_SZ) { + if ((bad = start[i]) != p1) + mt86_error(start + i, p1, bad); + } +#else + ulong* p = start + offset; + ulong* pe = start + len_dw; + asm __volatile__ + ( + "jmp L70\n\t" + ".p2align 4,,7\n\t" + + "L70:\n\t" + "movl (%%edi),%%ecx\n\t" + "cmpl %%eax,%%ecx\n\t" + "jne L71\n\t" + "L72:\n\t" + "addl $80,%%edi\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L70\n\t" + "jmp L73\n\t" + + "L71:\n\t" + "pushq %%rdx\n\t" + "pushq %%rcx\n\t" + "pushq %%rax\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" + "popq %%rax\n\t" + "popq %%rcx\n\t" + "popq %%rdx\n\t" + "jmp L72\n" + + "L73:\n\t" + : : "D" (p), "d" (pe), "a" (p1) + : "ecx" + ); +#endif +} + +/* + * Test all of memory using modulo X access pattern. + */ +void modtst(int offset, int iter, ulong p1, ulong p2, int me) +{ + modtst_ctx ctx; + ctx.offset = offset; + ctx.p1 = p1; + ctx.p2 = p2; + + /* Display the current pattern */ + if (mstr_cpu == me) { + hprint(LINE_PAT, COL_PAT-2, p1); + cprint(LINE_PAT, COL_PAT+6, "-"); + dprint(LINE_PAT, COL_PAT+7, offset, 2, 1); + } + + /* Write every nth location with pattern */ + sliced_foreach_segment(&ctx, me, modtst_sparse_writes); + { BAILR } + + /* Write the rest of memory "iter" times with the pattern complement */ + for (ulong i=0; i<iter; i++) { + sliced_foreach_segment(&ctx, me, modtst_dense_writes); + { BAILR } + } + + /* Now check every nth location */ + sliced_foreach_segment(&ctx, me, modtst_check); +} + +#if PREFER_C + +STATIC void movsl(ulong* dest, + ulong* src, + ulong size_in_dwords) { + /* Logically equivalent to: + + for (ulong i = 0; i < size_in_dwords; i++) + dest[i] = src[i]; + + However: the movsl instruction does the entire loop + in one instruction -- this is probably how 'memcpy' + is implemented -- so hardware makes it very fast. + + Even in PREFER_C mode, we want the brute force of movsl! + */ + asm __volatile__ + ( + "cld\n" + "jmp L1189\n\t" + + ".p2align 4,,7\n\t" + "L1189:\n\t" + + "movl %1,%%edi\n\t" // dest + "movl %0,%%esi\n\t" // src + "movl %2,%%ecx\n\t" // len in dwords + "rep\n\t" + "movsl\n\t" + + :: "g" (src), "g" (dest), "g" (size_in_dwords) + : "edi", "esi", "ecx" + ); +} +#endif // PREFER_C + +STATIC ulong block_move_normalize_len_dw(ulong len_dw) { + // The block_move test works with sets of 64-byte blocks, + // so ensure our total length is a multiple of 64. + // + // In fact, since we divide the region in half, and each half-region + // is a set of 64-byte blocks, the full region should be a multiple of 128 + // bytes. + // + // Note that there's no requirement for the start address of the region to + // be 64-byte aligned, it can be any dword. + ulong result = (len_dw >> 5) << 5; + ASSERT(result > 0); + return result; +} + +STATIC void block_move_init(ulong* restrict buf, + ulong len_dw, const void* unused_ctx) { + len_dw = block_move_normalize_len_dw(len_dw); + + // Compute 'len' in units of 64-byte chunks: + ulong len = len_dw >> 4; + + // We only need to initialize len/2, since we'll just copy + // the first half onto the second half in the move step. + len = len >> 1; + + ulong base_val = 1; +#if PREFER_C + while(len > 0) { + ulong neg_val = ~base_val; + + // Set a block of 64 bytes // first block DWORDS are: + buf[0] = base_val; // 0x00000001 + buf[1] = base_val; // 0x00000001 + buf[2] = base_val; // 0x00000001 + buf[3] = base_val; // 0x00000001 + buf[4] = neg_val; // 0xfffffffe + buf[5] = neg_val; // 0xfffffffe + buf[6] = base_val; // 0x00000001 + buf[7] = base_val; // 0x00000001 + buf[8] = base_val; // 0x00000001 + buf[9] = base_val; // 0x00000001 + buf[10] = neg_val; // 0xfffffffe + buf[11] = neg_val; // 0xfffffffe + buf[12] = base_val; // 0x00000001 + buf[13] = base_val; // 0x00000001 + buf[14] = neg_val; // 0xfffffffe + buf[15] = neg_val; // 0xfffffffe + + buf += 16; // advance to next 64-byte block + len--; + + // Rotate the bit left, including an all-zero state. + // It can't hurt to have a periodicity of 33 instead of + // a power of two. + if (base_val == 0) { + base_val = 1; + } else if (base_val & 0x80000000) { + base_val = 0; + } else { + base_val = base_val << 1; + } + } +#else + asm __volatile__ + ( + "jmp L100\n\t" + + ".p2align 4,,7\n\t" + "L100:\n\t" + + // First loop eax is 0x00000001, edx is 0xfffffffe + "movl %%eax, %%edx\n\t" + "notl %%edx\n\t" + + // Set a block of 64-bytes // First loop DWORDS are + "movl %%eax,0(%%edi)\n\t" // 0x00000001 + "movl %%eax,4(%%edi)\n\t" // 0x00000001 + "movl %%eax,8(%%edi)\n\t" // 0x00000001 + "movl %%eax,12(%%edi)\n\t" // 0x00000001 + "movl %%edx,16(%%edi)\n\t" // 0xfffffffe + "movl %%edx,20(%%edi)\n\t" // 0xfffffffe + "movl %%eax,24(%%edi)\n\t" // 0x00000001 + "movl %%eax,28(%%edi)\n\t" // 0x00000001 + "movl %%eax,32(%%edi)\n\t" // 0x00000001 + "movl %%eax,36(%%edi)\n\t" // 0x00000001 + "movl %%edx,40(%%edi)\n\t" // 0xfffffffe + "movl %%edx,44(%%edi)\n\t" // 0xfffffffe + "movl %%eax,48(%%edi)\n\t" // 0x00000001 + "movl %%eax,52(%%edi)\n\t" // 0x00000001 + "movl %%edx,56(%%edi)\n\t" // 0xfffffffe + "movl %%edx,60(%%edi)\n\t" // 0xfffffffe + + // rotate left with carry, + // second loop eax is 0x00000002 + // second loop edx is (~eax) 0xfffffffd + "rcll $1, %%eax\n\t" + + // Move current position forward 64-bytes (to start of next block) + "leal 64(%%edi), %%edi\n\t" + + // Loop until end + "decl %%ecx\n\t" + "jnz L100\n\t" + + : : "D" (buf), "c" (len), "a" (base_val) + : "edx" + ); +#endif +} + +typedef struct { + int iter; + int me; +} block_move_ctx; + +STATIC void block_move_move(ulong* restrict buf, + ulong len_dw, const void* vctx) { + const block_move_ctx* restrict ctx = (const block_move_ctx*)vctx; + ulong iter = ctx->iter; + int me = ctx->me; + + len_dw = block_move_normalize_len_dw(len_dw); + + /* Now move the data around + * First move the data up half of the segment size we are testing + * Then move the data to the original location + 32 bytes + */ + ulong half_len_dw = len_dw / 2; // Half the size of this block in DWORDS + ASSERT(half_len_dw > 8); + + // TODO ulong* mid = buf + half_len_dw; // VA at mid-point of this block. + for (int i=0; i<iter; i++) { + if (i > 0) { + // foreach_segment() called this before the 0th iteration, + // so don't tick twice in quick succession. + do_tick(me); + } + { BAILR } + +#if PREFER_C + // Move first half to 2nd half: + movsl(/*dest=*/ mid, /*src=*/ buf, half_len_dw); + + // Move the second half, less the last 8 dwords + // to the first half plus an offset of 8 dwords. + movsl(/*dest=*/ buf + 8, /*src=*/ mid, half_len_dw - 8); + + // Finally, move the last 8 dwords of the 2nd half + // to the first 8 dwords of the first half. + movsl(/*dest=*/ mid + half_len_dw - 8, /*src=*/ buf, 8); +#else + /* asm __volatile__ // TODO + ( + "cld\n" + "jmp L110\n\t" + + ".p2align 4,,7\n\t" + "L110:\n\t" + + // + // At the end of all this + // - the second half equals the inital value of the first half + // - the first half is right shifted 32-bytes (with wrapping) + // + + // Move first half to second half + "movl %1,%%edi\n\t" // Destination 'mid' (mid point) + "movl %0,%%esi\n\t" // Source, 'buf' (start point) + "movl %2,%%ecx\n\t" // Length, 'half_len_dw' (size of a half in DWORDS) + "rep\n\t" + "movsl\n\t" + + // Move the second half, less the last 32-bytes. To the first half, offset plus 32-bytes + "movl %0,%%edi\n\t" + "addl $32,%%edi\n\t" // Destination 'buf' plus 32 bytes + "movl %1,%%esi\n\t" // Source, 'mid' + "movl %2,%%ecx\n\t" + "subl $8,%%ecx\n\t" // Length, 'half_len_dw' + "rep\n\t" + "movsl\n\t" + + // Move last 8 DWORDS (32-bytes) of the second half to the start of the first half + "movl %0,%%edi\n\t" // Destination 'buf' + // Source, 8 DWORDS from the end of the second half, left over by the last rep/movsl + "movl $8,%%ecx\n\t" // Length, 8 DWORDS (32-bytes) + "rep\n\t" + "movsl\n\t" + + :: "g" (buf), "g" (mid), "g" (half_len_dw) + : "edi", "esi", "ecx" + );*/ +#endif + } +} + +STATIC void block_move_check(ulong* restrict buf, + ulong len_dw, const void* unused_ctx) { + len_dw = block_move_normalize_len_dw(len_dw); + + /* Now check the data. + * This is rather crude, we just check that the + * adjacent words are the same. + */ +#if PREFER_C + for (ulong i = 0; i < len_dw; i = i + 2) { + if (buf[i] != buf[i+1]) { + mt86_error(buf+i, buf[i], buf[i+1]); + } + } +#else + ulong* pe = buf + (len_dw - 2); + asm __volatile__ + ( + "jmp L120\n\t" + + ".p2align 4,,7\n\t" + "L124:\n\t" + "addl $8,%%edi\n\t" // Next QWORD + "L120:\n\t" + + // Compare adjacent DWORDS + "movl (%%edi),%%ecx\n\t" + "cmpl 4(%%edi),%%ecx\n\t" + "jnz L121\n\t" // Print error if they don't match + + // Loop until end of block + "L122:\n\t" + "cmpl %%edx,%%edi\n\t" + "jb L124\n" + "jmp L123\n\t" + + "L121:\n\t" + // eax not used so we don't need to save it as per cdecl + // ecx is used but not restored, however we don't need it's value anymore after this point + "pushq %%rdx\n\t" + "pushq 4(%%edi)\n\t" + "pushq %%rcx\n\t" + "pushq %%rdi\n\t" + "call mt86_error\n\t" + "popq %%rdi\n\t" + "addl $8,%%esp\n\t" + "popq %%rdx\n\t" + "jmp L122\n" + "L123:\n\t" + :: "D" (buf), "d" (pe) + : "ecx" + ); +#endif +} + +/* + * Test memory using block moves + * Adapted from Robert Redelmeier's burnBX test + */ +void block_move(int iter, int me) +{ + cprint(LINE_PAT, COL_PAT-2, " "); + + block_move_ctx ctx; + ctx.iter = iter; + ctx.me = me; + + /* Initialize memory with the initial pattern. */ + sliced_foreach_segment(&ctx, me, block_move_init); + { BAILR } + s_barrier(); + + /* Now move the data around */ + sliced_foreach_segment(&ctx, me, block_move_move); + { BAILR } + s_barrier(); + + /* And check it. */ + sliced_foreach_segment(&ctx, me, block_move_check); +} + +typedef struct { + ulong pat; +} bit_fade_ctx; + +STATIC void bit_fade_fill_seg(ulong* restrict p, + ulong len_dw, const void* vctx) { + const bit_fade_ctx* restrict ctx = (const bit_fade_ctx*)vctx; + ulong pat = ctx->pat; + + for (ulong i = 0; i < len_dw; i++) { + p[i] = pat; + } +} + +/* + * Test memory for bit fade, fill memory with pattern. + */ +void bit_fade_fill(ulong p1, int me) +{ + /* Display the current pattern */ + hprint(LINE_PAT, COL_PAT, p1); + + /* Initialize memory with the initial pattern. */ + bit_fade_ctx ctx; + ctx.pat = p1; + unsliced_foreach_segment(&ctx, me, bit_fade_fill_seg); +} + +STATIC void bit_fade_chk_seg(ulong* restrict p, + ulong len_dw, const void* vctx) { + const bit_fade_ctx* restrict ctx = (const bit_fade_ctx*)vctx; + ulong pat = ctx->pat; + + for (ulong i = 0; i < len_dw; i++) { + ulong bad; + if ((bad=p[i]) != pat) { + mt86_error(p+i, pat, bad); + } + } +} + +void bit_fade_chk(ulong p1, int me) +{ + bit_fade_ctx ctx; + ctx.pat = p1; + + /* Make sure that nothing changed while sleeping */ + unsliced_foreach_segment(&ctx, me, bit_fade_chk_seg); +} + +/* Sleep for N seconds */ +void sleep(long n, int flag, int me, + int sms /* interpret 'n' as milliseconds instead */) +{ + ulong sh, sl, l, h, t, ip=0; + + /* save the starting time */ + asm __volatile__( + "rdtsc":"=a" (sl),"=d" (sh)); + + /* loop for n seconds */ + while (1) { + /*asm __volatile__( + "rep ; nop\n\t" + "rdtsc":"=a" (l),"=d" (h)); + asm __volatile__ ( + "subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (l), "=d" (h) + :"g" (sl), "g" (sh), + "0" (l), "1" (h));*/ + + h = 1; // TODO remove + l = 1; // TODO remove + if (sms != 0) { + t = h * ((unsigned)0xffffffff / vv->clks_msec); + t += (l / vv->clks_msec); + } else { + t = h * ((unsigned)0xffffffff / vv->clks_msec) / 1000; + t += (l / vv->clks_msec) / 1000; + } + + /* Is the time up? */ + if (t >= n) { + break; + } + + /* Only display elapsed time if flag is set */ + if (flag == 0) { + continue; + } + + if (t != ip) { + do_tick(me); + { BAILR } + ip = t; + } + } +} + +void beep(unsigned int frequency) +{ +#if 0 + // BOZO(jcoiner) + // Removed this, we need to define outb_p() and inb_p() + // before reintroducing it. +#else + unsigned int count = 1193180 / frequency; + + // Switch on the speaker + outb_p(inb_p(0x61)|3, 0x61); + + // Set command for counter 2, 2 byte write + outb_p(0xB6, 0x43); + + // Select desired Hz + outb_p(count & 0xff, 0x42); + outb((count >> 8) & 0xff, 0x42); + + // Block for 100 microseconds + sleep(100, 0, 0, 1); + + // Switch off the speaker + outb(inb_p(0x61)&0xFC, 0x61); +#endif +} diff --git a/efi_memtest/memtest86+/efi/test_cache.h b/efi_memtest/memtest86+/efi/test_cache.h new file mode 100644 index 0000000..a6ea496 --- /dev/null +++ b/efi_memtest/memtest86+/efi/test_cache.h @@ -0,0 +1,20 @@ +static inline void cache_off(void) +{ + asm( + "push %rax\n\t" + "movq %cr0,%eax\n\t" + "orl $0x40000000,%eax\n\t" /* Set CD */ + "movq %eax,%cr0\n\t" + "wbinvd\n\t" + "pop %rax\n\t"); +} + +static inline void cache_on(void) +{ + asm( + "push %rax\n\t" + "movq %cr0,%eax\n\t" + "andl $0x9fffffff,%eax\n\t" /* Clear CD and NW */ + "movq %eax,%cr0\n\t" + "pop %rax\n\t"); +} diff --git a/efi_memtest/memtest86+/efi/vmem.c b/efi_memtest/memtest86+/efi/vmem.c new file mode 100644 index 0000000..80e69d2 --- /dev/null +++ b/efi_memtest/memtest86+/efi/vmem.c @@ -0,0 +1,159 @@ +/* vmem.c - MemTest-86 + * + * Virtual memory handling (PAE) + * + * Released under version 2 of the Gnu Public License. + * By Chris Brady + */ +#include "stdint.h" +#include "test.h" +#include "cpuid.h" + +extern struct cpu_ident cpu_id; + +static unsigned long mapped_win = 1; +void paging_off(void) +{ + if (!cpu_id.fid.bits.pae) + return; +/* __asm__ __volatile__ + ( + // Disable paging + "movl %%cr0, %%eax\n\t" + "andl $0x7FFFFFFF, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + : : + : "ax" + );*/ +} + +static void paging_on(void *pdp) +{ + if (!cpu_id.fid.bits.pae) + return; +/* __asm__ __volatile__ + ( + // Load the page table address + "movl %0, %%cr3\n\t" + // Enable paging + "movl %%cr0, %%eax\n\t" + "orl $0x80000000, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + : + : "r" (pdp) + : "ax" + );*/ +} + +static void paging_on_lm(void *pml) +{ +/* if (!cpu_id.fid.bits.pae) + return; + __asm__ __volatile__ + ( + // Load the page table address + "movl %0, %%cr3\n\t" + // Enable paging + "movl %%cr0, %%eax\n\t" + "orl $0x80000000, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + : + : "r" (pml) + : "ax" + );*/ +} + +int map_page(unsigned long page) +{ + unsigned long i; + struct pde { + unsigned long addr_lo; + unsigned long addr_hi; + }; + extern unsigned char pdp[]; + extern unsigned char pml4[]; + extern struct pde pd2[]; + unsigned long win = page >> 19; + + /* Less than 2 GB so no mapping is required */ + if (win == 0) { + return 0; + } + if (cpu_id.fid.bits.pae == 0) { + /* Fail, we don't have PAE */ + return -1; + } + if (cpu_id.fid.bits.lm == 0 && (page > 0x1000000)) { + /* Fail, we want an address that is out of bounds (> 64GB) + * for PAE and no long mode (ie. 32 bit CPU). + */ + return -1; + } + /* Compute the page table entries... */ + for(i = 0; i < 1024; i++) { + /*-----------------10/30/2004 12:37PM--------------- + * 0xE3 -- + * Bit 0 = Present bit. 1 = PDE is present + * Bit 1 = Read/Write. 1 = memory is writable + * Bit 2 = Supervisor/User. 0 = Supervisor only (CPL 0-2) + * Bit 3 = Writethrough. 0 = writeback cache policy + * Bit 4 = Cache Disable. 0 = page level cache enabled + * Bit 5 = Accessed. 1 = memory has been accessed. + * Bit 6 = Dirty. 1 = memory has been written to. + * Bit 7 = Page Size. 1 = page size is 2 MBytes + * --------------------------------------------------*/ + pd2[i].addr_lo = ((win & 1) << 31) + ((i & 0x3ff) << 21) + 0xE3; + pd2[i].addr_hi = (win >> 1); + } + paging_off(); + if (cpu_id.fid.bits.lm == 1) { + paging_on_lm(pml4); + } else { + paging_on(pdp); + } + mapped_win = win; + return 0; +} + +void *mapping(unsigned long phys_page) +{ + void *result; + if (phys_page < WIN_SZ_PAGES) { + /* If the page is below 2GB, address it directly */ + result = (void *)(phys_page << 12); + } + else { + // Higher physical pages map to a virtual address + // in the 2G-4G range. + unsigned long alias; + alias = phys_page & 0x7FFFF; + alias += 0x80000; + result = (void *)(alias << 12); + } + return result; +} + +void *emapping(unsigned long phys_page) +{ + void *result; + result = mapping(phys_page - 1); + /* Fill in the low address bits */ + result = ((unsigned char *)result) + 0xffc; + return result; +} + +unsigned long page_of(void *addr) +{ + unsigned long page; + page = ((unsigned long)addr) >> 12; + if (page >= 0x80000) { + page &= 0x7FFFF; + page += mapped_win << 19; + } +#if 0 + cprint(LINE_SCROLL -2, 0, "page_of( )-> "); + hprint(LINE_SCROLL -2, 8, ((unsigned long)addr)); + hprint(LINE_SCROLL -2, 20, page); +#endif + return page; +} diff --git a/efi_memtest/memtest86+/error.c b/efi_memtest/memtest86+/error.c index 61afa1b..48a94ee 100644 --- a/efi_memtest/memtest86+/error.c +++ b/efi_memtest/memtest86+/error.c @@ -466,8 +466,8 @@ char spin[4] = {'|','/','-','\\'}; void do_tick(int me) { - int i, j, pct; - ulong h, l, n, t; + int i, /*j,*/ pct; + ulong h, /*l,*/ n/*, t*/; extern int mstr_cpu; if (++spin_idx[me] > 3) { @@ -588,7 +588,9 @@ void do_tick(int me) /* We can't do the elapsed time unless the rdtsc instruction * is supported */ - if (cpu_id.fid.bits.rdtsc) { + + // TODO + /* if (cpu_id.fid.bits.rdtsc) { asm __volatile__( "rdtsc":"=a" (l),"=d" (h)); asm __volatile__ ( @@ -620,7 +622,7 @@ void do_tick(int me) } vv->each_sec = j; } - } + }*/ /* Poll for ECC errors */ /* diff --git a/efi_memtest/memtest86+/from main b/efi_memtest/memtest86+/from main new file mode 100644 index 0000000..742e890 --- /dev/null +++ b/efi_memtest/memtest86+/from main @@ -0,0 +1,342 @@ + + + /* First time (for this CPU) initialization */ + if (start_seq < 2) { + + /* These steps are only done by the boot cpu */ + if (my_cpu_num == 0) { + my_cpu_ord = cpu_ord++; + smp_set_ordinal(my_cpu_num, my_cpu_ord); + parse_command_line(); + clear_screen(); + /* Initialize the barrier so the lock in btrace will work. + * Will get redone later when we know how many CPUs we have */ + barrier_init(1); + btrace(my_cpu_num, __LINE__, "Begin ", 1, 0, 0); + /* Find memory size */ + mem_size(); /* must be called before initialise_cpus(); */ + /* Fill in the CPUID table */ + get_cpuid(); + /* Startup the other CPUs */ + start_seq = 1; + //initialise_cpus(); + btrace(my_cpu_num, __LINE__, "BeforeInit", 1, 0, 0); + /* Draw the screen and get system information */ + init(); + + /* Set defaults and initialize variables */ + set_defaults(); + + /* Setup base address for testing, 1 MB */ + win0_start = 0x100; + + /* Set relocation address to 32Mb if there is enough + * memory. Otherwise set it to 3Mb */ + /* Large reloc addr allows for more testing overlap */ + if ((ulong)vv->pmap[vv->msegs-1].end > 0x2f00) { + high_test_adr = 0x2000000; + } else { + high_test_adr = 0x300000; + } + win1_end = (high_test_adr >> 12); + + /* Adjust the map to not test the page at 939k, + * reserved for locks */ + vv->pmap[0].end--; + + find_ticks_for_pass(); + } else { + /* APs only, Register the APs */ + btrace(my_cpu_num, __LINE__, "AP_Start ", 0, my_cpu_num, + cpu_ord); + smp_ap_booted(my_cpu_num); + /* Asign a sequential CPU ordinal to each active cpu */ + spin_lock(&barr->mutex); + my_cpu_ord = cpu_ord++; + smp_set_ordinal(my_cpu_num, my_cpu_ord); + spin_unlock(&barr->mutex); + btrace(my_cpu_num, __LINE__, "AP_Done ", 0, my_cpu_num, + my_cpu_ord); + } + + } else { + /* Unlock after a relocation */ + spin_unlock(&barr->mutex); + /* Get the CPU ordinal since it is lost during relocation */ + my_cpu_ord = smp_my_ord_num(my_cpu_num); + btrace(my_cpu_num, __LINE__, "Reloc_Done",0,my_cpu_num,my_cpu_ord); + } + + /* A barrier to insure that all of the CPUs are done with startup */ + barrier(); + btrace(my_cpu_num, __LINE__, "1st Barr ", 1, my_cpu_num, my_cpu_ord); + + + /* Setup Memory Management and measure memory speed, we do it here + * because we need all of the available CPUs */ + if (start_seq < 2) { + + /* Enable floating point processing */ + enable_fp_processing(); + + + btrace(my_cpu_num, __LINE__, "Mem Mgmnt ", + 1, cpu_id.fid.bits.pae, cpu_id.fid.bits.lm); + /* Setup memory management modes */ + setup_mm_modes(); + + /* Get the memory Speed with all CPUs */ + get_mem_speed(my_cpu_num, num_cpus); + } + + /* Set the initialized flag only after all of the CPU's have + * Reached the barrier. This insures that relocation has + * been completed for each CPU. */ + btrace(my_cpu_num, __LINE__, "Start Done", 1, 0, 0); + start_seq = 2; + + /* Loop through all tests */ + while (1) { + /* If the restart flag is set all initial params */ + if (restart_flag) { + set_defaults(); + continue; + } + /* Skip single CPU tests if we are using only one CPU */ + if (tseq[test].cpu_sel == -1 && + (num_cpus == 1 || cpu_mode != CPM_ALL)) { + test++; + continue; + } + + test_setup(); + + /* Loop through all possible windows */ + while (win_next <= ((ulong)vv->pmap[vv->msegs-1].end + WIN_SZ_PAGES)) { + + /* Main scheduling barrier */ + cprint(8, my_cpu_num+7, "W"); + btrace(my_cpu_num, __LINE__, "Sched_Barr", 1,window,win_next); + barrier(); + + /* Don't go over the 8TB PAE limit */ + if (win_next > MAX_MEM_PAGES) { + break; + } + + /* For the bit fade test, #11, we cannot relocate so bump the + * window to 1 */ + if (tseq[test].pat == 11 && window == 0) { + window = 1; + } + + /* Relocate if required */ + if (window != 0 && (ulong)&_start != LOW_TEST_ADR) { + btrace(my_cpu_num, __LINE__, "Sched_RelL", 1,0,0); + run_at(LOW_TEST_ADR, my_cpu_num); + } + if (window == 0 && vv->plim_lower >= win0_start) { + window++; + } + if (window == 0 && (ulong)&_start == LOW_TEST_ADR) { + btrace(my_cpu_num, __LINE__, "Sched_RelH", 1,0,0); + run_at(high_test_adr, my_cpu_num); + } + + /* Decide which CPU(s) to use */ + btrace(my_cpu_num, __LINE__, "Sched_CPU0",1,cpu_sel, + tseq[test].cpu_sel); + run = 1; + switch(cpu_mode) { + case CPM_RROBIN: + case CPM_SEQ: + /* Select a single CPU */ + if (my_cpu_ord == cpu_sel) { + mstr_cpu = cpu_sel; + run_cpus = 1; + } else { + run = 0; + } + break; + case CPM_ALL: + /* Use all CPUs */ + if (tseq[test].cpu_sel == -1) { + /* Round robin through all of the CPUs */ + if (my_cpu_ord == cpu_sel) { + mstr_cpu = cpu_sel; + run_cpus = 1; + } else { + run = 0; + } + } else { + /* Use the number of CPUs specified by the test, + * Starting with zero */ + if (my_cpu_ord >= tseq[test].cpu_sel) { + run = 0; + } + /* Set the master CPU to the highest CPU number + * that has been selected */ + if (act_cpus < tseq[test].cpu_sel) { + mstr_cpu = act_cpus-1; + run_cpus = act_cpus; + } else { + mstr_cpu = tseq[test].cpu_sel-1; + run_cpus = tseq[test].cpu_sel; + } + } + } + btrace(my_cpu_num, __LINE__, "Sched_CPU1",1,run_cpus,run); + barrier(); + dprint(9, 7, run_cpus, 2, 0); + + /* Setup a sub barrier for only the selected CPUs */ + if (my_cpu_ord == mstr_cpu) { + s_barrier_init(run_cpus); + } + + /* Make sure the the sub barrier is ready before proceeding */ + barrier(); + + /* Not selected CPUs go back to the scheduling barrier */ + if (run == 0 ) { + continue; + } + cprint(8, my_cpu_num+7, "-"); + btrace(my_cpu_num, __LINE__, "Sched_Win0",1,window,win_next); + + if (my_cpu_ord == mstr_cpu) { + switch (window) { + /* Special case for relocation */ + case 0: + winx.start = 0; + winx.end = win1_end; + window++; + break; + /* Special case for first segment */ + case 1: + winx.start = win0_start; + winx.end = WIN_SZ_PAGES; + win_next += WIN_SZ_PAGES; + window++; + break; + /* For all other windows */ + default: + winx.start = win_next; + win_next += WIN_SZ_PAGES; + winx.end = win_next; + } + btrace(my_cpu_num,__LINE__,"Sched_Win1",1,winx.start, + winx.end); + + /* Find the memory areas to test */ + segs = compute_segments(winx, my_cpu_num); + } + s_barrier(); + btrace(my_cpu_num,__LINE__,"Sched_Win2",1,segs, + vv->map[0].pbase_addr); + + if (segs == 0) { + /* No memory in this window so skip it */ + continue; + } + + /* map in the window... */ + if (map_page(vv->map[0].pbase_addr) < 0) { + /* Either there is no PAE or we are at the PAE limit */ + break; + } + + btrace(my_cpu_num, __LINE__, "Strt_Test ",1,my_cpu_num, + my_cpu_ord); + do_test(my_cpu_ord); + btrace(my_cpu_num, __LINE__, "End_Test ",1,my_cpu_num, + my_cpu_ord); + + paging_off(); + + } /* End of window loop */ + + s_barrier(); + btrace(my_cpu_num, __LINE__, "End_Win ",1,test, window); + + /* Setup for the next set of windows */ + win_next = 0; + window = 0; + bail = 0; + + /* Only the master CPU does the end of test housekeeping */ + if (my_cpu_ord != mstr_cpu) { + continue; + } + + /* Special handling for the bit fade test #11 */ + if (tseq[test].pat == 11 && bitf_seq != 6) { + /* Keep going until the sequence is complete. */ + bitf_seq++; + continue; + } else { + bitf_seq = 0; + } + + /* Select advancement of CPUs and next test */ + switch(cpu_mode) { + case CPM_RROBIN: + if (++cpu_sel >= act_cpus) { + cpu_sel = 0; + } + next_test(); + break; + case CPM_SEQ: + if (++cpu_sel >= act_cpus) { + cpu_sel = 0; + next_test(); + } + break; + case CPM_ALL: + if (tseq[test].cpu_sel == -1) + { + /* Do the same test for each CPU */ + if (++cpu_sel >= act_cpus) + { + cpu_sel = 0; + next_test(); + } else { + continue; + } + } else { + next_test(); + } + } //???? + btrace(my_cpu_num, __LINE__, "Next_CPU ",1,cpu_sel,test); + + /* If this was the last test then we finished a pass */ + if (pass_flag) + { + pass_flag = 0; + + vv->pass++; + + dprint(LINE_INFO, 49, vv->pass, 5, 0); + find_ticks_for_pass(); + ltest = -1; + + if (vv->ecount == 0) + { + /* If onepass is enabled and we did not get any errors + * reboot to exit the test */ + if (onepass) { reboot(); } + if (!btflag) + cprint(LINE_MSG, COL_MSG-8, + "** Pass complete, no errors, press Esc to exit **"); + if(BEEP_END_NO_ERROR) + { + beep(1000); + beep(2000); + beep(1000); + beep(2000); + } + } + } + + bail=0; + } /* End test loop */
\ No newline at end of file diff --git a/efi_memtest/memtest86+/main.c b/efi_memtest/memtest86+/main.c index d8eac4a..0ed8d8a 100644 --- a/efi_memtest/memtest86+/main.c +++ b/efi_memtest/memtest86+/main.c @@ -42,6 +42,9 @@ extern struct barrier_s *barr; extern int num_cpus; extern int act_cpus; +extern void enable_fp_processing(void); +extern void setup_mm_modes(void); + static int find_ticks_for_test(int test); void find_ticks_for_pass(void); int find_chunks(int test); @@ -378,397 +381,19 @@ void clear_screen() /* Test entry point. We get here on startup and also whenever * we relocate. */ void test_start(void) -{ - int my_cpu_num, my_cpu_ord, run; +{ // TODO logger to file or print + int my_cpu_num, my_cpu_ord, run; /* If this is the first time here we are CPU 0 */ if (start_seq == 0) { my_cpu_num = 0; } else { - my_cpu_num = smp_my_cpu_num(); - } - /* First thing, switch to main stack */ - switch_to_main_stack(my_cpu_num); - - /* First time (for this CPU) initialization */ - if (start_seq < 2) { - - /* These steps are only done by the boot cpu */ - if (my_cpu_num == 0) { - my_cpu_ord = cpu_ord++; - smp_set_ordinal(my_cpu_num, my_cpu_ord); - parse_command_line(); - clear_screen(); - /* Initialize the barrier so the lock in btrace will work. - * Will get redone later when we know how many CPUs we have */ - barrier_init(1); - btrace(my_cpu_num, __LINE__, "Begin ", 1, 0, 0); - /* Find memory size */ - mem_size(); /* must be called before initialise_cpus(); */ - /* Fill in the CPUID table */ - get_cpuid(); - /* Startup the other CPUs */ - start_seq = 1; - //initialise_cpus(); - btrace(my_cpu_num, __LINE__, "BeforeInit", 1, 0, 0); - /* Draw the screen and get system information */ - init(); - - /* Set defaults and initialize variables */ - set_defaults(); - - /* Setup base address for testing, 1 MB */ - win0_start = 0x100; - - /* Set relocation address to 32Mb if there is enough - * memory. Otherwise set it to 3Mb */ - /* Large reloc addr allows for more testing overlap */ - if ((ulong)vv->pmap[vv->msegs-1].end > 0x2f00) { - high_test_adr = 0x2000000; - } else { - high_test_adr = 0x300000; - } - win1_end = (high_test_adr >> 12); - - /* Adjust the map to not test the page at 939k, - * reserved for locks */ - vv->pmap[0].end--; - - find_ticks_for_pass(); - } else { - /* APs only, Register the APs */ - btrace(my_cpu_num, __LINE__, "AP_Start ", 0, my_cpu_num, - cpu_ord); - smp_ap_booted(my_cpu_num); - /* Asign a sequential CPU ordinal to each active cpu */ - spin_lock(&barr->mutex); - my_cpu_ord = cpu_ord++; - smp_set_ordinal(my_cpu_num, my_cpu_ord); - spin_unlock(&barr->mutex); - btrace(my_cpu_num, __LINE__, "AP_Done ", 0, my_cpu_num, - my_cpu_ord); - } - - } else { - /* Unlock after a relocation */ - spin_unlock(&barr->mutex); - /* Get the CPU ordinal since it is lost during relocation */ - my_cpu_ord = smp_my_ord_num(my_cpu_num); - btrace(my_cpu_num, __LINE__, "Reloc_Done",0,my_cpu_num,my_cpu_ord); + // TODO my_cpu_num = smp_my_cpu_num(); } - /* A barrier to insure that all of the CPUs are done with startup */ - barrier(); - btrace(my_cpu_num, __LINE__, "1st Barr ", 1, my_cpu_num, my_cpu_ord); - - - /* Setup Memory Management and measure memory speed, we do it here - * because we need all of the available CPUs */ - if (start_seq < 2) { - - /* Enable floating point processing */ - if (cpu_id.fid.bits.fpu) - __asm__ __volatile__ - ( - "movl %%cr0, %%eax\n\t" - "andl $0x7, %%eax\n\t" - "movl %%eax, %%cr0\n\t" - : : - : "ax" - ); - if (cpu_id.fid.bits.sse) - __asm__ __volatile__ - ( - "movl %%cr4, %%eax\n\t" - "orl $0x00000200, %%eax\n\t" - "movl %%eax, %%cr4\n\t" - : : - : "ax" - ); - - btrace(my_cpu_num, __LINE__, "Mem Mgmnt ", - 1, cpu_id.fid.bits.pae, cpu_id.fid.bits.lm); - /* Setup memory management modes */ - /* If we have PAE, turn it on */ - if (cpu_id.fid.bits.pae == 1) { - __asm__ __volatile__ - ( - "movl %%cr4, %%eax\n\t" - "orl $0x00000020, %%eax\n\t" - "movl %%eax, %%cr4\n\t" - : : - : "ax" - ); - cprint(LINE_TITLE+1, COL_MODE, "(PAE Mode)"); - } - /* If this is a 64 CPU enable long mode */ - if (cpu_id.fid.bits.lm == 1) { - __asm__ __volatile__ - ( - "movl $0xc0000080, %%ecx\n\t" - "rdmsr\n\t" - "orl $0x00000100, %%eax\n\t" - "wrmsr\n\t" - : : - : "ax", "cx" - ); - cprint(LINE_TITLE+1, COL_MODE, "(X64 Mode)"); - } - /* Get the memory Speed with all CPUs */ - get_mem_speed(my_cpu_num, num_cpus); - } - - /* Set the initialized flag only after all of the CPU's have - * Reached the barrier. This insures that relocation has - * been completed for each CPU. */ - btrace(my_cpu_num, __LINE__, "Start Done", 1, 0, 0); - start_seq = 2; - - /* Loop through all tests */ - while (1) { - /* If the restart flag is set all initial params */ - if (restart_flag) { - set_defaults(); - continue; - } - /* Skip single CPU tests if we are using only one CPU */ - if (tseq[test].cpu_sel == -1 && - (num_cpus == 1 || cpu_mode != CPM_ALL)) { - test++; - continue; - } - - test_setup(); - - /* Loop through all possible windows */ - while (win_next <= ((ulong)vv->pmap[vv->msegs-1].end + WIN_SZ_PAGES)) { - - /* Main scheduling barrier */ - cprint(8, my_cpu_num+7, "W"); - btrace(my_cpu_num, __LINE__, "Sched_Barr", 1,window,win_next); - barrier(); - - /* Don't go over the 8TB PAE limit */ - if (win_next > MAX_MEM_PAGES) { - break; - } - - /* For the bit fade test, #11, we cannot relocate so bump the - * window to 1 */ - if (tseq[test].pat == 11 && window == 0) { - window = 1; - } - - /* Relocate if required */ - if (window != 0 && (ulong)&_start != LOW_TEST_ADR) { - btrace(my_cpu_num, __LINE__, "Sched_RelL", 1,0,0); - run_at(LOW_TEST_ADR, my_cpu_num); - } - if (window == 0 && vv->plim_lower >= win0_start) { - window++; - } - if (window == 0 && (ulong)&_start == LOW_TEST_ADR) { - btrace(my_cpu_num, __LINE__, "Sched_RelH", 1,0,0); - run_at(high_test_adr, my_cpu_num); - } - - /* Decide which CPU(s) to use */ - btrace(my_cpu_num, __LINE__, "Sched_CPU0",1,cpu_sel, - tseq[test].cpu_sel); - run = 1; - switch(cpu_mode) { - case CPM_RROBIN: - case CPM_SEQ: - /* Select a single CPU */ - if (my_cpu_ord == cpu_sel) { - mstr_cpu = cpu_sel; - run_cpus = 1; - } else { - run = 0; - } - break; - case CPM_ALL: - /* Use all CPUs */ - if (tseq[test].cpu_sel == -1) { - /* Round robin through all of the CPUs */ - if (my_cpu_ord == cpu_sel) { - mstr_cpu = cpu_sel; - run_cpus = 1; - } else { - run = 0; - } - } else { - /* Use the number of CPUs specified by the test, - * Starting with zero */ - if (my_cpu_ord >= tseq[test].cpu_sel) { - run = 0; - } - /* Set the master CPU to the highest CPU number - * that has been selected */ - if (act_cpus < tseq[test].cpu_sel) { - mstr_cpu = act_cpus-1; - run_cpus = act_cpus; - } else { - mstr_cpu = tseq[test].cpu_sel-1; - run_cpus = tseq[test].cpu_sel; - } - } - } - btrace(my_cpu_num, __LINE__, "Sched_CPU1",1,run_cpus,run); - barrier(); - dprint(9, 7, run_cpus, 2, 0); - - /* Setup a sub barrier for only the selected CPUs */ - if (my_cpu_ord == mstr_cpu) { - s_barrier_init(run_cpus); - } - - /* Make sure the the sub barrier is ready before proceeding */ - barrier(); - - /* Not selected CPUs go back to the scheduling barrier */ - if (run == 0 ) { - continue; - } - cprint(8, my_cpu_num+7, "-"); - btrace(my_cpu_num, __LINE__, "Sched_Win0",1,window,win_next); - - if (my_cpu_ord == mstr_cpu) { - switch (window) { - /* Special case for relocation */ - case 0: - winx.start = 0; - winx.end = win1_end; - window++; - break; - /* Special case for first segment */ - case 1: - winx.start = win0_start; - winx.end = WIN_SZ_PAGES; - win_next += WIN_SZ_PAGES; - window++; - break; - /* For all other windows */ - default: - winx.start = win_next; - win_next += WIN_SZ_PAGES; - winx.end = win_next; - } - btrace(my_cpu_num,__LINE__,"Sched_Win1",1,winx.start, - winx.end); - - /* Find the memory areas to test */ - segs = compute_segments(winx, my_cpu_num); - } - s_barrier(); - btrace(my_cpu_num,__LINE__,"Sched_Win2",1,segs, - vv->map[0].pbase_addr); - - if (segs == 0) { - /* No memory in this window so skip it */ - continue; - } - - /* map in the window... */ - if (map_page(vv->map[0].pbase_addr) < 0) { - /* Either there is no PAE or we are at the PAE limit */ - break; - } - - btrace(my_cpu_num, __LINE__, "Strt_Test ",1,my_cpu_num, - my_cpu_ord); - do_test(my_cpu_ord); - btrace(my_cpu_num, __LINE__, "End_Test ",1,my_cpu_num, - my_cpu_ord); - - paging_off(); - - } /* End of window loop */ - - s_barrier(); - btrace(my_cpu_num, __LINE__, "End_Win ",1,test, window); - - /* Setup for the next set of windows */ - win_next = 0; - window = 0; - bail = 0; - - /* Only the master CPU does the end of test housekeeping */ - if (my_cpu_ord != mstr_cpu) { - continue; - } - - /* Special handling for the bit fade test #11 */ - if (tseq[test].pat == 11 && bitf_seq != 6) { - /* Keep going until the sequence is complete. */ - bitf_seq++; - continue; - } else { - bitf_seq = 0; - } - - /* Select advancement of CPUs and next test */ - switch(cpu_mode) { - case CPM_RROBIN: - if (++cpu_sel >= act_cpus) { - cpu_sel = 0; - } - next_test(); - break; - case CPM_SEQ: - if (++cpu_sel >= act_cpus) { - cpu_sel = 0; - next_test(); - } - break; - case CPM_ALL: - if (tseq[test].cpu_sel == -1) - { - /* Do the same test for each CPU */ - if (++cpu_sel >= act_cpus) - { - cpu_sel = 0; - next_test(); - } else { - continue; - } - } else { - next_test(); - } - } //???? - btrace(my_cpu_num, __LINE__, "Next_CPU ",1,cpu_sel,test); - - /* If this was the last test then we finished a pass */ - if (pass_flag) - { - pass_flag = 0; - - vv->pass++; - - dprint(LINE_INFO, 49, vv->pass, 5, 0); - find_ticks_for_pass(); - ltest = -1; - - if (vv->ecount == 0) - { - /* If onepass is enabled and we did not get any errors - * reboot to exit the test */ - if (onepass) { reboot(); } - if (!btflag) - cprint(LINE_MSG, COL_MSG-8, - "** Pass complete, no errors, press Esc to exit **"); - if(BEEP_END_NO_ERROR) - { - beep(1000); - beep(2000); - beep(1000); - beep(2000); - } - } - } - - bail=0; - } /* End test loop */ + /* First thing, switch to main stack */ + // TODO create head.S to get the boot_stack poiter? + //switch_to_main_stack(my_cpu_num); } void test_setup() diff --git a/efi_memtest/memtest86+/test.h b/efi_memtest/memtest86+/test.h index 8b2e924..ccf6b66 100644 --- a/efi_memtest/memtest86+/test.h +++ b/efi_memtest/memtest86+/test.h @@ -4,6 +4,8 @@ * By Chris Brady */ +#include "test_cache.h" + #ifndef _TEST_H_ #define _TEST_H_ #define E88 0x00 @@ -227,26 +229,6 @@ struct pair { ulong mask; }; -static inline void cache_off(void) -{ - asm( - "push %eax\n\t" - "movl %cr0,%eax\n\t" - "orl $0x40000000,%eax\n\t" /* Set CD */ - "movl %eax,%cr0\n\t" - "wbinvd\n\t" - "pop %eax\n\t"); -} - -static inline void cache_on(void) -{ - asm( - "push %eax\n\t" - "movl %cr0,%eax\n\t" - "andl $0x9fffffff,%eax\n\t" /* Clear CD and NW */ - "movl %eax,%cr0\n\t" - "pop %eax\n\t"); -} struct mmap { ulong pbase_addr; |