diff options
Diffstat (limited to 'memtestEDK/Memtest/SingleComponents/test.c')
-rw-r--r-- | memtestEDK/Memtest/SingleComponents/test.c | 1549 |
1 files changed, 0 insertions, 1549 deletions
diff --git a/memtestEDK/Memtest/SingleComponents/test.c b/memtestEDK/Memtest/SingleComponents/test.c deleted file mode 100644 index 864dfcc..0000000 --- a/memtestEDK/Memtest/SingleComponents/test.c +++ /dev/null @@ -1,1549 +0,0 @@ -/* test.c - MemTest-86 Version 3.4 - * - * Released under version 2 of the Gnu Public License. - * By Chris Brady - * ---------------------------------------------------- - * MemTest86+ V5 Specific code (GPL V2.0) - * By Samuel DEMEULEMEESTER, sdemeule@memtest.org - * http://www.canardpc.com - http://www.memtest.org - * Thanks to Passmark for calculate_chunk() and various comments ! - */ - -#include "test.h" -#include "config.h" -#include "stdint.h" -#include "cpuid.h" -#include "smp.h" -#include "io.h" - -extern struct cpu_ident cpu_id; -extern volatile int mstr_cpu; -extern volatile int run_cpus; -extern volatile int test; -extern volatile int segs, bail; -extern int test_ticks, nticks; -extern struct tseq tseq[]; -extern void update_err_counts(void); -extern void print_err_counts(void); -void rand_seed( unsigned int seed1, unsigned int seed2, int me); -ulong rand(int me); -void poll_errors(); - -// NOTE(jcoiner): -// Defining 'STATIC' to empty string results in crashes. (It should -// work fine, of course.) I suspect relocation problems in reloc.c. -// When we declare these routines static, we use relative addresses -// for them instead of looking up their addresses in (supposedly -// relocated) global elf tables, which avoids the crashes. - -#define STATIC static -//#define STATIC - -#define PREFER_C 0 - -static const void* const nullptr = 0x0; - -// Writes *start and *end with the VA range to test. -// -// me - this threads CPU number -// j - index into v->map for current segment we are testing -// align - number of bytes to align each block to -STATIC void calculate_chunk(ulong** start, ulong** end, int me, - int j, int makeMultipleOf) { - ulong chunk; - - // If we are only running 1 CPU then test the whole block - if (run_cpus == 1) { - *start = vv->map[j].start; - *end = vv->map[j].end; - } else { - - // Divide the current segment by the number of CPUs - chunk = (ulong)vv->map[j].end-(ulong)vv->map[j].start; - chunk /= run_cpus; - - // Round down to the nearest desired bitlength multiple - chunk = (chunk + (makeMultipleOf-1)) & ~(makeMultipleOf-1); - - // Figure out chunk boundaries - *start = (ulong*)((ulong)vv->map[j].start+(chunk*me)); - /* Set end addrs for the highest CPU num to the - * end of the segment for rounding errors */ - /* Also rounds down to boundary if needed, may miss some ram but - better than crashing or producing false errors. */ - /* This rounding probably will never happen as the segments should - be in 4096 bytes pages if I understand correctly. */ - if (me == mstr_cpu) { - *end = (ulong*)(vv->map[j].end); - } else { - *end = (ulong*)((ulong)(*start) + chunk); - (*end)--; - } - } -} - -/* Call segment_fn() for each up-to-SPINSZ segment between - * 'start' and 'end'. - */ -void foreach_segment -(ulong* start, ulong* end, - int me, const void* ctx, segment_fn func) { - - ASSERT(start < end); - - // Confirm 'start' points to an even dword, and 'end' - // should point to an odd dword - ASSERT(0 == (((ulong)start) & 0x7)); - ASSERT(0x4 == (((ulong)end) & 0x7)); - - // 'end' may be exactly 0xfffffffc, right at the 4GB boundary. - // - // To avoid overflow in our loop tests and length calculations, - // use dword indices (the '_dw' vars) to avoid overflows. - ulong start_dw = ((ulong)start) >> 2; - ulong end_dw = ((ulong) end) >> 2; - - // end is always xxxxxffc, but increment end_dw to an - // address beyond the segment for easier boundary calculations. - ++end_dw; - - ulong seg_dw = start_dw; - ulong seg_end_dw = start_dw; - - int done = 0; - do { - do_tick(me); - { BAILR } - - // ensure no overflow - ASSERT((seg_end_dw + SPINSZ_DWORDS) > seg_end_dw); - seg_end_dw += SPINSZ_DWORDS; - - if (seg_end_dw >= end_dw) { - seg_end_dw = end_dw; - done++; - } - if (seg_dw == seg_end_dw) { - break; - } - - ASSERT(((ulong)seg_end_dw) <= 0x40000000); - ASSERT(seg_end_dw > seg_dw); - ulong seg_len_dw = seg_end_dw - seg_dw; - - func((ulong*)(seg_dw << 2), seg_len_dw, ctx); - - seg_dw = seg_end_dw; - } while (!done); -} - -/* Calls segment_fn() for each segment in vv->map. - * - * Does not slice by CPU number, so it covers the entire memory. - * Contrast to sliced_foreach_segment(). - */ -STATIC void unsliced_foreach_segment -(const void* ctx, int me, segment_fn func) { - int j; - for (j=0; j<segs; j++) { - foreach_segment(vv->map[j].start, - vv->map[j].end, - me, ctx, func); - } -} - -/* Calls segment_fn() for each segment to be tested by CPU 'me'. - * - * In multicore mode, slices the segments by 'me' (the CPU ordinal - * number) so that each call will cover only 1/Nth of memory. - */ -STATIC void sliced_foreach_segment -(const void *ctx, int me, segment_fn func) { - int j; - ulong *start, *end; // VAs - ulong* prev_end = 0; - for (j=0; j<segs; j++) { - calculate_chunk(&start, &end, me, j, 64); - - // Ensure no overlap among chunks - ASSERT(end > start); - if (prev_end > 0) { - ASSERT(prev_end < start); - } - prev_end = end; - - foreach_segment(start, end, me, ctx, func); - } -} - -STATIC void addr_tst1_seg(ulong* restrict buf, - ulong len_dw, const void* unused) { - // Within each segment: - // - choose a low dword offset 'off' - // - write pat to *off - // - write ~pat to addresses that are above off by - // 1, 2, 4, ... dwords up to the top of the segment. None - // should alias to the original dword. - // - write ~pat to addresses that are below off by - // 1, 2, 4, etc dwords, down to the start of the segment. None - // should alias to the original dword. If adding a given offset - // doesn't produce a single bit address flip (because it produced - // a carry) subtracting the same offset should give a single bit flip. - // - repeat this, moving off ahead in increments of 1MB; - // this covers address bits within physical memory banks, we hope? - - ulong pat; - int k; - - for (pat=0x5555aaaa, k=0; k<2; k++) { - hprint(LINE_PAT, COL_PAT, pat); - - for (ulong off_dw = 0; off_dw < len_dw; off_dw += (1 << 18)) { - buf[off_dw] = pat; - pat = ~pat; - - for (ulong more_off_dw = 1; off_dw + more_off_dw < len_dw; - more_off_dw = more_off_dw << 1) { - ASSERT(more_off_dw); // it should never get to zero - buf[off_dw + more_off_dw] = pat; - ulong bad; - if ((bad = buf[off_dw]) != ~pat) { - ad_err1(buf + off_dw, - buf + off_dw + more_off_dw, - bad, ~pat); - break; - } - } - for (ulong more_off_dw = 1; off_dw > more_off_dw; - more_off_dw = more_off_dw << 1) { - ASSERT(more_off_dw); // it should never get to zero - buf[off_dw - more_off_dw] = pat; - ulong bad; - if ((bad = buf[off_dw]) != ~pat) { - ad_err1(buf + off_dw, - buf + off_dw - more_off_dw, - bad, ~pat); - break; - } - } - } - } -} - -/* - * Memory address test, walking ones - */ -void addr_tst1(int me) -{ - unsliced_foreach_segment(nullptr, me, addr_tst1_seg); -} - -STATIC void addr_tst2_init_segment(ulong* p, - ulong len_dw, const void* unused) { - ulong* pe = p + (len_dw - 1); - - /* Original C code replaced with hand tuned assembly code - * for (; p <= pe; p++) { - * *p = (ulong)p; - * } - */ - asm __volatile__ ( - "jmp L91\n\t" - ".p2align 4,,7\n\t" - "L90:\n\t" - "addl $4,%%edi\n\t" - "L91:\n\t" - "movl %%edi,(%%edi)\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L90\n\t" - : : "D" (p), "d" (pe) - ); -} - -STATIC void addr_tst2_check_segment(ulong* p, - ulong len_dw, const void* unused) { - ulong* pe = p + (len_dw - 1); - - /* Original C code replaced with hand tuned assembly code - * for (; p <= pe; p++) { - * if((bad = *p) != (ulong)p) { - * ad_err2((ulong)p, bad); - * } - * } - */ - asm __volatile__ - ( - "jmp L95\n\t" - ".p2align 4,,7\n\t" - "L99:\n\t" - "addl $4,%%edi\n\t" - "L95:\n\t" - "movl (%%edi),%%ecx\n\t" - "cmpl %%edi,%%ecx\n\t" - "jne L97\n\t" - "L96:\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L99\n\t" - "jmp L98\n\t" - - "L97:\n\t" - "pushl %%edx\n\t" - "pushl %%ecx\n\t" - "pushl %%edi\n\t" - "call ad_err2\n\t" - "popl %%edi\n\t" - "popl %%ecx\n\t" - "popl %%edx\n\t" - "jmp L96\n\t" - - "L98:\n\t" - : : "D" (p), "d" (pe) - : "ecx" - ); -} - -/* - * Memory address test, own address - */ -void addr_tst2(int me) -{ - cprint(LINE_PAT, COL_PAT, "address "); - - /* Write each address with its own address */ - unsliced_foreach_segment(nullptr, me, addr_tst2_init_segment); - { BAILR } - - /* Each address should have its own address */ - unsliced_foreach_segment(nullptr, me, addr_tst2_check_segment); -} - -typedef struct { - int me; - ulong xorVal; -} movinvr_ctx; - -STATIC void movinvr_init(ulong* p, - ulong len_dw, const void* vctx) { - ulong* pe = p + (len_dw - 1); - const movinvr_ctx* ctx = (const movinvr_ctx*)vctx; - /* Original C code replaced with hand tuned assembly code */ - /* - for (; p <= pe; p++) { - *p = rand(me); - } - */ - - asm __volatile__ - ( - "jmp L200\n\t" - ".p2align 4,,7\n\t" - "L201:\n\t" - "addl $4,%%edi\n\t" - "L200:\n\t" - "pushl %%ecx\n\t" - "call rand\n\t" - "popl %%ecx\n\t" - "movl %%eax,(%%edi)\n\t" - "cmpl %%ebx,%%edi\n\t" - "jb L201\n\t" - : : "D" (p), "b" (pe), "c" (ctx->me) - : "eax" - ); -} - -STATIC void movinvr_body(ulong* p, ulong len_dw, const void* vctx) { - ulong* pe = p + (len_dw - 1); - const movinvr_ctx* ctx = (const movinvr_ctx*)vctx; - - /* Original C code replaced with hand tuned assembly code */ - - /*for (; p <= pe; p++) { - num = rand(me); - if (i) { - num = ~num; - } - if ((bad=*p) != num) { - mt86_error((ulong*)p, num, bad); - } - *p = ~num; - }*/ - - asm __volatile__ - ( - "pushl %%ebp\n\t" - - // Skip first increment - "jmp L26\n\t" - ".p2align 4,,7\n\t" - - // increment 4 bytes (32-bits) - "L27:\n\t" - "addl $4,%%edi\n\t" - - // Check this byte - "L26:\n\t" - - // Get next random number, pass in me(edx), random value returned in num(eax) - // num = rand(me); - // cdecl call maintains all registers except eax, ecx, and edx - // We maintain edx with a push and pop here using it also as an input - // we don't need the current eax value and want it to change to the return value - // we overwrite ecx shortly after this discarding its current value - "pushl %%edx\n\t" // Push function inputs onto stack - "call rand\n\t" - "popl %%edx\n\t" // Remove function inputs from stack - - // XOR the random number with xorVal(ebx), which is either 0xffffffff or 0 depending on the outer loop - // if (i) { num = ~num; } - "xorl %%ebx,%%eax\n\t" - - // Move the current value of the current position p(edi) into bad(ecx) - // (bad=*p) - "movl (%%edi),%%ecx\n\t" - - // Compare bad(ecx) to num(eax) - "cmpl %%eax,%%ecx\n\t" - - // If not equal jump the error case - "jne L23\n\t" - - // Set a new value or not num(eax) at the current position p(edi) - // *p = ~num; - "L25:\n\t" - "movl $0xffffffff,%%ebp\n\t" - "xorl %%ebp,%%eax\n\t" - "movl %%eax,(%%edi)\n\t" - - // Loop until current position p(edi) equals the end position pe(esi) - "cmpl %%esi,%%edi\n\t" - "jb L27\n\t" - "jmp L24\n" - - // Error case - "L23:\n\t" - // Must manually maintain eax, ecx, and edx as part of cdecl call convention - "pushl %%edx\n\t" - "pushl %%ecx\n\t" // Next three pushes are functions input - "pushl %%eax\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" // Remove function inputs from stack and restore register values - "popl %%eax\n\t" - "popl %%ecx\n\t" - "popl %%edx\n\t" - "jmp L25\n" - - "L24:\n\t" - "popl %%ebp\n\t" - :: "D" (p), "S" (pe), "b" (ctx->xorVal), - "d" (ctx->me) - : "eax", "ecx" - ); -} - -/* - * Test all of memory using a "half moving inversions" algorithm using random - * numbers and their complement as the data pattern. Since we are not able to - * produce random numbers in reverse order testing is only done in the forward - * direction. - */ -void movinvr(int me) -{ - int i, seed1, seed2; - - movinvr_ctx ctx; - ctx.me = me; - ctx.xorVal = 0; - - /* Initialize memory with initial sequence of random numbers. */ - if (cpu_id.fid.bits.rdtsc) { - asm __volatile__ ("rdtsc":"=a" (seed1),"=d" (seed2)); - } else { - seed1 = 521288629 + vv->pass; - seed2 = 362436069 - vv->pass; - } - - /* Display the current seed */ - if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, seed1); - rand_seed(seed1, seed2, me); - - sliced_foreach_segment(&ctx, me, movinvr_init); - { BAILR } - - /* Do moving inversions test. Check for initial pattern and then - * write the complement for each memory location. - */ - for (i=0; i<2; i++) { - rand_seed(seed1, seed2, me); - - if (i) { - ctx.xorVal = 0xffffffff; - } else { - ctx.xorVal = 0; - } - - sliced_foreach_segment(&ctx, me, movinvr_body); - { BAILR } - } -} - -typedef struct { - ulong p1; - ulong p2; -} movinv1_ctx; - -STATIC void movinv1_init(ulong* start, - ulong len_dw, const void* vctx) { - const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; - - ulong p1 = ctx->p1; - ulong* p = start; - - asm __volatile__ - ( - "rep\n\t" - "stosl\n\t" - : : "c" (len_dw), "D" (p), "a" (p1) - ); -} - -STATIC void movinv1_bottom_up(ulong* start, - ulong len_dw, const void* vctx) { - const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; - ulong p1 = ctx->p1; - ulong p2 = ctx->p2; - ulong* p = start; - ulong* pe = p + (len_dw - 1); - - // Original C code replaced with hand tuned assembly code - // seems broken - /*for (; p <= pe; p++) { - if ((bad=*p) != p1) { - mt86_error((ulong*)p, p1, bad); - } - *p = p2; - }*/ - - asm __volatile__ - ( - "jmp L2\n\t" - ".p2align 4,,7\n\t" - "L0:\n\t" - "addl $4,%%edi\n\t" - "L2:\n\t" - "movl (%%edi),%%ecx\n\t" - "cmpl %%eax,%%ecx\n\t" - "jne L3\n\t" - "L5:\n\t" - "movl %%ebx,(%%edi)\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L0\n\t" - "jmp L4\n" - - "L3:\n\t" - "pushl %%edx\n\t" - "pushl %%ebx\n\t" - "pushl %%ecx\n\t" - "pushl %%eax\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" - "popl %%eax\n\t" - "popl %%ecx\n\t" - "popl %%ebx\n\t" - "popl %%edx\n\t" - "jmp L5\n" - - "L4:\n\t" - :: "a" (p1), "D" (p), "d" (pe), "b" (p2) - : "ecx" - ); -} - -STATIC void movinv1_top_down(ulong* start, - ulong len_dw, const void* vctx) { - const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; - ulong p1 = ctx->p1; - ulong p2 = ctx->p2; - ulong* p = start + (len_dw - 1); - ulong* pe = start; - - //Original C code replaced with hand tuned assembly code - // seems broken - /*do { - if ((bad=*p) != p2) { - mt86_error((ulong*)p, p2, bad); - } - *p = p1; - } while (--p >= pe);*/ - - asm __volatile__ - ( - "jmp L9\n\t" - ".p2align 4,,7\n\t" - "L11:\n\t" - "subl $4, %%edi\n\t" - "L9:\n\t" - "movl (%%edi),%%ecx\n\t" - "cmpl %%ebx,%%ecx\n\t" - "jne L6\n\t" - "L10:\n\t" - "movl %%eax,(%%edi)\n\t" - "cmpl %%edi, %%edx\n\t" - "jne L11\n\t" - "jmp L7\n\t" - - "L6:\n\t" - "pushl %%edx\n\t" - "pushl %%eax\n\t" - "pushl %%ecx\n\t" - "pushl %%ebx\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" - "popl %%ebx\n\t" - "popl %%ecx\n\t" - "popl %%eax\n\t" - "popl %%edx\n\t" - "jmp L10\n" - - "L7:\n\t" - :: "a" (p1), "D" (p), "d" (pe), "b" (p2) - : "ecx" - ); -} - -/* - * Test all of memory using a "moving inversions" algorithm using the - * pattern in p1 and its complement in p2. - */ -void movinv1 (int iter, ulong p1, ulong p2, int me) -{ - int i; - - /* Display the current pattern */ - if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1); - - movinv1_ctx ctx; - ctx.p1 = p1; - ctx.p2 = p2; - sliced_foreach_segment(&ctx, me, movinv1_init); - { BAILR } - - /* Do moving inversions test. Check for initial pattern and then - * write the complement for each memory location. Test from bottom - * up and then from the top down. */ - for (i=0; i<iter; i++) { - sliced_foreach_segment(&ctx, me, movinv1_bottom_up); - { BAILR } - - // NOTE(jcoiner): - // For the top-down pass, the original 5.01 code iterated over - // 'segs' in from n-1 down to 0, and then within each mapped segment, - // it would form the SPINSZ windows from the top down -- thus forming - // a different set of windows than the bottom-up pass, when the segment - // is not an integer number of windows. - // - // My guess is that this buys us very little additional coverage, that - // the value in going top-down happens at the word or cache-line level - // and that there's little to be gained from reversing the direction of - // the outer loops. So I'm leaving a 'direction' bit off of the - // foreach_segment() routines for now. - sliced_foreach_segment(&ctx, me, movinv1_top_down); - { BAILR } - } -} - -typedef struct { - ulong p1; - ulong lb; - ulong hb; - int sval; - int off; -} movinv32_ctx; - -STATIC void movinv32_init(ulong* restrict buf, - ulong len_dw, const void* vctx) { - const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; - - ulong* p = buf; - ulong* pe = buf + (len_dw - 1); - - int k = ctx->off; - ulong pat = ctx->p1; - ulong lb = ctx->lb; - int sval = ctx->sval; - - /* Original C code replaced with hand tuned assembly code - * while (p <= pe) { - * *p = pat; - * if (++k >= 32) { - * pat = lb; - * k = 0; - * } else { - * pat = pat << 1; - * pat |= sval; - * } - * p++; - * } - */ - asm __volatile__ - ( - "jmp L20\n\t" - ".p2align 4,,7\n\t" - "L923:\n\t" - "addl $4,%%edi\n\t" - "L20:\n\t" - "movl %%ecx,(%%edi)\n\t" - "addl $1,%%ebx\n\t" - "cmpl $32,%%ebx\n\t" - "jne L21\n\t" - "movl %%esi,%%ecx\n\t" - "xorl %%ebx,%%ebx\n\t" - "jmp L22\n" - "L21:\n\t" - "shll $1,%%ecx\n\t" - "orl %%eax,%%ecx\n\t" - "L22:\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L923\n\t" - :: "D" (p),"d" (pe),"b" (k),"c" (pat), - "a" (sval), "S" (lb) - ); -} - -STATIC void movinv32_bottom_up(ulong* restrict buf, ulong len_dw, - const void* vctx) { - const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; - - ulong* p = buf; - ulong* pe = buf + (len_dw - 1); - - int k = ctx->off; - ulong pat = ctx->p1; - ulong lb = ctx->lb; - int sval = ctx->sval; - - /* Original C code replaced with hand tuned assembly code - * while (1) { - * if ((bad=*p) != pat) { - * mt86_error((ulong*)p, pat, bad); - * } - * *p = ~pat; - * if (p >= pe) break; - * p++; - * - * if (++k >= 32) { - * pat = lb; - * k = 0; - * } else { - * pat = pat << 1; - * pat |= sval; - * } - * } - */ - asm __volatile__ - ( - "pushl %%ebp\n\t" - "jmp L30\n\t" - ".p2align 4,,7\n\t" - "L930:\n\t" - "addl $4,%%edi\n\t" - "L30:\n\t" - "movl (%%edi),%%ebp\n\t" - "cmpl %%ecx,%%ebp\n\t" - "jne L34\n\t" - - "L35:\n\t" - "notl %%ecx\n\t" - "movl %%ecx,(%%edi)\n\t" - "notl %%ecx\n\t" - "incl %%ebx\n\t" - "cmpl $32,%%ebx\n\t" - "jne L31\n\t" - "movl %%esi,%%ecx\n\t" - "xorl %%ebx,%%ebx\n\t" - "jmp L32\n" - "L31:\n\t" - "shll $1,%%ecx\n\t" - "orl %%eax,%%ecx\n\t" - "L32:\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L930\n\t" - "jmp L33\n\t" - - "L34:\n\t" - "pushl %%esi\n\t" - "pushl %%eax\n\t" - "pushl %%ebx\n\t" - "pushl %%edx\n\t" - "pushl %%ebp\n\t" - "pushl %%ecx\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" - "popl %%ecx\n\t" - "popl %%ebp\n\t" - "popl %%edx\n\t" - "popl %%ebx\n\t" - "popl %%eax\n\t" - "popl %%esi\n\t" - "jmp L35\n" - - "L33:\n\t" - "popl %%ebp\n\t" - : "=b" (k),"=c" (pat) - : "D" (p),"d" (pe),"b" (k),"c" (pat), - "a" (sval), "S" (lb) - ); -} - -STATIC void movinv32_top_down(ulong* restrict buf, - ulong len_dw, const void* vctx) { - const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; - - ulong* pe = buf; - ulong* p = buf + (len_dw - 1); - - int k = ctx->off; - ulong pat = ctx->p1; - ulong hb = ctx->hb; - int sval = ctx->sval; - ulong p3 = (ulong)sval << 31; - - // Advance 'k' and 'pat' to where they would have been - // at the end of the corresponding bottom_up segment. - // - // The '-1' is because we didn't advance 'k' or 'pat' - // on the final bottom_up loop, so they're off by one... - ulong mod_len = (len_dw - 1) % 32; - for (int i = 0; i < mod_len; i++) { - if (++k >= 32) { - pat = ctx->lb; - k = 0; - } else { - pat = pat << 1; - pat |= sval; - } - } - - // Increment 'k' only because the code below has an off-by-one - // interpretation of 'k' relative to the bottom_up routine. - // There it ranges from 0:31, and here it ranges from 1:32. - k++; - - /* Original C code replaced with hand tuned assembly code */ -#if PREFER_C - ulong bad; - while(1) { - if ((bad=*p) != ~pat) { - mt86_error((ulong*)p, ~pat, bad); - } - *p = pat; - if (p <= pe) break; - p--; - - if (--k <= 0) { - k = 32; - pat = hb; - } else { - pat = pat >> 1; - pat |= p3; - } - }; -#else - asm __volatile__ - ( - "pushl %%ebp\n\t" - "jmp L40\n\t" - ".p2align 4,,7\n\t" - "L49:\n\t" - "subl $4,%%edi\n\t" - "L40:\n\t" - "movl (%%edi),%%ebp\n\t" - "notl %%ecx\n\t" - "cmpl %%ecx,%%ebp\n\t" - "jne L44\n\t" - - "L45:\n\t" - "notl %%ecx\n\t" - "movl %%ecx,(%%edi)\n\t" - "decl %%ebx\n\t" - "cmpl $0,%%ebx\n\t" - "jg L41\n\t" - "movl %%esi,%%ecx\n\t" - "movl $32,%%ebx\n\t" - "jmp L42\n" - "L41:\n\t" - "shrl $1,%%ecx\n\t" - "orl %%eax,%%ecx\n\t" - "L42:\n\t" - "cmpl %%edx,%%edi\n\t" - "ja L49\n\t" - "jmp L43\n\t" - - "L44:\n\t" - "pushl %%esi\n\t" - "pushl %%eax\n\t" - "pushl %%ebx\n\t" - "pushl %%edx\n\t" - "pushl %%ebp\n\t" - "pushl %%ecx\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" - "popl %%ecx\n\t" - "popl %%ebp\n\t" - "popl %%edx\n\t" - "popl %%ebx\n\t" - "popl %%eax\n\t" - "popl %%esi\n\t" - "jmp L45\n" - - "L43:\n\t" - "popl %%ebp\n\t" - : : "D" (p),"d" (pe),"b" (k),"c" (pat), - "a" (p3), "S" (hb) - ); -#endif -} - -void movinv32(int iter, ulong p1, ulong lb, ulong hb, int sval, int off,int me) -{ - // First callsite: - // - p1 has 1 bit set (somewhere) - // - lb = 1 ("low bit") - // - hb = 0x80000000 ("high bit") - // - sval = 0 - // - 'off' indicates the position of the set bit in p1 - // - // Second callsite is the same, but inverted: - // - p1 has 1 bit clear (somewhere) - // - lb = 0xfffffffe - // - hb = 0x7fffffff - // - sval = 1 - // - 'off' indicates the position of the cleared bit in p1 - - movinv32_ctx ctx; - ctx.p1 = p1; - ctx.lb = lb; - ctx.hb = hb; - ctx.sval = sval; - ctx.off = off; - - /* Display the current pattern */ - if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1); - - sliced_foreach_segment(&ctx, me, movinv32_init); - { BAILR } - - /* Do moving inversions test. Check for initial pattern and then - * write the complement for each memory location. Test from bottom - * up and then from the top down. */ - for (int i=0; i<iter; i++) { - sliced_foreach_segment(&ctx, me, movinv32_bottom_up); - { BAILR } - - sliced_foreach_segment(&ctx, me, movinv32_top_down); - { BAILR } - } -} - -typedef struct { - int offset; - ulong p1; - ulong p2; -} modtst_ctx; - -STATIC void modtst_sparse_writes(ulong* restrict start, - ulong len_dw, const void* vctx) { - const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; - ulong p1 = ctx->p1; - ulong offset = ctx->offset; - -#if PREFER_C - for (ulong i = offset; i < len_dw; i += MOD_SZ) { - start[i] = p1; - } -#else - ulong* p = start + offset; - ulong* pe = start + len_dw; - asm __volatile__ - ( - "jmp L60\n\t" - ".p2align 4,,7\n\t" - - "L60:\n\t" - "movl %%eax,(%%edi)\n\t" - "addl $80,%%edi\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L60\n\t" - :: "D" (p), "d" (pe), "a" (p1) - ); -#endif -} - -STATIC void modtst_dense_writes(ulong* restrict start, ulong len_dw, - const void* vctx) { - const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; - ulong p2 = ctx->p2; - ulong offset = ctx->offset; - - ASSERT(offset < MOD_SZ); - - ulong k = 0; -#if PREFER_C - for (ulong i = 0; i < len_dw; i++) { - if (k != offset) { - start[i] = p2; - } - if (++k >= MOD_SZ) { - k = 0; - } - } -#else - ulong* pe = start + (len_dw - 1); - asm __volatile__ - ( - "jmp L50\n\t" - ".p2align 4,,7\n\t" - - "L54:\n\t" - "addl $4,%%edi\n\t" - "L50:\n\t" - "cmpl %%ebx,%%ecx\n\t" - "je L52\n\t" - "movl %%eax,(%%edi)\n\t" - "L52:\n\t" - "incl %%ebx\n\t" - "cmpl $19,%%ebx\n\t" - "jle L53\n\t" - "xorl %%ebx,%%ebx\n\t" - "L53:\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L54\n\t" - : : "D" (start), "d" (pe), "a" (p2), - "b" (k), "c" (offset) - ); -#endif -} - -STATIC void modtst_check(ulong* restrict start, - ulong len_dw, const void* vctx) { - const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; - ulong p1 = ctx->p1; - ulong offset = ctx->offset; - - ASSERT(offset < MOD_SZ); - -#if PREFER_C - ulong bad; - for (ulong i = offset; i < len_dw; i += MOD_SZ) { - if ((bad = start[i]) != p1) - mt86_error(start + i, p1, bad); - } -#else - ulong* p = start + offset; - ulong* pe = start + len_dw; - asm __volatile__ - ( - "jmp L70\n\t" - ".p2align 4,,7\n\t" - - "L70:\n\t" - "movl (%%edi),%%ecx\n\t" - "cmpl %%eax,%%ecx\n\t" - "jne L71\n\t" - "L72:\n\t" - "addl $80,%%edi\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L70\n\t" - "jmp L73\n\t" - - "L71:\n\t" - "pushl %%edx\n\t" - "pushl %%ecx\n\t" - "pushl %%eax\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" - "popl %%eax\n\t" - "popl %%ecx\n\t" - "popl %%edx\n\t" - "jmp L72\n" - - "L73:\n\t" - : : "D" (p), "d" (pe), "a" (p1) - : "ecx" - ); -#endif -} - -/* - * Test all of memory using modulo X access pattern. - */ -void modtst(int offset, int iter, ulong p1, ulong p2, int me) -{ - modtst_ctx ctx; - ctx.offset = offset; - ctx.p1 = p1; - ctx.p2 = p2; - - /* Display the current pattern */ - if (mstr_cpu == me) { - hprint(LINE_PAT, COL_PAT-2, p1); - cprint(LINE_PAT, COL_PAT+6, "-"); - dprint(LINE_PAT, COL_PAT+7, offset, 2, 1); - } - - /* Write every nth location with pattern */ - sliced_foreach_segment(&ctx, me, modtst_sparse_writes); - { BAILR } - - /* Write the rest of memory "iter" times with the pattern complement */ - for (ulong i=0; i<iter; i++) { - sliced_foreach_segment(&ctx, me, modtst_dense_writes); - { BAILR } - } - - /* Now check every nth location */ - sliced_foreach_segment(&ctx, me, modtst_check); -} - -#if PREFER_C - -STATIC void movsl(ulong* dest, - ulong* src, - ulong size_in_dwords) { - /* Logically equivalent to: - - for (ulong i = 0; i < size_in_dwords; i++) - dest[i] = src[i]; - - However: the movsl instruction does the entire loop - in one instruction -- this is probably how 'memcpy' - is implemented -- so hardware makes it very fast. - - Even in PREFER_C mode, we want the brute force of movsl! - */ - asm __volatile__ - ( - "cld\n" - "jmp L1189\n\t" - - ".p2align 4,,7\n\t" - "L1189:\n\t" - - "movl %1,%%edi\n\t" // dest - "movl %0,%%esi\n\t" // src - "movl %2,%%ecx\n\t" // len in dwords - "rep\n\t" - "movsl\n\t" - - :: "g" (src), "g" (dest), "g" (size_in_dwords) - : "edi", "esi", "ecx" - ); -} -#endif // PREFER_C - -STATIC ulong block_move_normalize_len_dw(ulong len_dw) { - // The block_move test works with sets of 64-byte blocks, - // so ensure our total length is a multiple of 64. - // - // In fact, since we divide the region in half, and each half-region - // is a set of 64-byte blocks, the full region should be a multiple of 128 - // bytes. - // - // Note that there's no requirement for the start address of the region to - // be 64-byte aligned, it can be any dword. - ulong result = (len_dw >> 5) << 5; - ASSERT(result > 0); - return result; -} - -STATIC void block_move_init(ulong* restrict buf, - ulong len_dw, const void* unused_ctx) { - len_dw = block_move_normalize_len_dw(len_dw); - - // Compute 'len' in units of 64-byte chunks: - ulong len = len_dw >> 4; - - // We only need to initialize len/2, since we'll just copy - // the first half onto the second half in the move step. - len = len >> 1; - - ulong base_val = 1; -#if PREFER_C - while(len > 0) { - ulong neg_val = ~base_val; - - // Set a block of 64 bytes // first block DWORDS are: - buf[0] = base_val; // 0x00000001 - buf[1] = base_val; // 0x00000001 - buf[2] = base_val; // 0x00000001 - buf[3] = base_val; // 0x00000001 - buf[4] = neg_val; // 0xfffffffe - buf[5] = neg_val; // 0xfffffffe - buf[6] = base_val; // 0x00000001 - buf[7] = base_val; // 0x00000001 - buf[8] = base_val; // 0x00000001 - buf[9] = base_val; // 0x00000001 - buf[10] = neg_val; // 0xfffffffe - buf[11] = neg_val; // 0xfffffffe - buf[12] = base_val; // 0x00000001 - buf[13] = base_val; // 0x00000001 - buf[14] = neg_val; // 0xfffffffe - buf[15] = neg_val; // 0xfffffffe - - buf += 16; // advance to next 64-byte block - len--; - - // Rotate the bit left, including an all-zero state. - // It can't hurt to have a periodicity of 33 instead of - // a power of two. - if (base_val == 0) { - base_val = 1; - } else if (base_val & 0x80000000) { - base_val = 0; - } else { - base_val = base_val << 1; - } - } -#else - asm __volatile__ - ( - "jmp L100\n\t" - - ".p2align 4,,7\n\t" - "L100:\n\t" - - // First loop eax is 0x00000001, edx is 0xfffffffe - "movl %%eax, %%edx\n\t" - "notl %%edx\n\t" - - // Set a block of 64-bytes // First loop DWORDS are - "movl %%eax,0(%%edi)\n\t" // 0x00000001 - "movl %%eax,4(%%edi)\n\t" // 0x00000001 - "movl %%eax,8(%%edi)\n\t" // 0x00000001 - "movl %%eax,12(%%edi)\n\t" // 0x00000001 - "movl %%edx,16(%%edi)\n\t" // 0xfffffffe - "movl %%edx,20(%%edi)\n\t" // 0xfffffffe - "movl %%eax,24(%%edi)\n\t" // 0x00000001 - "movl %%eax,28(%%edi)\n\t" // 0x00000001 - "movl %%eax,32(%%edi)\n\t" // 0x00000001 - "movl %%eax,36(%%edi)\n\t" // 0x00000001 - "movl %%edx,40(%%edi)\n\t" // 0xfffffffe - "movl %%edx,44(%%edi)\n\t" // 0xfffffffe - "movl %%eax,48(%%edi)\n\t" // 0x00000001 - "movl %%eax,52(%%edi)\n\t" // 0x00000001 - "movl %%edx,56(%%edi)\n\t" // 0xfffffffe - "movl %%edx,60(%%edi)\n\t" // 0xfffffffe - - // rotate left with carry, - // second loop eax is 0x00000002 - // second loop edx is (~eax) 0xfffffffd - "rcll $1, %%eax\n\t" - - // Move current position forward 64-bytes (to start of next block) - "leal 64(%%edi), %%edi\n\t" - - // Loop until end - "decl %%ecx\n\t" - "jnz L100\n\t" - - : : "D" (buf), "c" (len), "a" (base_val) - : "edx" - ); -#endif -} - -typedef struct { - int iter; - int me; -} block_move_ctx; - -STATIC void block_move_move(ulong* restrict buf, - ulong len_dw, const void* vctx) { - const block_move_ctx* restrict ctx = (const block_move_ctx*)vctx; - ulong iter = ctx->iter; - int me = ctx->me; - - len_dw = block_move_normalize_len_dw(len_dw); - - /* Now move the data around - * First move the data up half of the segment size we are testing - * Then move the data to the original location + 32 bytes - */ - ulong half_len_dw = len_dw / 2; // Half the size of this block in DWORDS - ASSERT(half_len_dw > 8); - - ulong* mid = buf + half_len_dw; // VA at mid-point of this block. - for (int i=0; i<iter; i++) { - if (i > 0) { - // foreach_segment() called this before the 0th iteration, - // so don't tick twice in quick succession. - do_tick(me); - } - { BAILR } - -#if PREFER_C - // Move first half to 2nd half: - movsl(/*dest=*/ mid, /*src=*/ buf, half_len_dw); - - // Move the second half, less the last 8 dwords - // to the first half plus an offset of 8 dwords. - movsl(/*dest=*/ buf + 8, /*src=*/ mid, half_len_dw - 8); - - // Finally, move the last 8 dwords of the 2nd half - // to the first 8 dwords of the first half. - movsl(/*dest=*/ mid + half_len_dw - 8, /*src=*/ buf, 8); -#else - asm __volatile__ - ( - "cld\n" - "jmp L110\n\t" - - ".p2align 4,,7\n\t" - "L110:\n\t" - - // - // At the end of all this - // - the second half equals the inital value of the first half - // - the first half is right shifted 32-bytes (with wrapping) - // - - // Move first half to second half - "movl %1,%%edi\n\t" // Destination 'mid' (mid point) - "movl %0,%%esi\n\t" // Source, 'buf' (start point) - "movl %2,%%ecx\n\t" // Length, 'half_len_dw' (size of a half in DWORDS) - "rep\n\t" - "movsl\n\t" - - // Move the second half, less the last 32-bytes. To the first half, offset plus 32-bytes - "movl %0,%%edi\n\t" - "addl $32,%%edi\n\t" // Destination 'buf' plus 32 bytes - "movl %1,%%esi\n\t" // Source, 'mid' - "movl %2,%%ecx\n\t" - "subl $8,%%ecx\n\t" // Length, 'half_len_dw' - "rep\n\t" - "movsl\n\t" - - // Move last 8 DWORDS (32-bytes) of the second half to the start of the first half - "movl %0,%%edi\n\t" // Destination 'buf' - // Source, 8 DWORDS from the end of the second half, left over by the last rep/movsl - "movl $8,%%ecx\n\t" // Length, 8 DWORDS (32-bytes) - "rep\n\t" - "movsl\n\t" - - :: "g" (buf), "g" (mid), "g" (half_len_dw) - : "edi", "esi", "ecx" - ); -#endif - } -} - -STATIC void block_move_check(ulong* restrict buf, - ulong len_dw, const void* unused_ctx) { - len_dw = block_move_normalize_len_dw(len_dw); - - /* Now check the data. - * This is rather crude, we just check that the - * adjacent words are the same. - */ -#if PREFER_C - for (ulong i = 0; i < len_dw; i = i + 2) { - if (buf[i] != buf[i+1]) { - mt86_error(buf+i, buf[i], buf[i+1]); - } - } -#else - ulong* pe = buf + (len_dw - 2); - asm __volatile__ - ( - "jmp L120\n\t" - - ".p2align 4,,7\n\t" - "L124:\n\t" - "addl $8,%%edi\n\t" // Next QWORD - "L120:\n\t" - - // Compare adjacent DWORDS - "movl (%%edi),%%ecx\n\t" - "cmpl 4(%%edi),%%ecx\n\t" - "jnz L121\n\t" // Print error if they don't match - - // Loop until end of block - "L122:\n\t" - "cmpl %%edx,%%edi\n\t" - "jb L124\n" - "jmp L123\n\t" - - "L121:\n\t" - // eax not used so we don't need to save it as per cdecl - // ecx is used but not restored, however we don't need it's value anymore after this point - "pushl %%edx\n\t" - "pushl 4(%%edi)\n\t" - "pushl %%ecx\n\t" - "pushl %%edi\n\t" - "call mt86_error\n\t" - "popl %%edi\n\t" - "addl $8,%%esp\n\t" - "popl %%edx\n\t" - "jmp L122\n" - "L123:\n\t" - :: "D" (buf), "d" (pe) - : "ecx" - ); -#endif -} - -/* - * Test memory using block moves - * Adapted from Robert Redelmeier's burnBX test - */ -void block_move(int iter, int me) -{ - cprint(LINE_PAT, COL_PAT-2, " "); - - block_move_ctx ctx; - ctx.iter = iter; - ctx.me = me; - - /* Initialize memory with the initial pattern. */ - sliced_foreach_segment(&ctx, me, block_move_init); - { BAILR } - s_barrier(); - - /* Now move the data around */ - sliced_foreach_segment(&ctx, me, block_move_move); - { BAILR } - s_barrier(); - - /* And check it. */ - sliced_foreach_segment(&ctx, me, block_move_check); -} - -typedef struct { - ulong pat; -} bit_fade_ctx; - -STATIC void bit_fade_fill_seg(ulong* restrict p, - ulong len_dw, const void* vctx) { - const bit_fade_ctx* restrict ctx = (const bit_fade_ctx*)vctx; - ulong pat = ctx->pat; - - for (ulong i = 0; i < len_dw; i++) { - p[i] = pat; - } -} - -/* - * Test memory for bit fade, fill memory with pattern. - */ -void bit_fade_fill(ulong p1, int me) -{ - /* Display the current pattern */ - hprint(LINE_PAT, COL_PAT, p1); - - /* Initialize memory with the initial pattern. */ - bit_fade_ctx ctx; - ctx.pat = p1; - unsliced_foreach_segment(&ctx, me, bit_fade_fill_seg); -} - -STATIC void bit_fade_chk_seg(ulong* restrict p, - ulong len_dw, const void* vctx) { - const bit_fade_ctx* restrict ctx = (const bit_fade_ctx*)vctx; - ulong pat = ctx->pat; - - for (ulong i = 0; i < len_dw; i++) { - ulong bad; - if ((bad=p[i]) != pat) { - mt86_error(p+i, pat, bad); - } - } -} - -void bit_fade_chk(ulong p1, int me) -{ - bit_fade_ctx ctx; - ctx.pat = p1; - - /* Make sure that nothing changed while sleeping */ - unsliced_foreach_segment(&ctx, me, bit_fade_chk_seg); -} - -/* Sleep for N seconds */ -void sleep(long n, int flag, int me, - int sms /* interpret 'n' as milliseconds instead */) -{ - ulong sh, sl, l, h, t, ip=0; - - /* save the starting time */ - asm __volatile__( - "rdtsc":"=a" (sl),"=d" (sh)); - - /* loop for n seconds */ - while (1) { - asm __volatile__( - "rep ; nop\n\t" - "rdtsc":"=a" (l),"=d" (h)); - asm __volatile__ ( - "subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (l), "=d" (h) - :"g" (sl), "g" (sh), - "0" (l), "1" (h)); - - if (sms != 0) { - t = h * ((unsigned)0xffffffff / vv->clks_msec); - t += (l / vv->clks_msec); - } else { - t = h * ((unsigned)0xffffffff / vv->clks_msec) / 1000; - t += (l / vv->clks_msec) / 1000; - } - - /* Is the time up? */ - if (t >= n) { - break; - } - - /* Only display elapsed time if flag is set */ - if (flag == 0) { - continue; - } - - if (t != ip) { - do_tick(me); - { BAILR } - ip = t; - } - } -} - -void beep(unsigned int frequency) -{ -#if 0 - // BOZO(jcoiner) - // Removed this, we need to define outb_p() and inb_p() - // before reintroducing it. -#else - unsigned int count = 1193180 / frequency; - - // Switch on the speaker - outb_p(inb_p(0x61)|3, 0x61); - - // Set command for counter 2, 2 byte write - outb_p(0xB6, 0x43); - - // Select desired Hz - outb_p(count & 0xff, 0x42); - outb((count >> 8) & 0xff, 0x42); - - // Block for 100 microseconds - sleep(100, 0, 0, 1); - - // Switch off the speaker - outb(inb_p(0x61)&0xFC, 0x61); -#endif -} |