/* test.c - MemTest-86 Version 3.4 * * Released under version 2 of the Gnu Public License. * By Chris Brady * ---------------------------------------------------- * MemTest86+ V5 Specific code (GPL V2.0) * By Samuel DEMEULEMEESTER, sdemeule@memtest.org * http://www.canardpc.com - http://www.memtest.org * Thanks to Passmark for calculate_chunk() and various comments ! */ #include "test.h" #include "config.h" #include "stdint.h" #include "cpuid.h" #include "smp.h" #include "io.h" extern struct cpu_ident cpu_id; extern volatile int mstr_cpu; extern volatile int run_cpus; extern volatile int test; extern volatile int segs, bail; extern int test_ticks, nticks; extern struct tseq tseq[]; extern void update_err_counts(void); extern void print_err_counts(void); void rand_seed( unsigned int seed1, unsigned int seed2, int me); ulong rand(int me); void poll_errors(); // NOTE(jcoiner): // Defining 'STATIC' to empty string results in crashes. (It should // work fine, of course.) I suspect relocation problems in reloc.c. // When we declare these routines static, we use relative addresses // for them instead of looking up their addresses in (supposedly // relocated) global elf tables, which avoids the crashes. #define STATIC static //#define STATIC #define PREFER_C 0 static const void* const nullptr = 0x0; // Writes *start and *end with the VA range to test. // // me - this threads CPU number // j - index into v->map for current segment we are testing // align - number of bytes to align each block to STATIC void calculate_chunk(ulong** start, ulong** end, int me, int j, int makeMultipleOf) { ulong chunk; // If we are only running 1 CPU then test the whole block if (run_cpus == 1) { *start = vv->map[j].start; *end = vv->map[j].end; } else { // Divide the current segment by the number of CPUs chunk = (ulong)vv->map[j].end-(ulong)vv->map[j].start; chunk /= run_cpus; // Round down to the nearest desired bitlength multiple chunk = (chunk + (makeMultipleOf-1)) & ~(makeMultipleOf-1); // Figure out chunk boundaries *start = (ulong*)((ulong)vv->map[j].start+(chunk*me)); /* Set end addrs for the highest CPU num to the * end of the segment for rounding errors */ /* Also rounds down to boundary if needed, may miss some ram but better than crashing or producing false errors. */ /* This rounding probably will never happen as the segments should be in 4096 bytes pages if I understand correctly. */ if (me == mstr_cpu) { *end = (ulong*)(vv->map[j].end); } else { *end = (ulong*)((ulong)(*start) + chunk); (*end)--; } } } /* Call segment_fn() for each up-to-SPINSZ segment between * 'start' and 'end'. */ void foreach_segment (ulong* start, ulong* end, int me, const void* ctx, segment_fn func) { ASSERT(start < end); // Confirm 'start' points to an even dword, and 'end' // should point to an odd dword ASSERT(0 == (((ulong)start) & 0x7)); ASSERT(0x4 == (((ulong)end) & 0x7)); // 'end' may be exactly 0xfffffffc, right at the 4GB boundary. // // To avoid overflow in our loop tests and length calculations, // use dword indices (the '_dw' vars) to avoid overflows. ulong start_dw = ((ulong)start) >> 2; ulong end_dw = ((ulong) end) >> 2; // end is always xxxxxffc, but increment end_dw to an // address beyond the segment for easier boundary calculations. ++end_dw; ulong seg_dw = start_dw; ulong seg_end_dw = start_dw; int done = 0; do { do_tick(me); { BAILR } // ensure no overflow ASSERT((seg_end_dw + SPINSZ_DWORDS) > seg_end_dw); seg_end_dw += SPINSZ_DWORDS; if (seg_end_dw >= end_dw) { seg_end_dw = end_dw; done++; } if (seg_dw == seg_end_dw) { break; } ASSERT(((ulong)seg_end_dw) <= 0x40000000); ASSERT(seg_end_dw > seg_dw); ulong seg_len_dw = seg_end_dw - seg_dw; func((ulong*)(seg_dw << 2), seg_len_dw, ctx); seg_dw = seg_end_dw; } while (!done); } /* Calls segment_fn() for each segment in vv->map. * * Does not slice by CPU number, so it covers the entire memory. * Contrast to sliced_foreach_segment(). */ STATIC void unsliced_foreach_segment (const void* ctx, int me, segment_fn func) { int j; for (j=0; jmap[j].start, vv->map[j].end, me, ctx, func); } } /* Calls segment_fn() for each segment to be tested by CPU 'me'. * * In multicore mode, slices the segments by 'me' (the CPU ordinal * number) so that each call will cover only 1/Nth of memory. */ STATIC void sliced_foreach_segment (const void *ctx, int me, segment_fn func) { int j; ulong *start, *end; // VAs ulong* prev_end = 0; for (j=0; j start); if (prev_end > 0) { ASSERT(prev_end < start); } prev_end = end; foreach_segment(start, end, me, ctx, func); } } STATIC void addr_tst1_seg(ulong* restrict buf, ulong len_dw, const void* unused) { // Within each segment: // - choose a low dword offset 'off' // - write pat to *off // - write ~pat to addresses that are above off by // 1, 2, 4, ... dwords up to the top of the segment. None // should alias to the original dword. // - write ~pat to addresses that are below off by // 1, 2, 4, etc dwords, down to the start of the segment. None // should alias to the original dword. If adding a given offset // doesn't produce a single bit address flip (because it produced // a carry) subtracting the same offset should give a single bit flip. // - repeat this, moving off ahead in increments of 1MB; // this covers address bits within physical memory banks, we hope? ulong pat; int k; for (pat=0x5555aaaa, k=0; k<2; k++) { hprint(LINE_PAT, COL_PAT, pat); for (ulong off_dw = 0; off_dw < len_dw; off_dw += (1 << 18)) { buf[off_dw] = pat; pat = ~pat; for (ulong more_off_dw = 1; off_dw + more_off_dw < len_dw; more_off_dw = more_off_dw << 1) { ASSERT(more_off_dw); // it should never get to zero buf[off_dw + more_off_dw] = pat; ulong bad; if ((bad = buf[off_dw]) != ~pat) { ad_err1(buf + off_dw, buf + off_dw + more_off_dw, bad, ~pat); break; } } for (ulong more_off_dw = 1; off_dw > more_off_dw; more_off_dw = more_off_dw << 1) { ASSERT(more_off_dw); // it should never get to zero buf[off_dw - more_off_dw] = pat; ulong bad; if ((bad = buf[off_dw]) != ~pat) { ad_err1(buf + off_dw, buf + off_dw - more_off_dw, bad, ~pat); break; } } } } } /* * Memory address test, walking ones */ void addr_tst1(int me) { unsliced_foreach_segment(nullptr, me, addr_tst1_seg); } STATIC void addr_tst2_init_segment(ulong* p, ulong len_dw, const void* unused) { ulong* pe = p + (len_dw - 1); /* Original C code replaced with hand tuned assembly code * for (; p <= pe; p++) { * *p = (ulong)p; * } */ asm __volatile__ ( "jmp L91\n\t" ".p2align 4,,7\n\t" "L90:\n\t" "addl $4,%%edi\n\t" "L91:\n\t" "movl %%edi,(%%edi)\n\t" "cmpl %%edx,%%edi\n\t" "jb L90\n\t" : : "D" (p), "d" (pe) ); } STATIC void addr_tst2_check_segment(ulong* p, ulong len_dw, const void* unused) { ulong* pe = p + (len_dw - 1); /* Original C code replaced with hand tuned assembly code * for (; p <= pe; p++) { * if((bad = *p) != (ulong)p) { * ad_err2((ulong)p, bad); * } * } */ asm __volatile__ ( "jmp L95\n\t" ".p2align 4,,7\n\t" "L99:\n\t" "addl $4,%%edi\n\t" "L95:\n\t" "movl (%%edi),%%ecx\n\t" "cmpl %%edi,%%ecx\n\t" "jne L97\n\t" "L96:\n\t" "cmpl %%edx,%%edi\n\t" "jb L99\n\t" "jmp L98\n\t" "L97:\n\t" "pushl %%edx\n\t" "pushl %%ecx\n\t" "pushl %%edi\n\t" "call ad_err2\n\t" "popl %%edi\n\t" "popl %%ecx\n\t" "popl %%edx\n\t" "jmp L96\n\t" "L98:\n\t" : : "D" (p), "d" (pe) : "ecx" ); } /* * Memory address test, own address */ void addr_tst2(int me) { cprint(LINE_PAT, COL_PAT, "address "); /* Write each address with its own address */ unsliced_foreach_segment(nullptr, me, addr_tst2_init_segment); { BAILR } /* Each address should have its own address */ unsliced_foreach_segment(nullptr, me, addr_tst2_check_segment); } typedef struct { int me; ulong xorVal; } movinvr_ctx; STATIC void movinvr_init(ulong* p, ulong len_dw, const void* vctx) { ulong* pe = p + (len_dw - 1); const movinvr_ctx* ctx = (const movinvr_ctx*)vctx; /* Original C code replaced with hand tuned assembly code */ /* for (; p <= pe; p++) { *p = rand(me); } */ asm __volatile__ ( "jmp L200\n\t" ".p2align 4,,7\n\t" "L201:\n\t" "addl $4,%%edi\n\t" "L200:\n\t" "pushl %%ecx\n\t" "call rand\n\t" "popl %%ecx\n\t" "movl %%eax,(%%edi)\n\t" "cmpl %%ebx,%%edi\n\t" "jb L201\n\t" : : "D" (p), "b" (pe), "c" (ctx->me) : "eax" ); } STATIC void movinvr_body(ulong* p, ulong len_dw, const void* vctx) { ulong* pe = p + (len_dw - 1); const movinvr_ctx* ctx = (const movinvr_ctx*)vctx; /* Original C code replaced with hand tuned assembly code */ /*for (; p <= pe; p++) { num = rand(me); if (i) { num = ~num; } if ((bad=*p) != num) { mt86_error((ulong*)p, num, bad); } *p = ~num; }*/ asm __volatile__ ( "pushl %%ebp\n\t" // Skip first increment "jmp L26\n\t" ".p2align 4,,7\n\t" // increment 4 bytes (32-bits) "L27:\n\t" "addl $4,%%edi\n\t" // Check this byte "L26:\n\t" // Get next random number, pass in me(edx), random value returned in num(eax) // num = rand(me); // cdecl call maintains all registers except eax, ecx, and edx // We maintain edx with a push and pop here using it also as an input // we don't need the current eax value and want it to change to the return value // we overwrite ecx shortly after this discarding its current value "pushl %%edx\n\t" // Push function inputs onto stack "call rand\n\t" "popl %%edx\n\t" // Remove function inputs from stack // XOR the random number with xorVal(ebx), which is either 0xffffffff or 0 depending on the outer loop // if (i) { num = ~num; } "xorl %%ebx,%%eax\n\t" // Move the current value of the current position p(edi) into bad(ecx) // (bad=*p) "movl (%%edi),%%ecx\n\t" // Compare bad(ecx) to num(eax) "cmpl %%eax,%%ecx\n\t" // If not equal jump the error case "jne L23\n\t" // Set a new value or not num(eax) at the current position p(edi) // *p = ~num; "L25:\n\t" "movl $0xffffffff,%%ebp\n\t" "xorl %%ebp,%%eax\n\t" "movl %%eax,(%%edi)\n\t" // Loop until current position p(edi) equals the end position pe(esi) "cmpl %%esi,%%edi\n\t" "jb L27\n\t" "jmp L24\n" // Error case "L23:\n\t" // Must manually maintain eax, ecx, and edx as part of cdecl call convention "pushl %%edx\n\t" "pushl %%ecx\n\t" // Next three pushes are functions input "pushl %%eax\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" // Remove function inputs from stack and restore register values "popl %%eax\n\t" "popl %%ecx\n\t" "popl %%edx\n\t" "jmp L25\n" "L24:\n\t" "popl %%ebp\n\t" :: "D" (p), "S" (pe), "b" (ctx->xorVal), "d" (ctx->me) : "eax", "ecx" ); } /* * Test all of memory using a "half moving inversions" algorithm using random * numbers and their complement as the data pattern. Since we are not able to * produce random numbers in reverse order testing is only done in the forward * direction. */ void movinvr(int me) { int i, seed1, seed2; movinvr_ctx ctx; ctx.me = me; ctx.xorVal = 0; /* Initialize memory with initial sequence of random numbers. */ if (cpu_id.fid.bits.rdtsc) { asm __volatile__ ("rdtsc":"=a" (seed1),"=d" (seed2)); } else { seed1 = 521288629 + vv->pass; seed2 = 362436069 - vv->pass; } /* Display the current seed */ if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, seed1); rand_seed(seed1, seed2, me); sliced_foreach_segment(&ctx, me, movinvr_init); { BAILR } /* Do moving inversions test. Check for initial pattern and then * write the complement for each memory location. */ for (i=0; i<2; i++) { rand_seed(seed1, seed2, me); if (i) { ctx.xorVal = 0xffffffff; } else { ctx.xorVal = 0; } sliced_foreach_segment(&ctx, me, movinvr_body); { BAILR } } } typedef struct { ulong p1; ulong p2; } movinv1_ctx; STATIC void movinv1_init(ulong* start, ulong len_dw, const void* vctx) { const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; ulong p1 = ctx->p1; ulong* p = start; asm __volatile__ ( "rep\n\t" "stosl\n\t" : : "c" (len_dw), "D" (p), "a" (p1) ); } STATIC void movinv1_bottom_up(ulong* start, ulong len_dw, const void* vctx) { const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; ulong p1 = ctx->p1; ulong p2 = ctx->p2; ulong* p = start; ulong* pe = p + (len_dw - 1); // Original C code replaced with hand tuned assembly code // seems broken /*for (; p <= pe; p++) { if ((bad=*p) != p1) { mt86_error((ulong*)p, p1, bad); } *p = p2; }*/ asm __volatile__ ( "jmp L2\n\t" ".p2align 4,,7\n\t" "L0:\n\t" "addl $4,%%edi\n\t" "L2:\n\t" "movl (%%edi),%%ecx\n\t" "cmpl %%eax,%%ecx\n\t" "jne L3\n\t" "L5:\n\t" "movl %%ebx,(%%edi)\n\t" "cmpl %%edx,%%edi\n\t" "jb L0\n\t" "jmp L4\n" "L3:\n\t" "pushl %%edx\n\t" "pushl %%ebx\n\t" "pushl %%ecx\n\t" "pushl %%eax\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" "popl %%eax\n\t" "popl %%ecx\n\t" "popl %%ebx\n\t" "popl %%edx\n\t" "jmp L5\n" "L4:\n\t" :: "a" (p1), "D" (p), "d" (pe), "b" (p2) : "ecx" ); } STATIC void movinv1_top_down(ulong* start, ulong len_dw, const void* vctx) { const movinv1_ctx* ctx = (const movinv1_ctx*)vctx; ulong p1 = ctx->p1; ulong p2 = ctx->p2; ulong* p = start + (len_dw - 1); ulong* pe = start; //Original C code replaced with hand tuned assembly code // seems broken /*do { if ((bad=*p) != p2) { mt86_error((ulong*)p, p2, bad); } *p = p1; } while (--p >= pe);*/ asm __volatile__ ( "jmp L9\n\t" ".p2align 4,,7\n\t" "L11:\n\t" "subl $4, %%edi\n\t" "L9:\n\t" "movl (%%edi),%%ecx\n\t" "cmpl %%ebx,%%ecx\n\t" "jne L6\n\t" "L10:\n\t" "movl %%eax,(%%edi)\n\t" "cmpl %%edi, %%edx\n\t" "jne L11\n\t" "jmp L7\n\t" "L6:\n\t" "pushl %%edx\n\t" "pushl %%eax\n\t" "pushl %%ecx\n\t" "pushl %%ebx\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" "popl %%ebx\n\t" "popl %%ecx\n\t" "popl %%eax\n\t" "popl %%edx\n\t" "jmp L10\n" "L7:\n\t" :: "a" (p1), "D" (p), "d" (pe), "b" (p2) : "ecx" ); } /* * Test all of memory using a "moving inversions" algorithm using the * pattern in p1 and its complement in p2. */ void movinv1 (int iter, ulong p1, ulong p2, int me) { int i; /* Display the current pattern */ if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1); movinv1_ctx ctx; ctx.p1 = p1; ctx.p2 = p2; sliced_foreach_segment(&ctx, me, movinv1_init); { BAILR } /* Do moving inversions test. Check for initial pattern and then * write the complement for each memory location. Test from bottom * up and then from the top down. */ for (i=0; ioff; ulong pat = ctx->p1; ulong lb = ctx->lb; int sval = ctx->sval; /* Original C code replaced with hand tuned assembly code * while (p <= pe) { * *p = pat; * if (++k >= 32) { * pat = lb; * k = 0; * } else { * pat = pat << 1; * pat |= sval; * } * p++; * } */ asm __volatile__ ( "jmp L20\n\t" ".p2align 4,,7\n\t" "L923:\n\t" "addl $4,%%edi\n\t" "L20:\n\t" "movl %%ecx,(%%edi)\n\t" "addl $1,%%ebx\n\t" "cmpl $32,%%ebx\n\t" "jne L21\n\t" "movl %%esi,%%ecx\n\t" "xorl %%ebx,%%ebx\n\t" "jmp L22\n" "L21:\n\t" "shll $1,%%ecx\n\t" "orl %%eax,%%ecx\n\t" "L22:\n\t" "cmpl %%edx,%%edi\n\t" "jb L923\n\t" :: "D" (p),"d" (pe),"b" (k),"c" (pat), "a" (sval), "S" (lb) ); } STATIC void movinv32_bottom_up(ulong* restrict buf, ulong len_dw, const void* vctx) { const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; ulong* p = buf; ulong* pe = buf + (len_dw - 1); int k = ctx->off; ulong pat = ctx->p1; ulong lb = ctx->lb; int sval = ctx->sval; /* Original C code replaced with hand tuned assembly code * while (1) { * if ((bad=*p) != pat) { * mt86_error((ulong*)p, pat, bad); * } * *p = ~pat; * if (p >= pe) break; * p++; * * if (++k >= 32) { * pat = lb; * k = 0; * } else { * pat = pat << 1; * pat |= sval; * } * } */ asm __volatile__ ( "pushl %%ebp\n\t" "jmp L30\n\t" ".p2align 4,,7\n\t" "L930:\n\t" "addl $4,%%edi\n\t" "L30:\n\t" "movl (%%edi),%%ebp\n\t" "cmpl %%ecx,%%ebp\n\t" "jne L34\n\t" "L35:\n\t" "notl %%ecx\n\t" "movl %%ecx,(%%edi)\n\t" "notl %%ecx\n\t" "incl %%ebx\n\t" "cmpl $32,%%ebx\n\t" "jne L31\n\t" "movl %%esi,%%ecx\n\t" "xorl %%ebx,%%ebx\n\t" "jmp L32\n" "L31:\n\t" "shll $1,%%ecx\n\t" "orl %%eax,%%ecx\n\t" "L32:\n\t" "cmpl %%edx,%%edi\n\t" "jb L930\n\t" "jmp L33\n\t" "L34:\n\t" "pushl %%esi\n\t" "pushl %%eax\n\t" "pushl %%ebx\n\t" "pushl %%edx\n\t" "pushl %%ebp\n\t" "pushl %%ecx\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" "popl %%ecx\n\t" "popl %%ebp\n\t" "popl %%edx\n\t" "popl %%ebx\n\t" "popl %%eax\n\t" "popl %%esi\n\t" "jmp L35\n" "L33:\n\t" "popl %%ebp\n\t" : "=b" (k),"=c" (pat) : "D" (p),"d" (pe),"b" (k),"c" (pat), "a" (sval), "S" (lb) ); } STATIC void movinv32_top_down(ulong* restrict buf, ulong len_dw, const void* vctx) { const movinv32_ctx* restrict ctx = (const movinv32_ctx*)vctx; ulong* pe = buf; ulong* p = buf + (len_dw - 1); int k = ctx->off; ulong pat = ctx->p1; ulong hb = ctx->hb; int sval = ctx->sval; ulong p3 = (ulong)sval << 31; // Advance 'k' and 'pat' to where they would have been // at the end of the corresponding bottom_up segment. // // The '-1' is because we didn't advance 'k' or 'pat' // on the final bottom_up loop, so they're off by one... ulong mod_len = (len_dw - 1) % 32; for (int i = 0; i < mod_len; i++) { if (++k >= 32) { pat = ctx->lb; k = 0; } else { pat = pat << 1; pat |= sval; } } // Increment 'k' only because the code below has an off-by-one // interpretation of 'k' relative to the bottom_up routine. // There it ranges from 0:31, and here it ranges from 1:32. k++; /* Original C code replaced with hand tuned assembly code */ #if PREFER_C ulong bad; while(1) { if ((bad=*p) != ~pat) { mt86_error((ulong*)p, ~pat, bad); } *p = pat; if (p <= pe) break; p--; if (--k <= 0) { k = 32; pat = hb; } else { pat = pat >> 1; pat |= p3; } }; #else asm __volatile__ ( "pushl %%ebp\n\t" "jmp L40\n\t" ".p2align 4,,7\n\t" "L49:\n\t" "subl $4,%%edi\n\t" "L40:\n\t" "movl (%%edi),%%ebp\n\t" "notl %%ecx\n\t" "cmpl %%ecx,%%ebp\n\t" "jne L44\n\t" "L45:\n\t" "notl %%ecx\n\t" "movl %%ecx,(%%edi)\n\t" "decl %%ebx\n\t" "cmpl $0,%%ebx\n\t" "jg L41\n\t" "movl %%esi,%%ecx\n\t" "movl $32,%%ebx\n\t" "jmp L42\n" "L41:\n\t" "shrl $1,%%ecx\n\t" "orl %%eax,%%ecx\n\t" "L42:\n\t" "cmpl %%edx,%%edi\n\t" "ja L49\n\t" "jmp L43\n\t" "L44:\n\t" "pushl %%esi\n\t" "pushl %%eax\n\t" "pushl %%ebx\n\t" "pushl %%edx\n\t" "pushl %%ebp\n\t" "pushl %%ecx\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" "popl %%ecx\n\t" "popl %%ebp\n\t" "popl %%edx\n\t" "popl %%ebx\n\t" "popl %%eax\n\t" "popl %%esi\n\t" "jmp L45\n" "L43:\n\t" "popl %%ebp\n\t" : : "D" (p),"d" (pe),"b" (k),"c" (pat), "a" (p3), "S" (hb) ); #endif } void movinv32(int iter, ulong p1, ulong lb, ulong hb, int sval, int off,int me) { // First callsite: // - p1 has 1 bit set (somewhere) // - lb = 1 ("low bit") // - hb = 0x80000000 ("high bit") // - sval = 0 // - 'off' indicates the position of the set bit in p1 // // Second callsite is the same, but inverted: // - p1 has 1 bit clear (somewhere) // - lb = 0xfffffffe // - hb = 0x7fffffff // - sval = 1 // - 'off' indicates the position of the cleared bit in p1 movinv32_ctx ctx; ctx.p1 = p1; ctx.lb = lb; ctx.hb = hb; ctx.sval = sval; ctx.off = off; /* Display the current pattern */ if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1); sliced_foreach_segment(&ctx, me, movinv32_init); { BAILR } /* Do moving inversions test. Check for initial pattern and then * write the complement for each memory location. Test from bottom * up and then from the top down. */ for (int i=0; ip1; ulong offset = ctx->offset; #if PREFER_C for (ulong i = offset; i < len_dw; i += MOD_SZ) { start[i] = p1; } #else ulong* p = start + offset; ulong* pe = start + len_dw; asm __volatile__ ( "jmp L60\n\t" ".p2align 4,,7\n\t" "L60:\n\t" "movl %%eax,(%%edi)\n\t" "addl $80,%%edi\n\t" "cmpl %%edx,%%edi\n\t" "jb L60\n\t" :: "D" (p), "d" (pe), "a" (p1) ); #endif } STATIC void modtst_dense_writes(ulong* restrict start, ulong len_dw, const void* vctx) { const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; ulong p2 = ctx->p2; ulong offset = ctx->offset; ASSERT(offset < MOD_SZ); ulong k = 0; #if PREFER_C for (ulong i = 0; i < len_dw; i++) { if (k != offset) { start[i] = p2; } if (++k >= MOD_SZ) { k = 0; } } #else ulong* pe = start + (len_dw - 1); asm __volatile__ ( "jmp L50\n\t" ".p2align 4,,7\n\t" "L54:\n\t" "addl $4,%%edi\n\t" "L50:\n\t" "cmpl %%ebx,%%ecx\n\t" "je L52\n\t" "movl %%eax,(%%edi)\n\t" "L52:\n\t" "incl %%ebx\n\t" "cmpl $19,%%ebx\n\t" "jle L53\n\t" "xorl %%ebx,%%ebx\n\t" "L53:\n\t" "cmpl %%edx,%%edi\n\t" "jb L54\n\t" : : "D" (start), "d" (pe), "a" (p2), "b" (k), "c" (offset) ); #endif } STATIC void modtst_check(ulong* restrict start, ulong len_dw, const void* vctx) { const modtst_ctx* restrict ctx = (const modtst_ctx*)vctx; ulong p1 = ctx->p1; ulong offset = ctx->offset; ASSERT(offset < MOD_SZ); #if PREFER_C ulong bad; for (ulong i = offset; i < len_dw; i += MOD_SZ) { if ((bad = start[i]) != p1) mt86_error(start + i, p1, bad); } #else ulong* p = start + offset; ulong* pe = start + len_dw; asm __volatile__ ( "jmp L70\n\t" ".p2align 4,,7\n\t" "L70:\n\t" "movl (%%edi),%%ecx\n\t" "cmpl %%eax,%%ecx\n\t" "jne L71\n\t" "L72:\n\t" "addl $80,%%edi\n\t" "cmpl %%edx,%%edi\n\t" "jb L70\n\t" "jmp L73\n\t" "L71:\n\t" "pushl %%edx\n\t" "pushl %%ecx\n\t" "pushl %%eax\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" "popl %%eax\n\t" "popl %%ecx\n\t" "popl %%edx\n\t" "jmp L72\n" "L73:\n\t" : : "D" (p), "d" (pe), "a" (p1) : "ecx" ); #endif } /* * Test all of memory using modulo X access pattern. */ void modtst(int offset, int iter, ulong p1, ulong p2, int me) { modtst_ctx ctx; ctx.offset = offset; ctx.p1 = p1; ctx.p2 = p2; /* Display the current pattern */ if (mstr_cpu == me) { hprint(LINE_PAT, COL_PAT-2, p1); cprint(LINE_PAT, COL_PAT+6, "-"); dprint(LINE_PAT, COL_PAT+7, offset, 2, 1); } /* Write every nth location with pattern */ sliced_foreach_segment(&ctx, me, modtst_sparse_writes); { BAILR } /* Write the rest of memory "iter" times with the pattern complement */ for (ulong i=0; i> 5) << 5; ASSERT(result > 0); return result; } STATIC void block_move_init(ulong* restrict buf, ulong len_dw, const void* unused_ctx) { len_dw = block_move_normalize_len_dw(len_dw); // Compute 'len' in units of 64-byte chunks: ulong len = len_dw >> 4; // We only need to initialize len/2, since we'll just copy // the first half onto the second half in the move step. len = len >> 1; ulong base_val = 1; #if PREFER_C while(len > 0) { ulong neg_val = ~base_val; // Set a block of 64 bytes // first block DWORDS are: buf[0] = base_val; // 0x00000001 buf[1] = base_val; // 0x00000001 buf[2] = base_val; // 0x00000001 buf[3] = base_val; // 0x00000001 buf[4] = neg_val; // 0xfffffffe buf[5] = neg_val; // 0xfffffffe buf[6] = base_val; // 0x00000001 buf[7] = base_val; // 0x00000001 buf[8] = base_val; // 0x00000001 buf[9] = base_val; // 0x00000001 buf[10] = neg_val; // 0xfffffffe buf[11] = neg_val; // 0xfffffffe buf[12] = base_val; // 0x00000001 buf[13] = base_val; // 0x00000001 buf[14] = neg_val; // 0xfffffffe buf[15] = neg_val; // 0xfffffffe buf += 16; // advance to next 64-byte block len--; // Rotate the bit left, including an all-zero state. // It can't hurt to have a periodicity of 33 instead of // a power of two. if (base_val == 0) { base_val = 1; } else if (base_val & 0x80000000) { base_val = 0; } else { base_val = base_val << 1; } } #else asm __volatile__ ( "jmp L100\n\t" ".p2align 4,,7\n\t" "L100:\n\t" // First loop eax is 0x00000001, edx is 0xfffffffe "movl %%eax, %%edx\n\t" "notl %%edx\n\t" // Set a block of 64-bytes // First loop DWORDS are "movl %%eax,0(%%edi)\n\t" // 0x00000001 "movl %%eax,4(%%edi)\n\t" // 0x00000001 "movl %%eax,8(%%edi)\n\t" // 0x00000001 "movl %%eax,12(%%edi)\n\t" // 0x00000001 "movl %%edx,16(%%edi)\n\t" // 0xfffffffe "movl %%edx,20(%%edi)\n\t" // 0xfffffffe "movl %%eax,24(%%edi)\n\t" // 0x00000001 "movl %%eax,28(%%edi)\n\t" // 0x00000001 "movl %%eax,32(%%edi)\n\t" // 0x00000001 "movl %%eax,36(%%edi)\n\t" // 0x00000001 "movl %%edx,40(%%edi)\n\t" // 0xfffffffe "movl %%edx,44(%%edi)\n\t" // 0xfffffffe "movl %%eax,48(%%edi)\n\t" // 0x00000001 "movl %%eax,52(%%edi)\n\t" // 0x00000001 "movl %%edx,56(%%edi)\n\t" // 0xfffffffe "movl %%edx,60(%%edi)\n\t" // 0xfffffffe // rotate left with carry, // second loop eax is 0x00000002 // second loop edx is (~eax) 0xfffffffd "rcll $1, %%eax\n\t" // Move current position forward 64-bytes (to start of next block) "leal 64(%%edi), %%edi\n\t" // Loop until end "decl %%ecx\n\t" "jnz L100\n\t" : : "D" (buf), "c" (len), "a" (base_val) : "edx" ); #endif } typedef struct { int iter; int me; } block_move_ctx; STATIC void block_move_move(ulong* restrict buf, ulong len_dw, const void* vctx) { const block_move_ctx* restrict ctx = (const block_move_ctx*)vctx; ulong iter = ctx->iter; int me = ctx->me; len_dw = block_move_normalize_len_dw(len_dw); /* Now move the data around * First move the data up half of the segment size we are testing * Then move the data to the original location + 32 bytes */ ulong half_len_dw = len_dw / 2; // Half the size of this block in DWORDS ASSERT(half_len_dw > 8); ulong* mid = buf + half_len_dw; // VA at mid-point of this block. for (int i=0; i 0) { // foreach_segment() called this before the 0th iteration, // so don't tick twice in quick succession. do_tick(me); } { BAILR } #if PREFER_C // Move first half to 2nd half: movsl(/*dest=*/ mid, /*src=*/ buf, half_len_dw); // Move the second half, less the last 8 dwords // to the first half plus an offset of 8 dwords. movsl(/*dest=*/ buf + 8, /*src=*/ mid, half_len_dw - 8); // Finally, move the last 8 dwords of the 2nd half // to the first 8 dwords of the first half. movsl(/*dest=*/ mid + half_len_dw - 8, /*src=*/ buf, 8); #else asm __volatile__ ( "cld\n" "jmp L110\n\t" ".p2align 4,,7\n\t" "L110:\n\t" // // At the end of all this // - the second half equals the inital value of the first half // - the first half is right shifted 32-bytes (with wrapping) // // Move first half to second half "movl %1,%%edi\n\t" // Destination 'mid' (mid point) "movl %0,%%esi\n\t" // Source, 'buf' (start point) "movl %2,%%ecx\n\t" // Length, 'half_len_dw' (size of a half in DWORDS) "rep\n\t" "movsl\n\t" // Move the second half, less the last 32-bytes. To the first half, offset plus 32-bytes "movl %0,%%edi\n\t" "addl $32,%%edi\n\t" // Destination 'buf' plus 32 bytes "movl %1,%%esi\n\t" // Source, 'mid' "movl %2,%%ecx\n\t" "subl $8,%%ecx\n\t" // Length, 'half_len_dw' "rep\n\t" "movsl\n\t" // Move last 8 DWORDS (32-bytes) of the second half to the start of the first half "movl %0,%%edi\n\t" // Destination 'buf' // Source, 8 DWORDS from the end of the second half, left over by the last rep/movsl "movl $8,%%ecx\n\t" // Length, 8 DWORDS (32-bytes) "rep\n\t" "movsl\n\t" :: "g" (buf), "g" (mid), "g" (half_len_dw) : "edi", "esi", "ecx" ); #endif } } STATIC void block_move_check(ulong* restrict buf, ulong len_dw, const void* unused_ctx) { len_dw = block_move_normalize_len_dw(len_dw); /* Now check the data. * This is rather crude, we just check that the * adjacent words are the same. */ #if PREFER_C for (ulong i = 0; i < len_dw; i = i + 2) { if (buf[i] != buf[i+1]) { mt86_error(buf+i, buf[i], buf[i+1]); } } #else ulong* pe = buf + (len_dw - 2); asm __volatile__ ( "jmp L120\n\t" ".p2align 4,,7\n\t" "L124:\n\t" "addl $8,%%edi\n\t" // Next QWORD "L120:\n\t" // Compare adjacent DWORDS "movl (%%edi),%%ecx\n\t" "cmpl 4(%%edi),%%ecx\n\t" "jnz L121\n\t" // Print error if they don't match // Loop until end of block "L122:\n\t" "cmpl %%edx,%%edi\n\t" "jb L124\n" "jmp L123\n\t" "L121:\n\t" // eax not used so we don't need to save it as per cdecl // ecx is used but not restored, however we don't need it's value anymore after this point "pushl %%edx\n\t" "pushl 4(%%edi)\n\t" "pushl %%ecx\n\t" "pushl %%edi\n\t" "call mt86_error\n\t" "popl %%edi\n\t" "addl $8,%%esp\n\t" "popl %%edx\n\t" "jmp L122\n" "L123:\n\t" :: "D" (buf), "d" (pe) : "ecx" ); #endif } /* * Test memory using block moves * Adapted from Robert Redelmeier's burnBX test */ void block_move(int iter, int me) { cprint(LINE_PAT, COL_PAT-2, " "); block_move_ctx ctx; ctx.iter = iter; ctx.me = me; /* Initialize memory with the initial pattern. */ sliced_foreach_segment(&ctx, me, block_move_init); { BAILR } s_barrier(); /* Now move the data around */ sliced_foreach_segment(&ctx, me, block_move_move); { BAILR } s_barrier(); /* And check it. */ sliced_foreach_segment(&ctx, me, block_move_check); } typedef struct { ulong pat; } bit_fade_ctx; STATIC void bit_fade_fill_seg(ulong* restrict p, ulong len_dw, const void* vctx) { const bit_fade_ctx* restrict ctx = (const bit_fade_ctx*)vctx; ulong pat = ctx->pat; for (ulong i = 0; i < len_dw; i++) { p[i] = pat; } } /* * Test memory for bit fade, fill memory with pattern. */ void bit_fade_fill(ulong p1, int me) { /* Display the current pattern */ hprint(LINE_PAT, COL_PAT, p1); /* Initialize memory with the initial pattern. */ bit_fade_ctx ctx; ctx.pat = p1; unsliced_foreach_segment(&ctx, me, bit_fade_fill_seg); } STATIC void bit_fade_chk_seg(ulong* restrict p, ulong len_dw, const void* vctx) { const bit_fade_ctx* restrict ctx = (const bit_fade_ctx*)vctx; ulong pat = ctx->pat; for (ulong i = 0; i < len_dw; i++) { ulong bad; if ((bad=p[i]) != pat) { mt86_error(p+i, pat, bad); } } } void bit_fade_chk(ulong p1, int me) { bit_fade_ctx ctx; ctx.pat = p1; /* Make sure that nothing changed while sleeping */ unsliced_foreach_segment(&ctx, me, bit_fade_chk_seg); } /* Sleep for N seconds */ void sleep(long n, int flag, int me, int sms /* interpret 'n' as milliseconds instead */) { ulong sh, sl, l, h, t, ip=0; /* save the starting time */ asm __volatile__( "rdtsc":"=a" (sl),"=d" (sh)); /* loop for n seconds */ while (1) { asm __volatile__( "rep ; nop\n\t" "rdtsc":"=a" (l),"=d" (h)); asm __volatile__ ( "subl %2,%0\n\t" "sbbl %3,%1" :"=a" (l), "=d" (h) :"g" (sl), "g" (sh), "0" (l), "1" (h)); if (sms != 0) { t = h * ((unsigned)0xffffffff / vv->clks_msec); t += (l / vv->clks_msec); } else { t = h * ((unsigned)0xffffffff / vv->clks_msec) / 1000; t += (l / vv->clks_msec) / 1000; } /* Is the time up? */ if (t >= n) { break; } /* Only display elapsed time if flag is set */ if (flag == 0) { continue; } if (t != ip) { do_tick(me); { BAILR } ip = t; } } } void beep(unsigned int frequency) { #if 0 // BOZO(jcoiner) // Removed this, we need to define outb_p() and inb_p() // before reintroducing it. #else unsigned int count = 1193180 / frequency; // Switch on the speaker outb_p(inb_p(0x61)|3, 0x61); // Set command for counter 2, 2 byte write outb_p(0xB6, 0x43); // Select desired Hz outb_p(count & 0xff, 0x42); outb((count >> 8) & 0xff, 0x42); // Block for 100 microseconds sleep(100, 0, 0, 1); // Switch off the speaker outb(inb_p(0x61)&0xFC, 0x61); #endif }