summaryrefslogtreecommitdiffstats
path: root/include/asm-xtensa/xtensa/coreasm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-xtensa/xtensa/coreasm.h')
-rw-r--r--include/asm-xtensa/xtensa/coreasm.h526
1 files changed, 526 insertions, 0 deletions
diff --git a/include/asm-xtensa/xtensa/coreasm.h b/include/asm-xtensa/xtensa/coreasm.h
new file mode 100644
index 000000000000..a8cfb54c20a1
--- /dev/null
+++ b/include/asm-xtensa/xtensa/coreasm.h
@@ -0,0 +1,526 @@
+#ifndef XTENSA_COREASM_H
+#define XTENSA_COREASM_H
+
+/*
+ * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
+ *
+ * include/asm-xtensa/xtensa/coreasm.h -- assembler-specific
+ * definitions that depend on CORE configuration.
+ *
+ * Source for configuration-independent binaries (which link in a
+ * configuration-specific HAL library) must NEVER include this file.
+ * It is perfectly normal, however, for the HAL itself to include this
+ * file.
+ *
+ * This file must NOT include xtensa/config/system.h. Any assembler
+ * header file that depends on system information should likely go in
+ * a new systemasm.h (or sysasm.h) header file.
+ *
+ * NOTE: macro beqi32 is NOT configuration-dependent, and is placed
+ * here til we will have configuration-independent header file.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of
+ * this archive for more details.
+ *
+ * Copyright (C) 2002 Tensilica Inc.
+ */
+
+
+#include <xtensa/config/core.h>
+#include <xtensa/config/specreg.h>
+
+/*
+ * Assembly-language specific definitions (assembly macros, etc.).
+ */
+
+/*----------------------------------------------------------------------
+ * find_ms_setbit
+ *
+ * This macro finds the most significant bit that is set in <as>
+ * and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
+ * The index counts starting at zero for the lsbit, so the return
+ * value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
+ *
+ * Parameters:
+ * <ad> destination address register (any register)
+ * <as> source address register
+ * <at> temporary address register (must be different than <as>)
+ * <base> constant value added to result (usually 0 or 1)
+ * On entry:
+ * <ad> = undefined if different than <as>
+ * <as> = value whose most significant set bit is to be found
+ * <at> = undefined
+ * no other registers are used by this macro.
+ * On exit:
+ * <ad> = <base> + index of msbit set in original <as>,
+ * = <base> - 1 if original <as> was zero.
+ * <as> clobbered (if not <ad>)
+ * <at> clobbered (if not <ad>)
+ * Example:
+ * find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4
+ */
+
+ .macro find_ms_setbit ad, as, at, base
+#if XCHAL_HAVE_NSA
+ movi \at, 31+\base
+ nsau \as, \as // get index of \as, numbered from msbit (32 if absent)
+ sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent)
+#else /* XCHAL_HAVE_NSA */
+ movi \at, \base // start with result of 0 (point to lsbit of 32)
+
+ beqz \as, 2f // special case for zero argument: return -1
+ bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits)
+ addi \at, \at, 16 // no, increment result to upper 16 bits (of 32)
+ //srli \as, \as, 16 // check upper half (shift right 16 bits)
+ extui \as, \as, 16, 16 // check upper half (shift right 16 bits)
+1: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits)
+ addi \at, \at, 8 // no, increment result to upper 8 bits (of 16)
+ srli \as, \as, 8 // shift right to check upper 8 bits
+1: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits)
+ addi \at, \at, 4 // no, increment result to upper 4 bits (of 8)
+ srli \as, \as, 4 // shift right 4 bits to check upper half
+1: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits)
+ addi \at, \at, 2 // no, increment result to upper 2 bits (of 4)
+ srli \as, \as, 2 // shift right 2 bits to check upper half
+1: bltui \as, 0x2, 1f // is it the lsbit?
+ addi \at, \at, 2 // no, increment result to upper bit (of 2)
+2: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1)
+ //srli \as, \as, 1
+1: // done! \at contains index of msbit set (or -1 if none set)
+ .if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15)
+ mov \ad, \at // then move result to \ad
+ .endif
+#endif /* XCHAL_HAVE_NSA */
+ .endm // find_ms_setbit
+
+/*----------------------------------------------------------------------
+ * find_ls_setbit
+ *
+ * This macro finds the least significant bit that is set in <as>,
+ * and return its index in <ad>.
+ * Usage is the same as for the find_ms_setbit macro.
+ * Example:
+ * find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4
+ */
+
+ .macro find_ls_setbit ad, as, at, base
+ neg \at, \as // keep only the least-significant bit that is set...
+ and \as, \at, \as // ... in \as
+ find_ms_setbit \ad, \as, \at, \base
+ .endm // find_ls_setbit
+
+/*----------------------------------------------------------------------
+ * find_ls_one
+ *
+ * Same as find_ls_setbit with base zero.
+ * Source (as) and destination (ad) registers must be different.
+ * Provided for backward compatibility.
+ */
+
+ .macro find_ls_one ad, as
+ find_ls_setbit \ad, \as, \ad, 0
+ .endm // find_ls_one
+
+/*----------------------------------------------------------------------
+ * floop, floopnez, floopgtz, floopend
+ *
+ * These macros are used for fast inner loops that
+ * work whether or not the Loops options is configured.
+ * If the Loops option is configured, they simply use
+ * the zero-overhead LOOP instructions; otherwise
+ * they use explicit decrement and branch instructions.
+ *
+ * They are used in pairs, with floop, floopnez or floopgtz
+ * at the beginning of the loop, and floopend at the end.
+ *
+ * Each pair of loop macro calls must be given the loop count
+ * address register and a unique label for that loop.
+ *
+ * Example:
+ *
+ * movi a3, 16 // loop 16 times
+ * floop a3, myloop1
+ * :
+ * bnez a7, end1 // exit loop if a7 != 0
+ * :
+ * floopend a3, myloop1
+ * end1:
+ *
+ * Like the LOOP instructions, these macros cannot be
+ * nested, must include at least one instruction,
+ * cannot call functions inside the loop, etc.
+ * The loop can be exited by jumping to the instruction
+ * following floopend (or elsewhere outside the loop),
+ * or continued by jumping to a NOP instruction placed
+ * immediately before floopend.
+ *
+ * Unlike LOOP instructions, the register passed to floop*
+ * cannot be used inside the loop, because it is used as
+ * the loop counter if the Loops option is not configured.
+ * And its value is undefined after exiting the loop.
+ * And because the loop counter register is active inside
+ * the loop, you can't easily use this construct to loop
+ * across a register file using ROTW as you might with LOOP
+ * instructions, unless you copy the loop register along.
+ */
+
+ /* Named label version of the macros: */
+
+ .macro floop ar, endlabel
+ floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
+ .endm
+
+ .macro floopnez ar, endlabel
+ floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
+ .endm
+
+ .macro floopgtz ar, endlabel
+ floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
+ .endm
+
+ .macro floopend ar, endlabel
+ floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
+ .endm
+
+ /* Numbered local label version of the macros: */
+#if 0 /*UNTESTED*/
+ .macro floop89 ar
+ floop_ \ar, 8, 9f
+ .endm
+
+ .macro floopnez89 ar
+ floopnez_ \ar, 8, 9f
+ .endm
+
+ .macro floopgtz89 ar
+ floopgtz_ \ar, 8, 9f
+ .endm
+
+ .macro floopend89 ar
+ floopend_ \ar, 8b, 9
+ .endm
+#endif /*0*/
+
+ /* Underlying version of the macros: */
+
+ .macro floop_ ar, startlabel, endlabelref
+ .ifdef _infloop_
+ .if _infloop_
+ .err // Error: floop cannot be nested
+ .endif
+ .endif
+ .set _infloop_, 1
+#if XCHAL_HAVE_LOOPS
+ loop \ar, \endlabelref
+#else /* XCHAL_HAVE_LOOPS */
+\startlabel:
+ addi \ar, \ar, -1
+#endif /* XCHAL_HAVE_LOOPS */
+ .endm // floop_
+
+ .macro floopnez_ ar, startlabel, endlabelref
+ .ifdef _infloop_
+ .if _infloop_
+ .err // Error: floopnez cannot be nested
+ .endif
+ .endif
+ .set _infloop_, 1
+#if XCHAL_HAVE_LOOPS
+ loopnez \ar, \endlabelref
+#else /* XCHAL_HAVE_LOOPS */
+ beqz \ar, \endlabelref
+\startlabel:
+ addi \ar, \ar, -1
+#endif /* XCHAL_HAVE_LOOPS */
+ .endm // floopnez_
+
+ .macro floopgtz_ ar, startlabel, endlabelref
+ .ifdef _infloop_
+ .if _infloop_
+ .err // Error: floopgtz cannot be nested
+ .endif
+ .endif
+ .set _infloop_, 1
+#if XCHAL_HAVE_LOOPS
+ loopgtz \ar, \endlabelref
+#else /* XCHAL_HAVE_LOOPS */
+ bltz \ar, \endlabelref
+ beqz \ar, \endlabelref
+\startlabel:
+ addi \ar, \ar, -1
+#endif /* XCHAL_HAVE_LOOPS */
+ .endm // floopgtz_
+
+
+ .macro floopend_ ar, startlabelref, endlabel
+ .ifndef _infloop_
+ .err // Error: floopend without matching floopXXX
+ .endif
+ .ifeq _infloop_
+ .err // Error: floopend without matching floopXXX
+ .endif
+ .set _infloop_, 0
+#if ! XCHAL_HAVE_LOOPS
+ bnez \ar, \startlabelref
+#endif /* XCHAL_HAVE_LOOPS */
+\endlabel:
+ .endm // floopend_
+
+/*----------------------------------------------------------------------
+ * crsil -- conditional RSIL (read/set interrupt level)
+ *
+ * Executes the RSIL instruction if it exists, else just reads PS.
+ * The RSIL instruction does not exist in the new exception architecture
+ * if the interrupt option is not selected.
+ */
+
+ .macro crsil ar, newlevel
+#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
+ rsil \ar, \newlevel
+#else
+ rsr \ar, PS
+#endif
+ .endm // crsil
+
+/*----------------------------------------------------------------------
+ * window_spill{4,8,12}
+ *
+ * These macros spill callers' register windows to the stack.
+ * They work for both privileged and non-privileged tasks.
+ * Must be called from a windowed ABI context, eg. within
+ * a windowed ABI function (ie. valid stack frame, window
+ * exceptions enabled, not in exception mode, etc).
+ *
+ * This macro requires a single invocation of the window_spill_common
+ * macro in the same assembly unit and section.
+ *
+ * Note that using window_spill{4,8,12} macros is more efficient
+ * than calling a function implemented using window_spill_function,
+ * because the latter needs extra code to figure out the size of
+ * the call to the spilling function.
+ *
+ * Example usage:
+ *
+ * .text
+ * .align 4
+ * .global some_function
+ * .type some_function,@function
+ * some_function:
+ * entry a1, 16
+ * :
+ * :
+ *
+ * window_spill4 // spill windows of some_function's callers; preserves a0..a3 only;
+ * // to use window_spill{8,12} in this example function we'd have
+ * // to increase space allocated by the entry instruction, because
+ * // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
+ * // for call8/window_spill8 or call12/window_spill12 respectively.
+ * :
+ *
+ * retw
+ *
+ * window_spill_common // instantiates code used by window_spill4
+ *
+ *
+ * On entry:
+ * none (if window_spill4)
+ * stack frame has enough space allocated for call8 (if window_spill8)
+ * stack frame has enough space allocated for call12 (if window_spill12)
+ * On exit:
+ * a4..a15 clobbered (if window_spill4)
+ * a8..a15 clobbered (if window_spill8)
+ * a12..a15 clobbered (if window_spill12)
+ * no caller windows are in live registers
+ */
+
+ .macro window_spill4
+#if XCHAL_HAVE_WINDOWED
+# if XCHAL_NUM_AREGS == 16
+ movi a15, 0 // for 16-register files, no need to call to reach the end
+# elif XCHAL_NUM_AREGS == 32
+ call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers
+# elif XCHAL_NUM_AREGS == 64
+ call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers
+# endif
+#endif
+ .endm // window_spill4
+
+ .macro window_spill8
+#if XCHAL_HAVE_WINDOWED
+# if XCHAL_NUM_AREGS == 16
+ movi a15, 0 // for 16-register files, no need to call to reach the end
+# elif XCHAL_NUM_AREGS == 32
+ call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers
+# elif XCHAL_NUM_AREGS == 64
+ call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers
+# endif
+#endif
+ .endm // window_spill8
+
+ .macro window_spill12
+#if XCHAL_HAVE_WINDOWED
+# if XCHAL_NUM_AREGS == 16
+ movi a15, 0 // for 16-register files, no need to call to reach the end
+# elif XCHAL_NUM_AREGS == 32
+ call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers
+# elif XCHAL_NUM_AREGS == 64
+ call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers
+# endif
+#endif
+ .endm // window_spill12
+
+/*----------------------------------------------------------------------
+ * window_spill_function
+ *
+ * This macro outputs a function that will spill its caller's callers'
+ * register windows to the stack. Eg. it could be used to implement
+ * a version of xthal_window_spill() that works in non-privileged tasks.
+ * This works for both privileged and non-privileged tasks.
+ *
+ * Typical usage:
+ *
+ * .text
+ * .align 4
+ * .global my_spill_function
+ * .type my_spill_function,@function
+ * my_spill_function:
+ * window_spill_function
+ *
+ * On entry to resulting function:
+ * none
+ * On exit from resulting function:
+ * none (no caller windows are in live registers)
+ */
+
+ .macro window_spill_function
+#if XCHAL_HAVE_WINDOWED
+# if XCHAL_NUM_AREGS == 32
+ entry sp, 48
+ bbci.l a0, 31, 1f // branch if called with call4
+ bbsi.l a0, 30, 2f // branch if called with call12
+ call8 .L__wdwspill_assist16 // called with call8, only need another 8
+ retw
+1: call12 .L__wdwspill_assist16 // called with call4, only need another 12
+ retw
+2: call4 .L__wdwspill_assist16 // called with call12, only need another 4
+ retw
+# elif XCHAL_NUM_AREGS == 64
+ entry sp, 48
+ bbci.l a0, 31, 1f // branch if called with call4
+ bbsi.l a0, 30, 2f // branch if called with call12
+ call4 .L__wdwspill_assist52 // called with call8, only need a call4
+ retw
+1: call8 .L__wdwspill_assist52 // called with call4, only need a call8
+ retw
+2: call12 .L__wdwspill_assist40 // called with call12, can skip a call12
+ retw
+# elif XCHAL_NUM_AREGS == 16
+ entry sp, 16
+ bbci.l a0, 31, 1f // branch if called with call4
+ bbsi.l a0, 30, 2f // branch if called with call12
+ movi a7, 0 // called with call8
+ retw
+1: movi a11, 0 // called with call4
+2: retw // if called with call12, everything already spilled
+
+// movi a15, 0 // trick to spill all but the direct caller
+// j 1f
+// // The entry instruction is magical in the assembler (gets auto-aligned)
+// // so we have to jump to it to avoid falling through the padding.
+// // We need entry/retw to know where to return.
+//1: entry sp, 16
+// retw
+# else
+# error "unrecognized address register file size"
+# endif
+#endif /* XCHAL_HAVE_WINDOWED */
+ window_spill_common
+ .endm // window_spill_function
+
+/*----------------------------------------------------------------------
+ * window_spill_common
+ *
+ * Common code used by any number of invocations of the window_spill##
+ * and window_spill_function macros.
+ *
+ * Must be instantiated exactly once within a given assembly unit,
+ * within call/j range of and same section as window_spill##
+ * macro invocations for that assembly unit.
+ * (Is automatically instantiated by the window_spill_function macro.)
+ */
+
+ .macro window_spill_common
+#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
+ .ifndef .L__wdwspill_defined
+# if XCHAL_NUM_AREGS >= 64
+.L__wdwspill_assist60:
+ entry sp, 32
+ call8 .L__wdwspill_assist52
+ retw
+.L__wdwspill_assist56:
+ entry sp, 16
+ call4 .L__wdwspill_assist52
+ retw
+.L__wdwspill_assist52:
+ entry sp, 48
+ call12 .L__wdwspill_assist40
+ retw
+.L__wdwspill_assist40:
+ entry sp, 48
+ call12 .L__wdwspill_assist28
+ retw
+# endif
+.L__wdwspill_assist28:
+ entry sp, 48
+ call12 .L__wdwspill_assist16
+ retw
+.L__wdwspill_assist24:
+ entry sp, 32
+ call8 .L__wdwspill_assist16
+ retw
+.L__wdwspill_assist20:
+ entry sp, 16
+ call4 .L__wdwspill_assist16
+ retw
+.L__wdwspill_assist16:
+ entry sp, 16
+ movi a15, 0
+ retw
+ .set .L__wdwspill_defined, 1
+ .endif
+#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
+ .endm // window_spill_common
+
+/*----------------------------------------------------------------------
+ * beqi32
+ *
+ * macro implements version of beqi for arbitrary 32-bit immidiate value
+ *
+ * beqi32 ax, ay, imm32, label
+ *
+ * Compares value in register ax with imm32 value and jumps to label if
+ * equal. Clobberes register ay if needed
+ *
+ */
+ .macro beqi32 ax, ay, imm, label
+ .ifeq ((\imm-1) & ~7) // 1..8 ?
+ beqi \ax, \imm, \label
+ .else
+ .ifeq (\imm+1) // -1 ?
+ beqi \ax, \imm, \label
+ .else
+ .ifeq (\imm) // 0 ?
+ beqz \ax, \label
+ .else
+ // We could also handle immediates 10,12,16,32,64,128,256
+ // but it would be a long macro...
+ movi \ay, \imm
+ beq \ax, \ay, \label
+ .endif
+ .endif
+ .endif
+ .endm // beqi32
+
+#endif /*XTENSA_COREASM_H*/
+