summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell2017-06-05 19:03:43 +0200
committerPeter Maydell2017-06-05 19:03:43 +0200
commita0d4aac7467dd02e5657b79e867f067330266a24 (patch)
treedafa5ae8bfd6aa13c76af9675c9ae2b8506b05d7
parentMerge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into st... (diff)
parenttarget/alpha: Use goto_tb for fallthru between TBs (diff)
downloadqemu-a0d4aac7467dd02e5657b79e867f067330266a24.tar.gz
qemu-a0d4aac7467dd02e5657b79e867f067330266a24.tar.xz
qemu-a0d4aac7467dd02e5657b79e867f067330266a24.zip
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170605' into staging
Queued TCG patches # gpg: Signature made Mon 05 Jun 2017 17:48:42 BST # gpg: using RSA key 0xAD1270CC4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" # Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC 16A4 AD12 70CC 4DD0 279B * remotes/rth/tags/pull-tcg-20170605: (26 commits) target/alpha: Use goto_tb for fallthru between TBs target/alpha: Implement WTINT inline target/mips: optimize indirect branches target/mips: optimize cross-page direct jumps in softmmu target/aarch64: optimize indirect branches target/aarch64: optimize cross-page direct jumps in softmmu target/hppa: Use tcg_gen_lookup_and_goto_ptr target/s390: Use tcg_gen_lookup_and_goto_ptr tcg/mips: implement goto_ptr tcg/arm: Implement goto_ptr tcg/arm: Clarify tcg_out_bx for arm4 host tcg/s390: Implement goto_ptr tcg/sparc: Implement goto_ptr tcg/aarch64: Implement goto_ptr tcg/ppc: Implement goto_ptr tb-hash: improve tb_jmp_cache hash function in user mode target/i386: optimize indirect branches target/i386: optimize cross-page direct jumps in softmmu target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr target/arm: optimize indirect branches ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-xconfigure6
-rw-r--r--cpu-exec.c6
-rw-r--r--include/exec/exec-all.h2
-rw-r--r--include/exec/tb-hash.h12
-rw-r--r--include/qemu/atomic.h34
-rw-r--r--target/alpha/translate.c30
-rw-r--r--target/arm/translate-a64.c5
-rw-r--r--target/arm/translate.c21
-rw-r--r--target/arm/translate.h4
-rw-r--r--target/hppa/translate.c8
-rw-r--r--target/i386/translate.c43
-rw-r--r--target/mips/translate.c4
-rw-r--r--target/nios2/translate.c2
-rw-r--r--target/s390x/translate.c17
-rw-r--r--tcg-runtime.c32
-rw-r--r--tcg/README8
-rw-r--r--tcg/aarch64/tcg-target.h1
-rw-r--r--tcg/aarch64/tcg-target.inc.c22
-rw-r--r--tcg/arm/tcg-target.h1
-rw-r--r--tcg/arm/tcg-target.inc.c54
-rw-r--r--tcg/i386/tcg-target.h1
-rw-r--r--tcg/i386/tcg-target.inc.c24
-rw-r--r--tcg/ia64/tcg-target.h1
-rw-r--r--tcg/mips/tcg-target.h1
-rw-r--r--tcg/mips/tcg-target.inc.c13
-rw-r--r--tcg/ppc/tcg-target.h1
-rw-r--r--tcg/ppc/tcg-target.inc.c7
-rw-r--r--tcg/s390/tcg-target.h1
-rw-r--r--tcg/s390/tcg-target.inc.c24
-rw-r--r--tcg/sparc/tcg-target.h1
-rw-r--r--tcg/sparc/tcg-target.inc.c11
-rw-r--r--tcg/tcg-op.c12
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg-opc.h1
-rw-r--r--tcg/tcg-runtime.h2
-rw-r--r--tcg/tcg.c5
-rw-r--r--tcg/tcg.h1
-rw-r--r--tcg/tci/tcg-target.h1
38 files changed, 353 insertions, 77 deletions
diff --git a/configure b/configure
index fbb6a93c99..13e040d28c 100755
--- a/configure
+++ b/configure
@@ -1213,12 +1213,12 @@ case "$cpu" in
LDFLAGS="-m64 $LDFLAGS"
;;
sparc)
- LDFLAGS="-m32 $LDFLAGS"
- CPU_CFLAGS="-m32 -mcpu=ultrasparc"
+ CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
+ LDFLAGS="-m32 -mv8plus $LDFLAGS"
;;
sparc64)
- LDFLAGS="-m64 $LDFLAGS"
CPU_CFLAGS="-m64 -mcpu=ultrasparc"
+ LDFLAGS="-m64 $LDFLAGS"
;;
s390)
CPU_CFLAGS="-m31"
diff --git a/cpu-exec.c b/cpu-exec.c
index 63a56d0407..5b181c18ed 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -309,10 +309,8 @@ static bool tb_cmp(const void *p, const void *d)
return false;
}
-static TranslationBlock *tb_htable_lookup(CPUState *cpu,
- target_ulong pc,
- target_ulong cs_base,
- uint32_t flags)
+TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+ target_ulong cs_base, uint32_t flags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index bcde1e6a14..87ae10bcc9 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -368,6 +368,8 @@ struct TranslationBlock {
void tb_free(TranslationBlock *tb);
void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
+TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+ target_ulong cs_base, uint32_t flags);
#if defined(USE_DIRECT_JUMP)
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 2c27490cb8..b1fe2d0161 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -22,6 +22,8 @@
#include "exec/tb-hash-xx.h"
+#ifdef CONFIG_SOFTMMU
+
/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
addresses on the same page. The top bits are the same. This allows
TLB invalidation to quickly clear a subset of the hash table. */
@@ -45,6 +47,16 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
| (tmp & TB_JMP_ADDR_MASK));
}
+#else
+
+/* In user-mode we can get better hashing because we do not have a TLB */
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
+{
+ return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
+}
+
+#endif /* CONFIG_SOFTMMU */
+
static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
{
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 878fa0700d..e07c7972ab 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -88,6 +88,24 @@
#define smp_read_barrier_depends() barrier()
#endif
+/* Sanity check that the size of an atomic operation isn't "overly large".
+ * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
+ * want to use them because we ought not need them, and this lets us do a
+ * bit of sanity checking that other 32-bit hosts might build.
+ *
+ * That said, we have a problem on 64-bit ILP32 hosts in that in order to
+ * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS.
+ * We'd prefer not want to pull in everything else TCG related, so handle
+ * those few cases by hand.
+ *
+ * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for
+ * Sparc we always force the use of sparcv9 in configure.
+ */
+#if defined(__x86_64__) || defined(__sparc__)
+# define ATOMIC_REG_SIZE 8
+#else
+# define ATOMIC_REG_SIZE sizeof(void *)
+#endif
/* Weak atomic operations prevent the compiler moving other
* loads/stores past the atomic operation load/store. However there is
@@ -104,7 +122,7 @@
#define atomic_read(ptr) \
({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
atomic_read__nocheck(ptr); \
})
@@ -112,7 +130,7 @@
__atomic_store_n(ptr, i, __ATOMIC_RELAXED)
#define atomic_set(ptr, i) do { \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
atomic_set__nocheck(ptr, i); \
} while(0)
@@ -130,27 +148,27 @@
#define atomic_rcu_read(ptr) \
({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
typeof_strip_qual(*ptr) _val; \
atomic_rcu_read__nocheck(ptr, &_val); \
_val; \
})
#define atomic_rcu_set(ptr, i) do { \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
} while(0)
#define atomic_load_acquire(ptr) \
({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
typeof_strip_qual(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \
_val; \
})
#define atomic_store_release(ptr, i) do { \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
} while(0)
@@ -162,7 +180,7 @@
})
#define atomic_xchg(ptr, i) ({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
atomic_xchg__nocheck(ptr, i); \
})
@@ -175,7 +193,7 @@
})
#define atomic_cmpxchg(ptr, old, new) ({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+ QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
atomic_cmpxchg__nocheck(ptr, old, new); \
})
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index df5d695344..7c45ae360c 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -89,6 +89,9 @@ typedef enum {
updated the PC for the next instruction to be executed. */
EXIT_PC_STALE,
+ /* We are exiting the TB due to page crossing or space constraints. */
+ EXIT_FALLTHRU,
+
/* We are ending the TB with a noreturn function call, e.g. longjmp.
No following code will be executed. */
EXIT_NORETURN,
@@ -1157,6 +1160,7 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
#ifndef CONFIG_USER_ONLY
/* Privileged PAL code */
if (palcode < 0x40 && (ctx->tb->flags & TB_FLAGS_USER_MODE) == 0) {
+ TCGv tmp;
switch (palcode) {
case 0x01:
/* CFLUSH */
@@ -1182,10 +1186,8 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
offsetof(CPUAlphaState, sysval));
break;
- case 0x35: {
+ case 0x35:
/* SWPIPL */
- TCGv tmp;
-
/* Note that we already know we're in kernel mode, so we know
that PS only contains the 3 IPL bits. */
tcg_gen_ld8u_i64(ctx->ir[IR_V0], cpu_env,
@@ -1197,7 +1199,6 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, ps));
tcg_temp_free(tmp);
break;
- }
case 0x36:
/* RDPS */
@@ -1220,6 +1221,14 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
-offsetof(AlphaCPU, env) + offsetof(CPUState, cpu_index));
break;
+ case 0x3E:
+ /* WTINT */
+ tmp = tcg_const_i64(1);
+ tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) +
+ offsetof(CPUState, halted));
+ tcg_gen_movi_i64(ctx->ir[IR_V0], 0);
+ return gen_excp(ctx, EXCP_HALTED, 0);
+
default:
palcode &= 0x3f;
goto do_call_pal;
@@ -1369,7 +1378,7 @@ static ExitStatus gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
tmp = tcg_const_i64(1);
tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) +
offsetof(CPUState, halted));
- return gen_excp(ctx, EXCP_HLT, 0);
+ return gen_excp(ctx, EXCP_HALTED, 0);
case 252:
/* HALT */
@@ -2978,7 +2987,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
|| num_insns >= max_insns
|| singlestep
|| ctx.singlestep_enabled)) {
- ret = EXIT_PC_STALE;
+ ret = EXIT_FALLTHRU;
}
} while (ret == NO_EXIT);
@@ -2990,6 +2999,13 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
case EXIT_GOTO_TB:
case EXIT_NORETURN:
break;
+ case EXIT_FALLTHRU:
+ if (use_goto_tb(&ctx, ctx.pc)) {
+ tcg_gen_goto_tb(0);
+ tcg_gen_movi_i64(cpu_pc, ctx.pc);
+ tcg_gen_exit_tb((uintptr_t)ctx.tb);
+ }
+ /* FALLTHRU */
case EXIT_PC_STALE:
tcg_gen_movi_i64(cpu_pc, ctx.pc);
/* FALLTHRU */
@@ -3001,7 +3017,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
}
break;
default:
- abort();
+ g_assert_not_reached();
}
gen_tb_end(tb, num_insns);
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index a82ab49c94..860e279658 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -379,7 +379,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
} else if (s->singlestep_enabled) {
gen_exception_internal(EXCP_DEBUG);
} else {
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_pc);
s->is_jmp = DISAS_TB_JUMP;
}
}
@@ -11367,8 +11367,7 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
gen_a64_set_pc_im(dc->pc);
/* fall through */
case DISAS_JUMP:
- /* indicate that the hash table must be used to find the next TB */
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_pc);
break;
case DISAS_TB_JUMP:
case DISAS_EXC:
diff --git a/target/arm/translate.c b/target/arm/translate.c
index ae6646c05b..0862f9e4aa 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1182,7 +1182,7 @@ static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
gen_set_condexec(s);
gen_set_pc_im(s, s->pc - offset);
gen_exception_internal(excp);
- s->is_jmp = DISAS_JUMP;
+ s->is_jmp = DISAS_EXC;
}
static void gen_exception_insn(DisasContext *s, int offset, int excp,
@@ -1191,14 +1191,14 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp,
gen_set_condexec(s);
gen_set_pc_im(s, s->pc - offset);
gen_exception(excp, syn, target_el);
- s->is_jmp = DISAS_JUMP;
+ s->is_jmp = DISAS_EXC;
}
/* Force a TB lookup after an instruction that changes the CPU state. */
static inline void gen_lookup_tb(DisasContext *s)
{
tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
- s->is_jmp = DISAS_JUMP;
+ s->is_jmp = DISAS_EXIT;
}
static inline void gen_hlt(DisasContext *s, int imm)
@@ -4150,7 +4150,15 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
#endif
}
-static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
+static void gen_goto_ptr(void)
+{
+ TCGv addr = tcg_temp_new();
+ tcg_gen_extu_i32_tl(addr, cpu_R[15]);
+ tcg_gen_lookup_and_goto_ptr(addr);
+ tcg_temp_free(addr);
+}
+
+static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
{
if (use_goto_tb(s, dest)) {
tcg_gen_goto_tb(n);
@@ -4158,7 +4166,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
tcg_gen_exit_tb((uintptr_t)s->tb + n);
} else {
gen_set_pc_im(s, dest);
- tcg_gen_exit_tb(0);
+ gen_goto_ptr();
}
}
@@ -12091,11 +12099,14 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
gen_set_pc_im(dc, dc->pc);
/* fall through */
case DISAS_JUMP:
+ gen_goto_ptr();
+ break;
default:
/* indicate that the hash table must be used to find the next TB */
tcg_gen_exit_tb(0);
break;
case DISAS_TB_JUMP:
+ case DISAS_EXC:
/* nothing more to generate */
break;
case DISAS_WFI:
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 6b2cc34c33..15d383d9af 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -139,6 +139,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
* custom end-of-TB code)
*/
#define DISAS_BX_EXCRET 11
+/* For instructions which want an immediate exit to the main loop,
+ * as opposed to attempting to use lookup_and_goto_ptr.
+ */
+#define DISAS_EXIT 12
#ifdef TARGET_AARCH64
void a64_translate_init(void);
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 9e8c233501..e10abc5e04 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -517,7 +517,7 @@ static void gen_goto_tb(DisasContext *ctx, int which,
if (ctx->singlestep_enabled) {
gen_excp_1(EXCP_DEBUG);
} else {
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f);
}
}
}
@@ -1510,7 +1510,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest,
} else if (is_n && use_nullify_skip(ctx)) {
/* The (conditional) branch, B, nullifies the next insn, N,
and we're allowed to skip execution N (no single-step or
- tracepoint in effect). Since the exit_tb that we must use
+ tracepoint in effect). Since the goto_ptr that we must use
for the indirect branch consumes no special resources, we
can (conditionally) skip B and continue execution. */
/* The use_nullify_skip test implies we have a known control path. */
@@ -1527,7 +1527,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest,
if (link != 0) {
tcg_gen_movi_tl(cpu_gr[link], ctx->iaoq_n);
}
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f);
return nullify_end(ctx, NO_EXIT);
} else {
cond_prep(&ctx->null_cond);
@@ -3885,7 +3885,7 @@ void gen_intermediate_code(CPUHPPAState *env, struct TranslationBlock *tb)
if (ctx.singlestep_enabled) {
gen_excp_1(EXCP_DEBUG);
} else {
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f);
}
break;
default:
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1d1372fb43..674ec96d5a 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -141,6 +141,7 @@ typedef struct DisasContext {
} DisasContext;
static void gen_eob(DisasContext *s);
+static void gen_jr(DisasContext *s, TCGv dest);
static void gen_jmp(DisasContext *s, target_ulong eip);
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
@@ -2153,9 +2154,9 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
gen_jmp_im(eip);
tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
} else {
- /* jump to another page: currently not optimized */
+ /* jump to another page */
gen_jmp_im(eip);
- gen_eob(s);
+ gen_jr(s, cpu_tmp0);
}
}
@@ -2509,7 +2510,8 @@ static void gen_bnd_jmp(DisasContext *s)
If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
S->TF. This is used by the syscall/sysret insns. */
-static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
+static void
+do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr)
{
gen_update_cc_op(s);
@@ -2530,12 +2532,27 @@ static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
tcg_gen_exit_tb(0);
} else if (s->tf) {
gen_helper_single_step(cpu_env);
+ } else if (!TCGV_IS_UNUSED(jr)) {
+ TCGv vaddr = tcg_temp_new();
+
+ tcg_gen_add_tl(vaddr, jr, cpu_seg_base[R_CS]);
+ tcg_gen_lookup_and_goto_ptr(vaddr);
+ tcg_temp_free(vaddr);
} else {
tcg_gen_exit_tb(0);
}
s->is_jmp = DISAS_TB_JUMP;
}
+static inline void
+gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
+{
+ TCGv unused;
+
+ TCGV_UNUSED(unused);
+ do_gen_eob_worker(s, inhibit, recheck_tf, unused);
+}
+
/* End of block.
If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
@@ -2549,6 +2566,12 @@ static void gen_eob(DisasContext *s)
gen_eob_worker(s, false, false);
}
+/* Jump to register */
+static void gen_jr(DisasContext *s, TCGv dest)
+{
+ do_gen_eob_worker(s, false, false, dest);
+}
+
/* generate a jump to eip. No segment change must happen before as a
direct call to the next block may occur */
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
@@ -4973,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_push_v(s, cpu_T1);
gen_op_jmp_v(cpu_T0);
gen_bnd_jmp(s);
- gen_eob(s);
+ gen_jr(s, cpu_T0);
break;
case 3: /* lcall Ev */
gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
@@ -4991,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_const_i32(dflag - 1),
tcg_const_i32(s->pc - s->cs_base));
}
- gen_eob(s);
+ tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
+ gen_jr(s, cpu_tmp4);
break;
case 4: /* jmp Ev */
if (dflag == MO_16) {
@@ -4999,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
gen_op_jmp_v(cpu_T0);
gen_bnd_jmp(s);
- gen_eob(s);
+ gen_jr(s, cpu_T0);
break;
case 5: /* ljmp Ev */
gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
@@ -5014,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_movl_seg_T0_vm(R_CS);
gen_op_jmp_v(cpu_T1);
}
- gen_eob(s);
+ tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
+ gen_jr(s, cpu_tmp4);
break;
case 6: /* push Ev */
gen_push_v(s, cpu_T0);
@@ -6394,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* Note that gen_pop_T0 uses a zero-extending load. */
gen_op_jmp_v(cpu_T0);
gen_bnd_jmp(s);
- gen_eob(s);
+ gen_jr(s, cpu_T0);
break;
case 0xc3: /* ret */
ot = gen_pop_T0(s);
@@ -6402,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* Note that gen_pop_T0 uses a zero-extending load. */
gen_op_jmp_v(cpu_T0);
gen_bnd_jmp(s);
- gen_eob(s);
+ gen_jr(s, cpu_T0);
break;
case 0xca: /* lret im */
val = cpu_ldsw_code(env, s->pc);
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 3022f349cb..559f8fed89 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4233,7 +4233,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
save_cpu_state(ctx, 0);
gen_helper_raise_exception_debug(cpu_env);
}
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_PC);
}
}
@@ -10725,7 +10725,7 @@ static void gen_branch(DisasContext *ctx, int insn_bytes)
save_cpu_state(ctx, 0);
gen_helper_raise_exception_debug(cpu_env);
}
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_PC);
break;
default:
fprintf(stderr, "unknown branch 0x%x\n", proc_hflags);
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index cfec47959d..2f3c2e5dfb 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -164,7 +164,7 @@ static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest)
if (use_goto_tb(dc, dest)) {
tcg_gen_goto_tb(n);
tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
- tcg_gen_exit_tb((tcg_target_long)tb + n);
+ tcg_gen_exit_tb((uintptr_t)tb + n);
} else {
tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
tcg_gen_exit_tb(0);
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 4c48c593cd..628fb8685d 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -608,11 +608,16 @@ static void gen_op_calc_cc(DisasContext *s)
set_cc_static(s);
}
-static int use_goto_tb(DisasContext *s, uint64_t dest)
+static bool use_exit_tb(DisasContext *s)
{
- if (unlikely(s->singlestep_enabled) ||
- (s->tb->cflags & CF_LAST_IO) ||
- (s->tb->flags & FLAG_MASK_PER)) {
+ return (s->singlestep_enabled ||
+ (s->tb->cflags & CF_LAST_IO) ||
+ (s->tb->flags & FLAG_MASK_PER));
+}
+
+static bool use_goto_tb(DisasContext *s, uint64_t dest)
+{
+ if (unlikely(use_exit_tb(s))) {
return false;
}
#ifndef CONFIG_USER_ONLY
@@ -5461,8 +5466,10 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
/* Exit the TB, either by raising a debug exception or by return. */
if (do_debug) {
gen_exception(EXCP_DEBUG);
- } else {
+ } else if (use_exit_tb(&dc)) {
tcg_gen_exit_tb(0);
+ } else {
+ tcg_gen_lookup_and_goto_ptr(psw_addr);
}
break;
default:
diff --git a/tcg-runtime.c b/tcg-runtime.c
index 4c60c96658..7fa90ce508 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -27,6 +27,9 @@
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
+#include "exec/tb-hash.h"
+#include "disas/disas.h"
+#include "exec/log.h"
/* 32-bit helpers */
@@ -141,6 +144,35 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
return ctpop64(arg);
}
+void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
+{
+ CPUState *cpu = ENV_GET_CPU(env);
+ TranslationBlock *tb;
+ target_ulong cs_base, pc;
+ uint32_t flags;
+
+ tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
+ if (likely(tb)) {
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+ if (likely(tb->pc == addr && tb->cs_base == cs_base &&
+ tb->flags == flags)) {
+ goto found;
+ }
+ tb = tb_htable_lookup(cpu, addr, cs_base, flags);
+ if (likely(tb)) {
+ atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
+ goto found;
+ }
+ }
+ return tcg_ctx.code_gen_epilogue;
+ found:
+ qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
+ "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
+ tb->tc_ptr, cpu->cpu_index, addr,
+ lookup_symbol(addr));
+ return tb->tc_ptr;
+}
+
void HELPER(exit_atomic)(CPUArchState *env)
{
cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
diff --git a/tcg/README b/tcg/README
index a9858c2f74..bf49e8242b 100644
--- a/tcg/README
+++ b/tcg/README
@@ -477,6 +477,14 @@ current TB was linked to this TB. Otherwise execute the next
instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
at most once with each slot index per TB.
+* lookup_and_goto_ptr tb_addr
+
+Look up a TB address ('tb_addr') and jump to it if valid. If not valid,
+jump to the TCG epilogue to go back to the exec loop.
+
+This operation is optional. If the TCG backend does not implement the
+goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
+
* qemu_ld_i32/i64 t0, t1, flags, memidx
* qemu_st_i32/i64 t0, t1, flags, memidx
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 1a5ea23844..55a46ac825 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -77,6 +77,7 @@ typedef enum {
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rem_i64 1
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 290de6dae6..5f185458f1 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -1357,8 +1357,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
- tcg_out_goto(s, tb_ret_addr);
+ /* Reuse the zeroing that exists for goto_ptr. */
+ if (a0 == 0) {
+ tcg_out_goto(s, s->code_gen_epilogue);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
+ tcg_out_goto(s, tb_ret_addr);
+ }
break;
case INDEX_op_goto_tb:
@@ -1374,6 +1379,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ tcg_out_insn(s, 3207, BR, a0);
+ break;
+
case INDEX_op_br:
tcg_out_goto_label(s, arg_label(a0));
break;
@@ -1735,6 +1744,7 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
@@ -1942,6 +1952,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
+
+ /* TB epilogue */
tb_ret_addr = s->code_ptr;
/* Remove TCG locals stack space. */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 75ea247bc4..5ef1086710 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -123,6 +123,7 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_goto_ptr 1
enum {
TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index e75a6d4943..9f5cb66718 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -329,11 +329,6 @@ static const uint8_t tcg_cond_to_arm_cond[] = {
[TCG_COND_GTU] = COND_HI,
};
-static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
-{
- tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
-}
-
static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
{
tcg_out32(s, (cond << 28) | 0x0a000000 |
@@ -402,6 +397,18 @@ static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
}
}
+static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
+{
+ /* Unless the C portion of QEMU is compiled as thumb, we don't
+ actually need true BX semantics; merely a branch to an address
+ held in a register. */
+ if (use_armv5t_instructions) {
+ tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+ } else {
+ tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
+ }
+}
+
static inline void tcg_out_dat_imm(TCGContext *s,
int cond, int opc, int rd, int rn, int im)
{
@@ -977,7 +984,7 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
* with the code buffer limited to 16MB we wouldn't need the long case.
* But we also use it for the tail-call to the qemu_ld/st helpers, which does.
*/
-static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
+static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
{
intptr_t addri = (intptr_t)addr;
ptrdiff_t disp = tcg_pcrel_diff(s, addr);
@@ -987,15 +994,9 @@ static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
return;
}
+ assert(use_armv5t_instructions || (addri & 1) == 0);
tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
- if (use_armv5t_instructions) {
- tcg_out_bx(s, cond, TCG_REG_TMP);
- } else {
- if (addri & 1) {
- tcg_abort();
- }
- tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
- }
+ tcg_out_bx(s, cond, TCG_REG_TMP);
}
/* The call case is mostly used for helpers - so it's not unreasonable
@@ -1654,8 +1655,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
- tcg_out_goto(s, COND_AL, tb_ret_addr);
+ /* Reuse the zeroing that exists for goto_ptr. */
+ a0 = args[0];
+ if (a0 == 0) {
+ tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
+ } else {
+ tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
+ tcg_out_goto(s, COND_AL, tb_ret_addr);
+ }
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_insn_offset) {
@@ -1670,6 +1677,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ tcg_out_bx(s, COND_AL, args[0]);
+ break;
case INDEX_op_br:
tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
break;
@@ -1960,6 +1970,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2135,9 +2146,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
- tb_ret_addr = s->code_ptr;
- /* Epilogue. We branch here via tb_ret_addr. */
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
+
+ /* TB epilogue */
+ tb_ret_addr = s->code_ptr;
tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
TCG_REG_CALL_STACK, stack_addend, 1);
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 4275787db9..73a15f7e80 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -107,6 +107,7 @@ extern bool have_popcnt;
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_goto_ptr 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_extrl_i64_i32 0
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 5918008296..01e3b4e95c 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -1882,8 +1882,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
- tcg_out_jmp(s, tb_ret_addr);
+ /* Reuse the zeroing that exists for goto_ptr. */
+ if (a0 == 0) {
+ tcg_out_jmp(s, s->code_gen_epilogue);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
+ tcg_out_jmp(s, tb_ret_addr);
+ }
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_insn_offset) {
@@ -1906,6 +1911,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ /* jmp to the given host address (could be epilogue) */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
+ break;
case INDEX_op_br:
tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
break;
@@ -2277,6 +2286,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
{
+ static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
@@ -2299,6 +2309,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
= { .args_ct_str = { "L", "L", "L", "L" } };
switch (op) {
+ case INDEX_op_goto_ptr:
+ return &r;
+
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
case INDEX_op_ld8s_i32:
@@ -2567,6 +2580,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
#endif
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
+
/* TB epilogue */
tb_ret_addr = s->code_ptr;
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 42aea03a8b..901bb7575d 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -173,6 +173,7 @@ typedef enum {
#define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_goto_ptr 0
#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index f46d64a3a7..d75cb63ed3 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -130,6 +130,7 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_goto_ptr 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_add2_i32 0
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index 2a7e1c7f5b..8cff9a6bf9 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -1747,6 +1747,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_nop(s);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ /* jmp to the given host address (could be epilogue) */
+ tcg_out_opc_reg(s, OPC_JR, 0, a0, 0);
+ tcg_out_nop(s);
+ break;
case INDEX_op_br:
tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
arg_label(a0));
@@ -2160,6 +2165,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2451,6 +2457,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* delay slot */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO);
+
/* TB epilogue */
tb_ret_addr = s->code_ptr;
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index abd8b3d6cd..5f4a40a5b4 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -82,6 +82,7 @@ extern bool have_isa_3_00;
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1
+#define TCG_TARGET_HAS_goto_ptr 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_add2_i32 0
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 64f67d2c77..8d50f18328 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -1932,6 +1932,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* Epilogue */
tcg_debug_assert(tb_ret_addr == s->code_ptr);
+ s->code_gen_epilogue = tb_ret_addr;
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
@@ -1986,6 +1987,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
#endif
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ tcg_out32(s, MTSPR | RS(args[0]) | CTR);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ break;
case INDEX_op_br:
{
TCGLabel *l = arg_label(args[0]);
@@ -2555,6 +2561,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index cbdd2a6275..957f0c0afe 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -92,6 +92,7 @@ extern uint64_t s390_facilities;
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
index a679280b92..5d7083e90c 100644
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@@ -1741,9 +1741,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
switch (opc) {
case INDEX_op_exit_tb:
- /* return value */
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]);
- tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+ /* Reuse the zeroing that exists for goto_ptr. */
+ a0 = args[0];
+ if (a0 == 0) {
+ tgen_gotoi(s, S390_CC_ALWAYS, s->code_gen_epilogue);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
+ tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+ }
break;
case INDEX_op_goto_tb:
@@ -1767,6 +1772,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, args[0]);
+ break;
+
OP_32_64(ld8u):
/* ??? LLC (RXY format) is only present with the extended-immediate
facility, whereas LLGC is always present. */
@@ -2241,6 +2250,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2439,6 +2449,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* br %r3 (go to TB) */
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
+
+ /* TB epilogue */
tb_ret_addr = s->code_ptr;
/* lmg %r6,%r15,fs+48(%r15) (restore registers) */
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index b8b74f96ff..854a0afd70 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -123,6 +123,7 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_extrl_i64_i32 1
#define TCG_TARGET_HAS_extrh_i64_i32 1
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
index 3785d77f62..18afce2f87 100644
--- a/tcg/sparc/tcg-target.inc.c
+++ b/tcg/sparc/tcg-target.inc.c
@@ -1003,7 +1003,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* delay slot */
tcg_out_nop(s);
- /* No epilogue required. We issue ret + restore directly in the TB. */
+ /* Epilogue for goto_ptr. */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ /* delay slot */
+ tcg_out_movi_imm13(s, TCG_REG_O0, 0);
#ifdef CONFIG_SOFTMMU
build_trampolines(s);
@@ -1288,6 +1292,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_nop(s);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
+ tcg_out_nop(s);
+ break;
case INDEX_op_br:
tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
tcg_out_nop(s);
@@ -1513,6 +1521,7 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 6b1f41500c..87f673ef49 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2587,6 +2587,18 @@ void tcg_gen_goto_tb(unsigned idx)
tcg_gen_op1i(INDEX_op_goto_tb, idx);
}
+void tcg_gen_lookup_and_goto_ptr(TCGv addr)
+{
+ if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+ TCGv_ptr ptr = tcg_temp_new_ptr();
+ gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr);
+ tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr));
+ tcg_temp_free_ptr(ptr);
+ } else {
+ tcg_gen_exit_tb(0);
+ }
+}
+
static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
{
/* Trigger the asserts within as early as possible. */
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index c68e300a68..5d3278f243 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -796,6 +796,17 @@ static inline void tcg_gen_exit_tb(uintptr_t val)
*/
void tcg_gen_goto_tb(unsigned idx);
+/**
+ * tcg_gen_lookup_and_goto_ptr() - look up a TB and jump to it if valid
+ * @addr: Guest address of the target TB
+ *
+ * If the TB is not valid, jump to the epilogue.
+ *
+ * This operation is optional. If the TCG backend does not implement goto_ptr,
+ * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument.
+ */
+void tcg_gen_lookup_and_goto_ptr(TCGv addr);
+
#if TARGET_LONG_BITS == 32
#define tcg_temp_new() tcg_temp_new_i32()
#define tcg_global_reg_new tcg_global_reg_new_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index f06f89405e..956fb1e9f3 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -193,6 +193,7 @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
TCG_OPF_NOT_PRESENT)
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
+DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 114ea6fecf..c41d38a557 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
+
DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
#ifdef CONFIG_SOFTMMU
diff --git a/tcg/tcg.c b/tcg/tcg.c
index cb898f1636..564292f54d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -424,6 +424,11 @@ void tcg_prologue_init(TCGContext *s)
qemu_log_unlock();
}
#endif
+
+ /* Assert that goto_ptr is implemented completely. */
+ if (TCG_TARGET_HAS_goto_ptr) {
+ tcg_debug_assert(s->code_gen_epilogue != NULL);
+ }
}
void tcg_func_start(TCGContext *s)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6c216bb73f..5ec48d1787 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -699,6 +699,7 @@ struct TCGContext {
extension that allows arithmetic on void*. */
int code_gen_max_blocks;
void *code_gen_prologue;
+ void *code_gen_epilogue;
void *code_gen_buffer;
size_t code_gen_buffer_size;
void *code_gen_ptr;
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 838bf3a858..06963288dc 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -85,6 +85,7 @@
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_goto_ptr 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_extrl_i64_i32 0