From 86360ad71df0070283469b8ae59f33cdd013501d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 28 Mar 2014 15:29:46 +0000 Subject: exec-all.h: Use stl_p to avoid undefined behaviour patching x86 jumps The code which patches x86 jump instructions assumes it can do an unaligned write of a uint32_t. This is actually safe on x86, but it's still undefined behaviour. We have infrastructure for doing efficient unaligned accesses which doesn't engage in undefined behaviour, so use it. This is technically fractionally less efficient, at least with gcc 4.6; instead of one instruction: 7b2: 89 3e mov %edi,(%rsi) we get an extra spurious store to the stack slot: 7b2: 89 7c 24 64 mov %edi,0x64(%rsp) 7b6: 89 3e mov %edi,(%rsi) Reviewed-by: Alex Bennée Signed-off-by: Peter Maydell Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/exec') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index f9ac332f9d..1c49a21799 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -229,7 +229,7 @@ void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr); static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) { /* patch the branch destination */ - *(uint32_t *)jmp_addr = addr - (jmp_addr + 4); + stl_p((void*)jmp_addr, addr - (jmp_addr + 4)); /* no need to flush icache explicitly */ } #elif defined(__aarch64__) -- cgit v1.2.3-55-g7522 From 1813e1758dcc60c96f8caf2d0c66c2193f1f86e0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Mar 2014 12:56:22 -0700 Subject: tcg: Define tcg_insn_unit for code pointers To be defined by the tcg backend based on the elemental unit of the ISA. During the transition, allow TCG_TARGET_INSN_UNIT_SIZE to be undefined, which allows us to default tcg_insn_unit to the current uint8_t. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 2 +- tcg/tcg-be-ldst.h | 4 +- tcg/tcg.c | 105 +++++++++++++++++++++++++++++++----------------- tcg/tcg.h | 39 +++++++++++++----- translate-all.c | 13 +++--- 5 files changed, 107 insertions(+), 56 deletions(-) (limited to 'include/exec') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 1c49a21799..0766e24f8e 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -145,7 +145,7 @@ struct TranslationBlock { #define CF_COUNT_MASK 0x7fff #define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */ - uint8_t *tc_ptr; /* pointer to the translated code */ + void *tc_ptr; /* pointer to the translated code */ /* next matching tb for physical address. */ struct TranslationBlock *phys_hash_next; /* first and second physical page containing code. The lower bit diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h index ad94c0ca51..49b3de61ea 100644 --- a/tcg/tcg-be-ldst.h +++ b/tcg/tcg-be-ldst.h @@ -31,8 +31,8 @@ typedef struct TCGLabelQemuLdst { TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ int mem_index; /* soft MMU memory index */ - uint8_t *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ - uint8_t *label_ptr[2]; /* label pointers to be updated */ + tcg_insn_unit *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ + tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ } TCGLabelQemuLdst; typedef struct TCGBackendData { diff --git a/tcg/tcg.c b/tcg/tcg.c index 31a5d48d03..0b0be454df 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -65,7 +65,7 @@ /* Forward declarations for functions declared in tcg-target.c and used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend); /* The CIE and FDE header definitions will be common to all hosts. */ @@ -117,55 +117,87 @@ const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); static TCGRegSet tcg_target_available_regs[2]; static TCGRegSet tcg_target_call_clobber_regs; +#if TCG_TARGET_INSN_UNIT_SIZE == 1 static inline void tcg_out8(TCGContext *s, uint8_t v) { *s->code_ptr++ = v; } -static inline void tcg_patch8(uint8_t *p, uint8_t v) +static inline void tcg_patch8(tcg_insn_unit *p, uint8_t v) { - memcpy(p, &v, sizeof(v)); + *p = v; } +#endif +#if TCG_TARGET_INSN_UNIT_SIZE <= 2 static inline void tcg_out16(TCGContext *s, uint16_t v) { - uint8_t *p = s->code_ptr; - memcpy(p, &v, sizeof(v)); - s->code_ptr = p + 2; + if (TCG_TARGET_INSN_UNIT_SIZE == 2) { + *s->code_ptr++ = v; + } else { + tcg_insn_unit *p = s->code_ptr; + memcpy(p, &v, sizeof(v)); + s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); + } } -static inline void tcg_patch16(uint8_t *p, uint16_t v) +static inline void tcg_patch16(tcg_insn_unit *p, uint16_t v) { - memcpy(p, &v, sizeof(v)); + if (TCG_TARGET_INSN_UNIT_SIZE == 2) { + *p = v; + } else { + memcpy(p, &v, sizeof(v)); + } } +#endif +#if TCG_TARGET_INSN_UNIT_SIZE <= 4 static inline void tcg_out32(TCGContext *s, uint32_t v) { - uint8_t *p = s->code_ptr; - memcpy(p, &v, sizeof(v)); - s->code_ptr = p + 4; + if (TCG_TARGET_INSN_UNIT_SIZE == 4) { + *s->code_ptr++ = v; + } else { + tcg_insn_unit *p = s->code_ptr; + memcpy(p, &v, sizeof(v)); + s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); + } } -static inline void tcg_patch32(uint8_t *p, uint32_t v) +static inline void tcg_patch32(tcg_insn_unit *p, uint32_t v) { - memcpy(p, &v, sizeof(v)); + if (TCG_TARGET_INSN_UNIT_SIZE == 4) { + *p = v; + } else { + memcpy(p, &v, sizeof(v)); + } } +#endif +#if TCG_TARGET_INSN_UNIT_SIZE <= 8 static inline void tcg_out64(TCGContext *s, uint64_t v) { - uint8_t *p = s->code_ptr; - memcpy(p, &v, sizeof(v)); - s->code_ptr = p + 8; + if (TCG_TARGET_INSN_UNIT_SIZE == 8) { + *s->code_ptr++ = v; + } else { + tcg_insn_unit *p = s->code_ptr; + memcpy(p, &v, sizeof(v)); + s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); + } } -static inline void tcg_patch64(uint8_t *p, uint64_t v) +static inline void tcg_patch64(tcg_insn_unit *p, uint64_t v) { - memcpy(p, &v, sizeof(v)); + if (TCG_TARGET_INSN_UNIT_SIZE == 8) { + *p = v; + } else { + memcpy(p, &v, sizeof(v)); + } } +#endif /* label relocation processing */ -static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, +static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, int label_index, intptr_t addend) { TCGLabel *l; @@ -188,23 +220,20 @@ static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, } } -static void tcg_out_label(TCGContext *s, int label_index, void *ptr) +static void tcg_out_label(TCGContext *s, int label_index, tcg_insn_unit *ptr) { - TCGLabel *l; - TCGRelocation *r; + TCGLabel *l = &s->labels[label_index]; intptr_t value = (intptr_t)ptr; + TCGRelocation *r; - l = &s->labels[label_index]; - if (l->has_value) { - tcg_abort(); - } - r = l->u.first_reloc; - while (r != NULL) { + assert(!l->has_value); + + for (r = l->u.first_reloc; r != NULL; r = r->next) { patch_reloc(r->ptr, r->type, value, r->addend); - r = r->next; } + l->has_value = 1; - l->u.value = value; + l->u.value_ptr = ptr; } int gen_new_label(void) @@ -359,7 +388,7 @@ void tcg_prologue_init(TCGContext *s) #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { - size_t size = s->code_ptr - s->code_buf; + size_t size = tcg_current_code_size(s); qemu_log("PROLOGUE: [size=%zu]\n", size); log_disas(s->code_buf, size); qemu_log("\n"); @@ -2538,7 +2567,8 @@ static void dump_op_count(void) #endif -static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, +static inline int tcg_gen_code_common(TCGContext *s, + tcg_insn_unit *gen_code_buf, long search_pc) { TCGOpcode opc; @@ -2653,7 +2683,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, } args += def->nb_args; next: - if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) { + if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) { return op_index; } op_index++; @@ -2667,7 +2697,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, return -1; } -int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) +int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) { #ifdef CONFIG_PROFILER { @@ -2686,16 +2716,17 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) tcg_gen_code_common(s, gen_code_buf, -1); /* flush instruction cache */ - flush_icache_range((uintptr_t)gen_code_buf, (uintptr_t)s->code_ptr); + flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); - return s->code_ptr - gen_code_buf; + return tcg_current_code_size(s); } /* Return the index of the micro operation such as the pc after is < offset bytes from the start of the TB. The contents of gen_code_buf must not be changed, though writing the same values is ok. Return -1 if not found. */ -int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset) +int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, + long offset) { return tcg_gen_code_common(s, gen_code_buf, offset); } diff --git a/tcg/tcg.h b/tcg/tcg.h index d38c92d636..a3fb88cd04 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -146,10 +146,26 @@ typedef enum TCGOpcode { #define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b) #define tcg_regset_not(d, a) (d) = ~(a) +#ifndef TCG_TARGET_INSN_UNIT_SIZE +#define TCG_TARGET_INSN_UNIT_SIZE 1 +#endif +#if TCG_TARGET_INSN_UNIT_SIZE == 1 +typedef uint8_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 2 +typedef uint16_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 4 +typedef uint32_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 8 +typedef uint64_t tcg_insn_unit; +#else +/* The port better have done this. */ +#endif + + typedef struct TCGRelocation { struct TCGRelocation *next; int type; - uint8_t *ptr; + tcg_insn_unit *ptr; intptr_t addend; } TCGRelocation; @@ -157,6 +173,7 @@ typedef struct TCGLabel { int has_value; union { uintptr_t value; + tcg_insn_unit *value_ptr; TCGRelocation *first_reloc; } u; } TCGLabel; @@ -464,7 +481,7 @@ struct TCGContext { int nb_temps; /* goto_tb support */ - uint8_t *code_buf; + tcg_insn_unit *code_buf; uintptr_t *tb_next; uint16_t *tb_next_offset; uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ @@ -485,7 +502,7 @@ struct TCGContext { intptr_t frame_end; int frame_reg; - uint8_t *code_ptr; + tcg_insn_unit *code_ptr; TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; @@ -524,14 +541,17 @@ struct TCGContext { uint16_t gen_opc_icount[OPC_BUF_SIZE]; uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; - /* Code generation */ + /* Code generation. Note that we specifically do not use tcg_insn_unit + here, because there's too much arithmetic throughout that relies + on addition and subtraction working on bytes. Rely on the GCC + extension that allows arithmetic on void*. */ int code_gen_max_blocks; - uint8_t *code_gen_prologue; - uint8_t *code_gen_buffer; + void *code_gen_prologue; + void *code_gen_buffer; size_t code_gen_buffer_size; /* threshold to flush the translated code buffer */ size_t code_gen_buffer_max_size; - uint8_t *code_gen_ptr; + void *code_gen_ptr; TBContext tb_ctx; @@ -566,8 +586,9 @@ void tcg_context_init(TCGContext *s); void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); -int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf); -int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset); +int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); +int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, + long offset); void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size); diff --git a/translate-all.c b/translate-all.c index 5759974d42..5549a85ed5 100644 --- a/translate-all.c +++ b/translate-all.c @@ -143,7 +143,7 @@ void cpu_gen_init(void) int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr) { TCGContext *s = &tcg_ctx; - uint8_t *gen_code_buf; + tcg_insn_unit *gen_code_buf; int gen_code_size; #ifdef CONFIG_PROFILER int64_t ti; @@ -186,8 +186,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { - qemu_log("OUT: [size=%d]\n", *gen_code_size_ptr); - log_disas(tb->tc_ptr, *gen_code_size_ptr); + qemu_log("OUT: [size=%d]\n", gen_code_size); + log_disas(tb->tc_ptr, gen_code_size); qemu_log("\n"); qemu_log_flush(); } @@ -235,7 +235,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, s->tb_jmp_offset = NULL; s->tb_next = tb->tb_next; #endif - j = tcg_gen_code_search_pc(s, (uint8_t *)tc_ptr, searched_pc - tc_ptr); + j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr, + searched_pc - tc_ptr); if (j < 0) return -1; /* now find start of instruction before */ @@ -944,7 +945,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; - uint8_t *tc_ptr; tb_page_addr_t phys_pc, phys_page2; target_ulong virt_page2; int code_gen_size; @@ -959,8 +959,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, /* Don't forget to invalidate previous TB info. */ tcg_ctx.tb_ctx.tb_invalidated_flag = 1; } - tc_ptr = tcg_ctx.code_gen_ptr; - tb->tc_ptr = tc_ptr; + tb->tc_ptr = tcg_ctx.code_gen_ptr; tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; -- cgit v1.2.3-55-g7522