diff options
Diffstat (limited to 'target')
51 files changed, 446 insertions, 504 deletions
diff --git a/target/alpha/helper.h b/target/alpha/helper.h index 004221df8c..d60f208703 100644 --- a/target/alpha/helper.h +++ b/target/alpha/helper.h @@ -3,10 +3,6 @@ DEF_HELPER_FLAGS_1(load_pcc, TCG_CALL_NO_RWG_SE, i64, env) DEF_HELPER_FLAGS_3(check_overflow, TCG_CALL_NO_WG, void, env, i64, i64) -DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(cttz, TCG_CALL_NO_RWG_SE, i64, i64) - DEF_HELPER_FLAGS_2(zap, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(zapnot, TCG_CALL_NO_RWG_SE, i64, i64, i64) diff --git a/target/alpha/int_helper.c b/target/alpha/int_helper.c index 19bebfe742..e43b50a743 100644 --- a/target/alpha/int_helper.c +++ b/target/alpha/int_helper.c @@ -24,21 +24,6 @@ #include "qemu/host-utils.h" -uint64_t helper_ctpop(uint64_t arg) -{ - return ctpop64(arg); -} - -uint64_t helper_ctlz(uint64_t arg) -{ - return clz64(arg); -} - -uint64_t helper_cttz(uint64_t arg) -{ - return ctz64(arg); -} - uint64_t helper_zapnot(uint64_t val, uint64_t mskb) { uint64_t mask; diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 114927b751..055286a7b8 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -949,7 +949,13 @@ static void gen_ext_h(DisasContext *ctx, TCGv vc, TCGv va, int rb, bool islit, uint8_t lit, uint8_t byte_mask) { if (islit) { - tcg_gen_shli_i64(vc, va, (64 - lit * 8) & 0x3f); + int pos = (64 - lit * 8) & 0x3f; + int len = cto32(byte_mask) * 8; + if (pos < len) { + tcg_gen_deposit_z_i64(vc, va, pos, len - pos); + } else { + tcg_gen_movi_i64(vc, 0); + } } else { TCGv tmp = tcg_temp_new(); tcg_gen_shli_i64(tmp, load_gpr(ctx, rb), 3); @@ -966,38 +972,44 @@ static void gen_ext_l(DisasContext *ctx, TCGv vc, TCGv va, int rb, bool islit, uint8_t lit, uint8_t byte_mask) { if (islit) { - tcg_gen_shri_i64(vc, va, (lit & 7) * 8); + int pos = (lit & 7) * 8; + int len = cto32(byte_mask) * 8; + if (pos + len >= 64) { + len = 64 - pos; + } + tcg_gen_extract_i64(vc, va, pos, len); } else { TCGv tmp = tcg_temp_new(); tcg_gen_andi_i64(tmp, load_gpr(ctx, rb), 7); tcg_gen_shli_i64(tmp, tmp, 3); tcg_gen_shr_i64(vc, va, tmp); tcg_temp_free(tmp); + gen_zapnoti(vc, vc, byte_mask); } - gen_zapnoti(vc, vc, byte_mask); } /* INSWH, INSLH, INSQH */ static void gen_ins_h(DisasContext *ctx, TCGv vc, TCGv va, int rb, bool islit, uint8_t lit, uint8_t byte_mask) { - TCGv tmp = tcg_temp_new(); - - /* The instruction description has us left-shift the byte mask and extract - bits <15:8> and apply that zap at the end. This is equivalent to simply - performing the zap first and shifting afterward. */ - gen_zapnoti(tmp, va, byte_mask); - if (islit) { - lit &= 7; - if (unlikely(lit == 0)) { - tcg_gen_movi_i64(vc, 0); + int pos = 64 - (lit & 7) * 8; + int len = cto32(byte_mask) * 8; + if (pos < len) { + tcg_gen_extract_i64(vc, va, pos, len - pos); } else { - tcg_gen_shri_i64(vc, tmp, 64 - lit * 8); + tcg_gen_movi_i64(vc, 0); } } else { + TCGv tmp = tcg_temp_new(); TCGv shift = tcg_temp_new(); + /* The instruction description has us left-shift the byte mask + and extract bits <15:8> and apply that zap at the end. This + is equivalent to simply performing the zap first and shifting + afterward. */ + gen_zapnoti(tmp, va, byte_mask); + /* If (B & 7) == 0, we need to shift by 64 and leave a zero. Do this portably by splitting the shift into two parts: shift_count-1 and 1. Arrange for the -1 by using ones-complement instead of @@ -1010,32 +1022,37 @@ static void gen_ins_h(DisasContext *ctx, TCGv vc, TCGv va, int rb, bool islit, tcg_gen_shr_i64(vc, tmp, shift); tcg_gen_shri_i64(vc, vc, 1); tcg_temp_free(shift); + tcg_temp_free(tmp); } - tcg_temp_free(tmp); } /* INSBL, INSWL, INSLL, INSQL */ static void gen_ins_l(DisasContext *ctx, TCGv vc, TCGv va, int rb, bool islit, uint8_t lit, uint8_t byte_mask) { - TCGv tmp = tcg_temp_new(); - - /* The instruction description has us left-shift the byte mask - the same number of byte slots as the data and apply the zap - at the end. This is equivalent to simply performing the zap - first and shifting afterward. */ - gen_zapnoti(tmp, va, byte_mask); - if (islit) { - tcg_gen_shli_i64(vc, tmp, (lit & 7) * 8); + int pos = (lit & 7) * 8; + int len = cto32(byte_mask) * 8; + if (pos + len > 64) { + len = 64 - pos; + } + tcg_gen_deposit_z_i64(vc, va, pos, len); } else { + TCGv tmp = tcg_temp_new(); TCGv shift = tcg_temp_new(); + + /* The instruction description has us left-shift the byte mask + and extract bits <15:8> and apply that zap at the end. This + is equivalent to simply performing the zap first and shifting + afterward. */ + gen_zapnoti(tmp, va, byte_mask); + tcg_gen_andi_i64(shift, load_gpr(ctx, rb), 7); tcg_gen_shli_i64(shift, shift, 3); tcg_gen_shl_i64(vc, tmp, shift); tcg_temp_free(shift); + tcg_temp_free(tmp); } - tcg_temp_free(tmp); } /* MSKWH, MSKLH, MSKQH */ @@ -2524,7 +2541,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) REQUIRE_TB_FLAG(TB_FLAGS_AMASK_CIX); REQUIRE_REG_31(ra); REQUIRE_NO_LIT; - gen_helper_ctpop(vc, vb); + tcg_gen_ctpop_i64(vc, vb); break; case 0x31: /* PERR */ @@ -2538,14 +2555,14 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) REQUIRE_TB_FLAG(TB_FLAGS_AMASK_CIX); REQUIRE_REG_31(ra); REQUIRE_NO_LIT; - gen_helper_ctlz(vc, vb); + tcg_gen_clzi_i64(vc, vb, 64); break; case 0x33: /* CTTZ */ REQUIRE_TB_FLAG(TB_FLAGS_AMASK_CIX); REQUIRE_REG_31(ra); REQUIRE_NO_LIT; - gen_helper_cttz(vc, vb); + tcg_gen_ctzi_i64(vc, vb, 64); break; case 0x34: /* UNPKBW */ diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 98b97df461..d9df82cff5 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -54,26 +54,6 @@ int64_t HELPER(sdiv64)(int64_t num, int64_t den) return num / den; } -uint64_t HELPER(clz64)(uint64_t x) -{ - return clz64(x); -} - -uint64_t HELPER(cls64)(uint64_t x) -{ - return clrsb64(x); -} - -uint32_t HELPER(cls32)(uint32_t x) -{ - return clrsb32(x); -} - -uint32_t HELPER(clz32)(uint32_t x) -{ - return clz32(x); -} - uint64_t HELPER(rbit64)(uint64_t x) { return revbit64(x); diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index dd32000e63..6f9eaba533 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -18,10 +18,6 @@ */ DEF_HELPER_FLAGS_2(udiv64, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64) -DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32) -DEF_HELPER_FLAGS_1(clz32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) diff --git a/target/arm/helper.c b/target/arm/helper.c index 9f1a335e3f..6c5c7ec811 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5726,11 +5726,6 @@ uint32_t HELPER(uxtb16)(uint32_t x) return res; } -uint32_t HELPER(clz)(uint32_t x) -{ - return clz32(x); -} - int32_t HELPER(sdiv)(int32_t num, int32_t den) { if (den == 0) diff --git a/target/arm/helper.h b/target/arm/helper.h index 84aa637629..df86bf7141 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1,4 +1,3 @@ -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index f673d939e1..4f09dfb95a 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3216,67 +3216,44 @@ static void disas_bitfield(DisasContext *s, uint32_t insn) low 32-bits anyway. */ tcg_tmp = read_cpu_reg(s, rn, 1); - /* Recognize the common aliases. */ - if (opc == 0) { /* SBFM */ - if (ri == 0) { - if (si == 7) { /* SXTB */ - tcg_gen_ext8s_i64(tcg_rd, tcg_tmp); - goto done; - } else if (si == 15) { /* SXTH */ - tcg_gen_ext16s_i64(tcg_rd, tcg_tmp); - goto done; - } else if (si == 31) { /* SXTW */ - tcg_gen_ext32s_i64(tcg_rd, tcg_tmp); - goto done; - } - } - if (si == 63 || (si == 31 && ri <= si)) { /* ASR */ - if (si == 31) { - tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp); - } - tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri); + /* Recognize simple(r) extractions. */ + if (si <= ri) { + /* Wd<s-r:0> = Wn<s:r> */ + len = (si - ri) + 1; + if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ + tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); goto done; - } - } else if (opc == 2) { /* UBFM */ - if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */ - tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1)); + } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */ + tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); return; } - if (si == 63 || (si == 31 && ri <= si)) { /* LSR */ - if (si == 31) { - tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp); - } - tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri); - return; - } - if (si + 1 == ri && si != bitsize - 1) { /* LSL */ - int shift = bitsize - 1 - si; - tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift); - goto done; - } - } - - if (opc != 1) { /* SBFM or UBFM */ - tcg_gen_movi_i64(tcg_rd, 0); - } - - /* do the bit move operation */ - if (si >= ri) { - /* Wd<s-r:0> = Wn<s:r> */ - tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); + /* opc == 1, BXFIL fall through to deposit */ + tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len); pos = 0; - len = (si - ri) + 1; } else { - /* Wd<32+s-r,32-r> = Wn<s:0> */ - pos = bitsize - ri; + /* Handle the ri > si case with a deposit + * Wd<32+s-r,32-r> = Wn<s:0> + */ len = si + 1; + pos = (bitsize - ri) & (bitsize - 1); } - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); + if (opc == 0 && len < ri) { + /* SBFM: sign extend the destination field from len to fill + the balance of the word. Let the deposit below insert all + of those sign bits. */ + tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); + len = ri; + } - if (opc == 0) { /* SBFM - sign extend the destination field */ - tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len)); - tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len)); + if (opc == 1) { /* BFM, BXFIL */ + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); + } else { + /* SBFM or UBFM: We start with zero, and we haven't modified + any bits outside bitsize, therefore the zero-extension + below is unneeded. */ + tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); + return; } done: @@ -3977,11 +3954,11 @@ static void handle_clz(DisasContext *s, unsigned int sf, tcg_rn = cpu_reg(s, rn); if (sf) { - gen_helper_clz64(tcg_rd, tcg_rn); + tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); } else { TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); - gen_helper_clz(tcg_tmp32, tcg_tmp32); + tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); tcg_temp_free_i32(tcg_tmp32); } @@ -3995,11 +3972,11 @@ static void handle_cls(DisasContext *s, unsigned int sf, tcg_rn = cpu_reg(s, rn); if (sf) { - gen_helper_cls64(tcg_rd, tcg_rn); + tcg_gen_clrsb_i64(tcg_rd, tcg_rn); } else { TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); - gen_helper_cls32(tcg_tmp32, tcg_tmp32); + tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); tcg_temp_free_i32(tcg_tmp32); } @@ -7614,9 +7591,9 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, switch (opcode) { case 0x4: /* CLS, CLZ */ if (u) { - gen_helper_clz64(tcg_rd, tcg_rn); + tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); } else { - gen_helper_cls64(tcg_rd, tcg_rn); + tcg_gen_clrsb_i64(tcg_rd, tcg_rn); } break; case 0x5: /* NOT */ @@ -10284,9 +10261,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) goto do_cmop; case 0x4: /* CLS */ if (u) { - gen_helper_clz32(tcg_res, tcg_op); + tcg_gen_clzi_i32(tcg_res, tcg_op, 32); } else { - gen_helper_cls32(tcg_res, tcg_op); + tcg_gen_clrsb_i32(tcg_res, tcg_op); } break; case 0x7: /* SQABS, SQNEG */ diff --git a/target/arm/translate.c b/target/arm/translate.c index 0ad9070b45..c9186b6195 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -288,29 +288,6 @@ static void gen_revsh(TCGv_i32 var) tcg_gen_ext16s_i32(var, var); } -/* Unsigned bitfield extract. */ -static void gen_ubfx(TCGv_i32 var, int shift, uint32_t mask) -{ - if (shift) - tcg_gen_shri_i32(var, var, shift); - tcg_gen_andi_i32(var, var, mask); -} - -/* Signed bitfield extract. */ -static void gen_sbfx(TCGv_i32 var, int shift, int width) -{ - uint32_t signbit; - - if (shift) - tcg_gen_sari_i32(var, var, shift); - if (shift + width < 32) { - signbit = 1u << (width - 1); - tcg_gen_andi_i32(var, var, (1u << width) - 1); - tcg_gen_xori_i32(var, var, signbit); - tcg_gen_subi_i32(var, var, signbit); - } -} - /* Return (b << 32) + a. Mark inputs as dead */ static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b) { @@ -7060,7 +7037,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) switch (size) { case 0: gen_helper_neon_clz_u8(tmp, tmp); break; case 1: gen_helper_neon_clz_u16(tmp, tmp); break; - case 2: gen_helper_clz(tmp, tmp); break; + case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break; default: abort(); } break; @@ -8242,7 +8219,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) ARCH(5); rd = (insn >> 12) & 0xf; tmp = load_reg(s, rm); - gen_helper_clz(tmp, tmp); + tcg_gen_clzi_i32(tmp, tmp, 32); store_reg(s, rd, tmp); } else { goto illegal_op; @@ -9178,9 +9155,9 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) goto illegal_op; if (i < 32) { if (op1 & 0x20) { - gen_ubfx(tmp, shift, (1u << i) - 1); + tcg_gen_extract_i32(tmp, tmp, shift, i); } else { - gen_sbfx(tmp, shift, i); + tcg_gen_sextract_i32(tmp, tmp, shift, i); } } store_reg(s, rd, tmp); @@ -10015,7 +9992,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw tcg_temp_free_i32(tmp2); break; case 0x18: /* clz */ - gen_helper_clz(tmp, tmp); + tcg_gen_clzi_i32(tmp, tmp, 32); break; case 0x20: case 0x21: @@ -10497,15 +10474,17 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw imm++; if (shift + imm > 32) goto illegal_op; - if (imm < 32) - gen_sbfx(tmp, shift, imm); + if (imm < 32) { + tcg_gen_sextract_i32(tmp, tmp, shift, imm); + } break; case 6: /* Unsigned bitfield extract. */ imm++; if (shift + imm > 32) goto illegal_op; - if (imm < 32) - gen_ubfx(tmp, shift, (1u << imm) - 1); + if (imm < 32) { + tcg_gen_extract_i32(tmp, tmp, shift, imm); + } break; case 3: /* Bitfield insert/clear. */ if (imm < shift) diff --git a/target/cris/helper.h b/target/cris/helper.h index ff3595641a..20d21c4358 100644 --- a/target/cris/helper.h +++ b/target/cris/helper.h @@ -7,7 +7,6 @@ DEF_HELPER_1(rfn, void, env) DEF_HELPER_3(movl_sreg_reg, void, env, i32, i32) DEF_HELPER_3(movl_reg_sreg, void, env, i32, i32) -DEF_HELPER_FLAGS_1(lz, TCG_CALL_NO_SE, i32, i32) DEF_HELPER_FLAGS_4(btst, TCG_CALL_NO_SE, i32, env, i32, i32, i32) DEF_HELPER_FLAGS_4(evaluate_flags_muls, TCG_CALL_NO_SE, i32, env, i32, i32, i32) diff --git a/target/cris/op_helper.c b/target/cris/op_helper.c index 504303913c..e92505c907 100644 --- a/target/cris/op_helper.c +++ b/target/cris/op_helper.c @@ -230,11 +230,6 @@ void helper_rfn(CPUCRISState *env) env->pregs[PR_CCS] |= M_FLAG_V32; } -uint32_t helper_lz(uint32_t t0) -{ - return clz32(t0); -} - uint32_t helper_btst(CPUCRISState *env, uint32_t t0, uint32_t t1, uint32_t ccs) { /* FIXME: clean this up. */ diff --git a/target/cris/translate.c b/target/cris/translate.c index b91042743f..0ee05ca02d 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -767,7 +767,7 @@ static void cris_alu_op_exec(DisasContext *dc, int op, t_gen_subx_carry(dc, dst); break; case CC_OP_LZ: - gen_helper_lz(dst, b); + tcg_gen_clzi_tl(dst, b, TARGET_LONG_BITS); break; case CC_OP_MULS: tcg_gen_muls2_tl(dst, cpu_PR[PR_MOF], a, b); diff --git a/target/i386/cc_helper.c b/target/i386/cc_helper.c index 83af223c9f..c9c90e10db 100644 --- a/target/i386/cc_helper.c +++ b/target/i386/cc_helper.c @@ -105,6 +105,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, return src1; case CC_OP_CLR: return CC_Z | CC_P; + case CC_OP_POPCNT: + return src1 ? 0 : CC_Z; case CC_OP_MULB: return compute_all_mulb(dst, src1); @@ -232,6 +234,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_LOGICL: case CC_OP_LOGICQ: case CC_OP_CLR: + case CC_OP_POPCNT: return 0; case CC_OP_EFLAGS: diff --git a/target/i386/cpu.h b/target/i386/cpu.h index a7f2f6099d..a04e46b166 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -777,6 +777,7 @@ typedef enum { CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ CC_OP_CLR, /* Z set, all other flags clear. */ + CC_OP_POPCNT, /* Z via CC_SRC, all other flags clear. */ CC_OP_NB, } CCOp; diff --git a/target/i386/helper.h b/target/i386/helper.h index bd9b2cf677..4c1aafffd6 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -202,8 +202,6 @@ DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64) DEF_HELPER_FLAGS_2(rdpkru, TCG_CALL_NO_WG, i64, env, i32) DEF_HELPER_FLAGS_3(wrpkru, TCG_CALL_NO_WG, void, env, i32, i64) -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target/i386/int_helper.c b/target/i386/int_helper.c index 9e873ac150..4dc5c65991 100644 --- a/target/i386/int_helper.c +++ b/target/i386/int_helper.c @@ -417,17 +417,6 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0) # define clztl clz64 #endif -/* bit operations */ -target_ulong helper_ctz(target_ulong t0) -{ - return ctztl(t0); -} - -target_ulong helper_clz(target_ulong t0) -{ - return clztl(t0); -} - target_ulong helper_pdep(target_ulong src, target_ulong mask) { target_ulong dest = 0; diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 7a98f53864..16509d0a74 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -2157,32 +2157,6 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len) return crc; } -#define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1)) -#define POPCOUNT(n, i) ((n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))) -target_ulong helper_popcnt(CPUX86State *env, target_ulong n, uint32_t type) -{ - CC_SRC = n ? 0 : CC_Z; - - n = POPCOUNT(n, 0); - n = POPCOUNT(n, 1); - n = POPCOUNT(n, 2); - n = POPCOUNT(n, 3); - if (type == 1) { - return n & 0xff; - } - - n = POPCOUNT(n, 4); -#ifndef TARGET_X86_64 - return n; -#else - if (type == 2) { - return n & 0xff; - } - - return POPCOUNT(n, 5); -#endif -} - void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t ctrl) { diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 64c5857cf4..094aafc573 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -333,7 +333,6 @@ DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpistrm, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_3(crc32, tl, i32, tl, i32) -DEF_HELPER_3(popcnt, tl, env, tl, i32) #endif /* AES-NI op helpers */ diff --git a/target/i386/translate.c b/target/i386/translate.c index 7adfff09fc..72c1b03a2a 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -222,6 +222,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, [CC_OP_CLR] = 0, + [CC_OP_POPCNT] = USES_CC_SRC, }; static void set_cc_op(DisasContext *s, CCOp op) @@ -383,8 +384,7 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0) static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg) { if (ot == MO_8 && byte_reg_is_xH(reg)) { - tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8); - tcg_gen_ext8u_tl(t0, t0); + tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8); } else { tcg_gen_mov_tl(t0, cpu_regs[reg]); } @@ -758,6 +758,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) case CC_OP_LOGICB ... CC_OP_LOGICQ: case CC_OP_CLR: + case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; case CC_OP_INCB ... CC_OP_INCQ: @@ -825,6 +826,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_S }; case CC_OP_CLR: + case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: { @@ -844,6 +846,7 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, .mask = -1, .no_setcond = true }; case CC_OP_CLR: + case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: gen_compute_eflags(s); @@ -867,6 +870,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) .mask = CC_Z }; case CC_OP_CLR: return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 }; + case CC_OP_POPCNT: + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src, + .mask = -1 }; default: { TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3; @@ -3768,8 +3774,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* Extract the LEN into a mask. Lengths larger than operand size get all ones. */ - tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8); - tcg_gen_ext8u_tl(cpu_A0, cpu_A0); + tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8); tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound, cpu_A0, bound); tcg_temp_free(bound); @@ -3920,9 +3925,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_compute_eflags(s); } carry_in = cpu_tmp0; - tcg_gen_shri_tl(carry_in, cpu_cc_src, - ctz32(b == 0x1f6 ? CC_C : CC_O)); - tcg_gen_andi_tl(carry_in, carry_in, 1); + tcg_gen_extract_tl(carry_in, cpu_cc_src, + ctz32(b == 0x1f6 ? CC_C : CC_O), 1); } switch (ot) { @@ -5447,21 +5451,25 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = (modrm & 7) | REX_B(s); if (mod == 3) { - gen_op_mov_v_reg(ot, cpu_T0, rm); - switch (s_ot) { - case MO_UB: - tcg_gen_ext8u_tl(cpu_T0, cpu_T0); - break; - case MO_SB: - tcg_gen_ext8s_tl(cpu_T0, cpu_T0); - break; - case MO_UW: - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); - break; - default: - case MO_SW: - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); - break; + if (s_ot == MO_SB && byte_reg_is_xH(rm)) { + tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8); + } else { + gen_op_mov_v_reg(ot, cpu_T0, rm); + switch (s_ot) { + case MO_UB: + tcg_gen_ext8u_tl(cpu_T0, cpu_T0); + break; + case MO_SB: + tcg_gen_ext8s_tl(cpu_T0, cpu_T0); + break; + case MO_UW: + tcg_gen_ext16u_tl(cpu_T0, cpu_T0); + break; + default: + case MO_SW: + tcg_gen_ext16s_tl(cpu_T0, cpu_T0); + break; + } } gen_op_mov_reg_v(d_ot, reg, cpu_T0); } else { @@ -6803,21 +6811,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, ? s->cpuid_ext3_features & CPUID_EXT3_ABM : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { int size = 8 << ot; + /* For lzcnt/tzcnt, C bit is defined related to the input. */ tcg_gen_mov_tl(cpu_cc_src, cpu_T0); if (b & 1) { /* For lzcnt, reduce the target_ulong result by the number of zeros that we expect to find at the top. */ - gen_helper_clz(cpu_T0, cpu_T0); + tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS); tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size); } else { - /* For tzcnt, a zero input must return the operand size: - force all bits outside the operand size to 1. */ - target_ulong mask = (target_ulong)-2 << (size - 1); - tcg_gen_ori_tl(cpu_T0, cpu_T0, mask); - gen_helper_ctz(cpu_T0, cpu_T0); - } - /* For lzcnt/tzcnt, C and Z bits are defined and are - related to the result. */ + /* For tzcnt, a zero input must return the operand size. */ + tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size); + } + /* For lzcnt/tzcnt, Z bit is defined related to the result. */ gen_op_update1_cc(); set_cc_op(s, CC_OP_BMILGB + ot); } else { @@ -6825,20 +6830,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, to the input and not the result. */ tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); set_cc_op(s, CC_OP_LOGICB + ot); + + /* ??? The manual says that the output is undefined when the + input is zero, but real hardware leaves it unchanged, and + real programs appear to depend on that. Accomplish this + by passing the output as the value to return upon zero. */ if (b & 1) { /* For bsr, return the bit index of the first 1 bit, not the count of leading zeros. */ - gen_helper_clz(cpu_T0, cpu_T0); + tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1); + tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1); tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1); } else { - gen_helper_ctz(cpu_T0, cpu_T0); + tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]); } - /* ??? The manual says that the output is undefined when the - input is zero, but real hardware leaves it unchanged, and - real programs appear to depend on that. */ - tcg_gen_movi_tl(cpu_tmp0, 0); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0, - cpu_regs[reg], cpu_T0); } gen_op_mov_reg_v(ot, reg, cpu_T0); break; @@ -8207,10 +8212,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot)); + gen_extu(ot, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, cpu_T0); + tcg_gen_ctpop_tl(cpu_T0, cpu_T0); gen_op_mov_reg_v(ot, reg, cpu_T0); - set_cc_op(s, CC_OP_EFLAGS); + set_cc_op(s, CC_OP_POPCNT); break; case 0x10e ... 0x10f: /* 3DNow! instructions, ignore prefixes */ diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h index bd13826de0..71a6c0858d 100644 --- a/target/microblaze/helper.h +++ b/target/microblaze/helper.h @@ -3,7 +3,6 @@ DEF_HELPER_1(debug, void, env) DEF_HELPER_FLAGS_3(carry, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(cmp, i32, i32, i32) DEF_HELPER_2(cmpu, i32, i32, i32) -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(divs, i32, env, i32, i32) DEF_HELPER_3(divu, i32, env, i32, i32) diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c index 4a856e6204..1e07e21c1c 100644 --- a/target/microblaze/op_helper.c +++ b/target/microblaze/op_helper.c @@ -145,11 +145,6 @@ uint32_t helper_cmpu(uint32_t a, uint32_t b) return t; } -uint32_t helper_clz(uint32_t t0) -{ - return clz32(t0); -} - uint32_t helper_carry(uint32_t a, uint32_t b, uint32_t cf) { return compute_carry(a, b, cf); diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index de2090ac71..0bb609513c 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -768,7 +768,7 @@ static void dec_bit(DisasContext *dc) t_gen_raise_exception(dc, EXCP_HW_EXCP); } if (dc->cpu->env.pvr.regs[2] & PVR2_USE_PCMP_INSTR) { - gen_helper_clz(cpu_R[dc->rd], cpu_R[dc->ra]); + tcg_gen_clzi_i32(cpu_R[dc->rd], cpu_R[dc->ra], 32); } break; case 0x1e0: diff --git a/target/mips/helper.h b/target/mips/helper.h index 666936c81b..60efa01194 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -20,13 +20,6 @@ DEF_HELPER_4(scd, tl, env, tl, tl, int) #endif #endif -DEF_HELPER_FLAGS_1(clo, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) -#ifdef TARGET_MIPS64 -DEF_HELPER_FLAGS_1(dclo, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(dclz, TCG_CALL_NO_RWG_SE, tl, tl) -#endif - DEF_HELPER_3(muls, tl, env, tl, tl) DEF_HELPER_3(mulsu, tl, env, tl, tl) DEF_HELPER_3(macc, tl, env, tl, tl) diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c index 7af4c2f084..11d781fc91 100644 --- a/target/mips/op_helper.c +++ b/target/mips/op_helper.c @@ -103,28 +103,6 @@ HELPER_ST(sd, stq, uint64_t) #endif #undef HELPER_ST -target_ulong helper_clo (target_ulong arg1) -{ - return clo32(arg1); -} - -target_ulong helper_clz (target_ulong arg1) -{ - return clz32(arg1); -} - -#if defined(TARGET_MIPS64) -target_ulong helper_dclo (target_ulong arg1) -{ - return clo64(arg1); -} - -target_ulong helper_dclz (target_ulong arg1) -{ - return clz64(arg1); -} -#endif /* TARGET_MIPS64 */ - /* 64 bits arithmetic for 32 bits hosts */ static inline uint64_t get_HILO(CPUMIPSState *env) { diff --git a/target/mips/translate.c b/target/mips/translate.c index 57b824ff2d..7f8ecf42c2 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -3626,29 +3626,38 @@ static void gen_cl (DisasContext *ctx, uint32_t opc, /* Treat as NOP. */ return; } - t0 = tcg_temp_new(); + t0 = cpu_gpr[rd]; gen_load_gpr(t0, rs); + switch (opc) { case OPC_CLO: case R6_OPC_CLO: - gen_helper_clo(cpu_gpr[rd], t0); +#if defined(TARGET_MIPS64) + case OPC_DCLO: + case R6_OPC_DCLO: +#endif + tcg_gen_not_tl(t0, t0); break; + } + + switch (opc) { + case OPC_CLO: + case R6_OPC_CLO: case OPC_CLZ: case R6_OPC_CLZ: - gen_helper_clz(cpu_gpr[rd], t0); + tcg_gen_ext32u_tl(t0, t0); + tcg_gen_clzi_tl(t0, t0, TARGET_LONG_BITS); + tcg_gen_subi_tl(t0, t0, TARGET_LONG_BITS - 32); break; #if defined(TARGET_MIPS64) case OPC_DCLO: case R6_OPC_DCLO: - gen_helper_dclo(cpu_gpr[rd], t0); - break; case OPC_DCLZ: case R6_OPC_DCLZ: - gen_helper_dclz(cpu_gpr[rd], t0); + tcg_gen_clzi_i64(t0, t0, 64); break; #endif } - tcg_temp_free(t0); } /* Godson integer instructions */ @@ -4488,11 +4497,12 @@ static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt, if (lsb + msb > 31) { goto fail; } - tcg_gen_shri_tl(t0, t1, lsb); if (msb != 31) { - tcg_gen_andi_tl(t0, t0, (1U << (msb + 1)) - 1); + tcg_gen_extract_tl(t0, t1, lsb, msb + 1); } else { - tcg_gen_ext32s_tl(t0, t0); + /* The two checks together imply that lsb == 0, + so this is a simple sign-extension. */ + tcg_gen_ext32s_tl(t0, t1); } break; #if defined(TARGET_MIPS64) @@ -4507,10 +4517,7 @@ static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt, if (lsb + msb > 63) { goto fail; } - tcg_gen_shri_tl(t0, t1, lsb); - if (msb != 63) { - tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1); - } + tcg_gen_extract_tl(t0, t1, lsb, msb + 1); break; #endif case OPC_INS: diff --git a/target/openrisc/helper.h b/target/openrisc/helper.h index f53fa21344..bcc7245fc3 100644 --- a/target/openrisc/helper.h +++ b/target/openrisc/helper.h @@ -54,8 +54,6 @@ FOP_CMP(ge) #undef FOP_CMP /* int */ -DEF_HELPER_FLAGS_1(ff1, 0, tl, tl) -DEF_HELPER_FLAGS_1(fl1, 0, tl, tl) DEF_HELPER_FLAGS_3(mul32, 0, i32, env, i32, i32) /* interrupt */ diff --git a/target/openrisc/int_helper.c b/target/openrisc/int_helper.c index 4d1f958901..ba0fd277cd 100644 --- a/target/openrisc/int_helper.c +++ b/target/openrisc/int_helper.c @@ -24,25 +24,6 @@ #include "exception.h" #include "qemu/host-utils.h" -target_ulong HELPER(ff1)(target_ulong x) -{ -/*#ifdef TARGET_OPENRISC64 - return x ? ctz64(x) + 1 : 0; -#else*/ - return x ? ctz32(x) + 1 : 0; -/*#endif*/ -} - -target_ulong HELPER(fl1)(target_ulong x) -{ -/* not used yet, open it when we need or64. */ -/*#ifdef TARGET_OPENRISC64 - return 64 - clz64(x); -#else*/ - return 32 - clz32(x); -/*#endif*/ -} - uint32_t HELPER(mul32)(CPUOpenRISCState *env, uint32_t ra, uint32_t rb) { diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index 229361aed1..03fa7db570 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -602,11 +602,13 @@ static void dec_calc(DisasContext *dc, uint32_t insn) switch (op1) { case 0x00: /* l.ff1 */ LOG_DIS("l.ff1 r%d, r%d, r%d\n", rd, ra, rb); - gen_helper_ff1(cpu_R[rd], cpu_R[ra]); + tcg_gen_ctzi_tl(cpu_R[rd], cpu_R[ra], -1); + tcg_gen_addi_tl(cpu_R[rd], cpu_R[rd], 1); break; case 0x01: /* l.fl1 */ LOG_DIS("l.fl1 r%d, r%d, r%d\n", rd, ra, rb); - gen_helper_fl1(cpu_R[rd], cpu_R[ra]); + tcg_gen_clzi_tl(cpu_R[rd], cpu_R[ra], TARGET_LONG_BITS); + tcg_gen_subfi_tl(cpu_R[rd], TARGET_LONG_BITS, cpu_R[rd]); break; default: diff --git a/target/ppc/helper.h b/target/ppc/helper.h index da00f0ab49..0a8fbba3c5 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -38,17 +38,12 @@ DEF_HELPER_4(divde, i64, env, i64, i64, i32) DEF_HELPER_4(divweu, tl, env, tl, tl, i32) DEF_HELPER_4(divwe, tl, env, tl, tl, i32) -DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(cnttzw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_3(sraw, tl, env, tl, tl) #if defined(TARGET_PPC64) DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) -DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(cnttzd, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) DEF_HELPER_0(darn32, tl) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 2d57c9a1c2..1871792ff6 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -141,16 +141,6 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) #endif -target_ulong helper_cntlzw(target_ulong t) -{ - return clz32(t); -} - -target_ulong helper_cnttzw(target_ulong t) -{ - return ctz32(t); -} - #if defined(TARGET_PPC64) /* if x = 0xab, returns 0xababababababababa */ #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) @@ -174,16 +164,6 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) #undef haszero #undef hasvalue -target_ulong helper_cntlzd(target_ulong t) -{ - return clz64(t); -} - -target_ulong helper_cnttzd(target_ulong t) -{ - return ctz64(t); -} - /* Return invalid random number. * * FIXME: Add rng backend or other mechanism to get cryptographically suitable @@ -292,6 +272,7 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value, #if defined(TARGET_PPC64) target_ulong helper_popcntb(target_ulong val) { + /* Note that we don't fold past bytes */ val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); val = (val & 0x3333333333333333ULL) + ((val >> 2) & @@ -303,6 +284,7 @@ target_ulong helper_popcntb(target_ulong val) target_ulong helper_popcntw(target_ulong val) { + /* Note that we don't fold past words. */ val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); val = (val & 0x3333333333333333ULL) + ((val >> 2) & @@ -315,29 +297,15 @@ target_ulong helper_popcntw(target_ulong val) 0x0000ffff0000ffffULL); return val; } - -target_ulong helper_popcntd(target_ulong val) -{ - return ctpop64(val); -} #else target_ulong helper_popcntb(target_ulong val) { + /* Note that we don't fold past bytes */ val = (val & 0x55555555) + ((val >> 1) & 0x55555555); val = (val & 0x33333333) + ((val >> 2) & 0x33333333); val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); return val; } - -target_ulong helper_popcntw(target_ulong val) -{ - val = (val & 0x55555555) + ((val >> 1) & 0x55555555); - val = (val & 0x33333333) + ((val >> 2) & 0x33333333); - val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); - val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); - val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff); - return val; -} #endif /*****************************************************************************/ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 59e9552d2b..121218087f 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1641,7 +1641,13 @@ static void gen_andis_(DisasContext *ctx) /* cntlzw */ static void gen_cntlzw(DisasContext *ctx) { - gen_helper_cntlzw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t, cpu_gpr[rS(ctx->opcode)]); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t); + tcg_temp_free_i32(t); + if (unlikely(Rc(ctx->opcode) != 0)) gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } @@ -1649,7 +1655,13 @@ static void gen_cntlzw(DisasContext *ctx) /* cnttzw */ static void gen_cnttzw(DisasContext *ctx) { - gen_helper_cnttzw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t, cpu_gpr[rS(ctx->opcode)]); + tcg_gen_ctzi_i32(t, t, 32); + tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t); + tcg_temp_free_i32(t); + if (unlikely(Rc(ctx->opcode) != 0)) { gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } @@ -1832,14 +1844,18 @@ static void gen_popcntb(DisasContext *ctx) static void gen_popcntw(DisasContext *ctx) { +#if defined(TARGET_PPC64) gen_helper_popcntw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); +#else + tcg_gen_ctpop_i32(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); +#endif } #if defined(TARGET_PPC64) /* popcntd: PowerPC 2.06 specification */ static void gen_popcntd(DisasContext *ctx) { - gen_helper_popcntd(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + tcg_gen_ctpop_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); } #endif @@ -1891,7 +1907,7 @@ GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B); /* cntlzd */ static void gen_cntlzd(DisasContext *ctx) { - gen_helper_cntlzd(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + tcg_gen_clzi_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], 64); if (unlikely(Rc(ctx->opcode) != 0)) gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } @@ -1899,7 +1915,7 @@ static void gen_cntlzd(DisasContext *ctx) /* cnttzd */ static void gen_cnttzd(DisasContext *ctx) { - gen_helper_cnttzd(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + tcg_gen_ctzi_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], 64); if (unlikely(Rc(ctx->opcode) != 0)) { gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } @@ -1975,16 +1991,16 @@ static void gen_rlwinm(DisasContext *ctx) { TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; - uint32_t sh = SH(ctx->opcode); - uint32_t mb = MB(ctx->opcode); - uint32_t me = ME(ctx->opcode); - - if (mb == 0 && me == (31 - sh)) { - tcg_gen_shli_tl(t_ra, t_rs, sh); - tcg_gen_ext32u_tl(t_ra, t_ra); - } else if (sh != 0 && me == 31 && sh == (32 - mb)) { - tcg_gen_ext32u_tl(t_ra, t_rs); - tcg_gen_shri_tl(t_ra, t_ra, mb); + int sh = SH(ctx->opcode); + int mb = MB(ctx->opcode); + int me = ME(ctx->opcode); + int len = me - mb + 1; + int rsh = (32 - sh) & 31; + + if (sh != 0 && len > 0 && me == (31 - sh)) { + tcg_gen_deposit_z_tl(t_ra, t_rs, sh, len); + } else if (me == 31 && rsh + len <= 32) { + tcg_gen_extract_tl(t_ra, t_rs, rsh, len); } else { target_ulong mask; #if defined(TARGET_PPC64) @@ -1992,8 +2008,9 @@ static void gen_rlwinm(DisasContext *ctx) me += 32; #endif mask = MASK(mb, me); - - if (mask <= 0xffffffffu) { + if (sh == 0) { + tcg_gen_andi_tl(t_ra, t_rs, mask); + } else if (mask <= 0xffffffffu) { TCGv_i32 t0 = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(t0, t_rs); tcg_gen_rotli_i32(t0, t0, sh); @@ -2096,11 +2113,13 @@ static void gen_rldinm(DisasContext *ctx, int mb, int me, int sh) { TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + int len = me - mb + 1; + int rsh = (64 - sh) & 63; - if (sh != 0 && mb == 0 && me == (63 - sh)) { - tcg_gen_shli_tl(t_ra, t_rs, sh); - } else if (sh != 0 && me == 63 && sh == (64 - mb)) { - tcg_gen_shri_tl(t_ra, t_rs, mb); + if (sh != 0 && len > 0 && me == (63 - sh)) { + tcg_gen_deposit_z_tl(t_ra, t_rs, sh, len); + } else if (me == 63 && rsh + len <= 64) { + tcg_gen_extract_tl(t_ra, t_rs, rsh, len); } else { tcg_gen_rotli_tl(t_ra, t_rs, sh); tcg_gen_andi_tl(t_ra, t_ra, MASK(mb, me)); diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 207a6e7d1c..9102071d0a 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -70,7 +70,6 @@ DEF_HELPER_FLAGS_4(msdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_3(tceb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64) DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64) DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64) -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/s390x/int_helper.c b/target/s390x/int_helper.c index 370c94da55..f26f36a904 100644 --- a/target/s390x/int_helper.c +++ b/target/s390x/int_helper.c @@ -117,12 +117,6 @@ uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, return ret; } -/* count leading zeros, for find leftmost one */ -uint64_t HELPER(clz)(uint64_t v) -{ - return clz64(v); -} - uint64_t HELPER(cvd)(int32_t reg) { /* positive 0 */ @@ -143,14 +137,11 @@ uint64_t HELPER(cvd)(int32_t reg) return dec; } -uint64_t HELPER(popcnt)(uint64_t r2) +uint64_t HELPER(popcnt)(uint64_t val) { - uint64_t ret = 0; - int i; - - for (i = 0; i < 64; i += 8) { - uint64_t t = ctpop32((r2 >> i) & 0xff); - ret |= t << i; - } - return ret; + /* Note that we don't fold past bytes. */ + val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); + val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); + val = (val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + return val; } diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 02bc7058fd..01c62176bf 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -2249,7 +2249,7 @@ static ExitStatus op_flogr(DisasContext *s, DisasOps *o) gen_op_update1_cc_i64(s, CC_OP_FLOGR, o->in2); /* R1 = IN ? CLZ(IN) : 64. */ - gen_helper_clz(o->out, o->in2); + tcg_gen_clzi_i64(o->out, o->in2, 64); /* R1+1 = IN & ~(found bit). Note that we may attempt to shift this value by 64, which is undefined. But since the shift is 64 iff the @@ -3134,20 +3134,26 @@ static ExitStatus op_risbg(DisasContext *s, DisasOps *o) } } - /* In some cases we can implement this with deposit, which can be more - efficient on some hosts. */ - if (~mask == imask && i3 <= i4) { - if (s->fields->op2 == 0x5d) { - i3 += 32, i4 += 32; - } + len = i4 - i3 + 1; + pos = 63 - i4; + rot = i5 & 63; + if (s->fields->op2 == 0x5d) { + pos += 32; + } + + /* In some cases we can implement this with extract. */ + if (imask == 0 && pos == 0 && len > 0 && rot + len <= 64) { + tcg_gen_extract_i64(o->out, o->in2, rot, len); + return NO_EXIT; + } + + /* In some cases we can implement this with deposit. */ + if (len > 0 && (imask == 0 || ~mask == imask)) { /* Note that we rotate the bits to be inserted to the lsb, not to the position as described in the PoO. */ - len = i4 - i3 + 1; - pos = 63 - i4; - rot = (i5 - pos) & 63; + rot = (rot - pos) & 63; } else { - pos = len = -1; - rot = i5 & 63; + pos = -1; } /* Rotate the input as necessary. */ @@ -3155,7 +3161,11 @@ static ExitStatus op_risbg(DisasContext *s, DisasOps *o) /* Insert the selected bits into the output. */ if (pos >= 0) { - tcg_gen_deposit_i64(o->out, o->out, o->in2, pos, len); + if (imask == 0) { + tcg_gen_deposit_z_i64(o->out, o->in2, pos, len); + } else { + tcg_gen_deposit_i64(o->out, o->out, o->in2, pos, len); + } } else if (imask == 0) { tcg_gen_andi_i64(o->out, o->in2, mask); } else { diff --git a/target/sparc/helper.c b/target/sparc/helper.c index 359b0b15ed..1d854890b4 100644 --- a/target/sparc/helper.c +++ b/target/sparc/helper.c @@ -49,11 +49,6 @@ void helper_debug(CPUSPARCState *env) } #ifdef TARGET_SPARC64 -target_ulong helper_popc(target_ulong val) -{ - return ctpop64(val); -} - void helper_tick_set_count(void *opaque, uint64_t count) { #if !defined(CONFIG_USER_ONLY) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 0cf1bfb73a..3ef38b9a22 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -16,7 +16,6 @@ DEF_HELPER_2(wrccr, void, env, tl) DEF_HELPER_1(rdcwp, tl, env) DEF_HELPER_2(wrcwp, void, env, tl) DEF_HELPER_FLAGS_2(array8, TCG_CALL_NO_RWG_SE, tl, tl, tl) -DEF_HELPER_FLAGS_1(popc, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(set_softint, TCG_CALL_NO_RWG, void, env, i64) DEF_HELPER_FLAGS_2(clear_softint, TCG_CALL_NO_RWG, void, env, i64) DEF_HELPER_FLAGS_2(write_softint, TCG_CALL_NO_RWG, void, env, i64) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 2205f89837..ead585eef5 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -4647,7 +4647,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) gen_store_gpr(dc, rd, cpu_dst); break; case 0x2e: /* V9 popc */ - gen_helper_popc(cpu_dst, cpu_src2); + tcg_gen_ctpop_tl(cpu_dst, cpu_src2); gen_store_gpr(dc, rd, cpu_dst); break; case 0x2f: /* V9 movr */ diff --git a/target/tilegx/helper.c b/target/tilegx/helper.c index b4fba9cc21..4964bb9111 100644 --- a/target/tilegx/helper.c +++ b/target/tilegx/helper.c @@ -55,21 +55,6 @@ void helper_ext01_ics(CPUTLGState *env) } } -uint64_t helper_cntlz(uint64_t arg) -{ - return clz64(arg); -} - -uint64_t helper_cnttz(uint64_t arg) -{ - return ctz64(arg); -} - -uint64_t helper_pcnt(uint64_t arg) -{ - return ctpop64(arg); -} - uint64_t helper_revbits(uint64_t arg) { return revbit64(arg); diff --git a/target/tilegx/helper.h b/target/tilegx/helper.h index 9281d0f428..16745c266f 100644 --- a/target/tilegx/helper.h +++ b/target/tilegx/helper.h @@ -1,8 +1,5 @@ DEF_HELPER_2(exception, noreturn, env, i32) DEF_HELPER_1(ext01_ics, void, env) -DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64) diff --git a/target/tilegx/translate.c b/target/tilegx/translate.c index 9c734eeba3..ff2ef7b63d 100644 --- a/target/tilegx/translate.c +++ b/target/tilegx/translate.c @@ -608,12 +608,12 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext, switch (opext) { case OE_RR_X0(CNTLZ): case OE_RR_Y0(CNTLZ): - gen_helper_cntlz(tdest, tsrca); + tcg_gen_clzi_tl(tdest, tsrca, TARGET_LONG_BITS); mnemonic = "cntlz"; break; case OE_RR_X0(CNTTZ): case OE_RR_Y0(CNTTZ): - gen_helper_cnttz(tdest, tsrca); + tcg_gen_ctzi_tl(tdest, tsrca, TARGET_LONG_BITS); mnemonic = "cnttz"; break; case OE_RR_X0(FSINGLE_PACK1): @@ -697,7 +697,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext, break; case OE_RR_X0(PCNT): case OE_RR_Y0(PCNT): - gen_helper_pcnt(tdest, tsrca); + tcg_gen_ctpop_tl(tdest, tsrca); mnemonic = "pcnt"; break; case OE_RR_X0(REVBITS): diff --git a/target/tricore/fpu_helper.c b/target/tricore/fpu_helper.c index 98fe9472b1..7979bb6692 100644 --- a/target/tricore/fpu_helper.c +++ b/target/tricore/fpu_helper.c @@ -21,7 +21,8 @@ #include "cpu.h" #include "exec/helper-proto.h" -#define ADD_NAN 0x7cf00001 +#define QUIET_NAN 0x7fc00000 +#define ADD_NAN 0x7fc00001 #define DIV_NAN 0x7fc00008 #define MUL_NAN 0x7fc00002 #define FPU_FS PSW_USB_C @@ -47,6 +48,42 @@ static inline bool f_is_denormal(float32 arg) return float32_is_zero_or_denormal(arg) && !float32_is_zero(arg); } +static inline float32 f_maddsub_nan_result(float32 arg1, float32 arg2, + float32 arg3, float32 result, + uint32_t muladd_negate_c) +{ + uint32_t aSign, bSign, cSign; + uint32_t aExp, bExp, cExp; + + if (float32_is_any_nan(arg1) || float32_is_any_nan(arg2) || + float32_is_any_nan(arg3)) { + return QUIET_NAN; + } else if (float32_is_infinity(arg1) && float32_is_zero(arg2)) { + return MUL_NAN; + } else if (float32_is_zero(arg1) && float32_is_infinity(arg2)) { + return MUL_NAN; + } else { + aSign = arg1 >> 31; + bSign = arg2 >> 31; + cSign = arg3 >> 31; + + aExp = (arg1 >> 23) & 0xff; + bExp = (arg2 >> 23) & 0xff; + cExp = (arg3 >> 23) & 0xff; + + if (muladd_negate_c) { + cSign ^= 1; + } + if (((aExp == 0xff) || (bExp == 0xff)) && (cExp == 0xff)) { + if (aSign ^ bSign ^ cSign) { + return ADD_NAN; + } + } + } + + return result; +} + static void f_update_psw_flags(CPUTriCoreState *env, uint8_t flags) { uint8_t some_excp = 0; @@ -159,6 +196,60 @@ uint32_t helper_fdiv(CPUTriCoreState *env, uint32_t r1, uint32_t r2) return (uint32_t)f_result; } +uint32_t helper_fmadd(CPUTriCoreState *env, uint32_t r1, + uint32_t r2, uint32_t r3) +{ + uint32_t flags; + float32 arg1 = make_float32(r1); + float32 arg2 = make_float32(r2); + float32 arg3 = make_float32(r3); + float32 f_result; + + f_result = float32_muladd(arg1, arg2, arg3, 0, &env->fp_status); + + flags = f_get_excp_flags(env); + if (flags) { + if (flags & float_flag_invalid) { + arg1 = float32_squash_input_denormal(arg1, &env->fp_status); + arg2 = float32_squash_input_denormal(arg2, &env->fp_status); + arg3 = float32_squash_input_denormal(arg3, &env->fp_status); + f_result = f_maddsub_nan_result(arg1, arg2, arg3, f_result, 0); + } + f_update_psw_flags(env, flags); + } else { + env->FPU_FS = 0; + } + return (uint32_t)f_result; +} + +uint32_t helper_fmsub(CPUTriCoreState *env, uint32_t r1, + uint32_t r2, uint32_t r3) +{ + uint32_t flags; + float32 arg1 = make_float32(r1); + float32 arg2 = make_float32(r2); + float32 arg3 = make_float32(r3); + float32 f_result; + + f_result = float32_muladd(arg1, arg2, arg3, float_muladd_negate_product, + &env->fp_status); + + flags = f_get_excp_flags(env); + if (flags) { + if (flags & float_flag_invalid) { + arg1 = float32_squash_input_denormal(arg1, &env->fp_status); + arg2 = float32_squash_input_denormal(arg2, &env->fp_status); + arg3 = float32_squash_input_denormal(arg3, &env->fp_status); + + f_result = f_maddsub_nan_result(arg1, arg2, arg3, f_result, 1); + } + f_update_psw_flags(env, flags); + } else { + env->FPU_FS = 0; + } + return (uint32_t)f_result; +} + uint32_t helper_fcmp(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { uint32_t result, flags; @@ -215,3 +306,44 @@ uint32_t helper_itof(CPUTriCoreState *env, uint32_t arg) } return (uint32_t)f_result; } + +uint32_t helper_ftouz(CPUTriCoreState *env, uint32_t arg) +{ + float32 f_arg = make_float32(arg); + uint32_t result; + int32_t flags; + + result = float32_to_uint32_round_to_zero(f_arg, &env->fp_status); + + flags = f_get_excp_flags(env); + if (flags & float_flag_invalid) { + flags &= ~float_flag_inexact; + if (float32_is_any_nan(f_arg)) { + result = 0; + } + } else if (float32_lt_quiet(f_arg, 0, &env->fp_status)) { + flags = float_flag_invalid; + result = 0; + } + + if (flags) { + f_update_psw_flags(env, flags); + } else { + env->FPU_FS = 0; + } + return result; +} + +void helper_updfl(CPUTriCoreState *env, uint32_t arg) +{ + env->FPU_FS = extract32(arg, 7, 1) & extract32(arg, 15, 1); + env->FPU_FI = (extract32(arg, 6, 1) & extract32(arg, 14, 1)) << 31; + env->FPU_FV = (extract32(arg, 5, 1) & extract32(arg, 13, 1)) << 31; + env->FPU_FZ = (extract32(arg, 4, 1) & extract32(arg, 12, 1)) << 31; + env->FPU_FU = (extract32(arg, 3, 1) & extract32(arg, 11, 1)) << 31; + /* clear FX and RM */ + env->PSW &= ~(extract32(arg, 10, 1) << 26); + env->PSW |= (extract32(arg, 2, 1) & extract32(arg, 10, 1)) << 26; + + fpu_set_state(env); +} diff --git a/target/tricore/helper.h b/target/tricore/helper.h index 9333e161ab..e634d0c680 100644 --- a/target/tricore/helper.h +++ b/target/tricore/helper.h @@ -87,11 +87,8 @@ DEF_HELPER_FLAGS_2(min_hu, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(ixmin, TCG_CALL_NO_RWG_SE, i64, i64, i32) DEF_HELPER_FLAGS_2(ixmin_u, TCG_CALL_NO_RWG_SE, i64, i64, i32) /* count leading ... */ -DEF_HELPER_FLAGS_1(clo, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(clo_h, TCG_CALL_NO_RWG_SE, i32, i32) -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(clz_h, TCG_CALL_NO_RWG_SE, i32, i32) -DEF_HELPER_FLAGS_1(cls, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(cls_h, TCG_CALL_NO_RWG_SE, i32, i32) /* sh */ DEF_HELPER_FLAGS_2(sh, TCG_CALL_NO_RWG_SE, i32, i32, i32) @@ -109,9 +106,13 @@ DEF_HELPER_3(fadd, i32, env, i32, i32) DEF_HELPER_3(fsub, i32, env, i32, i32) DEF_HELPER_3(fmul, i32, env, i32, i32) DEF_HELPER_3(fdiv, i32, env, i32, i32) +DEF_HELPER_4(fmadd, i32, env, i32, i32, i32) +DEF_HELPER_4(fmsub, i32, env, i32, i32, i32) DEF_HELPER_3(fcmp, i32, env, i32, i32) DEF_HELPER_2(ftoi, i32, env, i32) DEF_HELPER_2(itof, i32, env, i32) +DEF_HELPER_2(ftouz, i32, env, i32) +DEF_HELPER_2(updfl, void, env, i32) /* dvinit */ DEF_HELPER_3(dvinit_b_13, i64, env, i32, i32) DEF_HELPER_3(dvinit_b_131, i64, env, i32, i32) diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c index ac02e0a36b..7af202c8c0 100644 --- a/target/tricore/op_helper.c +++ b/target/tricore/op_helper.c @@ -1733,11 +1733,6 @@ EXTREMA_H_B(min, <) #undef EXTREMA_H_B -uint32_t helper_clo(target_ulong r1) -{ - return clo32(r1); -} - uint32_t helper_clo_h(target_ulong r1) { uint32_t ret_hw0 = extract32(r1, 0, 16); @@ -1756,11 +1751,6 @@ uint32_t helper_clo_h(target_ulong r1) return ret_hw0 | (ret_hw1 << 16); } -uint32_t helper_clz(target_ulong r1) -{ - return clz32(r1); -} - uint32_t helper_clz_h(target_ulong r1) { uint32_t ret_hw0 = extract32(r1, 0, 16); @@ -1779,11 +1769,6 @@ uint32_t helper_clz_h(target_ulong r1) return ret_hw0 | (ret_hw1 << 16); } -uint32_t helper_cls(target_ulong r1) -{ - return clrsb32(r1); -} - uint32_t helper_cls_h(target_ulong r1) { uint32_t ret_hw0 = extract32(r1, 0, 16); diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 36f734a662..ddd2dd07dd 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -3362,9 +3362,17 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, case OPC1_16_SBC_JEQ: gen_branch_condi(ctx, TCG_COND_EQ, cpu_gpr_d[15], constant, offset); break; + case OPC1_16_SBC_JEQ2: + gen_branch_condi(ctx, TCG_COND_EQ, cpu_gpr_d[15], constant, + offset + 16); + break; case OPC1_16_SBC_JNE: gen_branch_condi(ctx, TCG_COND_NE, cpu_gpr_d[15], constant, offset); break; + case OPC1_16_SBC_JNE2: + gen_branch_condi(ctx, TCG_COND_NE, cpu_gpr_d[15], + constant, offset + 16); + break; /* SBRN-format jumps */ case OPC1_16_SBRN_JZ_T: temp = tcg_temp_new(); @@ -4097,6 +4105,16 @@ static void decode_16Bit_opc(CPUTriCoreState *env, DisasContext *ctx) const16 = MASK_OP_SBC_CONST4_SEXT(ctx->opcode); gen_compute_branch(ctx, op1, 0, 0, const16, address); break; + case OPC1_16_SBC_JEQ2: + case OPC1_16_SBC_JNE2: + if (tricore_feature(env, TRICORE_FEATURE_16)) { + address = MASK_OP_SBC_DISP4(ctx->opcode); + const16 = MASK_OP_SBC_CONST4_SEXT(ctx->opcode); + gen_compute_branch(ctx, op1, 0, 0, const16, address); + } else { + generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); + } + break; /* SBRN-format */ case OPC1_16_SBRN_JNZ_T: case OPC1_16_SBRN_JZ_T: @@ -6034,6 +6052,8 @@ static void decode_rr_accumulator(CPUTriCoreState *env, DisasContext *ctx) uint32_t op2; int r3, r2, r1; + TCGv temp; + r3 = MASK_OP_RR_D(ctx->opcode); r2 = MASK_OP_RR_S2(ctx->opcode); r1 = MASK_OP_RR_S1(ctx->opcode); @@ -6224,6 +6244,20 @@ static void decode_rr_accumulator(CPUTriCoreState *env, DisasContext *ctx) case OPC2_32_RR_MOV: tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]); break; + case OPC2_32_RR_MOV_64: + if (tricore_feature(env, TRICORE_FEATURE_16)) { + temp = tcg_temp_new(); + + CHECK_REG_PAIR(r3); + tcg_gen_mov_tl(temp, cpu_gpr_d[r1]); + tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]); + tcg_gen_mov_tl(cpu_gpr_d[r3 + 1], temp); + + tcg_temp_free(temp); + } else { + generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); + } + break; case OPC2_32_RR_NE: tcg_gen_setcond_tl(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); @@ -6367,19 +6401,20 @@ static void decode_rr_logical_shift(CPUTriCoreState *env, DisasContext *ctx) tcg_gen_andc_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_CLO: - gen_helper_clo(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_not_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_clzi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], TARGET_LONG_BITS); break; case OPC2_32_RR_CLO_H: gen_helper_clo_h(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_CLS: - gen_helper_cls(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_clrsb_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_CLS_H: gen_helper_cls_h(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_CLZ: - gen_helper_clz(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_clzi_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], TARGET_LONG_BITS); break; case OPC2_32_RR_CLZ_H: gen_helper_clz_h(cpu_gpr_d[r3], cpu_gpr_d[r1]); @@ -6698,6 +6733,12 @@ static void decode_rr_divide(CPUTriCoreState *env, DisasContext *ctx) case OPC2_32_RR_ITOF: gen_helper_itof(cpu_gpr_d[r3], cpu_env, cpu_gpr_d[r1]); break; + case OPC2_32_RR_FTOUZ: + gen_helper_ftouz(cpu_gpr_d[r3], cpu_env, cpu_gpr_d[r1]); + break; + case OPC2_32_RR_UPDFL: + gen_helper_updfl(cpu_env, cpu_gpr_d[r1]); + break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } @@ -7093,6 +7134,14 @@ static void decode_rrr_divide(CPUTriCoreState *env, DisasContext *ctx) case OPC2_32_RRR_SUB_F: gen_helper_fsub(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1], cpu_gpr_d[r3]); break; + case OPC2_32_RRR_MADD_F: + gen_helper_fmadd(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1], + cpu_gpr_d[r2], cpu_gpr_d[r3]); + break; + case OPC2_32_RRR_MSUB_F: + gen_helper_fmsub(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1], + cpu_gpr_d[r2], cpu_gpr_d[r3]); + break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } diff --git a/target/tricore/tricore-opcodes.h b/target/tricore/tricore-opcodes.h index df666b081f..08394b85ac 100644 --- a/target/tricore/tricore-opcodes.h +++ b/target/tricore/tricore-opcodes.h @@ -311,6 +311,7 @@ enum { OPC1_16_SRR_EQ = 0x3a, OPC1_16_SB_J = 0x3c, OPC1_16_SBC_JEQ = 0x1e, + OPC1_16_SBC_JEQ2 = 0x9e, OPC1_16_SBR_JEQ = 0x3e, OPC1_16_SBR_JGEZ = 0xce, OPC1_16_SBR_JGTZ = 0x4e, @@ -318,6 +319,7 @@ enum { OPC1_16_SBR_JLEZ = 0x8e, OPC1_16_SBR_JLTZ = 0x0e, OPC1_16_SBC_JNE = 0x5e, + OPC1_16_SBC_JNE2 = 0xde, OPC1_16_SBR_JNE = 0x7e, OPC1_16_SB_JNZ = 0xee, OPC1_16_SBR_JNZ = 0xf6, @@ -1062,6 +1064,7 @@ enum { OPC2_32_RR_MIN_H = 0x78, OPC2_32_RR_MIN_HU = 0x79, OPC2_32_RR_MOV = 0x1f, + OPC2_32_RR_MOV_64 = 0x81, OPC2_32_RR_NE = 0x11, OPC2_32_RR_OR_EQ = 0x27, OPC2_32_RR_OR_GE = 0x2b, diff --git a/target/unicore32/helper.c b/target/unicore32/helper.c index d603bde237..7a5613e776 100644 --- a/target/unicore32/helper.c +++ b/target/unicore32/helper.c @@ -32,16 +32,6 @@ UniCore32CPU *uc32_cpu_init(const char *cpu_model) return UNICORE32_CPU(cpu_generic_init(TYPE_UNICORE32_CPU, cpu_model)); } -uint32_t HELPER(clo)(uint32_t x) -{ - return clo32(x); -} - -uint32_t HELPER(clz)(uint32_t x) -{ - return clz32(x); -} - #ifndef CONFIG_USER_ONLY void helper_cp0_set(CPUUniCore32State *env, uint32_t val, uint32_t creg, uint32_t cop) diff --git a/target/unicore32/helper.h b/target/unicore32/helper.h index 941813749d..a4a5d45d1d 100644 --- a/target/unicore32/helper.h +++ b/target/unicore32/helper.h @@ -13,9 +13,6 @@ DEF_HELPER_3(cp0_get, i32, env, i32, i32) DEF_HELPER_1(cp1_putc, void, i32) #endif -DEF_HELPER_1(clz, i32, i32) -DEF_HELPER_1(clo, i32, i32) - DEF_HELPER_2(exception, void, env, i32) DEF_HELPER_3(asr_write, void, env, i32, i32) diff --git a/target/unicore32/translate.c b/target/unicore32/translate.c index 514d460408..666a2016a8 100644 --- a/target/unicore32/translate.c +++ b/target/unicore32/translate.c @@ -1479,10 +1479,10 @@ static void do_misc(CPUUniCore32State *env, DisasContext *s, uint32_t insn) /* clz */ tmp = load_reg(s, UCOP_REG_M); if (UCOP_SET(26)) { - gen_helper_clo(tmp, tmp); - } else { - gen_helper_clz(tmp, tmp); + /* clo */ + tcg_gen_not_i32(tmp, tmp); } + tcg_gen_clzi_i32(tmp, tmp, 32); store_reg(s, UCOP_REG_D, tmp); return; } diff --git a/target/xtensa/helper.h b/target/xtensa/helper.h index 5ea9c5beec..0c8adae9d4 100644 --- a/target/xtensa/helper.h +++ b/target/xtensa/helper.h @@ -3,8 +3,6 @@ DEF_HELPER_3(exception_cause, noreturn, env, i32, i32) DEF_HELPER_4(exception_cause_vaddr, noreturn, env, i32, i32, i32) DEF_HELPER_3(debug_exception, noreturn, env, i32, i32) -DEF_HELPER_FLAGS_1(nsa, TCG_CALL_NO_RWG_SE, i32, i32) -DEF_HELPER_FLAGS_1(nsau, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_2(wsr_windowbase, void, env, i32) DEF_HELPER_4(entry, void, env, i32, i32, i32) DEF_HELPER_2(retw, i32, env, i32) diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c index 0a4b2147bc..dc25625d0d 100644 --- a/target/xtensa/op_helper.c +++ b/target/xtensa/op_helper.c @@ -161,19 +161,6 @@ void HELPER(debug_exception)(CPUXtensaState *env, uint32_t pc, uint32_t cause) HELPER(exception)(env, EXC_DEBUG); } -uint32_t HELPER(nsa)(uint32_t v) -{ - if (v & 0x80000000) { - v = ~v; - } - return v ? clz32(v) - 1 : 31; -} - -uint32_t HELPER(nsau)(uint32_t v) -{ - return v ? clz32(v) : 32; -} - static void copy_window_from_phys(CPUXtensaState *env, uint32_t window, uint32_t phys, uint32_t n) { diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 0858c296ea..5a93705fac 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1372,14 +1372,14 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc) case 14: /*NSAu*/ HAS_OPTION(XTENSA_OPTION_MISC_OP_NSA); if (gen_window_check2(dc, RRR_S, RRR_T)) { - gen_helper_nsa(cpu_R[RRR_T], cpu_R[RRR_S]); + tcg_gen_clrsb_i32(cpu_R[RRR_T], cpu_R[RRR_S]); } break; case 15: /*NSAUu*/ HAS_OPTION(XTENSA_OPTION_MISC_OP_NSA); if (gen_window_check2(dc, RRR_S, RRR_T)) { - gen_helper_nsau(cpu_R[RRR_T], cpu_R[RRR_S]); + tcg_gen_clzi_i32(cpu_R[RRR_T], cpu_R[RRR_S], 32); } break; |