diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 30 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 4 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 49 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 8 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 8 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/optimize.c | 5 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 8 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 8 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 6 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 8 | ||||
-rw-r--r-- | tcg/tcg-op.h | 228 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 12 | ||||
-rw-r--r-- | tcg/tcg-runtime.h | 2 | ||||
-rw-r--r-- | tcg/tcg.c | 28 | ||||
-rw-r--r-- | tcg/tcg.h | 10 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 9 |
20 files changed, 380 insertions, 47 deletions
diff --git a/tcg/README b/tcg/README index ec1ac79375..934e7afc96 100644 --- a/tcg/README +++ b/tcg/README @@ -361,6 +361,24 @@ Write 8, 16, 32 or 64 bits to host memory. All this opcodes assume that the pointed host memory doesn't correspond to a global. In the latter case the behaviour is unpredictable. +********* Multiword arithmetic support + +* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high +* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high + +Similar to add/sub, except that the double-word inputs T1 and T2 are +formed from two single-word arguments, and the double-word output T0 +is returned in two single-word outputs. + +* mulu2_i32/i64 t0_low, t0_high, t1, t2 + +Similar to mul, except two unsigned inputs T1 and T2 yielding the full +double-word product T0. The later is returned in two single-word outputs. + +* muls2_i32/i64 t0_low, t0_high, t1, t2 + +Similar to mulu2, except the two inputs T1 and T2 are signed. + ********* 64-bit target on 32-bit host support The following opcodes are internal to TCG. Thus they are to be implemented by @@ -372,18 +390,6 @@ They are emitted as needed by inline functions within "tcg-op.h". Similar to brcond, except that the 64-bit values T0 and T1 are formed from two 32-bit arguments. -* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high -* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high - -Similar to add/sub, except that the 64-bit inputs T1 and T2 are -formed from two 32-bit arguments, and the 64-bit output T0 -is returned in two 32-bit outputs. - -* mulu2_i32 t0_low, t0_high, t1, t2 - -Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding -the full 64-bit product T0. The later is returned in two 32-bit outputs. - * setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond Similar to setcond, except that the 64-bit values T1 and T2 are diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index d9c33d850f..94c6ca43aa 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1647,6 +1647,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mulu2_i32: tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; + case INDEX_op_muls2_i32: + tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); + break; /* XXX: Perhaps args[2] & 0x1f is wrong */ case INDEX_op_shl_i32: c = const_args[2] ? @@ -1798,6 +1801,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_sub_i32, { "r", "r", "rI" } }, { INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, { INDEX_op_and_i32, { "r", "r", "rI" } }, { INDEX_op_andc_i32, { "r", "r", "rI" } }, { INDEX_op_or_i32, { "r", "r", "rI" } }, diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 7083f3a700..b6eed1f3f4 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -75,6 +75,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 1 enum { TCG_AREG0 = TCG_REG_R6, diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index e2754fe970..ebd53d9e36 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -98,6 +98,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 7aec3043e3..9eec06c8a4 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1922,40 +1922,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_st(s, args, 3); break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); + OP_32_64(mulu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); + OP_32_64(muls2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); break; - case INDEX_op_mulu2_i32: - tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]); - break; - case INDEX_op_add2_i32: + OP_32_64(add2): if (const_args[4]) { - tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); + tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); } else { - tgen_arithr(s, ARITH_ADD, args[0], args[4]); + tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); } if (const_args[5]) { - tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); + tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); } else { - tgen_arithr(s, ARITH_ADC, args[1], args[5]); + tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); } break; - case INDEX_op_sub2_i32: + OP_32_64(sub2): if (const_args[4]) { - tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); + tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); } else { - tgen_arithr(s, ARITH_SUB, args[0], args[4]); + tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); } if (const_args[5]) { - tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); + tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); } else { - tgen_arithr(s, ARITH_SBB, args[1], args[5]); + tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); } break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; #else /* TCG_TARGET_REG_BITS == 64 */ case INDEX_op_movi_i64: tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); @@ -2078,10 +2082,12 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, #endif -#if TCG_TARGET_REG_BITS == 32 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, #else @@ -2132,6 +2138,11 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, + + { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, #endif #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index e63db9cfe9..e3f6bb965f 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -92,6 +92,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 +#define TCG_TARGET_HAS_muls2_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -114,6 +118,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 1 +#define TCG_TARGET_HAS_muls2_i64 1 #endif #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 7f3401ecdd..e3d72ea52f 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -136,6 +136,14 @@ typedef enum { #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 78af664cca..0384bd384f 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -87,6 +87,7 @@ typedef enum { #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ diff --git a/tcg/optimize.c b/tcg/optimize.c index 973d2d679f..bc6e5c16a9 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -554,11 +554,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[5] = tcg_invert_cond(args[5]); } break; - case INDEX_op_add2_i32: + CASE_OP_32_64(add2): swap_commutative(args[0], &args[2], &args[4]); swap_commutative(args[1], &args[3], &args[5]); break; - case INDEX_op_mulu2_i32: + CASE_OP_32_64(mulu2): + CASE_OP_32_64(muls2): swap_commutative(args[0], &args[2], &args[3]); break; case INDEX_op_brcond2_i32: diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 0fdad04ee4..17a6bb367a 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -94,6 +94,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 9b8e9a07b8..aa6a0f0306 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -85,6 +85,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rot_i64 0 @@ -106,6 +110,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index c87b4138b5..40211e68f1 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -65,6 +65,10 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -87,6 +91,10 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 #endif /* used for function call generation */ diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 03db514a1d..6d489fcc52 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1327,6 +1327,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[3], const_args[3], args[4], const_args[4]); break; +#endif + case INDEX_op_add2_i32: tcg_out_addsub2(s, args[0], args[1], args[2], args[3], args[4], const_args[4], args[5], const_args[5], @@ -1342,7 +1344,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, ARITH_UMUL); tcg_out_rdy(s, args[1]); break; -#endif case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, 0); @@ -1511,10 +1512,11 @@ static const TCGTargetOpDef sparc_op_defs[] = { #if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } }, +#endif + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, -#endif #if TCG_TARGET_REG_BITS == 64 { INDEX_op_mov_i64, { "r", "r" } }, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 256f973c6d..b5217bef25 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -102,6 +102,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 +#define TCG_TARGET_HAS_muls2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 @@ -124,6 +128,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 #endif #define TCG_AREG0 TCG_REG_I0 diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 91c9d80dd5..d70b2eba33 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -2246,6 +2246,26 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, tcg_gen_deposit_i64(dest, low, high, 32, 32); } +static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(lo, TCGV_LOW(arg)); + tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); +#else + TCGv_i64 t0 = tcg_temp_new_i64(); + tcg_gen_trunc_i64_i32(lo, arg); + tcg_gen_shri_i64(t0, arg, 32); + tcg_gen_trunc_i64_i32(hi, t0); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) +{ + tcg_gen_ext32u_i64(lo, arg); + tcg_gen_shri_i64(hi, arg, 32); +} + static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2) @@ -2312,6 +2332,204 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, #endif } +static inline void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, + TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) +{ + if (TCG_TARGET_HAS_add2_i32) { + tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace add2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(t0, al, ah); + tcg_gen_concat_i32_i64(t1, bl, bh); + tcg_gen_add_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, + TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) +{ + if (TCG_TARGET_HAS_sub2_i32) { + tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace sub2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(t0, al, ah); + tcg_gen_concat_i32_i64(t1, bl, bh); + tcg_gen_sub_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_mulu2_i32) { + tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t0, arg1); + tcg_gen_extu_i32_i64(t1, arg2); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_muls2_i32) { + tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace muls2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + tcg_gen_op4_i32(INDEX_op_mulu2_i32, t0, t1, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + /* Adjust for negative inputs. */ + tcg_gen_sari_i32(t2, arg1, 31); + tcg_gen_sari_i32(t3, arg2, 31); + tcg_gen_and_i32(t2, t2, arg2); + tcg_gen_and_i32(t3, t3, arg1); + tcg_gen_sub_i32(rh, t1, t2); + tcg_gen_sub_i32(rh, rh, t3); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(t0, arg1); + tcg_gen_ext_i32_i64(t1, arg2); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ + if (TCG_TARGET_HAS_add2_i64) { + tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace add2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_add_i64(t0, al, bl); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al); + tcg_gen_add_i64(rh, ah, bh); + tcg_gen_add_i64(rh, rh, t1); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ + if (TCG_TARGET_HAS_sub2_i64) { + tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace sub2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_sub_i64(t0, al, bl); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl); + tcg_gen_sub_i64(rh, ah, bh); + tcg_gen_sub_i64(rh, rh, t1); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_mulu2_i64) { + tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulu2_i64) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t3 = tcg_temp_new_i64(); + tcg_gen_op4_i64(INDEX_op_mulu2_i64, t0, t1, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + /* Adjust for negative inputs. */ + tcg_gen_sari_i64(t2, arg1, 63); + tcg_gen_sari_i64(t3, arg2, 63); + tcg_gen_and_i64(t2, t2, arg2); + tcg_gen_and_i64(t3, t3, arg1); + tcg_gen_sub_i64(rh, t1, t2); + tcg_gen_sub_i64(rh, rh, t3); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_temp_free_i64(t3); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + tcg_gen_mul_i64(t0, arg1, arg2); + tcg_gen_helper64(tcg_helper_muluh_i64, sizemask, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_muls2_i64) { + tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace muls2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + tcg_gen_mul_i64(t0, arg1, arg2); + tcg_gen_helper64(tcg_helper_mulsh_i64, sizemask, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + } +} + /***************************************/ /* QEMU specific operations. Their type depend on the QEMU CPU type. */ @@ -2625,6 +2843,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64 #define tcg_gen_andc_tl tcg_gen_andc_i64 #define tcg_gen_eqv_tl tcg_gen_eqv_i64 #define tcg_gen_nand_tl tcg_gen_nand_i64 @@ -2638,6 +2857,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_const_tl tcg_const_i64 #define tcg_const_local_tl tcg_const_local_i64 #define tcg_gen_movcond_tl tcg_gen_movcond_i64 +#define tcg_gen_add2_tl tcg_gen_add2_i64 +#define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 +#define tcg_gen_muls2_tl tcg_gen_muls2_i64 #else #define tcg_gen_movi_tl tcg_gen_movi_i32 #define tcg_gen_mov_tl tcg_gen_mov_i32 @@ -2697,6 +2920,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 +#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64 #define tcg_gen_andc_tl tcg_gen_andc_i32 #define tcg_gen_eqv_tl tcg_gen_eqv_i32 #define tcg_gen_nand_tl tcg_gen_nand_i32 @@ -2710,6 +2934,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_const_tl tcg_const_i32 #define tcg_const_local_tl tcg_const_local_i32 #define tcg_gen_movcond_tl tcg_gen_movcond_i32 +#define tcg_gen_add2_tl tcg_gen_add2_i32 +#define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 +#define tcg_gen_muls2_tl tcg_gen_muls2_i32 #endif #if TCG_TARGET_REG_BITS == 32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 9651063414..4246e9c1fa 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -83,10 +83,11 @@ DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) -DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32)) -DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32)) +DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) +DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) +DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) +DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) -DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) @@ -158,6 +159,11 @@ DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) +DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) +DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) +DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) +DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) + /* QEMU specific */ #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS DEF(debug_insn_start, 0, 0, 2, 0) diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 5615b133e0..a1ebef9f9c 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -12,7 +12,9 @@ int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2); uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2); uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2); +uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2); #endif @@ -1217,7 +1217,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, static void tcg_liveness_analysis(TCGContext *s) { int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; - TCGOpcode op; + TCGOpcode op, op_new; TCGArg *args; const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; @@ -1324,7 +1324,17 @@ static void tcg_liveness_analysis(TCGContext *s) break; case INDEX_op_add2_i32: + op_new = INDEX_op_add_i32; + goto do_addsub2; case INDEX_op_sub2_i32: + op_new = INDEX_op_sub_i32; + goto do_addsub2; + case INDEX_op_add2_i64: + op_new = INDEX_op_add_i64; + goto do_addsub2; + case INDEX_op_sub2_i64: + op_new = INDEX_op_sub_i64; + do_addsub2: args -= 6; nb_iargs = 4; nb_oargs = 2; @@ -1337,12 +1347,7 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_remove; } /* Create the single operation plus nop. */ - if (op == INDEX_op_add2_i32) { - op = INDEX_op_add_i32; - } else { - op = INDEX_op_sub_i32; - } - s->gen_opc_buf[op_index] = op; + s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[4]; assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); @@ -1354,6 +1359,13 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: + case INDEX_op_muls2_i32: + op_new = INDEX_op_mul_i32; + goto do_mul2; + case INDEX_op_mulu2_i64: + case INDEX_op_muls2_i64: + op_new = INDEX_op_mul_i64; + do_mul2: args -= 4; nb_iargs = 2; nb_oargs = 2; @@ -1362,7 +1374,7 @@ static void tcg_liveness_analysis(TCGContext *s) if (dead_temps[args[0]] && !mem_temps[args[0]]) { goto do_remove; } - s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32; + s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[3]; assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); @@ -57,8 +57,8 @@ typedef uint64_t TCGRegSet; #error unsupported #endif -/* Turn some undef macros into false macros. */ #if TCG_TARGET_REG_BITS == 32 +/* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 0 @@ -80,6 +80,14 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +/* Turn some undef macros into true macros. */ +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 #endif #ifndef TCG_TARGET_deposit_i32_valid diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index a832f5cf52..1f17576f54 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -76,6 +76,7 @@ #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_bswap16_i64 1 @@ -100,6 +101,14 @@ #define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 + +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ /* Number of registers available. |