summaryrefslogtreecommitdiffstats
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/README30
-rw-r--r--tcg/arm/tcg-target.c4
-rw-r--r--tcg/arm/tcg-target.h1
-rw-r--r--tcg/hppa/tcg-target.h1
-rw-r--r--tcg/i386/tcg-target.c49
-rw-r--r--tcg/i386/tcg-target.h8
-rw-r--r--tcg/ia64/tcg-target.h8
-rw-r--r--tcg/mips/tcg-target.h1
-rw-r--r--tcg/optimize.c5
-rw-r--r--tcg/ppc/tcg-target.h1
-rw-r--r--tcg/ppc64/tcg-target.h8
-rw-r--r--tcg/s390/tcg-target.h8
-rw-r--r--tcg/sparc/tcg-target.c6
-rw-r--r--tcg/sparc/tcg-target.h8
-rw-r--r--tcg/tcg-op.h228
-rw-r--r--tcg/tcg-opc.h12
-rw-r--r--tcg/tcg-runtime.h2
-rw-r--r--tcg/tcg.c28
-rw-r--r--tcg/tcg.h10
-rw-r--r--tcg/tci/tcg-target.h9
20 files changed, 380 insertions, 47 deletions
diff --git a/tcg/README b/tcg/README
index ec1ac79375..934e7afc96 100644
--- a/tcg/README
+++ b/tcg/README
@@ -361,6 +361,24 @@ Write 8, 16, 32 or 64 bits to host memory.
All this opcodes assume that the pointed host memory doesn't correspond
to a global. In the latter case the behaviour is unpredictable.
+********* Multiword arithmetic support
+
+* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the double-word inputs T1 and T2 are
+formed from two single-word arguments, and the double-word output T0
+is returned in two single-word outputs.
+
+* mulu2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mul, except two unsigned inputs T1 and T2 yielding the full
+double-word product T0. The later is returned in two single-word outputs.
+
+* muls2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mulu2, except the two inputs T1 and T2 are signed.
+
********* 64-bit target on 32-bit host support
The following opcodes are internal to TCG. Thus they are to be implemented by
@@ -372,18 +390,6 @@ They are emitted as needed by inline functions within "tcg-op.h".
Similar to brcond, except that the 64-bit values T0 and T1
are formed from two 32-bit arguments.
-* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-
-Similar to add/sub, except that the 64-bit inputs T1 and T2 are
-formed from two 32-bit arguments, and the 64-bit output T0
-is returned in two 32-bit outputs.
-
-* mulu2_i32 t0_low, t0_high, t1, t2
-
-Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
-the full 64-bit product T0. The later is returned in two 32-bit outputs.
-
* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
Similar to setcond, except that the 64-bit values T1 and T2 are
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index d9c33d850f..94c6ca43aa 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1647,6 +1647,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_mulu2_i32:
tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
break;
+ case INDEX_op_muls2_i32:
+ tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
/* XXX: Perhaps args[2] & 0x1f is wrong */
case INDEX_op_shl_i32:
c = const_args[2] ?
@@ -1798,6 +1801,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_sub_i32, { "r", "r", "rI" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
+ { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "rI" } },
{ INDEX_op_andc_i32, { "r", "r", "rI" } },
{ INDEX_op_or_i32, { "r", "r", "rI" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 7083f3a700..b6eed1f3f4 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -75,6 +75,7 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
enum {
TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index e2754fe970..ebd53d9e36 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -98,6 +98,7 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_muls2_i32 0
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 7aec3043e3..9eec06c8a4 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1922,40 +1922,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_st(s, args, 3);
break;
-#if TCG_TARGET_REG_BITS == 32
- case INDEX_op_brcond2_i32:
- tcg_out_brcond2(s, args, const_args, 0);
+ OP_32_64(mulu2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
break;
- case INDEX_op_setcond2_i32:
- tcg_out_setcond2(s, args, const_args);
+ OP_32_64(muls2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
break;
- case INDEX_op_mulu2_i32:
- tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
- break;
- case INDEX_op_add2_i32:
+ OP_32_64(add2):
if (const_args[4]) {
- tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
+ tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
} else {
- tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+ tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
}
if (const_args[5]) {
- tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
+ tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
} else {
- tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+ tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
}
break;
- case INDEX_op_sub2_i32:
+ OP_32_64(sub2):
if (const_args[4]) {
- tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
+ tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
} else {
- tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+ tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
}
if (const_args[5]) {
- tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
+ tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
} else {
- tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+ tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
}
break;
+
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args, const_args, 0);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
#else /* TCG_TARGET_REG_BITS == 64 */
case INDEX_op_movi_i64:
tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
@@ -2078,10 +2082,12 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
#endif
-#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
+ { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
{ INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
{ INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+
+#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
{ INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
#else
@@ -2132,6 +2138,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_deposit_i64, { "Q", "0", "Q" } },
{ INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
+
+ { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
+ { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
+ { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
+ { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
#endif
#if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index e63db9cfe9..e3f6bb965f 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -92,6 +92,10 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -114,6 +118,10 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 1
#endif
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 7f3401ecdd..e3d72ea52f 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -136,6 +136,14 @@ typedef enum {
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 78af664cca..0384bd384f 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -87,6 +87,7 @@ typedef enum {
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 973d2d679f..bc6e5c16a9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -554,11 +554,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args[5] = tcg_invert_cond(args[5]);
}
break;
- case INDEX_op_add2_i32:
+ CASE_OP_32_64(add2):
swap_commutative(args[0], &args[2], &args[4]);
swap_commutative(args[1], &args[3], &args[5]);
break;
- case INDEX_op_mulu2_i32:
+ CASE_OP_32_64(mulu2):
+ CASE_OP_32_64(muls2):
swap_commutative(args[0], &args[2], &args[3]);
break;
case INDEX_op_brcond2_i32:
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 0fdad04ee4..17a6bb367a 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -94,6 +94,7 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_AREG0 TCG_REG_R27
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 9b8e9a07b8..aa6a0f0306 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -85,6 +85,10 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rot_i64 0
@@ -106,6 +110,10 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_AREG0 TCG_REG_R27
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index c87b4138b5..40211e68f1 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -65,6 +65,10 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -87,6 +91,10 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
#endif
/* used for function call generation */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 03db514a1d..6d489fcc52 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1327,6 +1327,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[3], const_args[3],
args[4], const_args[4]);
break;
+#endif
+
case INDEX_op_add2_i32:
tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
args[4], const_args[4], args[5], const_args[5],
@@ -1342,7 +1344,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
ARITH_UMUL);
tcg_out_rdy(s, args[1]);
break;
-#endif
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
@@ -1511,10 +1512,11 @@ static const TCGTargetOpDef sparc_op_defs[] = {
#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
{ INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } },
+#endif
+
{ INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
{ INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
{ INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
-#endif
#if TCG_TARGET_REG_BITS == 64
{ INDEX_op_mov_i64, { "r", "r" } },
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 256f973c6d..b5217bef25 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -102,6 +102,10 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div_i64 1
@@ -124,6 +128,10 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
#endif
#define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 91c9d80dd5..d70b2eba33 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2246,6 +2246,26 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low,
tcg_gen_deposit_i64(dest, low, high, 32, 32);
}
+static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
+{
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(lo, TCGV_LOW(arg));
+ tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
+#else
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_trunc_i64_i32(lo, arg);
+ tcg_gen_shri_i64(t0, arg, 32);
+ tcg_gen_trunc_i64_i32(hi, t0);
+ tcg_temp_free_i64(t0);
+#endif
+}
+
+static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+ tcg_gen_ext32u_i64(lo, arg);
+ tcg_gen_shri_i64(hi, arg, 32);
+}
+
static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
TCGv_i32 c1, TCGv_i32 c2,
TCGv_i32 v1, TCGv_i32 v2)
@@ -2312,6 +2332,204 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
#endif
}
+static inline void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+ if (TCG_TARGET_HAS_add2_i32) {
+ tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
+ /* Allow the optimizer room to replace add2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(t0, al, ah);
+ tcg_gen_concat_i32_i64(t1, bl, bh);
+ tcg_gen_add_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+static inline void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+ if (TCG_TARGET_HAS_sub2_i32) {
+ tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
+ /* Allow the optimizer room to replace sub2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(t0, al, ah);
+ tcg_gen_concat_i32_i64(t1, bl, bh);
+ tcg_gen_sub_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_mulu2_i32) {
+ tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t0, arg1);
+ tcg_gen_extu_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_muls2_i32) {
+ tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
+ /* Allow the optimizer room to replace muls2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+ tcg_gen_op4_i32(INDEX_op_mulu2_i32, t0, t1, arg1, arg2);
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ /* Adjust for negative inputs. */
+ tcg_gen_sari_i32(t2, arg1, 31);
+ tcg_gen_sari_i32(t3, arg2, 31);
+ tcg_gen_and_i32(t2, t2, arg2);
+ tcg_gen_and_i32(t3, t3, arg1);
+ tcg_gen_sub_i32(rh, t1, t2);
+ tcg_gen_sub_i32(rh, rh, t3);
+ tcg_gen_mov_i32(rl, t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t3);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(t0, arg1);
+ tcg_gen_ext_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+static inline void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+ if (TCG_TARGET_HAS_add2_i64) {
+ tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
+ /* Allow the optimizer room to replace add2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_add_i64(t0, al, bl);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
+ tcg_gen_add_i64(rh, ah, bh);
+ tcg_gen_add_i64(rh, rh, t1);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+static inline void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+ if (TCG_TARGET_HAS_sub2_i64) {
+ tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
+ /* Allow the optimizer room to replace sub2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_sub_i64(t0, al, bl);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
+ tcg_gen_sub_i64(rh, ah, bh);
+ tcg_gen_sub_i64(rh, rh, t1);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_mulu2_i64) {
+ tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else if (TCG_TARGET_HAS_mulu2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+ tcg_gen_op4_i64(INDEX_op_mulu2_i64, t0, t1, arg1, arg2);
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ /* Adjust for negative inputs. */
+ tcg_gen_sari_i64(t2, arg1, 63);
+ tcg_gen_sari_i64(t3, arg2, 63);
+ tcg_gen_and_i64(t2, t2, arg2);
+ tcg_gen_and_i64(t3, t3, arg1);
+ tcg_gen_sub_i64(rh, t1, t2);
+ tcg_gen_sub_i64(rh, rh, t3);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 1, 0);
+ sizemask |= tcg_gen_sizemask(1, 1, 0);
+ sizemask |= tcg_gen_sizemask(2, 1, 0);
+ tcg_gen_mul_i64(t0, arg1, arg2);
+ tcg_gen_helper64(tcg_helper_muluh_i64, sizemask, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_muls2_i64) {
+ tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
+ /* Allow the optimizer room to replace muls2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+ tcg_gen_mul_i64(t0, arg1, arg2);
+ tcg_gen_helper64(tcg_helper_mulsh_i64, sizemask, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
@@ -2625,6 +2843,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
#define tcg_gen_andc_tl tcg_gen_andc_i64
#define tcg_gen_eqv_tl tcg_gen_eqv_i64
#define tcg_gen_nand_tl tcg_gen_nand_i64
@@ -2638,6 +2857,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_const_tl tcg_const_i64
#define tcg_const_local_tl tcg_const_local_i64
#define tcg_gen_movcond_tl tcg_gen_movcond_i64
+#define tcg_gen_add2_tl tcg_gen_add2_i64
+#define tcg_gen_sub2_tl tcg_gen_sub2_i64
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
+#define tcg_gen_muls2_tl tcg_gen_muls2_i64
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
#define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2697,6 +2920,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
+#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64
#define tcg_gen_andc_tl tcg_gen_andc_i32
#define tcg_gen_eqv_tl tcg_gen_eqv_i32
#define tcg_gen_nand_tl tcg_gen_nand_i32
@@ -2710,6 +2934,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_const_tl tcg_const_i32
#define tcg_const_local_tl tcg_const_local_i32
#define tcg_gen_movcond_tl tcg_gen_movcond_i32
+#define tcg_gen_add2_tl tcg_gen_add2_i32
+#define tcg_gen_sub2_tl tcg_gen_sub2_i32
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
+#define tcg_gen_muls2_tl tcg_gen_muls2_i32
#endif
#if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 9651063414..4246e9c1fa 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -83,10 +83,11 @@ DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
-DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
+DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
+DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
+DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32))
DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
@@ -158,6 +159,11 @@ DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
+DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
+DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
+DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
+DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
+
/* QEMU specific */
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
DEF(debug_insn_start, 0, 0, 2, 0)
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 5615b133e0..a1ebef9f9c 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -12,7 +12,9 @@ int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2);
int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2);
int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2);
int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2);
uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2);
uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2);
+uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2);
#endif
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c8a843ed09..1d8265e72e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1217,7 +1217,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
static void tcg_liveness_analysis(TCGContext *s)
{
int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
- TCGOpcode op;
+ TCGOpcode op, op_new;
TCGArg *args;
const TCGOpDef *def;
uint8_t *dead_temps, *mem_temps;
@@ -1324,7 +1324,17 @@ static void tcg_liveness_analysis(TCGContext *s)
break;
case INDEX_op_add2_i32:
+ op_new = INDEX_op_add_i32;
+ goto do_addsub2;
case INDEX_op_sub2_i32:
+ op_new = INDEX_op_sub_i32;
+ goto do_addsub2;
+ case INDEX_op_add2_i64:
+ op_new = INDEX_op_add_i64;
+ goto do_addsub2;
+ case INDEX_op_sub2_i64:
+ op_new = INDEX_op_sub_i64;
+ do_addsub2:
args -= 6;
nb_iargs = 4;
nb_oargs = 2;
@@ -1337,12 +1347,7 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_remove;
}
/* Create the single operation plus nop. */
- if (op == INDEX_op_add2_i32) {
- op = INDEX_op_add_i32;
- } else {
- op = INDEX_op_sub_i32;
- }
- s->gen_opc_buf[op_index] = op;
+ s->gen_opc_buf[op_index] = op = op_new;
args[1] = args[2];
args[2] = args[4];
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
@@ -1354,6 +1359,13 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_not_remove;
case INDEX_op_mulu2_i32:
+ case INDEX_op_muls2_i32:
+ op_new = INDEX_op_mul_i32;
+ goto do_mul2;
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i64:
+ op_new = INDEX_op_mul_i64;
+ do_mul2:
args -= 4;
nb_iargs = 2;
nb_oargs = 2;
@@ -1362,7 +1374,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
- s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+ s->gen_opc_buf[op_index] = op = op_new;
args[1] = args[2];
args[2] = args[3];
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 51c8176550..b195396b0f 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -57,8 +57,8 @@ typedef uint64_t TCGRegSet;
#error unsupported
#endif
-/* Turn some undef macros into false macros. */
#if TCG_TARGET_REG_BITS == 32
+/* Turn some undef macros into false macros. */
#define TCG_TARGET_HAS_div_i64 0
#define TCG_TARGET_HAS_div2_i64 0
#define TCG_TARGET_HAS_rot_i64 0
@@ -80,6 +80,14 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+/* Turn some undef macros into true macros. */
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
#endif
#ifndef TCG_TARGET_deposit_i32_valid
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index a832f5cf52..1f17576f54 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -76,6 +76,7 @@
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_bswap16_i64 1
@@ -100,6 +101,14 @@
#define TCG_TARGET_HAS_orc_i64 0
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
#endif /* TCG_TARGET_REG_BITS == 64 */
/* Number of registers available.