summaryrefslogtreecommitdiffstats
path: root/tcg
diff options
context:
space:
mode:
authorRichard Henderson2014-09-19 22:49:15 +0200
committerRichard Henderson2015-02-13 06:21:38 +0100
commitc45cb8bb89fc798489869982c4c463b26ce43d7b (patch)
tree5d83e8a32cb947fe316b71e72a033d707da7ca50 /tcg
parenttcg: Introduce tcg_op_buf_count and tcg_op_buf_full (diff)
downloadqemu-c45cb8bb89fc798489869982c4c463b26ce43d7b.tar.gz
qemu-c45cb8bb89fc798489869982c4c463b26ce43d7b.tar.xz
qemu-c45cb8bb89fc798489869982c4c463b26ce43d7b.zip
tcg: Put opcodes in a linked list
The previous setup required ops and args to be completely sequential, and was error prone when it came to both iteration and optimization. Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'tcg')
-rw-r--r--tcg/optimize.c286
-rw-r--r--tcg/tcg-op.c190
-rw-r--r--tcg/tcg.c376
-rw-r--r--tcg/tcg.h58
4 files changed, 415 insertions, 495 deletions
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 34ae3c2857..f2b8acf1e0 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -162,13 +162,13 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
return false;
}
-static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
+static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
TCGOpcode old_op, TCGArg dst, TCGArg src)
{
TCGOpcode new_op = op_to_mov(old_op);
tcg_target_ulong mask;
- s->gen_opc_buf[op_index] = new_op;
+ op->opc = new_op;
reset_temp(dst);
mask = temps[src].mask;
@@ -193,17 +193,17 @@ static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
temps[src].next_copy = dst;
}
- gen_args[0] = dst;
- gen_args[1] = src;
+ args[0] = dst;
+ args[1] = src;
}
-static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
+static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
TCGOpcode old_op, TCGArg dst, TCGArg val)
{
TCGOpcode new_op = op_to_movi(old_op);
tcg_target_ulong mask;
- s->gen_opc_buf[op_index] = new_op;
+ op->opc = new_op;
reset_temp(dst);
temps[dst].state = TCG_TEMP_CONST;
@@ -215,8 +215,8 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
}
temps[dst].mask = mask;
- gen_args[0] = dst;
- gen_args[1] = val;
+ args[0] = dst;
+ args[1] = val;
}
static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
@@ -533,11 +533,9 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
}
/* Propagate constants and copies, fold constant expressions. */
-static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
- TCGArg *args, TCGOpDef *tcg_op_defs)
+static void tcg_constant_folding(TCGContext *s)
{
- int nb_ops, op_index, nb_temps, nb_globals;
- TCGArg *gen_args;
+ int oi, oi_next, nb_temps, nb_globals;
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
@@ -548,24 +546,23 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
nb_globals = s->nb_globals;
reset_all_temps(nb_temps);
- nb_ops = tcg_opc_ptr - s->gen_opc_buf;
- gen_args = args;
- for (op_index = 0; op_index < nb_ops; op_index++) {
- TCGOpcode op = s->gen_opc_buf[op_index];
- const TCGOpDef *def = &tcg_op_defs[op];
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
tcg_target_ulong mask, partmask, affected;
- int nb_oargs, nb_iargs, nb_args, i;
+ int nb_oargs, nb_iargs, i;
TCGArg tmp;
- if (op == INDEX_op_call) {
- *gen_args++ = tmp = *args++;
- nb_oargs = tmp >> 16;
- nb_iargs = tmp & 0xffff;
- nb_args = nb_oargs + nb_iargs + def->nb_cargs;
+ TCGOp * const op = &s->gen_op_buf[oi];
+ TCGArg * const args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+
+ oi_next = op->next;
+ if (opc == INDEX_op_call) {
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
} else {
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
- nb_args = def->nb_args;
}
/* Do copy propagation */
@@ -576,7 +573,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* For commutative operations make constant second argument */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(add):
CASE_OP_32_64(mul):
CASE_OP_32_64(and):
@@ -634,7 +631,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
and "sub r, 0, a => neg r, a" case. */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(shl):
CASE_OP_32_64(shr):
CASE_OP_32_64(sar):
@@ -642,9 +639,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(rotr):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
- args += 3;
- gen_args += 2;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
break;
@@ -657,7 +652,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Proceed with possible constant folding. */
break;
}
- if (op == INDEX_op_sub_i32) {
+ if (opc == INDEX_op_sub_i32) {
neg_op = INDEX_op_neg_i32;
have_neg = TCG_TARGET_HAS_neg_i32;
} else {
@@ -669,12 +664,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
- s->gen_opc_buf[op_index] = neg_op;
+ op->opc = neg_op;
reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[2];
- args += 3;
- gen_args += 2;
+ args[1] = args[2];
continue;
}
}
@@ -728,12 +720,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (!have_not) {
break;
}
- s->gen_opc_buf[op_index] = not_op;
+ op->opc = not_op;
reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[i];
- args += 3;
- gen_args += 2;
+ args[1] = args[i];
continue;
}
default:
@@ -741,7 +730,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Simplify expression for "op r, a, const => mov r, a" cases */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
CASE_OP_32_64(shl):
@@ -769,12 +758,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
break;
do_mov3:
if (temps_are_copies(args[0], args[1])) {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
} else {
- tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
- gen_args += 2;
+ tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
}
- args += 3;
continue;
default:
break;
@@ -784,7 +771,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
output argument is supported. */
mask = -1;
affected = -1;
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(ext8s):
if ((temps[args[1]].mask & 0x80) != 0) {
break;
@@ -923,38 +910,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (partmask == 0) {
assert(nb_oargs == 1);
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
- args += nb_args;
- gen_args += 2;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
if (affected == 0) {
assert(nb_oargs == 1);
if (temps_are_copies(args[0], args[1])) {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
} else if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
- gen_args += 2;
+ tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
} else {
- tcg_opt_gen_movi(s, op_index, gen_args, op,
+ tcg_opt_gen_movi(s, op, args, opc,
args[0], temps[args[1]].val);
- gen_args += 2;
}
- args += nb_args;
continue;
}
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(and):
CASE_OP_32_64(mul):
CASE_OP_32_64(muluh):
CASE_OP_32_64(mulsh):
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
- args += 3;
- gen_args += 2;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
break;
@@ -963,18 +943,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Simplify expression for "op r, a, a => mov r, a" cases */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(or):
CASE_OP_32_64(and):
if (temps_are_copies(args[1], args[2])) {
if (temps_are_copies(args[0], args[1])) {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
} else {
- tcg_opt_gen_mov(s, op_index, gen_args, op,
- args[0], args[1]);
- gen_args += 2;
+ tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
}
- args += 3;
continue;
}
break;
@@ -983,14 +960,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Simplify expression for "op r, a, a => movi r, 0" cases */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(andc):
CASE_OP_32_64(sub):
CASE_OP_32_64(xor):
if (temps_are_copies(args[1], args[2])) {
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
- gen_args += 2;
- args += 3;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
break;
@@ -1001,17 +976,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Propagate constants through copy operations and do constant
folding. Constants will be substituted to arguments by register
allocator where needed and possible. Also detect copies. */
- switch (op) {
+ switch (opc) {
CASE_OP_32_64(mov):
if (temps_are_copies(args[0], args[1])) {
- args += 2;
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
- gen_args += 2;
- args += 2;
+ tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
break;
}
/* Source argument is constant. Rewrite the operation and
@@ -1019,9 +991,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args[1] = temps[args[1]].val;
/* fallthrough */
CASE_OP_32_64(movi):
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]);
- gen_args += 2;
- args += 2;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]);
break;
CASE_OP_32_64(not):
@@ -1033,20 +1003,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
case INDEX_op_ext32s_i64:
case INDEX_op_ext32u_i64:
if (temps[args[1]].state == TCG_TEMP_CONST) {
- tmp = do_constant_folding(op, temps[args[1]].val, 0);
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
- gen_args += 2;
- args += 2;
+ tmp = do_constant_folding(opc, temps[args[1]].val, 0);
+ tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
case INDEX_op_trunc_shr_i32:
if (temps[args[1]].state == TCG_TEMP_CONST) {
- tmp = do_constant_folding(op, temps[args[1]].val, args[2]);
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
- gen_args += 2;
- args += 3;
+ tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
+ tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
@@ -1075,11 +1041,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(remu):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
- tmp = do_constant_folding(op, temps[args[1]].val,
+ tmp = do_constant_folding(opc, temps[args[1]].val,
temps[args[2]].val);
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
- gen_args += 2;
- args += 3;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
@@ -1089,54 +1053,44 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
&& temps[args[2]].state == TCG_TEMP_CONST) {
tmp = deposit64(temps[args[1]].val, args[3], args[4],
temps[args[2]].val);
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
- gen_args += 2;
- args += 5;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
CASE_OP_32_64(setcond):
- tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+ tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
if (tmp != 2) {
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
- gen_args += 2;
- args += 4;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
CASE_OP_32_64(brcond):
- tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+ tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
if (tmp != 2) {
if (tmp) {
reset_all_temps(nb_temps);
- s->gen_opc_buf[op_index] = INDEX_op_br;
- gen_args[0] = args[3];
- gen_args += 1;
+ op->opc = INDEX_op_br;
+ args[0] = args[3];
} else {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
}
- args += 4;
break;
}
goto do_default;
CASE_OP_32_64(movcond):
- tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+ tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
if (tmp != 2) {
if (temps_are_copies(args[0], args[4-tmp])) {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
} else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
- tcg_opt_gen_movi(s, op_index, gen_args, op,
+ tcg_opt_gen_movi(s, op, args, opc,
args[0], temps[args[4-tmp]].val);
- gen_args += 2;
} else {
- tcg_opt_gen_mov(s, op_index, gen_args, op,
- args[0], args[4-tmp]);
- gen_args += 2;
+ tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]);
}
- args += 6;
break;
}
goto do_default;
@@ -1154,24 +1108,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
uint64_t a = ((uint64_t)ah << 32) | al;
uint64_t b = ((uint64_t)bh << 32) | bl;
TCGArg rl, rh;
+ TCGOp *op2;
+ TCGArg *args2;
- if (op == INDEX_op_add2_i32) {
+ if (opc == INDEX_op_add2_i32) {
a += b;
} else {
a -= b;
}
/* We emit the extra nop when we emit the add2/sub2. */
- assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ op2 = &s->gen_op_buf[oi_next];
+ assert(op2->opc == INDEX_op_nop);
+
+ /* But we still have to allocate args for the op. */
+ op2->args = s->gen_next_parm_idx;
+ s->gen_next_parm_idx += 2;
+ args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];
rh = args[1];
- tcg_opt_gen_movi(s, op_index, &gen_args[0],
- op, rl, (uint32_t)a);
- tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
- op, rh, (uint32_t)(a >> 32));
- gen_args += 4;
- args += 6;
+ tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
+ tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32));
+
+ /* We've done all we need to do with the movi. Skip it. */
+ oi_next = op2->next;
break;
}
goto do_default;
@@ -1183,18 +1144,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
uint32_t b = temps[args[3]].val;
uint64_t r = (uint64_t)a * b;
TCGArg rl, rh;
+ TCGOp *op2;
+ TCGArg *args2;
/* We emit the extra nop when we emit the mulu2. */
- assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ op2 = &s->gen_op_buf[oi_next];
+ assert(op2->opc == INDEX_op_nop);
+
+ /* But we still have to allocate args for the op. */
+ op2->args = s->gen_next_parm_idx;
+ s->gen_next_parm_idx += 2;
+ args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];
rh = args[1];
- tcg_opt_gen_movi(s, op_index, &gen_args[0],
- op, rl, (uint32_t)r);
- tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
- op, rh, (uint32_t)(r >> 32));
- gen_args += 4;
- args += 4;
+ tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r);
+ tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32));
+
+ /* We've done all we need to do with the movi. Skip it. */
+ oi_next = op2->next;
break;
}
goto do_default;
@@ -1205,12 +1173,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (tmp) {
do_brcond_true:
reset_all_temps(nb_temps);
- s->gen_opc_buf[op_index] = INDEX_op_br;
- gen_args[0] = args[5];
- gen_args += 1;
+ op->opc = INDEX_op_br;
+ args[0] = args[5];
} else {
do_brcond_false:
- s->gen_opc_buf[op_index] = INDEX_op_nop;
+ op->opc = INDEX_op_nop;
}
} else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
&& temps[args[2]].state == TCG_TEMP_CONST
@@ -1221,12 +1188,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
vs the high word of the input. */
do_brcond_high:
reset_all_temps(nb_temps);
- s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
- gen_args[0] = args[1];
- gen_args[1] = args[3];
- gen_args[2] = args[4];
- gen_args[3] = args[5];
- gen_args += 4;
+ op->opc = INDEX_op_brcond_i32;
+ args[0] = args[1];
+ args[1] = args[3];
+ args[2] = args[4];
+ args[3] = args[5];
} else if (args[4] == TCG_COND_EQ) {
/* Simplify EQ comparisons where one of the pairs
can be simplified. */
@@ -1246,12 +1212,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
do_brcond_low:
reset_all_temps(nb_temps);
- s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
- gen_args[0] = args[0];
- gen_args[1] = args[2];
- gen_args[2] = args[4];
- gen_args[3] = args[5];
- gen_args += 4;
+ op->opc = INDEX_op_brcond_i32;
+ args[1] = args[2];
+ args[2] = args[4];
+ args[3] = args[5];
} else if (args[4] == TCG_COND_NE) {
/* Simplify NE comparisons where one of the pairs
can be simplified. */
@@ -1273,15 +1237,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else {
goto do_default;
}
- args += 6;
break;
case INDEX_op_setcond2_i32:
tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
if (tmp != 2) {
do_setcond_const:
- tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
- gen_args += 2;
+ tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
} else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
&& temps[args[3]].state == TCG_TEMP_CONST
&& temps[args[4]].state == TCG_TEMP_CONST
@@ -1290,14 +1252,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Simplify LT/GE comparisons vs zero to a single compare
vs the high word of the input. */
do_setcond_high:
- s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
reset_temp(args[0]);
temps[args[0]].mask = 1;
- gen_args[0] = args[0];
- gen_args[1] = args[2];
- gen_args[2] = args[4];
- gen_args[3] = args[5];
- gen_args += 4;
+ op->opc = INDEX_op_setcond_i32;
+ args[1] = args[2];
+ args[2] = args[4];
+ args[3] = args[5];
} else if (args[5] == TCG_COND_EQ) {
/* Simplify EQ comparisons where one of the pairs
can be simplified. */
@@ -1318,12 +1278,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
do_setcond_low:
reset_temp(args[0]);
temps[args[0]].mask = 1;
- s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[3];
- gen_args[3] = args[5];
- gen_args += 4;
+ op->opc = INDEX_op_setcond_i32;
+ args[2] = args[3];
+ args[3] = args[5];
} else if (args[5] == TCG_COND_NE) {
/* Simplify NE comparisons where one of the pairs
can be simplified. */
@@ -1345,7 +1302,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else {
goto do_default;
}
- args += 6;
break;
case INDEX_op_call:
@@ -1377,22 +1333,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
}
}
- for (i = 0; i < nb_args; i++) {
- gen_args[i] = args[i];
- }
- args += nb_args;
- gen_args += nb_args;
break;
}
}
-
- return gen_args;
}
-TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
- TCGArg *args, TCGOpDef *tcg_op_defs)
+void tcg_optimize(TCGContext *s)
{
- TCGArg *res;
- res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
- return res;
+ tcg_constant_folding(s);
}
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 5305f1d34a..cbaa15ccb9 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -35,100 +35,116 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
#define TCGV_HIGH TCGV_HIGH_link_error
#endif
+/* Note that this is optimized for sequential allocation during translate.
+ Up to and including filling in the forward link immediately. We'll do
+ proper termination of the end of the list after we finish translation. */
+
+static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
+{
+ int oi = ctx->gen_next_op_idx;
+ int ni = oi + 1;
+ int pi = oi - 1;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ ctx->gen_last_op_idx = oi;
+ ctx->gen_next_op_idx = ni;
+
+ ctx->gen_op_buf[oi] = (TCGOp){
+ .opc = opc,
+ .args = args,
+ .prev = pi,
+ .next = ni
+ };
+}
+
void tcg_gen_op0(TCGContext *ctx, TCGOpcode opc)
{
- *ctx->gen_opc_ptr++ = opc;
+ tcg_emit_op(ctx, opc, -1);
}
void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1)
{
- uint16_t *op = ctx->gen_opc_ptr;
- TCGArg *opp = ctx->gen_opparam_ptr;
+ int pi = ctx->gen_next_parm_idx;
- op[0] = opc;
- opp[0] = a1;
+ tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 1;
+ ctx->gen_opparam_buf[pi] = a1;
- ctx->gen_opc_ptr = op + 1;
- ctx->gen_opparam_ptr = opp + 1;
+ tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2)
{
- uint16_t *op = ctx->gen_opc_ptr;
- TCGArg *opp = ctx->gen_opparam_ptr;
+ int pi = ctx->gen_next_parm_idx;
- op[0] = opc;
- opp[0] = a1;
- opp[1] = a2;
+ tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 2;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
- ctx->gen_opc_ptr = op + 1;
- ctx->gen_opparam_ptr = opp + 2;
+ tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
TCGArg a2, TCGArg a3)
{
- uint16_t *op = ctx->gen_opc_ptr;
- TCGArg *opp = ctx->gen_opparam_ptr;
+ int pi = ctx->gen_next_parm_idx;
- op[0] = opc;
- opp[0] = a1;
- opp[1] = a2;
- opp[2] = a3;
+ tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 3;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
- ctx->gen_opc_ptr = op + 1;
- ctx->gen_opparam_ptr = opp + 3;
+ tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
TCGArg a2, TCGArg a3, TCGArg a4)
{
- uint16_t *op = ctx->gen_opc_ptr;
- TCGArg *opp = ctx->gen_opparam_ptr;
+ int pi = ctx->gen_next_parm_idx;
- op[0] = opc;
- opp[0] = a1;
- opp[1] = a2;
- opp[2] = a3;
- opp[3] = a4;
+ tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 4;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+ ctx->gen_opparam_buf[pi + 3] = a4;
- ctx->gen_opc_ptr = op + 1;
- ctx->gen_opparam_ptr = opp + 4;
+ tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5)
{
- uint16_t *op = ctx->gen_opc_ptr;
- TCGArg *opp = ctx->gen_opparam_ptr;
+ int pi = ctx->gen_next_parm_idx;
- op[0] = opc;
- opp[0] = a1;
- opp[1] = a2;
- opp[2] = a3;
- opp[3] = a4;
- opp[4] = a5;
+ tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 5;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+ ctx->gen_opparam_buf[pi + 3] = a4;
+ ctx->gen_opparam_buf[pi + 4] = a5;
- ctx->gen_opc_ptr = op + 1;
- ctx->gen_opparam_ptr = opp + 5;
+ tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
{
- uint16_t *op = ctx->gen_opc_ptr;
- TCGArg *opp = ctx->gen_opparam_ptr;
+ int pi = ctx->gen_next_parm_idx;
- op[0] = opc;
- opp[0] = a1;
- opp[1] = a2;
- opp[2] = a3;
- opp[3] = a4;
- opp[4] = a5;
- opp[5] = a6;
+ tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 6;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+ ctx->gen_opparam_buf[pi + 3] = a4;
+ ctx->gen_opparam_buf[pi + 4] = a5;
+ ctx->gen_opparam_buf[pi + 5] = a6;
- ctx->gen_opc_ptr = op + 1;
- ctx->gen_opparam_ptr = opp + 6;
+ tcg_emit_op(ctx, opc, pi);
}
/* 32 bit ops */
@@ -1862,53 +1878,57 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
return op;
}
-static inline void tcg_add_param_i32(TCGv_i32 val)
-{
- *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val);
-}
-
-static inline void tcg_add_param_i64(TCGv_i64 val)
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
+ TCGMemOp memop, TCGArg idx)
{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op4ii_i32(opc, val, addr, memop, idx);
+#else
if (TCG_TARGET_REG_BITS == 32) {
- *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val));
- *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val));
+ tcg_gen_op5ii_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr),
+ memop, idx);
} else {
- *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr),
+ memop, idx);
}
+#endif
}
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
+ TCGMemOp memop, TCGArg idx)
+{
#if TARGET_LONG_BITS == 32
-# define tcg_add_param_tl tcg_add_param_i32
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op5ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
+ addr, memop, idx);
+ } else {
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr),
+ memop, idx);
+ }
#else
-# define tcg_add_param_tl tcg_add_param_i64
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op6ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
+ TCGV_LOW(addr), TCGV_HIGH(addr), memop, idx);
+ } else {
+ tcg_gen_op4ii_i64(opc, val, addr, memop, idx);
+ }
#endif
+}
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 0, 0);
-
- *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
- tcg_add_param_i32(val);
- tcg_add_param_tl(addr);
- *tcg_ctx.gen_opparam_ptr++ = memop;
- *tcg_ctx.gen_opparam_ptr++ = idx;
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
}
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 0, 1);
-
- *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
- tcg_add_param_i32(val);
- tcg_add_param_tl(addr);
- *tcg_ctx.gen_opparam_ptr++ = memop;
- *tcg_ctx.gen_opparam_ptr++ = idx;
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
}
void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
- memop = tcg_canonicalize_memop(memop, 1, 0);
-
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
if (memop & MO_SIGN) {
@@ -1919,25 +1939,17 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
return;
}
- *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
- tcg_add_param_i64(val);
- tcg_add_param_tl(addr);
- *tcg_ctx.gen_opparam_ptr++ = memop;
- *tcg_ctx.gen_opparam_ptr++ = idx;
+ memop = tcg_canonicalize_memop(memop, 1, 0);
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
}
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
- memop = tcg_canonicalize_memop(memop, 1, 1);
-
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
return;
}
- *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
- tcg_add_param_i64(val);
- tcg_add_param_tl(addr);
- *tcg_ctx.gen_opparam_ptr++ = memop;
- *tcg_ctx.gen_opparam_ptr++ = idx;
+ memop = tcg_canonicalize_memop(memop, 1, 1);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3470500a7a..ee041b9405 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -407,7 +407,6 @@ void tcg_func_start(TCGContext *s)
/* No temps have been previously allocated for size or locality. */
memset(s->free_temps, 0, sizeof(s->free_temps));
- s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
s->nb_labels = 0;
s->current_frame_offset = s->frame_start;
@@ -415,8 +414,10 @@ void tcg_func_start(TCGContext *s)
s->goto_tb_issue_mask = 0;
#endif
- s->gen_opc_ptr = s->gen_opc_buf;
- s->gen_opparam_ptr = s->gen_opparam_buf;
+ s->gen_first_op_idx = 0;
+ s->gen_last_op_idx = -1;
+ s->gen_next_op_idx = 0;
+ s->gen_next_parm_idx = 0;
s->be = tcg_malloc(sizeof(TCGBackendData));
}
@@ -703,9 +704,8 @@ int tcg_check_temp_count(void)
void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
int nargs, TCGArg *args)
{
- int i, real_args, nb_rets;
+ int i, real_args, nb_rets, pi, pi_first;
unsigned sizemask, flags;
- TCGArg *nparam;
TCGHelperInfo *info;
info = g_hash_table_lookup(s->helpers, (gpointer)func);
@@ -758,8 +758,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
}
#endif /* TCG_TARGET_EXTEND_ARGS */
- *s->gen_opc_ptr++ = INDEX_op_call;
- nparam = s->gen_opparam_ptr++;
+ pi_first = pi = s->gen_next_parm_idx;
if (ret != TCG_CALL_DUMMY_ARG) {
#if defined(__sparc__) && !defined(__arch64__) \
&& !defined(CONFIG_TCG_INTERPRETER)
@@ -769,25 +768,25 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
two return temporaries, and reassemble below. */
retl = tcg_temp_new_i64();
reth = tcg_temp_new_i64();
- *s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
- *s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
+ s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
+ s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
nb_rets = 2;
} else {
- *s->gen_opparam_ptr++ = ret;
+ s->gen_opparam_buf[pi++] = ret;
nb_rets = 1;
}
#else
if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
#ifdef HOST_WORDS_BIGENDIAN
- *s->gen_opparam_ptr++ = ret + 1;
- *s->gen_opparam_ptr++ = ret;
+ s->gen_opparam_buf[pi++] = ret + 1;
+ s->gen_opparam_buf[pi++] = ret;
#else
- *s->gen_opparam_ptr++ = ret;
- *s->gen_opparam_ptr++ = ret + 1;
+ s->gen_opparam_buf[pi++] = ret;
+ s->gen_opparam_buf[pi++] = ret + 1;
#endif
nb_rets = 2;
} else {
- *s->gen_opparam_ptr++ = ret;
+ s->gen_opparam_buf[pi++] = ret;
nb_rets = 1;
}
#endif
@@ -801,7 +800,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
/* some targets want aligned 64 bit args */
if (real_args & 1) {
- *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+ s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
real_args++;
}
#endif
@@ -816,26 +815,42 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
have to get more complicated to differentiate between
stack arguments and register arguments. */
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
- *s->gen_opparam_ptr++ = args[i] + 1;
- *s->gen_opparam_ptr++ = args[i];
+ s->gen_opparam_buf[pi++] = args[i] + 1;
+ s->gen_opparam_buf[pi++] = args[i];
#else
- *s->gen_opparam_ptr++ = args[i];
- *s->gen_opparam_ptr++ = args[i] + 1;
+ s->gen_opparam_buf[pi++] = args[i];
+ s->gen_opparam_buf[pi++] = args[i] + 1;
#endif
real_args += 2;
continue;
}
- *s->gen_opparam_ptr++ = args[i];
+ s->gen_opparam_buf[pi++] = args[i];
real_args++;
}
- *s->gen_opparam_ptr++ = (uintptr_t)func;
- *s->gen_opparam_ptr++ = flags;
+ s->gen_opparam_buf[pi++] = (uintptr_t)func;
+ s->gen_opparam_buf[pi++] = flags;
+
+ i = s->gen_next_op_idx;
+ tcg_debug_assert(i < OPC_BUF_SIZE);
+ tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
+
+ /* Set links for sequential allocation during translation. */
+ s->gen_op_buf[i] = (TCGOp){
+ .opc = INDEX_op_call,
+ .callo = nb_rets,
+ .calli = real_args,
+ .args = pi_first,
+ .prev = i - 1,
+ .next = i + 1
+ };
- *nparam = (nb_rets << 16) | real_args;
+ /* Make sure the calli field didn't overflow. */
+ tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
- /* total parameters, needed to go backward in the instruction stream */
- *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+ s->gen_last_op_idx = i;
+ s->gen_next_op_idx = i + 1;
+ s->gen_next_parm_idx = pi;
#if defined(__sparc__) && !defined(__arch64__) \
&& !defined(CONFIG_TCG_INTERPRETER)
@@ -972,20 +987,21 @@ static const char * const ldst_name[] =
void tcg_dump_ops(TCGContext *s)
{
- const uint16_t *opc_ptr;
- const TCGArg *args;
- TCGArg arg;
- TCGOpcode c;
- int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
- const TCGOpDef *def;
char buf[128];
+ TCGOp *op;
+ int oi;
- first_insn = 1;
- opc_ptr = s->gen_opc_buf;
- args = s->gen_opparam_buf;
- while (opc_ptr < s->gen_opc_ptr) {
- c = *opc_ptr++;
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+ int i, k, nb_oargs, nb_iargs, nb_cargs;
+ const TCGOpDef *def;
+ const TCGArg *args;
+ TCGOpcode c;
+
+ op = &s->gen_op_buf[oi];
+ c = op->opc;
def = &tcg_op_defs[c];
+ args = &s->gen_opparam_buf[op->args];
+
if (c == INDEX_op_debug_insn_start) {
uint64_t pc;
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -993,21 +1009,14 @@ void tcg_dump_ops(TCGContext *s)
#else
pc = args[0];
#endif
- if (!first_insn) {
+ if (oi != s->gen_first_op_idx) {
qemu_log("\n");
}
qemu_log(" ---- 0x%" PRIx64, pc);
- first_insn = 0;
- nb_oargs = def->nb_oargs;
- nb_iargs = def->nb_iargs;
- nb_cargs = def->nb_cargs;
} else if (c == INDEX_op_call) {
- TCGArg arg;
-
/* variable number of arguments */
- arg = *args++;
- nb_oargs = arg >> 16;
- nb_iargs = arg & 0xffff;
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
nb_cargs = def->nb_cargs;
/* function name, flags, out args */
@@ -1028,26 +1037,20 @@ void tcg_dump_ops(TCGContext *s)
}
} else {
qemu_log(" %s ", def->name);
- if (c == INDEX_op_nopn) {
- /* variable number of arguments */
- nb_cargs = *args;
- nb_oargs = 0;
- nb_iargs = 0;
- } else {
- nb_oargs = def->nb_oargs;
- nb_iargs = def->nb_iargs;
- nb_cargs = def->nb_cargs;
- }
-
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+
k = 0;
- for(i = 0; i < nb_oargs; i++) {
+ for (i = 0; i < nb_oargs; i++) {
if (k != 0) {
qemu_log(",");
}
qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
args[k++]));
}
- for(i = 0; i < nb_iargs; i++) {
+ for (i = 0; i < nb_iargs; i++) {
if (k != 0) {
qemu_log(",");
}
@@ -1085,16 +1088,14 @@ void tcg_dump_ops(TCGContext *s)
i = 0;
break;
}
- for(; i < nb_cargs; i++) {
+ for (; i < nb_cargs; i++) {
if (k != 0) {
qemu_log(",");
}
- arg = args[k++];
- qemu_log("$0x%" TCG_PRIlx, arg);
+ qemu_log("$0x%" TCG_PRIlx, args[k++]);
}
}
qemu_log("\n");
- args += nb_iargs + nb_oargs + nb_cargs;
}
}
@@ -1244,20 +1245,6 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
}
#ifdef USE_LIVENESS_ANALYSIS
-
-/* set a nop for an operation using 'nb_args' */
-static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
- TCGArg *args, int nb_args)
-{
- if (nb_args == 0) {
- *opc_ptr = INDEX_op_nop;
- } else {
- *opc_ptr = INDEX_op_nopn;
- args[0] = nb_args;
- args[nb_args - 1] = nb_args;
- }
-}
-
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
@@ -1287,19 +1274,10 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
{
- int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
- TCGOpcode op, op_new, op_new2;
- TCGArg *args, arg;
- const TCGOpDef *def;
uint8_t *dead_temps, *mem_temps;
- uint16_t dead_args;
- uint8_t sync_args;
- bool have_op_new2;
-
- s->gen_opc_ptr++; /* skip end */
-
- nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
+ int oi, oi_prev, nb_ops;
+ nb_ops = s->gen_next_op_idx;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
@@ -1307,25 +1285,31 @@ static void tcg_liveness_analysis(TCGContext *s)
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
- args = s->gen_opparam_ptr;
- op_index = nb_ops - 1;
- while (op_index >= 0) {
- op = s->gen_opc_buf[op_index];
- def = &tcg_op_defs[op];
- switch(op) {
+ for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
+ int i, nb_iargs, nb_oargs;
+ TCGOpcode opc_new, opc_new2;
+ bool have_opc_new2;
+ uint16_t dead_args;
+ uint8_t sync_args;
+ TCGArg arg;
+
+ TCGOp * const op = &s->gen_op_buf[oi];
+ TCGArg * const args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+
+ oi_prev = op->prev;
+
+ switch (opc) {
case INDEX_op_call:
{
int call_flags;
- nb_args = args[-1];
- args -= nb_args;
- arg = *args++;
- nb_iargs = arg & 0xffff;
- nb_oargs = arg >> 16;
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
call_flags = args[nb_oargs + nb_iargs + 1];
- /* pure functions can be removed if their result is not
- used */
+ /* pure functions can be removed if their result is unused */
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
@@ -1333,8 +1317,7 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_not_remove_call;
}
}
- tcg_set_nop(s, s->gen_opc_buf + op_index,
- args - 1, nb_args);
+ goto do_remove;
} else {
do_not_remove_call:
@@ -1373,41 +1356,33 @@ static void tcg_liveness_analysis(TCGContext *s)
dead_temps[arg] = 0;
}
}
- s->op_dead_args[op_index] = dead_args;
- s->op_sync_args[op_index] = sync_args;
+ s->op_dead_args[oi] = dead_args;
+ s->op_sync_args[oi] = sync_args;
}
- args--;
}
break;
case INDEX_op_debug_insn_start:
- args -= def->nb_args;
- break;
- case INDEX_op_nopn:
- nb_args = args[-1];
- args -= nb_args;
+ case INDEX_op_nop:
+ case INDEX_op_end:
break;
case INDEX_op_discard:
- args--;
/* mark the temporary as dead */
dead_temps[args[0]] = 1;
mem_temps[args[0]] = 0;
break;
- case INDEX_op_end:
- break;
case INDEX_op_add2_i32:
- op_new = INDEX_op_add_i32;
+ opc_new = INDEX_op_add_i32;
goto do_addsub2;
case INDEX_op_sub2_i32:
- op_new = INDEX_op_sub_i32;
+ opc_new = INDEX_op_sub_i32;
goto do_addsub2;
case INDEX_op_add2_i64:
- op_new = INDEX_op_add_i64;
+ opc_new = INDEX_op_add_i64;
goto do_addsub2;
case INDEX_op_sub2_i64:
- op_new = INDEX_op_sub_i64;
+ opc_new = INDEX_op_sub_i64;
do_addsub2:
- args -= 6;
nb_iargs = 4;
nb_oargs = 2;
/* Test if the high part of the operation is dead, but not
@@ -1418,12 +1393,11 @@ static void tcg_liveness_analysis(TCGContext *s)
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
- /* Create the single operation plus nop. */
- s->gen_opc_buf[op_index] = op = op_new;
+ /* Replace the opcode and adjust the args in place,
+ leaving 3 unused args at the end. */
+ op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[4];
- assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
/* Fall through and mark the single-word operation live. */
nb_iargs = 2;
nb_oargs = 1;
@@ -1431,27 +1405,26 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_not_remove;
case INDEX_op_mulu2_i32:
- op_new = INDEX_op_mul_i32;
- op_new2 = INDEX_op_muluh_i32;
- have_op_new2 = TCG_TARGET_HAS_muluh_i32;
+ opc_new = INDEX_op_mul_i32;
+ opc_new2 = INDEX_op_muluh_i32;
+ have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
goto do_mul2;
case INDEX_op_muls2_i32:
- op_new = INDEX_op_mul_i32;
- op_new2 = INDEX_op_mulsh_i32;
- have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
+ opc_new = INDEX_op_mul_i32;
+ opc_new2 = INDEX_op_mulsh_i32;
+ have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
goto do_mul2;
case INDEX_op_mulu2_i64:
- op_new = INDEX_op_mul_i64;
- op_new2 = INDEX_op_muluh_i64;
- have_op_new2 = TCG_TARGET_HAS_muluh_i64;
+ opc_new = INDEX_op_mul_i64;
+ opc_new2 = INDEX_op_muluh_i64;
+ have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
goto do_mul2;
case INDEX_op_muls2_i64:
- op_new = INDEX_op_mul_i64;
- op_new2 = INDEX_op_mulsh_i64;
- have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
+ opc_new = INDEX_op_mul_i64;
+ opc_new2 = INDEX_op_mulsh_i64;
+ have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
goto do_mul2;
do_mul2:
- args -= 4;
nb_iargs = 2;
nb_oargs = 2;
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
@@ -1460,28 +1433,25 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_remove;
}
/* The high part of the operation is dead; generate the low. */
- s->gen_opc_buf[op_index] = op = op_new;
+ op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
- } else if (have_op_new2 && dead_temps[args[0]]
+ } else if (have_opc_new2 && dead_temps[args[0]]
&& !mem_temps[args[0]]) {
- /* The low part of the operation is dead; generate the high. */
- s->gen_opc_buf[op_index] = op = op_new2;
+ /* The low part of the operation is dead; generate the high. */
+ op->opc = opc = opc_new2;
args[0] = args[1];
args[1] = args[2];
args[2] = args[3];
} else {
goto do_not_remove;
}
- assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
/* Mark the single-word operation live. */
nb_oargs = 1;
goto do_not_remove;
default:
/* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
- args -= def->nb_args;
nb_iargs = def->nb_iargs;
nb_oargs = def->nb_oargs;
@@ -1489,24 +1459,23 @@ static void tcg_liveness_analysis(TCGContext *s)
its outputs are dead. We assume that nb_oargs == 0
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
- for(i = 0; i < nb_oargs; i++) {
+ for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (!dead_temps[arg] || mem_temps[arg]) {
goto do_not_remove;
}
}
do_remove:
- tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
+ op->opc = INDEX_op_nop;
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
} else {
do_not_remove:
-
/* output args are dead */
dead_args = 0;
sync_args = 0;
- for(i = 0; i < nb_oargs; i++) {
+ for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
@@ -1527,23 +1496,18 @@ static void tcg_liveness_analysis(TCGContext *s)
}
/* input args are live */
- for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
}
dead_temps[arg] = 0;
}
- s->op_dead_args[op_index] = dead_args;
- s->op_sync_args[op_index] = sync_args;
+ s->op_dead_args[oi] = dead_args;
+ s->op_sync_args[oi] = sync_args;
}
break;
}
- op_index--;
- }
-
- if (args != s->gen_opparam_buf) {
- tcg_abort();
}
}
#else
@@ -2110,11 +2074,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
#define STACK_DIR(x) (x)
#endif
-static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
- TCGOpcode opc, const TCGArg *args,
- uint16_t dead_args, uint8_t sync_args)
+static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
+ const TCGArg * const args, uint16_t dead_args,
+ uint8_t sync_args)
{
- int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
+ int flags, nb_regs, i, reg;
TCGArg arg;
TCGTemp *ts;
intptr_t stack_offset;
@@ -2123,22 +2087,16 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
int allocate_args;
TCGRegSet allocated_regs;
- arg = *args++;
-
- nb_oargs = arg >> 16;
- nb_iargs = arg & 0xffff;
- nb_params = nb_iargs;
-
func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
flags = args[nb_oargs + nb_iargs + 1];
nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
- if (nb_regs > nb_params) {
- nb_regs = nb_params;
+ if (nb_regs > nb_iargs) {
+ nb_regs = nb_iargs;
}
/* assign stack slots first */
- call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
+ call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
@@ -2149,7 +2107,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
}
stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
- for(i = nb_regs; i < nb_params; i++) {
+ for(i = nb_regs; i < nb_iargs; i++) {
arg = args[nb_oargs + i];
#ifdef TCG_TARGET_STACK_GROWSUP
stack_offset -= sizeof(tcg_target_long);
@@ -2256,8 +2214,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
}
}
}
-
- return nb_iargs + nb_oargs + def->nb_cargs + 1;
}
#ifdef CONFIG_PROFILER
@@ -2285,10 +2241,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_insn_unit *gen_code_buf,
long search_pc)
{
- TCGOpcode opc;
- int op_index;
- const TCGOpDef *def;
- const TCGArg *args;
+ int oi, oi_next;
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@@ -2303,8 +2256,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
#endif
#ifdef USE_TCG_OPTIMIZATIONS
- s->gen_opparam_ptr =
- tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
+ tcg_optimize(s);
#endif
#ifdef CONFIG_PROFILER
@@ -2333,42 +2285,31 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_out_tb_init(s);
- args = s->gen_opparam_buf;
- op_index = 0;
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ TCGOp * const op = &s->gen_op_buf[oi];
+ TCGArg * const args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+ uint16_t dead_args = s->op_dead_args[oi];
+ uint8_t sync_args = s->op_sync_args[oi];
- for(;;) {
- opc = s->gen_opc_buf[op_index];
+ oi_next = op->next;
#ifdef CONFIG_PROFILER
tcg_table_op_count[opc]++;
#endif
- def = &tcg_op_defs[opc];
-#if 0
- printf("%s: %d %d %d\n", def->name,
- def->nb_oargs, def->nb_iargs, def->nb_cargs);
- // dump_regs(s);
-#endif
- switch(opc) {
+
+ switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
- tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
- s->op_sync_args[op_index]);
+ tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
- tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
- s->op_sync_args[op_index]);
+ tcg_reg_alloc_movi(s, args, dead_args, sync_args);
break;
case INDEX_op_debug_insn_start:
- /* debug instruction */
- break;
case INDEX_op_nop:
- case INDEX_op_nop1:
- case INDEX_op_nop2:
- case INDEX_op_nop3:
break;
- case INDEX_op_nopn:
- args += args[0];
- goto next;
case INDEX_op_discard:
temp_dead(s, args[0]);
break;
@@ -2377,12 +2318,9 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_out_label(s, args[0], s->code_ptr);
break;
case INDEX_op_call:
- args += tcg_reg_alloc_call(s, def, opc, args,
- s->op_dead_args[op_index],
- s->op_sync_args[op_index]);
- goto next;
- case INDEX_op_end:
- goto the_end;
+ tcg_reg_alloc_call(s, op->callo, op->calli, args,
+ dead_args, sync_args);
+ break;
default:
/* Sanity check that we've not introduced any unhandled opcodes. */
if (def->flags & TCG_OPF_NOT_PRESENT) {
@@ -2391,21 +2329,17 @@ static inline int tcg_gen_code_common(TCGContext *s,
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
- tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
- s->op_sync_args[op_index]);
+ tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
break;
}
- args += def->nb_args;
- next:
if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
- return op_index;
+ return oi;
}
- op_index++;
#ifndef NDEBUG
check_regs(s);
#endif
}
- the_end:
+
/* Generate TB finalization at the end of block */
tcg_out_tb_finalize(s);
return -1;
@@ -2416,14 +2350,18 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
#ifdef CONFIG_PROFILER
{
int n;
- n = (s->gen_opc_ptr - s->gen_opc_buf);
+
+ n = s->gen_last_op_idx + 1;
s->op_count += n;
- if (n > s->op_count_max)
+ if (n > s->op_count_max) {
s->op_count_max = n;
+ }
- s->temp_count += s->nb_temps;
- if (s->nb_temps > s->temp_count_max)
- s->temp_count_max = s->nb_temps;
+ n = s->nb_temps;
+ s->temp_count += n;
+ if (n > s->temp_count_max) {
+ s->temp_count_max = n;
+ }
}
#endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 95f1aad7dc..596e30a4b6 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -448,10 +448,28 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
+typedef struct TCGOp {
+ TCGOpcode opc : 8;
+
+ /* The number of out and in parameter for a call. */
+ unsigned callo : 2;
+ unsigned calli : 6;
+
+ /* Index of the arguments for this op, or -1 for zero-operand ops. */
+ signed args : 16;
+
+ /* Index of the prex/next op, or -1 for the end of the list. */
+ signed prev : 16;
+ signed next : 16;
+} TCGOp;
+
+QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
+
struct TCGContext {
uint8_t *pool_cur, *pool_end;
TCGPool *pool_first, *pool_current, *pool_first_large;
- TCGLabel *labels;
int nb_labels;
int nb_globals;
int nb_temps;
@@ -469,9 +487,6 @@ struct TCGContext {
corresponding output argument needs to be
sync to memory. */
- /* tells in which temporary a given register is. It does not take
- into account fixed registers */
- int reg_to_temp[TCG_TARGET_NB_REGS];
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
@@ -479,8 +494,6 @@ struct TCGContext {
int frame_reg;
tcg_insn_unit *code_ptr;
- TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
- TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
GHashTable *helpers;
@@ -508,14 +521,10 @@ struct TCGContext {
int goto_tb_issue_mask;
#endif
- uint16_t gen_opc_buf[OPC_BUF_SIZE];
- TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
-
- uint16_t *gen_opc_ptr;
- TCGArg *gen_opparam_ptr;
- target_ulong gen_opc_pc[OPC_BUF_SIZE];
- uint16_t gen_opc_icount[OPC_BUF_SIZE];
- uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+ int gen_first_op_idx;
+ int gen_last_op_idx;
+ int gen_next_op_idx;
+ int gen_next_parm_idx;
/* Code generation. Note that we specifically do not use tcg_insn_unit
here, because there's too much arithmetic throughout that relies
@@ -533,6 +542,22 @@ struct TCGContext {
/* The TCGBackendData structure is private to tcg-target.c. */
struct TCGBackendData *be;
+
+ TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
+ TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
+
+ /* tells in which temporary a given register is. It does not take
+ into account fixed registers */
+ int reg_to_temp[TCG_TARGET_NB_REGS];
+
+ TCGOp gen_op_buf[OPC_BUF_SIZE];
+ TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+
+ target_ulong gen_opc_pc[OPC_BUF_SIZE];
+ uint16_t gen_opc_icount[OPC_BUF_SIZE];
+ uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+
+ TCGLabel labels[TCG_MAX_LABELS];
};
extern TCGContext tcg_ctx;
@@ -540,7 +565,7 @@ extern TCGContext tcg_ctx;
/* The number of opcodes emitted so far. */
static inline int tcg_op_buf_count(void)
{
- return tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+ return tcg_ctx.gen_next_op_idx;
}
/* Test for whether to terminate the TB for using too many opcodes. */
@@ -718,8 +743,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
void tcg_gen_callN(TCGContext *s, void *func,
TCGArg ret, int nargs, TCGArg *args);
-TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args,
- TCGOpDef *tcg_op_def);
+void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */
void tcg_dump_ops(TCGContext *s);