From 56e4943825e70a45d1fbba4fffa431000c6e1c7a Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:13 +0200
Subject: tcg/optimize: split expression simplification

Split expression simplification in multiple parts so that a given op
can appear multiple times. This patch should not change anything.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9c65474a8c..63f970d045 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -322,7 +322,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
-        /* Simplify expression if possible. */
+        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
         switch (op) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
@@ -352,6 +352,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+        switch (op) {
         CASE_OP_32_64(mul):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
@@ -362,6 +368,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => mov r, a" cases */
+        switch (op) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
-- 
cgit v1.2.3-55-g7522


From 38ee188b1b63606191c013046add021bdc6f1bfd Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:14 +0200
Subject: tcg/optimize: simplify or/xor r, a, 0 cases

or/xor r, a, 0 is equivalent to a mov r, a.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 63f970d045..0db849edbf 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -331,6 +331,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(sar):
         CASE_OP_32_64(rotl):
         CASE_OP_32_64(rotr):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 /* Proceed with possible constant folding. */
                 break;
-- 
cgit v1.2.3-55-g7522


From 61251c0c790d0a63468c8e3eb2767c1f4bf3654a Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:14 +0200
Subject: tcg/optimize: simplify and r, a, 0 cases

and r, a, 0 is equivalent to a movi r, 0.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0db849edbf..c12cb2bc4b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -360,6 +360,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
         switch (op) {
+        CASE_OP_32_64(and):
         CASE_OP_32_64(mul):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
-- 
cgit v1.2.3-55-g7522


From 01ee5282ea955dee4e189b34ef888be6f36d9861 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:14 +0200
Subject: tcg/optimize: simplify shift/rot r, 0, a => movi r, 0 cases

shift/rot r, 0, a is equivalent to movi r, 0.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c12cb2bc4b..1698ba39b3 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -322,6 +322,26 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
+        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
+        switch (op) {
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[1]].val == 0) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
         /* Simplify expression for "op r, a, 0 => mov r, a" cases */
         switch (op) {
         CASE_OP_32_64(add):
-- 
cgit v1.2.3-55-g7522


From 65a7cce17ddf6fa1a30d4315da1631d9b6c8fd31 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:14 +0200
Subject: tcg/optimize: swap brcond/setcond arguments when possible

brcond and setcond ops are not commutative, but it's easy to compute the
new condition after swapping the arguments. Try to always put the constant
argument in second position like for commutative ops, to help backends to
generate better code.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1698ba39b3..7debc8a36a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -318,6 +318,24 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 args[2] = tmp;
             }
             break;
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state != TCG_TEMP_CONST) {
+                tmp = args[0];
+                args[0] = args[1];
+                args[1] = tmp;
+                args[2] = tcg_swap_cond(args[2]);
+            }
+            break;
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                args[3] = tcg_swap_cond(args[3]);
+            }
+            break;
         default:
             break;
         }
-- 
cgit v1.2.3-55-g7522


From f8dd19e5c7a5ff8cb5587c9aaea96ef754429713 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:14 +0200
Subject: tcg/optimize: add constant folding for setcond

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 7debc8a36a..1cb1f36335 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -267,6 +267,67 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+                                       TCGArg y, TCGCond c)
+{
+    switch (op_bits(op)) {
+    case 32:
+        switch (c) {
+        case TCG_COND_EQ:
+            return (uint32_t)x == (uint32_t)y;
+        case TCG_COND_NE:
+            return (uint32_t)x != (uint32_t)y;
+        case TCG_COND_LT:
+            return (int32_t)x < (int32_t)y;
+        case TCG_COND_GE:
+            return (int32_t)x >= (int32_t)y;
+        case TCG_COND_LE:
+            return (int32_t)x <= (int32_t)y;
+        case TCG_COND_GT:
+            return (int32_t)x > (int32_t)y;
+        case TCG_COND_LTU:
+            return (uint32_t)x < (uint32_t)y;
+        case TCG_COND_GEU:
+            return (uint32_t)x >= (uint32_t)y;
+        case TCG_COND_LEU:
+            return (uint32_t)x <= (uint32_t)y;
+        case TCG_COND_GTU:
+            return (uint32_t)x > (uint32_t)y;
+        }
+        break;
+    case 64:
+        switch (c) {
+        case TCG_COND_EQ:
+            return (uint64_t)x == (uint64_t)y;
+        case TCG_COND_NE:
+            return (uint64_t)x != (uint64_t)y;
+        case TCG_COND_LT:
+            return (int64_t)x < (int64_t)y;
+        case TCG_COND_GE:
+            return (int64_t)x >= (int64_t)y;
+        case TCG_COND_LE:
+            return (int64_t)x <= (int64_t)y;
+        case TCG_COND_GT:
+            return (int64_t)x > (int64_t)y;
+        case TCG_COND_LTU:
+            return (uint64_t)x < (uint64_t)y;
+        case TCG_COND_GEU:
+            return (uint64_t)x >= (uint64_t)y;
+        case TCG_COND_LEU:
+            return (uint64_t)x <= (uint64_t)y;
+        case TCG_COND_GTU:
+            return (uint64_t)x > (uint64_t)y;
+        }
+        break;
+    }
+
+    fprintf(stderr,
+            "Unrecognized bitness %d or condition %d in "
+            "do_constant_folding_cond.\n", op_bits(op), c);
+    tcg_abort();
+}
+
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -522,6 +583,26 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 args += 3;
                 break;
             }
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = do_constant_folding_cond(op, temps[args[1]].val,
+                                               temps[args[2]].val, args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                gen_args += 2;
+                args += 4;
+                break;
+            } else {
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+                args += 4;
+                break;
+            }
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
-- 
cgit v1.2.3-55-g7522


From fbeaa26c4cf45a83afaab7aac286d5d96199e740 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Thu, 6 Sep 2012 16:47:14 +0200
Subject: tcg/optimize: add constant folding for brcond

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1cb1f36335..156e8d9698 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -603,6 +603,32 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 args += 4;
                 break;
             }
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state == TCG_TEMP_CONST) {
+                if (do_constant_folding_cond(op, temps[args[0]].val,
+                                             temps[args[1]].val, args[2])) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[3];
+                    gen_args += 1;
+                    args += 4;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                    args += 4;
+                }
+                break;
+            } else {
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+                args += 4;
+                break;
+            }
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -624,7 +650,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_set_label:
         case INDEX_op_jmp:
         case INDEX_op_br:
-        CASE_OP_32_64(brcond):
             memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             for (i = 0; i < def->nb_args; i++) {
                 *gen_args = *args;
-- 
cgit v1.2.3-55-g7522


From fedc0da2510b61742dcc4755938093bef4c6078d Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Fri, 7 Sep 2012 12:24:32 +0200
Subject: tcg/optimize: fix if/else/break coding style

optimizer.c contains some cases were the break is appearing in both the
if and the else parts. Fix that by moving it to the outer part. Also
move some common code there.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 156e8d9698..fba0ed9592 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -441,15 +441,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 if ((temps[args[0]].state == TCG_TEMP_COPY
                     && temps[args[0]].val == args[1])
                     || args[0] == args[1]) {
-                    args += 3;
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1],
                                     nb_temps, nb_globals);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
@@ -480,15 +479,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
                 if (args[1] == args[0]) {
-                    args += 3;
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
                                     nb_globals);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
@@ -538,17 +536,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
-                gen_args += 2;
-                args += 2;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
-                gen_args += 2;
-                args += 2;
-                break;
             }
+            gen_args += 2;
+            args += 2;
+            break;
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -572,17 +567,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
                 gen_args += 2;
-                args += 3;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
                 gen_args += 3;
-                args += 3;
-                break;
             }
+            args += 3;
+            break;
         CASE_OP_32_64(setcond):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
@@ -591,8 +584,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                                temps[args[2]].val, args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
                 gen_args += 2;
-                args += 4;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
@@ -600,9 +591,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_args[2] = args[2];
                 gen_args[3] = args[3];
                 gen_args += 4;
-                args += 4;
-                break;
             }
+            args += 4;
+            break;
         CASE_OP_32_64(brcond):
             if (temps[args[0]].state == TCG_TEMP_CONST
                 && temps[args[1]].state == TCG_TEMP_CONST) {
@@ -612,12 +603,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_br;
                     gen_args[0] = args[3];
                     gen_args += 1;
-                    args += 4;
                 } else {
                     gen_opc_buf[op_index] = INDEX_op_nop;
-                    args += 4;
                 }
-                break;
             } else {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
                 reset_temp(args[0], nb_temps, nb_globals);
@@ -626,9 +614,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_args[2] = args[2];
                 gen_args[3] = args[3];
                 gen_args += 4;
-                args += 4;
-                break;
             }
+            args += 4;
+            break;
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
-- 
cgit v1.2.3-55-g7522


From d104bebd073cc05a1a572529a060857a377be6aa Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Mon, 10 Sep 2012 13:14:12 +0200
Subject: revert "TCG: fix copy propagation"

Given the copy propagation breakage on 32-bit hosts has been fixed
commit e31b0a7c050711884ad570fe73df806520953618 can be reverted.

Cc: Blue Swirl <blauwirbel@gmail.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 15 ++++++---------
 tcg/tcg.h      |  5 -----
 2 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index fba0ed9592..10d9773dcb 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -107,15 +107,12 @@ static TCGOpcode op_to_movi(TCGOpcode op)
     }
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
-                            TCGArg src, int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
+                            int nb_temps, int nb_globals)
 {
         reset_temp(dst, nb_temps, nb_globals);
         assert(temps[src].state != TCG_TEMP_COPY);
-        /* Don't try to copy if one of temps is a global or either one
-           is local and another is register */
-        if (src >= nb_globals && dst >= nb_globals &&
-            tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
+        if (src >= nb_globals) {
             assert(temps[src].state != TCG_TEMP_CONST);
             if (temps[src].state != TCG_TEMP_HAS_COPY) {
                 temps[src].state = TCG_TEMP_HAS_COPY;
@@ -444,7 +441,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[1],
+                    tcg_opt_gen_mov(gen_args, args[0], args[1],
                                     nb_temps, nb_globals);
                     gen_args += 2;
                 }
@@ -482,7 +479,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
+                    tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
                                     nb_globals);
                     gen_args += 2;
                 }
@@ -507,7 +504,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1],
+                tcg_opt_gen_mov(gen_args, args[0], args[1],
                                 nb_temps, nb_globals);
                 gen_args += 2;
                 args += 2;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7a72729f3a..477775dba6 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -459,11 +459,6 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
 void tcg_temp_free_i64(TCGv_i64 arg);
 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
 
-static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
-{
-    return s->temps[arg].temp_local;
-}
-
 #if defined(CONFIG_DEBUG_TCG)
 /* If you call tcg_clear_temp_count() at the start of a section of
  * code which is not supposed to leak any TCG temporaries, then
-- 
cgit v1.2.3-55-g7522


From a25506603914d706f4ac4c63d3b93b4f1227b9b4 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Wed, 19 Sep 2012 21:40:30 +0200
Subject: tcg/optimize: fix end of basic block detection

Commit e31b0a7c050711884ad570fe73df806520953618 fixed copy propagation on
32-bit host by restricting the copy between different types. This was the
wrong fix.

The real problem is that the all temps states should be reset at the end
of a basic block. This was done by adding such operations in the switch,
but brcond2 was forgotten (that's why the crash was only observed on 32-bit
hosts).

Fix that by looking at the TCG_OPF_BB_END instead. We need to keep the case
for op_set_label as temps might be modified through another path.

Cc: Blue Swirl <blauwirbel@gmail.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 10d9773dcb..9da333c303 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -632,21 +632,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 i--;
             }
             break;
-        case INDEX_op_set_label:
-        case INDEX_op_jmp:
-        case INDEX_op_br:
-            memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-            for (i = 0; i < def->nb_args; i++) {
-                *gen_args = *args;
-                args++;
-                gen_args++;
-            }
-            break;
         default:
             /* Default case: we do know nothing about operation so no
-               propagation is done.  We only trash output args.  */
-            for (i = 0; i < def->nb_oargs; i++) {
-                reset_temp(args[i], nb_temps, nb_globals);
+               propagation is done.  We trash everything if the operation
+               is the end of a basic block, otherwise we only trash the
+               output args.  */
+            if (def->flags & TCG_OPF_BB_END) {
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+            } else {
+                for (i = 0; i < def->nb_oargs; i++) {
+                    reset_temp(args[i], nb_temps, nb_globals);
+                }
             }
             for (i = 0; i < def->nb_args; i++) {
                 gen_args[i] = args[i];
-- 
cgit v1.2.3-55-g7522


From fa01a2084e5643504b28ebba2d9dbdb73d2f74a4 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Fri, 21 Sep 2012 10:13:37 -0700
Subject: tcg: Optimize movcond for constant comparisons

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9da333c303..26038a695d 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -394,6 +394,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
+        CASE_OP_32_64(movcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                args[5] = tcg_swap_cond(args[5]);
+            }
         default:
             break;
         }
@@ -614,6 +622,38 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             args += 4;
             break;
+        CASE_OP_32_64(movcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                tmp = do_constant_folding_cond(op, temps[args[1]].val,
+                                               temps[args[2]].val, args[5]);
+                if (args[0] == args[4-tmp]
+                    || (temps[args[4-tmp]].state == TCG_TEMP_COPY
+                        && temps[args[4-tmp]].val == args[0])) {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
+                    gen_opc_buf[op_index] = op_to_movi(op);
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
+                                     nb_temps, nb_globals);
+                    gen_args += 2;
+                } else {
+                    gen_opc_buf[op_index] = op_to_mov(op);
+                    tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
+                                    nb_temps, nb_globals);
+                    gen_args += 2;
+                }
+            } else {
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args[4] = args[4];
+                gen_args[5] = args[5];
+                gen_args += 6;
+            }
+            args += 6;
+            break;
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
-- 
cgit v1.2.3-55-g7522


From 5d8f53630011f93dc774f1b2dc9557c7eac3ad89 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Fri, 21 Sep 2012 10:13:38 -0700
Subject: tcg: Optimize two-address commutative operations

While swapping constants to the second operand, swap
sources matching destinations to the first operand.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 26038a695d..1be7631672 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -334,6 +334,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
+    TCGCond cond;
+
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
        If this temp is a copy of other ones then this equivalence class'
@@ -395,13 +397,24 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             break;
         CASE_OP_32_64(movcond):
+            cond = args[5];
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state != TCG_TEMP_CONST) {
                 tmp = args[1];
                 args[1] = args[2];
                 args[2] = tmp;
-                args[5] = tcg_swap_cond(args[5]);
+                cond = tcg_swap_cond(cond);
+            }
+            /* For movcond, we canonicalize the "false" input reg to match
+               the destination reg so that the tcg backend can implement
+               a "move if true" operation.  */
+            if (args[0] == args[3]) {
+                tmp = args[3];
+                args[3] = args[4];
+                args[4] = tmp;
+                cond = tcg_invert_cond(cond);
             }
+            args[5] = cond;
         default:
             break;
         }
-- 
cgit v1.2.3-55-g7522


From 48b56ce1683dec02a29448f31861fca4dd0a0b33 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Mon, 10 Sep 2012 23:51:42 +0200
Subject: tcg/optimize: remove TCG_TEMP_ANY

TCG_TEMP_ANY has no different meaning than TCG_TEMP_UNDEF, so use
the later instead.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1be7631672..308b7f9033 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,8 +39,7 @@ typedef enum {
     TCG_TEMP_UNDEF = 0,
     TCG_TEMP_CONST,
     TCG_TEMP_COPY,
-    TCG_TEMP_HAS_COPY,
-    TCG_TEMP_ANY
+    TCG_TEMP_HAS_COPY
 } tcg_temp_state;
 
 struct tcg_temp_info {
@@ -52,7 +51,7 @@ struct tcg_temp_info {
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
-/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
    class of equivalent temp's, a new representative should be chosen in this
    class. */
 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
@@ -69,7 +68,7 @@ static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
         }
         for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
             if (new_base == (TCGArg)-1) {
-                temps[i].state = TCG_TEMP_ANY;
+                temps[i].state = TCG_TEMP_UNDEF;
             } else {
                 temps[i].val = new_base;
             }
@@ -81,9 +80,9 @@ static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
         temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
         new_base = temps[temp].val;
     }
-    temps[temp].state = TCG_TEMP_ANY;
+    temps[temp].state = TCG_TEMP_UNDEF;
     if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
-        temps[new_base].state = TCG_TEMP_ANY;
+        temps[new_base].state = TCG_TEMP_UNDEF;
     }
 }
 
-- 
cgit v1.2.3-55-g7522


From b80bb016d8c8e9d74345a90ab6dac1cb547904e0 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 11 Sep 2012 12:26:23 +0200
Subject: tcg/optimize: check types in copy propagation

The copy propagation doesn't check the types of the temps during copy
propagation. However TCG is using the mov_i32 for the i64 to i32
conversion and thus the two are not equivalent.

With this patch tcg_opt_gen_mov() doesn't consider two temps of
different type as copies anymore.

So far it seems the optimization was not aggressive enough to trigger
this bug, but it will be triggered later in this series once the copy
propagation is improved.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 308b7f9033..da8dffe9cd 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -106,12 +106,13 @@ static TCGOpcode op_to_movi(TCGOpcode op)
     }
 }
 
-static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
-                            int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
+                            TCGArg dst, TCGArg src)
 {
-        reset_temp(dst, nb_temps, nb_globals);
+        reset_temp(dst, s->nb_temps, s->nb_globals);
         assert(temps[src].state != TCG_TEMP_COPY);
-        if (src >= nb_globals) {
+        /* Only consider temps with the same type (width) as copies. */
+        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
             assert(temps[src].state != TCG_TEMP_CONST);
             if (temps[src].state != TCG_TEMP_HAS_COPY) {
                 temps[src].state = TCG_TEMP_HAS_COPY;
@@ -461,8 +462,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(gen_args, args[0], args[1],
-                                    nb_temps, nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
                 }
                 args += 3;
@@ -499,8 +499,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
-                                    nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
                 }
                 args += 3;
@@ -524,8 +523,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(gen_args, args[0], args[1],
-                                nb_temps, nb_globals);
+                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                 gen_args += 2;
                 args += 2;
                 break;
-- 
cgit v1.2.3-55-g7522


From e590d4e6b3c73b38a9d9ed10c898f73ed8a29f1d Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 11 Sep 2012 12:31:21 +0200
Subject: tcg/optimize: rework copy progagation

The copy propagation pass tries to keep track what is a copy of what
and what has copy of what, and in addition it keep a circular list of
of all the copies. Unfortunately this doesn't fully work: a mov from
a temp which has a state "COPY" changed it into a state "HAS_COPY".
Later when this temp is used again, it is considered has not having
copy and thus no propagation is done.

This patch fixes that by removing the hiearchy between copies, and thus
only keeping a "COPY" state both meaning "is a copy" and "has a copy".
The decision of which copy to use is deferred to the actual temp
replacement. At this stage there is not one best choice to do, but only
better choices than others. For doing the best choice the operation
would have to be parsed in reversed to know if a temp is going to be
used later or not. That what is done by the liveness analysis. At this
stage it is known that globals will be always live, that local temps
will be dead at the end of the translation block, and that the temps
will be dead at the end of the basic block. This means that this stage
should try to replace temps by local temps or globals and local temps
by globals.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 167 +++++++++++++++++++++++++++++++--------------------------
 1 file changed, 92 insertions(+), 75 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index da8dffe9cd..1904b396a6 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,7 +39,6 @@ typedef enum {
     TCG_TEMP_UNDEF = 0,
     TCG_TEMP_CONST,
     TCG_TEMP_COPY,
-    TCG_TEMP_HAS_COPY
 } tcg_temp_state;
 
 struct tcg_temp_info {
@@ -51,39 +50,19 @@ struct tcg_temp_info {
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
-/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
-   class of equivalent temp's, a new representative should be chosen in this
-   class. */
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
+   the copy flag from the left temp.  */
+static void reset_temp(TCGArg temp)
 {
-    int i;
-    TCGArg new_base = (TCGArg)-1;
-    if (temps[temp].state == TCG_TEMP_HAS_COPY) {
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (i >= nb_globals) {
-                temps[i].state = TCG_TEMP_HAS_COPY;
-                new_base = i;
-                break;
-            }
-        }
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (new_base == (TCGArg)-1) {
-                temps[i].state = TCG_TEMP_UNDEF;
-            } else {
-                temps[i].val = new_base;
-            }
+    if (temps[temp].state == TCG_TEMP_COPY) {
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+        } else {
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
         }
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-    } else if (temps[temp].state == TCG_TEMP_COPY) {
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-        new_base = temps[temp].val;
     }
     temps[temp].state = TCG_TEMP_UNDEF;
-    if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
-        temps[new_base].state = TCG_TEMP_UNDEF;
-    }
 }
 
 static int op_bits(TCGOpcode op)
@@ -106,34 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
     }
 }
 
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
+{
+    TCGArg i;
+
+    /* If this is already a global, we can't do better. */
+    if (temp < s->nb_globals) {
+        return temp;
+    }
+
+    /* Search for a global first. */
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+        if (i < s->nb_globals) {
+            return i;
+        }
+    }
+
+    /* If it is a temp, search for a temp local. */
+    if (!s->temps[temp].temp_local) {
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+            if (s->temps[i].temp_local) {
+                return i;
+            }
+        }
+    }
+
+    /* Failure to find a better representation, return the same temp. */
+    return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+    TCGArg i;
+
+    if (arg1 == arg2) {
+        return true;
+    }
+
+    if (temps[arg1].state != TCG_TEMP_COPY
+        || temps[arg2].state != TCG_TEMP_COPY) {
+        return false;
+    }
+
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+        if (i == arg2) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
                             TCGArg dst, TCGArg src)
 {
-        reset_temp(dst, s->nb_temps, s->nb_globals);
-        assert(temps[src].state != TCG_TEMP_COPY);
-        /* Only consider temps with the same type (width) as copies. */
-        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
-            assert(temps[src].state != TCG_TEMP_CONST);
-            if (temps[src].state != TCG_TEMP_HAS_COPY) {
-                temps[src].state = TCG_TEMP_HAS_COPY;
+        reset_temp(dst);
+        assert(temps[src].state != TCG_TEMP_CONST);
+
+        if (s->temps[src].type == s->temps[dst].type) {
+            if (temps[src].state != TCG_TEMP_COPY) {
+                temps[src].state = TCG_TEMP_COPY;
                 temps[src].next_copy = src;
                 temps[src].prev_copy = src;
             }
             temps[dst].state = TCG_TEMP_COPY;
-            temps[dst].val = src;
             temps[dst].next_copy = temps[src].next_copy;
             temps[dst].prev_copy = src;
             temps[temps[dst].next_copy].prev_copy = dst;
             temps[src].next_copy = dst;
         }
+
         gen_args[0] = dst;
         gen_args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
-                             int nb_temps, int nb_globals)
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 {
-        reset_temp(dst, nb_temps, nb_globals);
+        reset_temp(dst);
         temps[dst].state = TCG_TEMP_CONST;
         temps[dst].val = val;
         gen_args[0] = dst;
@@ -324,7 +352,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     tcg_abort();
 }
 
-
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -338,10 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
-       If this temp is a copy of other ones then this equivalence class'
-       representative is kept in VALS' element.
-       If this temp is neither copy nor constant then corresponding VALS'
-       element is unused. */
+       If this temp is a copy of other ones then the other copies are
+       available through the doubly linked circular list. */
 
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
@@ -357,7 +382,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             assert(op != INDEX_op_call);
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = temps[args[i]].val;
+                    args[i] = find_better_copy(s, args[i]);
                 }
             }
         }
@@ -429,7 +454,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -456,9 +481,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             if (temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0) {
-                if ((temps[args[0]].state == TCG_TEMP_COPY
-                    && temps[args[0]].val == args[1])
-                    || args[0] == args[1]) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
@@ -480,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -495,7 +518,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
-                if (args[1] == args[0]) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
@@ -515,9 +538,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            allocator where needed and possible.  Also detect copies. */
         switch (op) {
         CASE_OP_32_64(mov):
-            if ((temps[args[1]].state == TCG_TEMP_COPY
-                && temps[args[1]].val == args[0])
-                || args[0] == args[1]) {
+            if (temps_are_copies(args[0], args[1])) {
                 args += 2;
                 gen_opc_buf[op_index] = INDEX_op_nop;
                 break;
@@ -535,7 +556,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
             gen_args += 2;
             args += 2;
             break;
@@ -550,9 +571,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
             }
@@ -580,10 +601,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -597,10 +618,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
                                                temps[args[2]].val, args[3]);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -623,7 +644,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
             } else {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -637,23 +658,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 && temps[args[2]].state == TCG_TEMP_CONST) {
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
                                                temps[args[2]].val, args[5]);
-                if (args[0] == args[4-tmp]
-                    || (temps[args[4-tmp]].state == TCG_TEMP_COPY
-                        && temps[args[4-tmp]].val == args[0])) {
+                if (temps_are_copies(args[0], args[4-tmp])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
                     gen_opc_buf[op_index] = op_to_movi(op);
-                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
-                                     nb_temps, nb_globals);
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
                     gen_args += 2;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
-                                    nb_temps, nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -668,11 +685,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
                 for (i = 0; i < nb_globals; i++) {
-                    reset_temp(i, nb_temps, nb_globals);
+                    reset_temp(i);
                 }
             }
             for (i = 0; i < (args[0] >> 16); i++) {
-                reset_temp(args[i + 1], nb_temps, nb_globals);
+                reset_temp(args[i + 1]);
             }
             i = nb_call_args + 3;
             while (i) {
@@ -691,7 +708,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
                 for (i = 0; i < def->nb_oargs; i++) {
-                    reset_temp(args[i], nb_temps, nb_globals);
+                    reset_temp(args[i]);
                 }
             }
             for (i = 0; i < def->nb_args; i++) {
-- 
cgit v1.2.3-55-g7522


From 1ff8c5418a680d6766493908eaa07cc11dce7f13 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 11 Sep 2012 16:18:49 +0200
Subject: tcg/optimize: do copy propagation for all operations

It is possible to due copy propagation for all operations, even the one
that have side effects or clobber arguments (it only concerns input
arguments). That said, the call operation should be handled differently
due to the variable number of arguments.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1904b396a6..aeb2225f2a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -378,8 +378,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         op = gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
         /* Do copy propagation */
-        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
-            assert(op != INDEX_op_call);
+        if (op == INDEX_op_call) {
+            int nb_oargs = args[0] >> 16;
+            int nb_iargs = args[0] & 0xffff;
+            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
+                if (temps[args[i]].state == TCG_TEMP_COPY) {
+                    args[i] = find_better_copy(s, args[i]);
+                }
+            }
+        } else {
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
                     args[i] = find_better_copy(s, args[i]);
-- 
cgit v1.2.3-55-g7522


From 0aba1c7376ad325202f328211ab6dfcae94c7f2a Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 18 Sep 2012 19:11:32 +0200
Subject: tcg/optimize: optimize "op r, a, a => mov r, a"

Now that we can easily detect all copies, we can optimize the
"op r, a, a => mov r, a" case a bit more.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index aeb2225f2a..b9a7da920f 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -524,7 +524,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         switch (op) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
-            if (args[1] == args[2]) {
+            if (temps_are_copies(args[1], args[2])) {
                 if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
-- 
cgit v1.2.3-55-g7522


From 3c94193e0bbdd855bcbedabd27e3cbe1e6bc6242 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 18 Sep 2012 19:12:36 +0200
Subject: tcg/optimize: optimize "op r, a, a => movi r, 0"

Now that it's possible to detect copies, we can optimize the case
the "op r, a, a => movi r, 0". This helps in the computation of
overflow flags when one of the two args is 0.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index b9a7da920f..ceea6441bc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -540,6 +540,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
+        /* Simplify expression for "op r, a, a => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(sub):
+        CASE_OP_32_64(xor):
+            if (temps_are_copies(args[1], args[2])) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                gen_args += 2;
+                args += 3;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
         /* Propagate constants through copy operations and do constant
            folding.  Constants will be substituted to arguments by register
            allocator where needed and possible.  Also detect copies. */
-- 
cgit v1.2.3-55-g7522


From b336ceb6918b8f9eb54dcbb1043521482c7be83b Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 18 Sep 2012 19:37:00 +0200
Subject: tcg/optimize: further optimize brcond/movcond/setcond

When both argument of brcond/movcond/setcond are the same or when one
of the two values is a constant equal to zero, it's possible to do
further optimizations.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 127 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 76 insertions(+), 51 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index ceea6441bc..abe016a28a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,58 +292,88 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                                        TCGArg y, TCGCond c)
 {
-    switch (op_bits(op)) {
-    case 32:
+    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+        switch (op_bits(op)) {
+        case 32:
+            switch (c) {
+            case TCG_COND_EQ:
+                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
+            case TCG_COND_NE:
+                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
+            case TCG_COND_LT:
+                return (int32_t)temps[x].val < (int32_t)temps[y].val;
+            case TCG_COND_GE:
+                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
+            case TCG_COND_LE:
+                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
+            case TCG_COND_GT:
+                return (int32_t)temps[x].val > (int32_t)temps[y].val;
+            case TCG_COND_LTU:
+                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
+            case TCG_COND_GEU:
+                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
+            case TCG_COND_LEU:
+                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
+            case TCG_COND_GTU:
+                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
+            }
+            break;
+        case 64:
+            switch (c) {
+            case TCG_COND_EQ:
+                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
+            case TCG_COND_NE:
+                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
+            case TCG_COND_LT:
+                return (int64_t)temps[x].val < (int64_t)temps[y].val;
+            case TCG_COND_GE:
+                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
+            case TCG_COND_LE:
+                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
+            case TCG_COND_GT:
+                return (int64_t)temps[x].val > (int64_t)temps[y].val;
+            case TCG_COND_LTU:
+                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
+            case TCG_COND_GEU:
+                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
+            case TCG_COND_LEU:
+                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
+            case TCG_COND_GTU:
+                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
+            }
+            break;
+        }
+    } else if (temps_are_copies(x, y)) {
         switch (c) {
-        case TCG_COND_EQ:
-            return (uint32_t)x == (uint32_t)y;
-        case TCG_COND_NE:
-            return (uint32_t)x != (uint32_t)y;
-        case TCG_COND_LT:
-            return (int32_t)x < (int32_t)y;
-        case TCG_COND_GE:
-            return (int32_t)x >= (int32_t)y;
-        case TCG_COND_LE:
-            return (int32_t)x <= (int32_t)y;
         case TCG_COND_GT:
-            return (int32_t)x > (int32_t)y;
         case TCG_COND_LTU:
-            return (uint32_t)x < (uint32_t)y;
-        case TCG_COND_GEU:
-            return (uint32_t)x >= (uint32_t)y;
-        case TCG_COND_LEU:
-            return (uint32_t)x <= (uint32_t)y;
+        case TCG_COND_LT:
         case TCG_COND_GTU:
-            return (uint32_t)x > (uint32_t)y;
-        }
-        break;
-    case 64:
-        switch (c) {
-        case TCG_COND_EQ:
-            return (uint64_t)x == (uint64_t)y;
         case TCG_COND_NE:
-            return (uint64_t)x != (uint64_t)y;
-        case TCG_COND_LT:
-            return (int64_t)x < (int64_t)y;
+            return 0;
         case TCG_COND_GE:
-            return (int64_t)x >= (int64_t)y;
+        case TCG_COND_GEU:
         case TCG_COND_LE:
-            return (int64_t)x <= (int64_t)y;
-        case TCG_COND_GT:
-            return (int64_t)x > (int64_t)y;
+        case TCG_COND_LEU:
+        case TCG_COND_EQ:
+            return 1;
+        }
+    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+        switch (c) {
         case TCG_COND_LTU:
-            return (uint64_t)x < (uint64_t)y;
+            return 0;
         case TCG_COND_GEU:
-            return (uint64_t)x >= (uint64_t)y;
-        case TCG_COND_LEU:
-            return (uint64_t)x <= (uint64_t)y;
-        case TCG_COND_GTU:
-            return (uint64_t)x > (uint64_t)y;
+            return 1;
+        default:
+            return 2;
         }
-        break;
+    } else {
+        return 2;
     }
 
     fprintf(stderr,
@@ -636,11 +666,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args += 3;
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST) {
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+            if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tmp = do_constant_folding_cond(op, temps[args[1]].val,
-                                               temps[args[2]].val, args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
@@ -654,10 +682,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args += 4;
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state == TCG_TEMP_CONST) {
-                if (do_constant_folding_cond(op, temps[args[0]].val,
-                                             temps[args[1]].val, args[2])) {
+            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+            if (tmp != 2) {
+                if (tmp) {
                     memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
                     gen_opc_buf[op_index] = INDEX_op_br;
                     gen_args[0] = args[3];
@@ -677,10 +704,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args += 4;
             break;
         CASE_OP_32_64(movcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST) {
-                tmp = do_constant_folding_cond(op, temps[args[1]].val,
-                                               temps[args[2]].val, args[5]);
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+            if (tmp != 2) {
                 if (temps_are_copies(args[0], args[4-tmp])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
-- 
cgit v1.2.3-55-g7522


From c2b0e2fea2ef7a183233d3b86c37c5d4bcb89544 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Wed, 19 Sep 2012 22:00:22 +0200
Subject: tcg/optimize: prefer the "op a, a, b" form for commutative ops

The "op a, a, b" form is better handled on non-RISC host than the "op
a, b, a" form, so swap the arguments to this form when possible, and
when b is not a constant.

This reduces the number of generated instructions by a tiny bit.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index abe016a28a..c8ae50bc91 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -434,7 +434,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            if (temps[args[1]].state == TCG_TEMP_CONST) {
+            /* Prefer the constant in second argument, and then the form
+               op a, a, b, which is better handled on non-RISC hosts. */
+            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
+                && temps[args[2]].state != TCG_TEMP_CONST)) {
                 tmp = args[1];
                 args[1] = args[2];
                 args[2] = tmp;
-- 
cgit v1.2.3-55-g7522


From 7ef55fc91926f518f905692db19ed0b4a8018989 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Fri, 21 Sep 2012 11:07:29 +0200
Subject: tcg/optimize: add constant folding for deposit

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c8ae50bc91..35532a1e03 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -668,6 +668,26 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             args += 3;
             break;
+        CASE_OP_32_64(deposit):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = ((1ull << args[4]) - 1);
+                tmp = (temps[args[1]].val & ~(tmp << args[3]))
+                      | ((temps[args[2]].val & tmp) << args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else {
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args[4] = args[4];
+                gen_args += 5;
+            }
+            args += 5;
+            break;
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
-- 
cgit v1.2.3-55-g7522


From 0aed257f08444feb6269d0c302b35a8fb10fcb3f Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Mon, 24 Sep 2012 14:21:40 -0700
Subject: tcg: Add TCG_COND_NEVER, TCG_COND_ALWAYS

There are several cases that can be handled easier inside both
translators and code generators if we have out-of-band values
for conditions.  It's easy enough to handle ALWAYS and NEVER in
the natural way inside the tcg middle-end.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/arm/tcg-target.c   |  2 +-
 tcg/hppa/tcg-target.c  |  2 +-
 tcg/i386/tcg-target.c  |  2 +-
 tcg/optimize.c         |  6 ++++
 tcg/ppc/tcg-target.c   |  2 +-
 tcg/ppc64/tcg-target.c |  2 +-
 tcg/s390/tcg-target.c  |  4 +--
 tcg/sparc/tcg-target.c |  2 +-
 tcg/tcg-op.h           | 82 ++++++++++++++++++++++++++++++++++++++------------
 tcg/tcg.c              |  2 ++
 tcg/tcg.h              | 37 ++++++++++++++---------
 11 files changed, 102 insertions(+), 41 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 1e61864d42..fbbbefe985 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -335,7 +335,7 @@ enum arm_cond_code_e {
     COND_AL = 0xe,
 };
 
-static const uint8_t tcg_cond_to_arm_cond[10] = {
+static const uint8_t tcg_cond_to_arm_cond[] = {
     [TCG_COND_EQ] = COND_EQ,
     [TCG_COND_NE] = COND_NE,
     [TCG_COND_LT] = COND_LT,
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 44974c4e86..b93343015a 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -732,7 +732,7 @@ static void tcg_out_branch(TCGContext *s, int label_index, int nul)
     }
 }
 
-static const uint8_t tcg_cond_to_cmp_cond[10] =
+static const uint8_t tcg_cond_to_cmp_cond[] =
 {
     [TCG_COND_EQ] = COND_EQ,
     [TCG_COND_NE] = COND_EQ | COND_FALSE,
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bb2306df8d..4952c057b3 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -338,7 +338,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-static const uint8_t tcg_cond_to_jcc[10] = {
+static const uint8_t tcg_cond_to_jcc[] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
     [TCG_COND_LT] = JCC_JL,
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 35532a1e03..edb2b0ea90 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -321,6 +321,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                 return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
             case TCG_COND_GTU:
                 return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
+            default:
+                break;
             }
             break;
         case 64:
@@ -345,6 +347,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                 return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
             case TCG_COND_GTU:
                 return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
+            default:
+                break;
             }
             break;
         }
@@ -362,6 +366,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
         case TCG_COND_LEU:
         case TCG_COND_EQ:
             return 1;
+        default:
+            break;
         }
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 56baedda58..60b7b92621 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -437,7 +437,7 @@ enum {
     CR_SO
 };
 
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
     [TCG_COND_EQ]  = BC | BI (7, CR_EQ) | BO_COND_TRUE,
     [TCG_COND_NE]  = BC | BI (7, CR_EQ) | BO_COND_FALSE,
     [TCG_COND_LT]  = BC | BI (7, CR_LT) | BO_COND_TRUE,
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 6e9b363a1b..5403fc1f91 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -418,7 +418,7 @@ enum {
     CR_SO
 };
 
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
     [TCG_COND_EQ]  = BC | BI (7, CR_EQ) | BO_COND_TRUE,
     [TCG_COND_NE]  = BC | BI (7, CR_EQ) | BO_COND_FALSE,
     [TCG_COND_LT]  = BC | BI (7, CR_LT) | BO_COND_TRUE,
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index c0ef6ba9a6..fd9286f52d 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -268,7 +268,7 @@ static const int tcg_target_call_oarg_regs[] = {
 #define S390_CC_ALWAYS  15
 
 /* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
-static const uint8_t tcg_cond_to_s390_cond[10] = {
+static const uint8_t tcg_cond_to_s390_cond[] = {
     [TCG_COND_EQ]  = S390_CC_EQ,
     [TCG_COND_NE]  = S390_CC_NE,
     [TCG_COND_LT]  = S390_CC_LT,
@@ -284,7 +284,7 @@ static const uint8_t tcg_cond_to_s390_cond[10] = {
 /* Condition codes that result from a LOAD AND TEST.  Here, we have no
    unsigned instruction variation, however since the test is vs zero we
    can re-map the outcomes appropriately.  */
-static const uint8_t tcg_cond_to_ltr_cond[10] = {
+static const uint8_t tcg_cond_to_ltr_cond[] = {
     [TCG_COND_EQ]  = S390_CC_EQ,
     [TCG_COND_NE]  = S390_CC_NE,
     [TCG_COND_LT]  = S390_CC_LT,
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index e82fab2622..0c32baa50e 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -512,7 +512,7 @@ static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
 }
 #endif
 
-static const uint8_t tcg_cond_to_bcond[10] = {
+static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_EQ] = COND_E,
     [TCG_COND_NE] = COND_NE,
     [TCG_COND_LT] = COND_L,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index bd93fe4f03..551845801d 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -646,29 +646,49 @@ static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
                                       TCGv_i32 arg2, int label_index)
 {
-    tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
                                        int32_t arg2, int label_index)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_brcond_i32(cond, arg1, t0, label_index);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_brcond_i32(cond, arg1, t0, label_index);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
                                        TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
                                         TCGv_i32 arg1, int32_t arg2)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_setcond_i32(cond, ret, arg1, t0);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_setcond_i32(cond, ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -964,17 +984,27 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
-                      TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
-                      TCGV_HIGH(arg2), cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+                          TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+                          TCGV_HIGH(arg2), cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
-                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
-                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+    } else {
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                         TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    }
     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
 }
 
@@ -1273,13 +1303,23 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i64(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i64(ret, 0);
+    } else {
+        tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1397,9 +1437,13 @@ static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
                                        int64_t arg2, int label_index)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_brcond_i64(cond, arg1, t0, label_index);
-    tcg_temp_free_i64(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_brcond_i64(cond, arg1, t0, label_index);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c069e44a0e..78ef50b6ee 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -864,6 +864,8 @@ static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val)
 
 static const char * const cond_name[] =
 {
+    [TCG_COND_NEVER] = "never",
+    [TCG_COND_ALWAYS] = "always",
     [TCG_COND_EQ] = "eq",
     [TCG_COND_NE] = "ne",
     [TCG_COND_LT] = "lt",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6ff2ab55be..75f0239709 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -266,18 +266,28 @@ typedef int TCGv_i64;
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
 
+/* Conditions.  Note that these are layed out for easy manipulation by
+   the the functions below:
+     bit 0 is used for inverting;
+     bit 1 is signed,
+     bit 2 is unsigned,
+     bit 3 is used with bit 0 for swapping signed/unsigned.  */
 typedef enum {
-    TCG_COND_EQ,
-    TCG_COND_NE,
-    TCG_COND_LT,
-    TCG_COND_GE,
-    TCG_COND_LE,
-    TCG_COND_GT,
+    /* non-signed */
+    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
+    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+    TCG_COND_EQ     = 8 | 0 | 0 | 0,
+    TCG_COND_NE     = 8 | 0 | 0 | 1,
+    /* signed */
+    TCG_COND_LT     = 0 | 0 | 2 | 0,
+    TCG_COND_GE     = 0 | 0 | 2 | 1,
+    TCG_COND_LE     = 8 | 0 | 2 | 0,
+    TCG_COND_GT     = 8 | 0 | 2 | 1,
     /* unsigned */
-    TCG_COND_LTU,
-    TCG_COND_GEU,
-    TCG_COND_LEU,
-    TCG_COND_GTU,
+    TCG_COND_LTU    = 0 | 4 | 0 | 0,
+    TCG_COND_GEU    = 0 | 4 | 0 | 1,
+    TCG_COND_LEU    = 8 | 4 | 0 | 0,
+    TCG_COND_GTU    = 8 | 4 | 0 | 1,
 } TCGCond;
 
 /* Invert the sense of the comparison.  */
@@ -289,18 +299,17 @@ static inline TCGCond tcg_invert_cond(TCGCond c)
 /* Swap the operands in a comparison.  */
 static inline TCGCond tcg_swap_cond(TCGCond c)
 {
-    int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15);
-    return (TCGCond)(c ^ mask);
+    return c & 6 ? (TCGCond)(c ^ 9) : c;
 }
 
 static inline TCGCond tcg_unsigned_cond(TCGCond c)
 {
-    return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
+    return c & 2 ? (TCGCond)(c ^ 6) : c;
 }
 
 static inline bool is_unsigned_cond(TCGCond c)
 {
-    return c >= TCG_COND_LTU;
+    return (c & 4) != 0;
 }
 
 #define TEMP_VAL_DEAD  0
-- 
cgit v1.2.3-55-g7522


From 24c9ae4eba5eec59256d0d0ace4d868a19f87528 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:21 -0700
Subject: tcg: Split out swap_commutative as a subroutine

Reduces code duplication and prefers

  movcond d, c1, c2, const, s
to
  movcond d, c1, c2, s, const

It also prefers

  add r, r, c
over
  add r, c, r

when both inputs are known constants.  This doesn't matter for true add, as
we will fully constant fold that.  But it matters for a follow-on patch using
this routine for add2 which may not be fully foldable.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 56 ++++++++++++++++++++++++--------------------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index edb2b0ea90..ff4ddb2efc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -388,6 +388,23 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     tcg_abort();
 }
 
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+    TCGArg a1 = *p1, a2 = *p2;
+    int sum = 0;
+    sum += temps[a1].state == TCG_TEMP_CONST;
+    sum -= temps[a2].state == TCG_TEMP_CONST;
+
+    /* Prefer the constant in second argument, and then the form
+       op a, a, b, which is better handled on non-RISC hosts. */
+    if (sum > 0 || (sum == 0 && dest == a2)) {
+        *p1 = a2;
+        *p2 = a1;
+        return true;
+    }
+    return false;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -397,7 +414,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
-    TCGCond cond;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -440,52 +456,28 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            /* Prefer the constant in second argument, and then the form
-               op a, a, b, which is better handled on non-RISC hosts. */
-            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
-                && temps[args[2]].state != TCG_TEMP_CONST)) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-            }
+            swap_commutative(args[0], &args[1], &args[2]);
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state != TCG_TEMP_CONST) {
-                tmp = args[0];
-                args[0] = args[1];
-                args[1] = tmp;
+            if (swap_commutative(-1, &args[0], &args[1])) {
                 args[2] = tcg_swap_cond(args[2]);
             }
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
+            if (swap_commutative(args[0], &args[1], &args[2])) {
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
         CASE_OP_32_64(movcond):
-            cond = args[5];
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-                cond = tcg_swap_cond(cond);
+            if (swap_commutative(-1, &args[1], &args[2])) {
+                args[5] = tcg_swap_cond(args[5]);
             }
             /* For movcond, we canonicalize the "false" input reg to match
                the destination reg so that the tcg backend can implement
                a "move if true" operation.  */
-            if (args[0] == args[3]) {
-                tmp = args[3];
-                args[3] = args[4];
-                args[4] = tmp;
-                cond = tcg_invert_cond(cond);
+            if (swap_commutative(args[0], &args[4], &args[3])) {
+                args[5] = tcg_invert_cond(args[5]);
             }
-            args[5] = cond;
         default:
             break;
         }
-- 
cgit v1.2.3-55-g7522


From 1e484e61e2d9387c18a40e9e239b304003f5d183 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:22 -0700
Subject: tcg: Canonicalize add2 operand ordering

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index ff4ddb2efc..8d74186ab3 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -478,6 +478,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (swap_commutative(args[0], &args[4], &args[3])) {
                 args[5] = tcg_invert_cond(args[5]);
             }
+            break;
+        case INDEX_op_add2_i32:
+            swap_commutative(args[0], &args[2], &args[4]);
+            swap_commutative(args[1], &args[3], &args[5]);
+            break;
         default:
             break;
         }
-- 
cgit v1.2.3-55-g7522


From 0bfcb86538d0bcffa3ef5434810f91aa861431ce Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:23 -0700
Subject: tcg: Swap commutative double-word comparisons

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8d74186ab3..9ecea7085a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -405,6 +405,22 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
     return false;
 }
 
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+    int sum = 0;
+    sum += temps[p1[0]].state == TCG_TEMP_CONST;
+    sum += temps[p1[1]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    if (sum > 0) {
+        TCGArg t;
+        t = p1[0], p1[0] = p2[0], p2[0] = t;
+        t = p1[1], p1[1] = p2[1], p2[1] = t;
+        return true;
+    }
+    return false;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -483,6 +499,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             swap_commutative(args[0], &args[2], &args[4]);
             swap_commutative(args[1], &args[3], &args[5]);
             break;
+        case INDEX_op_brcond2_i32:
+            if (swap_commutative2(&args[0], &args[2])) {
+                args[4] = tcg_swap_cond(args[4]);
+            }
+            break;
+        case INDEX_op_setcond2_i32:
+            if (swap_commutative2(&args[1], &args[3])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            break;
         default:
             break;
         }
-- 
cgit v1.2.3-55-g7522


From 6e14e91b6681dac9614b7b09b561351813046193 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:24 -0700
Subject: tcg: Use common code when failing to optimize

This saves a whole lot of repetitive code sequences.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 91 +++++++++++++++++++++-------------------------------------
 1 file changed, 32 insertions(+), 59 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9ecea7085a..0fd7db3654 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -645,6 +645,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             gen_args += 2;
             args += 2;
             break;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
@@ -657,14 +658,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
             }
-            gen_args += 2;
-            args += 2;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -688,15 +687,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args += 3;
+                args += 3;
+                break;
             }
-            args += 3;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
@@ -706,33 +701,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                       | ((temps[args[2]].val & tmp) << args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args += 5;
+                args += 5;
+                break;
             }
-            args += 5;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(brcond):
             tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
             if (tmp != 2) {
@@ -744,17 +728,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 } else {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 }
-            } else {
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(movcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
             if (tmp != 2) {
@@ -769,18 +747,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args[5] = args[5];
-                gen_args += 6;
+                args += 6;
+                break;
             }
-            args += 6;
-            break;
+            goto do_default;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -799,11 +770,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 i--;
             }
             break;
+
         default:
-            /* Default case: we do know nothing about operation so no
-               propagation is done.  We trash everything if the operation
-               is the end of a basic block, otherwise we only trash the
-               output args.  */
+        do_default:
+            /* Default case: we know nothing about operation (or were unable
+               to compute the operation result) so no propagation is done.
+               We trash everything if the operation is the end of a basic
+               block, otherwise we only trash the output args.  */
             if (def->flags & TCG_OPF_BB_END) {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
-- 
cgit v1.2.3-55-g7522


From bc1473eff4d9a3b62d669b7232383f85eb82b376 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:25 -0700
Subject: tcg: Optimize double-word comparisons against zero

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0fd7db3654..0a55352870 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -752,6 +752,45 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_brcond2_i32:
+            /* Simplify LT/GE comparisons vs zero to a single compare
+               vs the high word of the input.  */
+            if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                && temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[2]].val == 0
+                && temps[args[3]].val == 0) {
+                gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                gen_args[0] = args[1];
+                gen_args[1] = args[3];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+                args += 6;
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_setcond2_i32:
+            /* Simplify LT/GE comparisons vs zero to a single compare
+               vs the high word of the input.  */
+            if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[3]].val == 0
+                && temps[args[4]].val == 0) {
+                gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+                gen_args[0] = args[0];
+                gen_args[1] = args[2];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
-- 
cgit v1.2.3-55-g7522


From 9519da7e39516c5cb5bd7146a849a5d8c0958105 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:26 -0700
Subject: tcg: Split out subroutines from do_constant_folding_cond

We can re-use these for implementing double-word folding.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 152 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 81 insertions(+), 71 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0a55352870..38027dc5ad 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int32_t)x < (int32_t)y;
+    case TCG_COND_GE:
+        return (int32_t)x >= (int32_t)y;
+    case TCG_COND_LE:
+        return (int32_t)x <= (int32_t)y;
+    case TCG_COND_GT:
+        return (int32_t)x > (int32_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int64_t)x < (int64_t)y;
+    case TCG_COND_GE:
+        return (int64_t)x >= (int64_t)y;
+    case TCG_COND_LE:
+        return (int64_t)x <= (int64_t)y;
+    case TCG_COND_GT:
+        return (int64_t)x > (int64_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GT:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GTU:
+    case TCG_COND_NE:
+        return 0;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_EQ:
+        return 1;
+    default:
+        tcg_abort();
+    }
+}
+
 /* Return 2 if the condition can't be simplified, and the result
    of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
@@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
         switch (op_bits(op)) {
         case 32:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int32_t)temps[x].val < (int32_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int32_t)temps[x].val > (int32_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
+            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
         case 64:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int64_t)temps[x].val < (int64_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int64_t)temps[x].val > (int64_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
-        }
-    } else if (temps_are_copies(x, y)) {
-        switch (c) {
-        case TCG_COND_GT:
-        case TCG_COND_LTU:
-        case TCG_COND_LT:
-        case TCG_COND_GTU:
-        case TCG_COND_NE:
-            return 0;
-        case TCG_COND_GE:
-        case TCG_COND_GEU:
-        case TCG_COND_LE:
-        case TCG_COND_LEU:
-        case TCG_COND_EQ:
-            return 1;
+            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
         default:
-            break;
+            tcg_abort();
         }
+    } else if (temps_are_copies(x, y)) {
+        return do_constant_folding_cond_eq(c);
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
         case TCG_COND_LTU:
@@ -381,11 +396,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     } else {
         return 2;
     }
-
-    fprintf(stderr,
-            "Unrecognized bitness %d or condition %d in "
-            "do_constant_folding_cond.\n", op_bits(op), c);
-    tcg_abort();
 }
 
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
-- 
cgit v1.2.3-55-g7522


From 6c4382f8f47d12eec24c7c5335141a9c78104016 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:27 -0700
Subject: tcg: Do constant folding on double-word comparisons

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 93 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 21 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 38027dc5ad..d9251e4677 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -398,6 +398,40 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     }
 }
 
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+    TCGArg al = p1[0], ah = p1[1];
+    TCGArg bl = p2[0], bh = p2[1];
+
+    if (temps[bl].state == TCG_TEMP_CONST
+        && temps[bh].state == TCG_TEMP_CONST) {
+        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+        if (temps[al].state == TCG_TEMP_CONST
+            && temps[ah].state == TCG_TEMP_CONST) {
+            uint64_t a;
+            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+            return do_constant_folding_cond_64(a, b, c);
+        }
+        if (b == 0) {
+            switch (c) {
+            case TCG_COND_LTU:
+                return 0;
+            case TCG_COND_GEU:
+                return 1;
+            default:
+                break;
+            }
+        }
+    }
+    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+        return do_constant_folding_cond_eq(c);
+    }
+    return 2;
+}
+
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 {
     TCGArg a1 = *p1, a2 = *p2;
@@ -763,43 +797,60 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             goto do_default;
 
         case INDEX_op_brcond2_i32:
-            /* Simplify LT/GE comparisons vs zero to a single compare
-               vs the high word of the input.  */
-            if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
-                && temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == 0
-                && temps[args[3]].val == 0) {
+            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+            if (tmp != 2) {
+                if (tmp) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[5];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                       && temps[args[2]].state == TCG_TEMP_CONST
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[2]].val == 0
+                       && temps[args[3]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
                 gen_opc_buf[op_index] = INDEX_op_brcond_i32;
                 gen_args[0] = args[1];
                 gen_args[1] = args[3];
                 gen_args[2] = args[4];
                 gen_args[3] = args[5];
                 gen_args += 4;
-                args += 6;
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                break;
+            } else {
+                goto do_default;
             }
-            goto do_default;
+            args += 6;
+            break;
 
         case INDEX_op_setcond2_i32:
-            /* Simplify LT/GE comparisons vs zero to a single compare
-               vs the high word of the input.  */
-            if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[4]].state == TCG_TEMP_CONST
-                && temps[args[3]].val == 0
-                && temps[args[4]].val == 0) {
+            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+            if (tmp != 2) {
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[4]].state == TCG_TEMP_CONST
+                       && temps[args[3]].val == 0
+                       && temps[args[4]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
                 gen_opc_buf[op_index] = INDEX_op_setcond_i32;
                 gen_args[0] = args[0];
                 gen_args[1] = args[2];
                 gen_args[2] = args[4];
                 gen_args[3] = args[5];
                 gen_args += 4;
-                args += 6;
-                break;
+            } else {
+                goto do_default;
             }
-            goto do_default;
+            args += 6;
+            break;
 
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
-- 
cgit v1.2.3-55-g7522


From 212c328d615f6750659d39c6fd544d05be314fa1 Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:28 -0700
Subject: tcg: Constant fold add2 and sub2

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 35 +++++++++++++++++++++++++++++++++++
 tcg/tcg-op.h   |  9 +++++++++
 2 files changed, 44 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index d9251e4677..05891ef380 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -796,6 +796,41 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_brcond2_i32:
             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
             if (tmp != 2) {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 551845801d..a580cfea07 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
@@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-- 
cgit v1.2.3-55-g7522


From 1414968a6aecd23cb037bc9e718d6f05ead2afaf Mon Sep 17 00:00:00 2001
From: Richard Henderson
Date: Tue, 2 Oct 2012 11:32:30 -0700
Subject: tcg: Optimize mulu2

Like add2, do operand ordering, constant folding, and dead operand
elimination.  The latter happens about 15% of all mulu2 during an
x86_64 bios boot.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 26 ++++++++++++++++++++++++++
 tcg/tcg-op.h   |  2 ++
 tcg/tcg.c      | 19 +++++++++++++++++++
 3 files changed, 47 insertions(+)

(limited to 'tcg/optimize.c')

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 05891ef380..a06c8eb43e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -543,6 +543,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             swap_commutative(args[0], &args[2], &args[4]);
             swap_commutative(args[1], &args[3], &args[5]);
             break;
+        case INDEX_op_mulu2_i32:
+            swap_commutative(args[0], &args[2], &args[3]);
+            break;
         case INDEX_op_brcond2_i32:
             if (swap_commutative2(&args[0], &args[2])) {
                 args[4] = tcg_swap_cond(args[4]);
@@ -831,6 +834,29 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_mulu2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST) {
+                uint32_t a = temps[args[2]].val;
+                uint32_t b = temps[args[3]].val;
+                uint64_t r = (uint64_t)a * b;
+                TCGArg rl, rh;
+
+                /* We emit the extra nop when we emit the mulu2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                gen_args += 4;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_brcond2_i32:
             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
             if (tmp != 2) {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index a580cfea07..8100a5a2e0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -1027,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 
     tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
                     TCGV_LOW(arg1), TCGV_LOW(arg2));
+    /* Allow the optimizer room to replace mulu2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 
     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0eb407b723..f0deea25b2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1338,6 +1338,25 @@ static void tcg_liveness_analysis(TCGContext *s)
             }
             goto do_not_remove;
 
+        case INDEX_op_mulu2_i32:
+            args -= 4;
+            nb_iargs = 2;
+            nb_oargs = 2;
+            /* Likewise, test for the high part of the operation dead.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                args[1] = args[2];
+                args[2] = args[3];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+                /* Fall through and mark the single-word operation live.  */
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
         default:
             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
-- 
cgit v1.2.3-55-g7522


From 78505279665e64d28e4a308818b69e6f77ecc5d2 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno
Date: Tue, 9 Oct 2012 21:53:08 +0200
Subject: tcg: rework TCG helper flags

The current helper flags, TCG_CALL_CONST and TCG_CALL_PURE might be
confusing and doesn't provide enough granularity for some helpers (FP
helpers for example).

This patch changes them into the following helpers flags:
- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
  either directly or via an exception. They will not be saved to their
  canonical location before calling the helper.
- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any
  globals. They will only be saved to their canonical locations before
  calling helpers, but they won't be reloaded afterwise.
- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is
  removed if the return value is not used.

It provides convenience flags, to avoid helper definitions longer than
80 characters. It also provides compatibility flags, and updates the
documentation.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/README     | 19 ++++++++++++++-----
 tcg/optimize.c |  3 ++-
 tcg/tcg-op.h   | 18 ++++++++++--------
 tcg/tcg.c      | 24 ++++++++++++++++--------
 tcg/tcg.h      | 26 ++++++++++++++++++--------
 5 files changed, 60 insertions(+), 30 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/tcg/README b/tcg/README
index 9d1b100613..ec1ac79375 100644
--- a/tcg/README
+++ b/tcg/README
@@ -77,11 +77,20 @@ destroyed, but local temporaries and globals are preserved.
 Using the tcg_gen_helper_x_y it is possible to call any function
 taking i32, i64 or pointer types. By default, before calling a helper,
 all globals are stored at their canonical location and it is assumed
-that the function can modify them. This can be overridden by the
-TCG_CALL_CONST function modifier. By default, the helper is allowed to
-modify the CPU state or raise an exception. This can be overridden by
-the TCG_CALL_PURE function modifier, in which case the call to the
-function is removed if the return value is not used.
+that the function can modify them. By default, the helper is allowed to
+modify the CPU state or raise an exception.
+
+This can be overridden using the following function modifiers:
+- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
+  either directly or via an exception. They will not be saved to their
+  canonical locations before calling the helper.
+- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
+  They will only be saved to their canonical location before calling helpers,
+  but they won't be reloaded afterwise.
+- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
+  the return value is not used.
+
+Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
 
 On some TCG targets (e.g. x86), several calling conventions are
 supported.
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a06c8eb43e..8e5d918030 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -915,7 +915,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
-            if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
+            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
+                                            TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
                     reset_temp(i);
                 }
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 8100a5a2e0..8d1da2b670 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -401,10 +401,10 @@ static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
 }
 
 /* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
-   reserved for helpers in tcg-runtime.c. These helpers are all const
-   and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST |
-   TCG_CALL_PURE. This may need to be adjusted if these functions
-   start to be used with other helpers. */
+   reserved for helpers in tcg-runtime.c. These helpers all do not read
+   globals and do not have side effects, hence the call to tcg_gen_callN()
+   with TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS. This may need
+   to be adjusted if these functions start to be used with other helpers. */
 static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
                                     TCGv_i32 a, TCGv_i32 b)
 {
@@ -413,8 +413,9 @@ static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
     fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I32(a);
     args[1] = GET_TCGV_I32(b);
-    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
-                  GET_TCGV_I32(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn,
+                  TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+                  sizemask, GET_TCGV_I32(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
@@ -426,8 +427,9 @@ static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret,
     fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I64(a);
     args[1] = GET_TCGV_I64(b);
-    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
-                  GET_TCGV_I64(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn,
+                  TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+                  sizemask, GET_TCGV_I64(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c9c642300d..c3a7f19bd7 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1251,7 +1251,7 @@ static void tcg_liveness_analysis(TCGContext *s)
 
                 /* pure functions can be removed if their result is not
                    used */
-                if (call_flags & TCG_CALL_PURE) {
+                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (!dead_temps[arg] || mem_temps[arg]) {
@@ -1277,11 +1277,15 @@ static void tcg_liveness_analysis(TCGContext *s)
                         dead_temps[arg] = 1;
                         mem_temps[arg] = 0;
                     }
-                    
-                    if (!(call_flags & TCG_CALL_CONST)) {
+
+                    if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+                        /* globals should be synced to memory */
+                        memset(mem_temps, 1, s->nb_globals);
+                    }
+                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+                                        TCG_CALL_NO_READ_GLOBALS))) {
                         /* globals should go back to memory */
                         memset(dead_temps, 1, s->nb_globals);
-                        memset(mem_temps, 1, s->nb_globals);
                     }
 
                     /* input args are live */
@@ -2128,10 +2132,14 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
             tcg_reg_free(s, reg);
         }
     }
-    
-    /* store globals and free associated registers (we assume the call
-       can modify any global. */
-    if (!(flags & TCG_CALL_CONST)) {
+
+    /* Save globals if they might be written by the helper, sync them if
+       they might be read. */
+    if (flags & TCG_CALL_NO_READ_GLOBALS) {
+        /* Nothing to do */
+    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+        sync_globals(s, allocated_regs);
+    } else {
         save_globals(s, allocated_regs);
     }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4b44ecdccb..96b62229c2 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -253,14 +253,24 @@ typedef int TCGv_i64;
 #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
 
 /* call flags */
-/* A pure function only reads its arguments and TCG global variables
-   and cannot raise exceptions. Hence a call to a pure function can be
-   safely suppressed if the return value is not used. */
-#define TCG_CALL_PURE           0x0010 
-/* A const function only reads its arguments and does not use TCG
-   global variables. Hence a call to such a function does not
-   save TCG global variables back to their canonical location. */
-#define TCG_CALL_CONST          0x0020
+/* Helper does not read globals (either directly or through an exception). It
+   implies TCG_CALL_NO_WRITE_GLOBALS. */
+#define TCG_CALL_NO_READ_GLOBALS    0x0010
+/* Helper does not write globals */
+#define TCG_CALL_NO_WRITE_GLOBALS   0x0020
+/* Helper can be safely suppressed if the return value is not used. */
+#define TCG_CALL_NO_SIDE_EFFECTS    0x0040
+
+/* convenience version of most used call flags */
+#define TCG_CALL_NO_RWG         TCG_CALL_NO_READ_GLOBALS
+#define TCG_CALL_NO_WG          TCG_CALL_NO_WRITE_GLOBALS
+#define TCG_CALL_NO_SE          TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_NO_RWG_SE      (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
+#define TCG_CALL_NO_WG_SE       (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
+
+/* compatibility call flags, they should be eventually be removed */
+#define TCG_CALL_PURE           TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_CONST          TCG_CALL_NO_READ_GLOBALS
 
 /* used to align parameters */
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
-- 
cgit v1.2.3-55-g7522


From 92414b31e726ead5fd93a5bee5a6d1aecc66e454 Mon Sep 17 00:00:00 2001
From: Evgeny Voevodin
Date: Mon, 12 Nov 2012 13:27:47 +0400
Subject: TCG: Use gen_opc_buf from context instead of global variable.

Signed-off-by: Evgeny Voevodin <e.voevodin@samsung.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 target-alpha/translate.c      |  6 ++---
 target-arm/translate.c        |  6 ++---
 target-cris/translate.c       |  8 +++---
 target-i386/translate.c       |  6 ++---
 target-lm32/translate.c       |  9 ++++---
 target-m68k/translate.c       |  6 ++---
 target-microblaze/translate.c |  9 ++++---
 target-mips/translate.c       |  6 ++---
 target-openrisc/translate.c   |  9 ++++---
 target-ppc/translate.c        |  6 ++---
 target-s390x/translate.c      |  6 ++---
 target-sh4/translate.c        |  6 ++---
 target-sparc/translate.c      |  6 ++---
 target-unicore32/translate.c  |  6 ++---
 target-xtensa/translate.c     |  4 +--
 tcg/optimize.c                | 62 +++++++++++++++++++++----------------------
 tcg/tcg.c                     | 30 ++++++++++-----------
 17 files changed, 97 insertions(+), 94 deletions(-)

(limited to 'tcg/optimize.c')

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index f160f83ffe..4045f788ea 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -3373,7 +3373,7 @@ static inline void gen_intermediate_code_internal(CPUAlphaState *env,
     int max_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     ctx.tb = tb;
     ctx.env = env;
@@ -3406,7 +3406,7 @@ static inline void gen_intermediate_code_internal(CPUAlphaState *env,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -3465,7 +3465,7 @@ static inline void gen_intermediate_code_internal(CPUAlphaState *env,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 014f3582b9..c42110ab0d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9727,7 +9727,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
 
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
@@ -9834,7 +9834,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -9974,7 +9974,7 @@ done_generating:
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 02969d4d53..0b0e86dbd1 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3232,7 +3232,7 @@ gen_intermediate_code_internal(CPUCRISState *env, TranslationBlock *tb,
     dc->env = env;
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     dc->ppc = pc_start;
@@ -3297,7 +3297,7 @@ gen_intermediate_code_internal(CPUCRISState *env, TranslationBlock *tb,
         check_breakpoint(env, dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -3436,7 +3436,7 @@ gen_intermediate_code_internal(CPUCRISState *env, TranslationBlock *tb,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j) {
             gen_opc_instr_start[lj++] = 0;
@@ -3452,7 +3452,7 @@ gen_intermediate_code_internal(CPUCRISState *env, TranslationBlock *tb,
         log_target_disas(env, pc_start, dc->pc - pc_start,
                                  dc->env->pregs[PR_VR]);
         qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr - gen_opc_buf);
+            dc->pc - pc_start, tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf);
     }
 #endif
 #endif
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 2658bf29c9..8e676ba1a8 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7962,7 +7962,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
@@ -7984,7 +7984,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -8034,7 +8034,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 8c2a6181d8..af986499f2 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -1018,7 +1018,7 @@ static void gen_intermediate_code_internal(CPULM32State *env,
     dc->env = env;
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
@@ -1047,7 +1047,7 @@ static void gen_intermediate_code_internal(CPULM32State *env,
         check_breakpoint(env, dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -1107,7 +1107,7 @@ static void gen_intermediate_code_internal(CPULM32State *env,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j) {
             gen_opc_instr_start[lj++] = 0;
@@ -1122,7 +1122,8 @@ static void gen_intermediate_code_internal(CPULM32State *env,
         qemu_log("\n");
         log_target_disas(env, pc_start, dc->pc - pc_start, 0);
         qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr - gen_opc_buf);
+            dc->pc - pc_start, tcg_ctx.gen_opc_ptr -
+            tcg_ctx.gen_opc_buf);
     }
 #endif
 }
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index ede3536dc4..b13be4899e 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2982,7 +2982,7 @@ gen_intermediate_code_internal(CPUM68KState *env, TranslationBlock *tb,
 
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->env = env;
     dc->is_jmp = DISAS_NEXT;
@@ -3015,7 +3015,7 @@ gen_intermediate_code_internal(CPUM68KState *env, TranslationBlock *tb,
                 break;
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -3075,7 +3075,7 @@ gen_intermediate_code_internal(CPUM68KState *env, TranslationBlock *tb,
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index a84c22ee0f..cce4494954 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1741,7 +1741,7 @@ gen_intermediate_code_internal(CPUMBState *env, TranslationBlock *tb,
     dc->tb = tb;
     org_flags = dc->synced_flags = dc->tb_flags = tb->flags;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     dc->jmp = 0;
@@ -1784,7 +1784,7 @@ gen_intermediate_code_internal(CPUMBState *env, TranslationBlock *tb,
         check_breakpoint(env, dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -1899,7 +1899,7 @@ gen_intermediate_code_internal(CPUMBState *env, TranslationBlock *tb,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
@@ -1916,7 +1916,8 @@ gen_intermediate_code_internal(CPUMBState *env, TranslationBlock *tb,
         log_target_disas(env, pc_start, dc->pc - pc_start, 0);
 #endif
         qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr - gen_opc_buf);
+            dc->pc - pc_start, tcg_ctx.gen_opc_ptr -
+            tcg_ctx.gen_opc_buf);
     }
 #endif
 #endif
diff --git a/target-mips/translate.c b/target-mips/translate.c
index cc126f5382..8b438f8bb0 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15513,7 +15513,7 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
         qemu_log("search pc %d\n", search_pc);
 
     pc_start = tb->pc;
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
     ctx.singlestep_enabled = env->singlestep_enabled;
@@ -15549,7 +15549,7 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
         }
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -15633,7 +15633,7 @@ done_generating:
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index eba134e0b7..f14da7bd1a 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1675,7 +1675,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     pc_start = tb->pc;
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     dc->is_jmp = DISAS_NEXT;
     dc->ppc = pc_start;
     dc->pc = pc_start;
@@ -1703,7 +1703,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     do {
         check_breakpoint(cpu, dc);
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (k < j) {
                 k++;
                 while (k < j) {
@@ -1784,7 +1784,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         k++;
         while (k <= j) {
             gen_opc_instr_start[k++] = 0;
@@ -1799,7 +1799,8 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
         qemu_log("\n");
         log_target_disas(&cpu->env, pc_start, dc->pc - pc_start, 0);
         qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr - gen_opc_buf);
+            dc->pc - pc_start, tcg_ctx.gen_opc_ptr -
+            tcg_ctx.gen_opc_buf);
     }
 #endif
 }
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b2a8374e61..16b9c5dd57 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9624,7 +9624,7 @@ static inline void gen_intermediate_code_internal(CPUPPCState *env,
     int max_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.nip = pc_start;
     ctx.tb = tb;
     ctx.exception = POWERPC_EXCP_NONE;
@@ -9675,7 +9675,7 @@ static inline void gen_intermediate_code_internal(CPUPPCState *env,
             }
         }
         if (unlikely(search_pc)) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -9777,7 +9777,7 @@ static inline void gen_intermediate_code_internal(CPUPPCState *env,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (unlikely(search_pc)) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 945cc51aaf..993f20752c 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5134,7 +5134,7 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
     dc.tb = tb;
     dc.cc_op = CC_OP_DYNAMIC;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 
@@ -5156,7 +5156,7 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -5209,7 +5209,7 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j) {
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index b43e6c28dc..5497dede05 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1967,7 +1967,7 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb,
     int max_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.flags = (uint32_t)tb->flags;
     ctx.bstate = BS_NONE;
@@ -1999,7 +1999,7 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb,
 	    }
 	}
         if (search_pc) {
-            i = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            i = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (ii < i) {
                 ii++;
                 while (ii < i)
@@ -2058,7 +2058,7 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        i = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        i = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         ii++;
         while (ii <= i)
             gen_opc_instr_start[ii++] = 0;
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 47bbc91426..2ae803695b 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -5257,7 +5257,7 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
     dc->fpu_enabled = tb_fpu_enabled(tb->flags);
     dc->address_mask_32bit = tb_am_enabled(tb->flags);
     dc->singlestep = (env->singlestep_enabled || singlestep);
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
@@ -5279,7 +5279,7 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
         }
         if (spc) {
             qemu_log("Search PC...\n");
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -5336,7 +5336,7 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
     gen_icount_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (spc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index d6bb6696ab..052bb45d70 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1956,7 +1956,7 @@ static inline void gen_intermediate_code_internal(CPUUniCore32State *env,
 
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
@@ -1999,7 +1999,7 @@ static inline void gen_intermediate_code_internal(CPUUniCore32State *env,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -2114,7 +2114,7 @@ done_generating:
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j) {
             gen_opc_instr_start[lj++] = 0;
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index f272eac56d..e5a3f49a75 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2849,7 +2849,7 @@ static void gen_intermediate_code_internal(
     DisasContext dc;
     int insn_count = 0;
     int j, lj = -1;
-    uint16_t *gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    uint16_t *gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     int max_insns = tb->cflags & CF_COUNT_MASK;
     uint32_t pc_start = tb->pc;
     uint32_t next_page_start =
@@ -2893,7 +2893,7 @@ static void gen_intermediate_code_internal(
         check_breakpoint(env, &dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j) {
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8e5d918030..9109b813e0 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -484,10 +484,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     nb_globals = s->nb_globals;
     memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 
-    nb_ops = tcg_opc_ptr - gen_opc_buf;
+    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
     gen_args = args;
     for (op_index = 0; op_index < nb_ops; op_index++) {
-        op = gen_opc_buf[op_index];
+        op = s->gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
         /* Do copy propagation */
         if (op == INDEX_op_call) {
@@ -569,7 +569,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(rotr):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
@@ -598,9 +598,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0) {
                 if (temps_are_copies(args[0], args[1])) {
-                    gen_opc_buf[op_index] = INDEX_op_nop;
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
-                    gen_opc_buf[op_index] = op_to_mov(op);
+                    s->gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
                 }
@@ -618,7 +618,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(mul):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
@@ -635,9 +635,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(and):
             if (temps_are_copies(args[1], args[2])) {
                 if (temps_are_copies(args[0], args[1])) {
-                    gen_opc_buf[op_index] = INDEX_op_nop;
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
-                    gen_opc_buf[op_index] = op_to_mov(op);
+                    s->gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
                 }
@@ -654,7 +654,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(sub):
         CASE_OP_32_64(xor):
             if (temps_are_copies(args[1], args[2])) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], 0);
                 gen_args += 2;
                 args += 3;
@@ -672,7 +672,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(mov):
             if (temps_are_copies(args[0], args[1])) {
                 args += 2;
-                gen_opc_buf[op_index] = INDEX_op_nop;
+                s->gen_opc_buf[op_index] = INDEX_op_nop;
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
@@ -684,7 +684,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             /* Source argument is constant.  Rewrite the operation and
                let movi case handle it. */
             op = op_to_movi(op);
-            gen_opc_buf[op_index] = op;
+            s->gen_opc_buf[op_index] = op;
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
@@ -702,7 +702,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
@@ -729,7 +729,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(nor):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
@@ -742,7 +742,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = ((1ull << args[4]) - 1);
                 tmp = (temps[args[1]].val & ~(tmp << args[3]))
                       | ((temps[args[2]].val & tmp) << args[3]);
@@ -756,7 +756,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
-                gen_opc_buf[op_index] = op_to_movi(op);
+                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
                 args += 4;
@@ -769,11 +769,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (tmp != 2) {
                 if (tmp) {
                     memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                    gen_opc_buf[op_index] = INDEX_op_br;
+                    s->gen_opc_buf[op_index] = INDEX_op_br;
                     gen_args[0] = args[3];
                     gen_args += 1;
                 } else {
-                    gen_opc_buf[op_index] = INDEX_op_nop;
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
                 }
                 args += 4;
                 break;
@@ -784,13 +784,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
             if (tmp != 2) {
                 if (temps_are_copies(args[0], args[4-tmp])) {
-                    gen_opc_buf[op_index] = INDEX_op_nop;
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
-                    gen_opc_buf[op_index] = op_to_movi(op);
+                    s->gen_opc_buf[op_index] = op_to_movi(op);
                     tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
                     gen_args += 2;
                 } else {
-                    gen_opc_buf[op_index] = op_to_mov(op);
+                    s->gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
@@ -820,12 +820,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
 
                 /* We emit the extra nop when we emit the add2/sub2.  */
-                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
 
                 rl = args[0];
                 rh = args[1];
-                gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
                 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
                 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
                 gen_args += 4;
@@ -843,12 +843,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 TCGArg rl, rh;
 
                 /* We emit the extra nop when we emit the mulu2.  */
-                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
 
                 rl = args[0];
                 rh = args[1];
-                gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
                 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
                 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
                 gen_args += 4;
@@ -862,11 +862,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (tmp != 2) {
                 if (tmp) {
                     memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                    gen_opc_buf[op_index] = INDEX_op_br;
+                    s->gen_opc_buf[op_index] = INDEX_op_br;
                     gen_args[0] = args[5];
                     gen_args += 1;
                 } else {
-                    gen_opc_buf[op_index] = INDEX_op_nop;
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
                 }
             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
                        && temps[args[2]].state == TCG_TEMP_CONST
@@ -876,7 +876,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
                 gen_args[0] = args[1];
                 gen_args[1] = args[3];
                 gen_args[2] = args[4];
@@ -891,7 +891,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_setcond2_i32:
             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
             if (tmp != 2) {
-                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
@@ -901,7 +901,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                        && temps[args[4]].val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
-                gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
                 gen_args[0] = args[0];
                 gen_args[1] = args[2];
                 gen_args[2] = args[4];
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 96535b6187..a039001369 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -297,7 +297,7 @@ void tcg_func_start(TCGContext *s)
     s->goto_tb_issue_mask = 0;
 #endif
 
-    s->gen_opc_ptr = gen_opc_buf;
+    s->gen_opc_ptr = s->gen_opc_buf;
     s->gen_opparam_ptr = gen_opparam_buf;
 
 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
@@ -896,7 +896,7 @@ void tcg_dump_ops(TCGContext *s)
     char buf[128];
 
     first_insn = 1;
-    opc_ptr = gen_opc_buf;
+    opc_ptr = s->gen_opc_buf;
     args = gen_opparam_buf;
     while (opc_ptr < s->gen_opc_ptr) {
         c = *opc_ptr++;
@@ -1231,7 +1231,7 @@ static void tcg_liveness_analysis(TCGContext *s)
     
     s->gen_opc_ptr++; /* skip end */
 
-    nb_ops = s->gen_opc_ptr - gen_opc_buf;
+    nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
 
     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
     s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
@@ -1243,7 +1243,7 @@ static void tcg_liveness_analysis(TCGContext *s)
     args = s->gen_opparam_ptr;
     op_index = nb_ops - 1;
     while (op_index >= 0) {
-        op = gen_opc_buf[op_index];
+        op = s->gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
         switch(op) {
         case INDEX_op_call:
@@ -1266,7 +1266,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                             goto do_not_remove_call;
                         }
                     }
-                    tcg_set_nop(s, gen_opc_buf + op_index, 
+                    tcg_set_nop(s, s->gen_opc_buf + op_index,
                                 args - 1, nb_args);
                 } else {
                 do_not_remove_call:
@@ -1347,11 +1347,11 @@ static void tcg_liveness_analysis(TCGContext *s)
                 } else {
                     op = INDEX_op_sub_i32;
                 }
-                gen_opc_buf[op_index] = op;
+                s->gen_opc_buf[op_index] = op;
                 args[1] = args[2];
                 args[2] = args[4];
-                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
-                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
                 /* Fall through and mark the single-word operation live.  */
                 nb_iargs = 2;
                 nb_oargs = 1;
@@ -1367,11 +1367,11 @@ static void tcg_liveness_analysis(TCGContext *s)
                 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
                     goto do_remove;
                 }
-                gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
                 args[1] = args[2];
                 args[2] = args[3];
-                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
-                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
                 /* Fall through and mark the single-word operation live.  */
                 nb_oargs = 1;
             }
@@ -1394,7 +1394,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     }
                 }
             do_remove:
-                tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
+                tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
 #ifdef CONFIG_PROFILER
                 s->del_op_count++;
 #endif
@@ -1448,7 +1448,7 @@ static void tcg_liveness_analysis(TCGContext *s)
 static void tcg_liveness_analysis(TCGContext *s)
 {
     int nb_ops;
-    nb_ops = s->gen_opc_ptr - gen_opc_buf;
+    nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
 
     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
     memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
@@ -2253,7 +2253,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
     op_index = 0;
 
     for(;;) {
-        opc = gen_opc_buf[op_index];
+        opc = s->gen_opc_buf[op_index];
 #ifdef CONFIG_PROFILER
         tcg_table_op_count[opc]++;
 #endif
@@ -2334,7 +2334,7 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
 #ifdef CONFIG_PROFILER
     {
         int n;
-        n = (s->gen_opc_ptr - gen_opc_buf);
+        n = (s->gen_opc_ptr - s->gen_opc_buf);
         s->op_count += n;
         if (n > s->op_count_max)
             s->op_count_max = n;
-- 
cgit v1.2.3-55-g7522