target/arm: Reorganize SVE WHILE

The pseudocode for this operation is an increment + compare loop, so comparing <= the maximum integer produces an all-true predicate. Rather than bound in both the inline code and the helper, pass the helper the number of predicate bits to set instead of the number of predicate elements to set. Reported-by: Laurent Desnogues <laurent.desnogues@gmail.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Laurent Desnogues <laurent.desnogues@gmail.com> Tested-by: Alex Bennée <alex.bennee@linaro.org> Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com> Message-id: 20180801123111.3595-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Richard Henderson 2018-08-14 18:17:22 +0200
committer: Peter Maydell 2018-08-14 18:17:22 +0200
commit: bbd0968c458d48e34a08b8694fa3309a9fe1c9e7 (patch)
tree: 2fbc7ecabca4fcc7f93db569ec697688e42fbe99 /target/arm/translate-sve.c
parent: target/arm: Fix typo in do_sat_addsub_64 (diff)
download: qemu-bbd0968c458d48e34a08b8694fa3309a9fe1c9e7.tar.gz
qemu-bbd0968c458d48e34a08b8694fa3309a9fe1c9e7.tar.xz
qemu-bbd0968c458d48e34a08b8694fa3309a9fe1c9e7.zip
1 files changed, 32 insertions, 17 deletions
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 9dd4c38bab..89efc80ee7 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3173,19 +3173,19 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
 
 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
 {
-    if (!sve_access_check(s)) {
-        return true;
-    }
-
-    TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
-    TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 op0, op1, t0, t1, tmax;
     TCGv_i32 t2, t3;
     TCGv_ptr ptr;
     unsigned desc, vsz = vec_full_reg_size(s);
     TCGCond cond;
 
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    op0 = read_cpu_reg(s, a->rn, 1);
+    op1 = read_cpu_reg(s, a->rm, 1);
+
     if (!a->sf) {
         if (a->u) {
             tcg_gen_ext32u_i64(op0, op0);
@@ -3198,32 +3198,47 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
 
     /* For the helper, compress the different conditions into a computation
      * of how many iterations for which the condition is true.
-     *
-     * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
-     * 2**64 iterations, overflowing to 0.  Of course, predicate registers
-     * aren't that large, so any value >= predicate size is sufficient.
      */
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
     tcg_gen_sub_i64(t0, op1, op0);
 
-    /* t0 = MIN(op1 - op0, vsz).  */
-    tcg_gen_movi_i64(t1, vsz);
-    tcg_gen_umin_i64(t0, t0, t1);
+    tmax = tcg_const_i64(vsz >> a->esz);
     if (a->eq) {
         /* Equality means one more iteration.  */
         tcg_gen_addi_i64(t0, t0, 1);
+
+        /* If op1 is max (un)signed integer (and the only time the addition
+         * above could overflow), then we produce an all-true predicate by
+         * setting the count to the vector length.  This is because the
+         * pseudocode is described as an increment + compare loop, and the
+         * max integer would always compare true.
+         */
+        tcg_gen_movi_i64(t1, (a->sf
+                              ? (a->u ? UINT64_MAX : INT64_MAX)
+                              : (a->u ? UINT32_MAX : INT32_MAX)));
+        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
     }
 
-    /* t0 = (condition true ? t0 : 0).  */
+    /* Bound to the maximum.  */
+    tcg_gen_umin_i64(t0, t0, tmax);
+    tcg_temp_free_i64(tmax);
+
+    /* Set the count to zero if the condition is false.  */
     cond = (a->u
             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
     tcg_gen_movi_i64(t1, 0);
     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
+    tcg_temp_free_i64(t1);
 
+    /* Since we're bounded, pass as a 32-bit type.  */
     t2 = tcg_temp_new_i32();
     tcg_gen_extrl_i64_i32(t2, t0);
     tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
+
+    /* Scale elements to bits.  */
+    tcg_gen_shli_i32(t2, t2, a->esz);
 
     desc = (vsz / 8) - 2;
     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
author	Richard Henderson	2018-08-14 18:17:22 +0200
committer	Peter Maydell	2018-08-14 18:17:22 +0200
commit	bbd0968c458d48e34a08b8694fa3309a9fe1c9e7 (patch)
tree	2fbc7ecabca4fcc7f93db569ec697688e42fbe99 /target/arm/translate-sve.c
parent	target/arm: Fix typo in do_sat_addsub_64 (diff)
download	qemu-bbd0968c458d48e34a08b8694fa3309a9fe1c9e7.tar.gz qemu-bbd0968c458d48e34a08b8694fa3309a9fe1c9e7.tar.xz qemu-bbd0968c458d48e34a08b8694fa3309a9fe1c9e7.zip