From 226e6c046c0fce8da32575aad020ca56a5a8064d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:52:58 -0800 Subject: target/arm: Fix sve_uzp_p vs odd vector lengths Missed out on compressing the second half of a predicate with length vl % 512 > 256. Adjust all of the x + (y << s) to x | (y << s) as a general style fix. Drop the extract64 because the input uint64_t are known to be already zero-extended from the current size of the predicate. Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 844db08bd5..11c9397dbb 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1939,7 +1939,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) if (oprsz <= 8) { l = compress_bits(n[0] >> odd, esz); h = compress_bits(m[0] >> odd, esz); - d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz); + d[0] = l | (h << (4 * oprsz)); } else { ARMPredicateReg tmp_m; intptr_t oprsz_16 = oprsz / 16; @@ -1953,23 +1953,35 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) h = n[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - d[i] = l + (h << 32); + d[i] = l | (h << 32); } - /* For VL which is not a power of 2, the results from M do not - align nicely with the uint64_t for D. Put the aligned results - from M into TMP_M and then copy it into place afterward. */ + /* + * For VL which is not a multiple of 512, the results from M do not + * align nicely with the uint64_t for D. Put the aligned results + * from M into TMP_M and then copy it into place afterward. + */ if (oprsz & 15) { - d[i] = compress_bits(n[2 * i] >> odd, esz); + int final_shift = (oprsz & 15) * 2; + + l = n[2 * i + 0]; + h = n[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[i] = l | (h << final_shift); for (i = 0; i < oprsz_16; i++) { l = m[2 * i + 0]; h = m[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - tmp_m.p[i] = l + (h << 32); + tmp_m.p[i] = l | (h << 32); } - tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz); + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + tmp_m.p[i] = l | (h << final_shift); swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2); } else { @@ -1978,7 +1990,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) h = m[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - d[oprsz_16 + i] = l + (h << 32); + d[oprsz_16 + i] = l | (h << 32); } } } -- cgit v1.2.3-55-g7522 From 8e7fefed1bdcc0f7e722ccf2a2fc2b4f79fe725e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:52:59 -0800 Subject: target/arm: Fix sve_zip_p vs odd vector lengths Wrote too much with low-half zip (zip1) with vl % 512 != 0. Adjust all of the x + (y << s) to x | (y << s) as a style fix. We only ever have exact overlap between D, M, and N. Therefore we only need a single temporary, and we do not need to check for partial overlap. Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 11c9397dbb..2fb4b2c1ea 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1871,6 +1871,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA); + int esize = 1 << esz; uint64_t *d = vd; intptr_t i; @@ -1883,33 +1884,35 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) mm = extract64(mm, high * half, half); nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d[0] = nn + (mm << (1 << esz)); + d[0] = nn | (mm << esize); } else { - ARMPredicateReg tmp_n, tmp_m; + ARMPredicateReg tmp; /* We produce output faster than we consume input. Therefore we must be mindful of possible overlap. */ - if ((vn - vd) < (uintptr_t)oprsz) { - vn = memcpy(&tmp_n, vn, oprsz); - } - if ((vm - vd) < (uintptr_t)oprsz) { - vm = memcpy(&tmp_m, vm, oprsz); + if (vd == vn) { + vn = memcpy(&tmp, vn, oprsz); + if (vd == vm) { + vm = vn; + } + } else if (vd == vm) { + vm = memcpy(&tmp, vm, oprsz); } if (high) { high = oprsz >> 1; } - if ((high & 3) == 0) { + if ((oprsz & 7) == 0) { uint32_t *n = vn, *m = vm; high >>= 2; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + for (i = 0; i < oprsz / 8; i++) { uint64_t nn = n[H4(high + i)]; uint64_t mm = m[H4(high + i)]; nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d[i] = nn + (mm << (1 << esz)); + d[i] = nn | (mm << esize); } } else { uint8_t *n = vn, *m = vm; @@ -1921,7 +1924,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d16[H2(i)] = nn + (mm << (1 << esz)); + d16[H2(i)] = nn | (mm << esize); } } } -- cgit v1.2.3-55-g7522 From fd911a21414b5a17663fa2b97f1059fb11cee99d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:00 -0800 Subject: target/arm: Fix sve_punpk_p vs odd vector lengths Wrote too much with punpk1 with vl % 512 != 0. Reviewed-by: Peter Maydell Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 2fb4b2c1ea..981895a17c 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2105,11 +2105,11 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) high = oprsz >> 1; } - if ((high & 3) == 0) { + if ((oprsz & 7) == 0) { uint32_t *n = vn; high >>= 2; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + for (i = 0; i < oprsz / 8; i++) { uint64_t nn = n[H4(high + i)]; d[i] = expand_bits(nn, 0); } -- cgit v1.2.3-55-g7522 From 2acbfbe4313daf43b6653ee5d82bcaeaa155e895 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:01 -0800 Subject: target/arm: Update find_last_active for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-5-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 6 +++--- target/arm/translate-sve.c | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 981895a17c..224c767944 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2237,10 +2237,10 @@ void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc) */ int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); - return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz); + return last_active_element(vg, words, esz); } void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 27402af23c..cac8082156 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2302,11 +2302,10 @@ static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) */ TCGv_ptr t_p = tcg_temp_new_ptr(); TCGv_i32 t_desc; - unsigned vsz = pred_full_reg_size(s); - unsigned desc; + unsigned desc = 0; - desc = vsz - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); + desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); t_desc = tcg_const_i32(desc); -- cgit v1.2.3-55-g7522 From 04c774a25da78eb07d505ee5923167c2010b9f8c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:02 -0800 Subject: target/arm: Update BRKA, BRKB, BRKN for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-6-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 30 ++++++++++++++---------------- target/arm/translate-sve.c | 4 ++-- 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 224c767944..8e0a5d30a5 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2710,7 +2710,7 @@ static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, true); } else { @@ -2721,7 +2721,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, true); } else { @@ -2732,7 +2732,7 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, false); } else { @@ -2743,7 +2743,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, false); } else { @@ -2753,56 +2753,55 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_z(vd, vn, vg, oprsz, true); } uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_z(vd, vn, vg, oprsz, true); } void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_z(vd, vn, vg, oprsz, false); } uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_z(vd, vn, vg, oprsz, false); } void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_m(vd, vn, vg, oprsz, true); } uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_m(vd, vn, vg, oprsz, true); } void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_m(vd, vn, vg, oprsz, false); } uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_m(vd, vn, vg, oprsz, false); } void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (!last_active_pred(vn, vg, oprsz)) { do_zero(vd, oprsz); } @@ -2827,8 +2826,7 @@ static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return predtest_ones(vd, oprsz, -1); } else { diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index cac8082156..c0212e6b08 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2850,7 +2850,7 @@ static bool do_brk3(DisasContext *s, arg_rprr_s *a, TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr m = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(vsz - 2); + TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); @@ -2884,7 +2884,7 @@ static bool do_brk2(DisasContext *s, arg_rpr_s *a, TCGv_ptr d = tcg_temp_new_ptr(); TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(vsz - 2); + TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); -- cgit v1.2.3-55-g7522 From f556a201b5bbeb59841b37247969fcfc1ab7bd5d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:03 -0800 Subject: target/arm: Update CNTP for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-7-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 6 +++--- target/arm/translate-sve.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 8e0a5d30a5..a95bbece4f 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2836,12 +2836,12 @@ uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz]; intptr_t i; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + for (i = 0; i < words; ++i) { uint64_t t = n[i] & g[i] & mask; sum += ctpop64(t); } diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index c0212e6b08..722805cf99 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2967,11 +2967,11 @@ static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) } else { TCGv_ptr t_pn = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); - unsigned desc; + unsigned desc = 0; TCGv_i32 t_desc; - desc = psz - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); + desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); -- cgit v1.2.3-55-g7522 From e610906c56f98c76888d45beb7f579935dd61a70 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:04 -0800 Subject: target/arm: Update WHILE for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-8-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 4 ++-- target/arm/translate-sve.c | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index a95bbece4f..6f4bc3a3cc 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2850,8 +2850,8 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) { - uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); uint64_t esz_mask = pred_esz_masks[esz]; ARMPredicateReg *d = vd; uint32_t flags; diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 722805cf99..2420cd741b 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3097,7 +3097,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) TCGv_i64 op0, op1, t0, t1, tmax; TCGv_i32 t2, t3; TCGv_ptr ptr; - unsigned desc, vsz = vec_full_reg_size(s); + unsigned vsz = vec_full_reg_size(s); + unsigned desc = 0; TCGCond cond; if (!sve_access_check(s)) { @@ -3161,8 +3162,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) /* Scale elements to bits. */ tcg_gen_shli_i32(t2, t2, a->esz); - desc = (vsz / 8) - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); t3 = tcg_const_i32(desc); ptr = tcg_temp_new_ptr(); -- cgit v1.2.3-55-g7522 From c648c9b7e1ccff94b51ecbebe86a206952c47e75 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:05 -0800 Subject: target/arm: Update sve reduction vs simd_desc With the reduction operations, we intentionally increase maxsz to the next power of 2, so as to fill out the reduction tree correctly. Since e2e7168a214b, oprsz must equal maxsz, with exceptions for small vectors, so this triggers an assertion for vector sizes > 32 that are not themselves a power of 2. Pass the power-of-two value in the simd_data field instead. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-9-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 2 +- target/arm/translate-sve.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'target/arm/sve_helper.c') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 6f4bc3a3cc..fd6c58f96a 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2896,7 +2896,7 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ } \ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ { \ - uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 2420cd741b..0eefb61214 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3440,7 +3440,7 @@ static void do_reduce(DisasContext *s, arg_rpr_esz *a, { unsigned vsz = vec_full_reg_size(s); unsigned p2vsz = pow2ceil(vsz); - TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0)); + TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz)); TCGv_ptr t_zn, t_pg, status; TCGv_i64 temp; -- cgit v1.2.3-55-g7522