From 226e6c046c0fce8da32575aad020ca56a5a8064d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:52:58 -0800 Subject: target/arm: Fix sve_uzp_p vs odd vector lengths Missed out on compressing the second half of a predicate with length vl % 512 > 256. Adjust all of the x + (y << s) to x | (y << s) as a general style fix. Drop the extract64 because the input uint64_t are known to be already zero-extended from the current size of the predicate. Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 844db08bd5..11c9397dbb 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1939,7 +1939,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) if (oprsz <= 8) { l = compress_bits(n[0] >> odd, esz); h = compress_bits(m[0] >> odd, esz); - d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz); + d[0] = l | (h << (4 * oprsz)); } else { ARMPredicateReg tmp_m; intptr_t oprsz_16 = oprsz / 16; @@ -1953,23 +1953,35 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) h = n[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - d[i] = l + (h << 32); + d[i] = l | (h << 32); } - /* For VL which is not a power of 2, the results from M do not - align nicely with the uint64_t for D. Put the aligned results - from M into TMP_M and then copy it into place afterward. */ + /* + * For VL which is not a multiple of 512, the results from M do not + * align nicely with the uint64_t for D. Put the aligned results + * from M into TMP_M and then copy it into place afterward. + */ if (oprsz & 15) { - d[i] = compress_bits(n[2 * i] >> odd, esz); + int final_shift = (oprsz & 15) * 2; + + l = n[2 * i + 0]; + h = n[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[i] = l | (h << final_shift); for (i = 0; i < oprsz_16; i++) { l = m[2 * i + 0]; h = m[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - tmp_m.p[i] = l + (h << 32); + tmp_m.p[i] = l | (h << 32); } - tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz); + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + tmp_m.p[i] = l | (h << final_shift); swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2); } else { @@ -1978,7 +1990,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) h = m[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - d[oprsz_16 + i] = l + (h << 32); + d[oprsz_16 + i] = l | (h << 32); } } } -- cgit v1.2.3-55-g7522 From 8e7fefed1bdcc0f7e722ccf2a2fc2b4f79fe725e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:52:59 -0800 Subject: target/arm: Fix sve_zip_p vs odd vector lengths Wrote too much with low-half zip (zip1) with vl % 512 != 0. Adjust all of the x + (y << s) to x | (y << s) as a style fix. We only ever have exact overlap between D, M, and N. Therefore we only need a single temporary, and we do not need to check for partial overlap. Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 11c9397dbb..2fb4b2c1ea 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1871,6 +1871,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA); + int esize = 1 << esz; uint64_t *d = vd; intptr_t i; @@ -1883,33 +1884,35 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) mm = extract64(mm, high * half, half); nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d[0] = nn + (mm << (1 << esz)); + d[0] = nn | (mm << esize); } else { - ARMPredicateReg tmp_n, tmp_m; + ARMPredicateReg tmp; /* We produce output faster than we consume input. Therefore we must be mindful of possible overlap. */ - if ((vn - vd) < (uintptr_t)oprsz) { - vn = memcpy(&tmp_n, vn, oprsz); - } - if ((vm - vd) < (uintptr_t)oprsz) { - vm = memcpy(&tmp_m, vm, oprsz); + if (vd == vn) { + vn = memcpy(&tmp, vn, oprsz); + if (vd == vm) { + vm = vn; + } + } else if (vd == vm) { + vm = memcpy(&tmp, vm, oprsz); } if (high) { high = oprsz >> 1; } - if ((high & 3) == 0) { + if ((oprsz & 7) == 0) { uint32_t *n = vn, *m = vm; high >>= 2; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + for (i = 0; i < oprsz / 8; i++) { uint64_t nn = n[H4(high + i)]; uint64_t mm = m[H4(high + i)]; nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d[i] = nn + (mm << (1 << esz)); + d[i] = nn | (mm << esize); } } else { uint8_t *n = vn, *m = vm; @@ -1921,7 +1924,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d16[H2(i)] = nn + (mm << (1 << esz)); + d16[H2(i)] = nn | (mm << esize); } } } -- cgit v1.2.3-55-g7522 From fd911a21414b5a17663fa2b97f1059fb11cee99d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:00 -0800 Subject: target/arm: Fix sve_punpk_p vs odd vector lengths Wrote too much with punpk1 with vl % 512 != 0. Reviewed-by: Peter Maydell Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 2fb4b2c1ea..981895a17c 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2105,11 +2105,11 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) high = oprsz >> 1; } - if ((high & 3) == 0) { + if ((oprsz & 7) == 0) { uint32_t *n = vn; high >>= 2; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + for (i = 0; i < oprsz / 8; i++) { uint64_t nn = n[H4(high + i)]; d[i] = expand_bits(nn, 0); } -- cgit v1.2.3-55-g7522 From 2acbfbe4313daf43b6653ee5d82bcaeaa155e895 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:01 -0800 Subject: target/arm: Update find_last_active for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-5-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 6 +++--- target/arm/translate-sve.c | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 981895a17c..224c767944 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2237,10 +2237,10 @@ void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc) */ int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); - return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz); + return last_active_element(vg, words, esz); } void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 27402af23c..cac8082156 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2302,11 +2302,10 @@ static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) */ TCGv_ptr t_p = tcg_temp_new_ptr(); TCGv_i32 t_desc; - unsigned vsz = pred_full_reg_size(s); - unsigned desc; + unsigned desc = 0; - desc = vsz - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); + desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); t_desc = tcg_const_i32(desc); -- cgit v1.2.3-55-g7522 From 04c774a25da78eb07d505ee5923167c2010b9f8c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:02 -0800 Subject: target/arm: Update BRKA, BRKB, BRKN for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-6-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 30 ++++++++++++++---------------- target/arm/translate-sve.c | 4 ++-- 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 224c767944..8e0a5d30a5 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2710,7 +2710,7 @@ static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, true); } else { @@ -2721,7 +2721,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, true); } else { @@ -2732,7 +2732,7 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, false); } else { @@ -2743,7 +2743,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, false); } else { @@ -2753,56 +2753,55 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_z(vd, vn, vg, oprsz, true); } uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_z(vd, vn, vg, oprsz, true); } void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_z(vd, vn, vg, oprsz, false); } uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_z(vd, vn, vg, oprsz, false); } void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_m(vd, vn, vg, oprsz, true); } uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_m(vd, vn, vg, oprsz, true); } void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_m(vd, vn, vg, oprsz, false); } uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_m(vd, vn, vg, oprsz, false); } void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (!last_active_pred(vn, vg, oprsz)) { do_zero(vd, oprsz); } @@ -2827,8 +2826,7 @@ static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return predtest_ones(vd, oprsz, -1); } else { diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index cac8082156..c0212e6b08 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2850,7 +2850,7 @@ static bool do_brk3(DisasContext *s, arg_rprr_s *a, TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr m = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(vsz - 2); + TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); @@ -2884,7 +2884,7 @@ static bool do_brk2(DisasContext *s, arg_rpr_s *a, TCGv_ptr d = tcg_temp_new_ptr(); TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(vsz - 2); + TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); -- cgit v1.2.3-55-g7522 From f556a201b5bbeb59841b37247969fcfc1ab7bd5d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:03 -0800 Subject: target/arm: Update CNTP for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-7-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 6 +++--- target/arm/translate-sve.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 8e0a5d30a5..a95bbece4f 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2836,12 +2836,12 @@ uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz]; intptr_t i; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + for (i = 0; i < words; ++i) { uint64_t t = n[i] & g[i] & mask; sum += ctpop64(t); } diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index c0212e6b08..722805cf99 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2967,11 +2967,11 @@ static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) } else { TCGv_ptr t_pn = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); - unsigned desc; + unsigned desc = 0; TCGv_i32 t_desc; - desc = psz - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); + desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); -- cgit v1.2.3-55-g7522 From e610906c56f98c76888d45beb7f579935dd61a70 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:04 -0800 Subject: target/arm: Update WHILE for PREDDESC Since b64ee454a4a0, all predicate operations should be using these field macros for predicates. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-8-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 4 ++-- target/arm/translate-sve.c | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index a95bbece4f..6f4bc3a3cc 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2850,8 +2850,8 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) { - uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); uint64_t esz_mask = pred_esz_masks[esz]; ARMPredicateReg *d = vd; uint32_t flags; diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 722805cf99..2420cd741b 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3097,7 +3097,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) TCGv_i64 op0, op1, t0, t1, tmax; TCGv_i32 t2, t3; TCGv_ptr ptr; - unsigned desc, vsz = vec_full_reg_size(s); + unsigned vsz = vec_full_reg_size(s); + unsigned desc = 0; TCGCond cond; if (!sve_access_check(s)) { @@ -3161,8 +3162,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) /* Scale elements to bits. */ tcg_gen_shli_i32(t2, t2, a->esz); - desc = (vsz / 8) - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); t3 = tcg_const_i32(desc); ptr = tcg_temp_new_ptr(); -- cgit v1.2.3-55-g7522 From c648c9b7e1ccff94b51ecbebe86a206952c47e75 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Mar 2021 07:53:05 -0800 Subject: target/arm: Update sve reduction vs simd_desc With the reduction operations, we intentionally increase maxsz to the next power of 2, so as to fill out the reduction tree correctly. Since e2e7168a214b, oprsz must equal maxsz, with exceptions for small vectors, so this triggers an assertion for vector sizes > 32 that are not themselves a power of 2. Pass the power-of-two value in the simd_data field instead. Signed-off-by: Richard Henderson Message-id: 20210309155305.11301-9-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 2 +- target/arm/translate-sve.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'target') diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 6f4bc3a3cc..fd6c58f96a 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -2896,7 +2896,7 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ } \ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ { \ - uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 2420cd741b..0eefb61214 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3440,7 +3440,7 @@ static void do_reduce(DisasContext *s, arg_rpr_esz *a, { unsigned vsz = vec_full_reg_size(s); unsigned p2vsz = pow2ceil(vsz); - TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0)); + TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz)); TCGv_ptr t_zn, t_pg, status; TCGv_i64 temp; -- cgit v1.2.3-55-g7522 From bcb902a1ed1ad5e0ceebb9536f392bf6d46219f9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 10 Mar 2021 14:52:18 +0100 Subject: hw/arm/virt: KVM: The IPA lower bound is 32 The virt machine already checks KVM_CAP_ARM_VM_IPA_SIZE to get the upper bound of the IPA size. If that bound is lower than the highest possible GPA for the machine, then QEMU will error out. However, the IPA is set to 40 when the highest GPA is less than or equal to 40, even when KVM may support an IPA limit as low as 32. This means KVM may fail the VM creation unnecessarily. Additionally, 40 is selected with the value 0, which means use the default, and that gets around a check in some versions of KVM, causing a difficult to debug fail. Always use the IPA size that corresponds to the highest possible GPA, unless it's lower than 32, in which case use 32. Also, we must still use 0 when KVM only supports the legacy fixed 40 bit IPA. Suggested-by: Marc Zyngier Signed-off-by: Andrew Jones Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Message-id: 20210310135218.255205-3-drjones@redhat.com Signed-off-by: Peter Maydell --- hw/arm/virt.c | 23 ++++++++++++++++------- target/arm/kvm.c | 4 +++- target/arm/kvm_arm.h | 6 ++++-- 3 files changed, 23 insertions(+), 10 deletions(-) (limited to 'target') diff --git a/hw/arm/virt.c b/hw/arm/virt.c index c08bf11297..aa2bbd14e0 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2548,27 +2548,36 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, static int virt_kvm_type(MachineState *ms, const char *type_str) { VirtMachineState *vms = VIRT_MACHINE(ms); - int max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms); - int requested_pa_size; + int max_vm_pa_size, requested_pa_size; + bool fixed_ipa; + + max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); /* we freeze the memory map to compute the highest gpa */ virt_set_memmap(vms); requested_pa_size = 64 - clz64(vms->highest_gpa); + /* + * KVM requires the IPA size to be at least 32 bits. + */ + if (requested_pa_size < 32) { + requested_pa_size = 32; + } + if (requested_pa_size > max_vm_pa_size) { error_report("-m and ,maxmem option values " "require an IPA range (%d bits) larger than " "the one supported by the host (%d bits)", requested_pa_size, max_vm_pa_size); - exit(1); + exit(1); } /* - * By default we return 0 which corresponds to an implicit legacy - * 40b IPA setting. Otherwise we return the actual requested PA - * logsize + * We return the requested PA log size, unless KVM only supports + * the implicit legacy 40b IPA setting, in which case the kvm_type + * must be 0. */ - return requested_pa_size > 40 ? requested_pa_size : 0; + return fixed_ipa ? 0 : requested_pa_size; } static void virt_machine_class_init(ObjectClass *oc, void *data) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index bebea90122..d8381ba224 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -230,12 +230,14 @@ bool kvm_arm_pmu_supported(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3); } -int kvm_arm_get_max_vm_ipa_size(MachineState *ms) +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) { KVMState *s = KVM_STATE(ms->accelerator); int ret; ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE); + *fixed_ipa = ret <= 0; + return ret > 0 ? ret : 40; } diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 68ec970c4f..34f8daa377 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -311,10 +311,12 @@ bool kvm_arm_sve_supported(void); /** * kvm_arm_get_max_vm_ipa_size: * @ms: Machine state handle + * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case + * for legacy KVM. * * Returns the number of bits in the IPA address space supported by KVM */ -int kvm_arm_get_max_vm_ipa_size(MachineState *ms); +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa); /** * kvm_arm_sync_mpstate_to_kvm: @@ -409,7 +411,7 @@ static inline void kvm_arm_add_vcpu_properties(Object *obj) g_assert_not_reached(); } -static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms) +static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) { g_assert_not_reached(); } -- cgit v1.2.3-55-g7522