diff options
Diffstat (limited to 'target/arm')
-rw-r--r-- | target/arm/kvm.c | 4 | ||||
-rw-r--r-- | target/arm/kvm_arm.h | 6 | ||||
-rw-r--r-- | target/arm/sve_helper.c | 107 | ||||
-rw-r--r-- | target/arm/translate-sve.c | 26 |
4 files changed, 80 insertions, 63 deletions
diff --git a/target/arm/kvm.c b/target/arm/kvm.c index bebea90122..d8381ba224 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -230,12 +230,14 @@ bool kvm_arm_pmu_supported(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3); } -int kvm_arm_get_max_vm_ipa_size(MachineState *ms) +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) { KVMState *s = KVM_STATE(ms->accelerator); int ret; ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE); + *fixed_ipa = ret <= 0; + return ret > 0 ? ret : 40; } diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 68ec970c4f..34f8daa377 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -311,10 +311,12 @@ bool kvm_arm_sve_supported(void); /** * kvm_arm_get_max_vm_ipa_size: * @ms: Machine state handle + * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case + * for legacy KVM. * * Returns the number of bits in the IPA address space supported by KVM */ -int kvm_arm_get_max_vm_ipa_size(MachineState *ms); +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa); /** * kvm_arm_sync_mpstate_to_kvm: @@ -409,7 +411,7 @@ static inline void kvm_arm_add_vcpu_properties(Object *obj) g_assert_not_reached(); } -static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms) +static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) { g_assert_not_reached(); } diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 844db08bd5..fd6c58f96a 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1871,6 +1871,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA); + int esize = 1 << esz; uint64_t *d = vd; intptr_t i; @@ -1883,33 +1884,35 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) mm = extract64(mm, high * half, half); nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d[0] = nn + (mm << (1 << esz)); + d[0] = nn | (mm << esize); } else { - ARMPredicateReg tmp_n, tmp_m; + ARMPredicateReg tmp; /* We produce output faster than we consume input. Therefore we must be mindful of possible overlap. */ - if ((vn - vd) < (uintptr_t)oprsz) { - vn = memcpy(&tmp_n, vn, oprsz); - } - if ((vm - vd) < (uintptr_t)oprsz) { - vm = memcpy(&tmp_m, vm, oprsz); + if (vd == vn) { + vn = memcpy(&tmp, vn, oprsz); + if (vd == vm) { + vm = vn; + } + } else if (vd == vm) { + vm = memcpy(&tmp, vm, oprsz); } if (high) { high = oprsz >> 1; } - if ((high & 3) == 0) { + if ((oprsz & 7) == 0) { uint32_t *n = vn, *m = vm; high >>= 2; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + for (i = 0; i < oprsz / 8; i++) { uint64_t nn = n[H4(high + i)]; uint64_t mm = m[H4(high + i)]; nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d[i] = nn + (mm << (1 << esz)); + d[i] = nn | (mm << esize); } } else { uint8_t *n = vn, *m = vm; @@ -1921,7 +1924,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) nn = expand_bits(nn, esz); mm = expand_bits(mm, esz); - d16[H2(i)] = nn + (mm << (1 << esz)); + d16[H2(i)] = nn | (mm << esize); } } } @@ -1939,7 +1942,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) if (oprsz <= 8) { l = compress_bits(n[0] >> odd, esz); h = compress_bits(m[0] >> odd, esz); - d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz); + d[0] = l | (h << (4 * oprsz)); } else { ARMPredicateReg tmp_m; intptr_t oprsz_16 = oprsz / 16; @@ -1953,23 +1956,35 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) h = n[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - d[i] = l + (h << 32); + d[i] = l | (h << 32); } - /* For VL which is not a power of 2, the results from M do not - align nicely with the uint64_t for D. Put the aligned results - from M into TMP_M and then copy it into place afterward. */ + /* + * For VL which is not a multiple of 512, the results from M do not + * align nicely with the uint64_t for D. Put the aligned results + * from M into TMP_M and then copy it into place afterward. + */ if (oprsz & 15) { - d[i] = compress_bits(n[2 * i] >> odd, esz); + int final_shift = (oprsz & 15) * 2; + + l = n[2 * i + 0]; + h = n[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[i] = l | (h << final_shift); for (i = 0; i < oprsz_16; i++) { l = m[2 * i + 0]; h = m[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - tmp_m.p[i] = l + (h << 32); + tmp_m.p[i] = l | (h << 32); } - tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz); + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + tmp_m.p[i] = l | (h << final_shift); swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2); } else { @@ -1978,7 +1993,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) h = m[2 * i + 1]; l = compress_bits(l >> odd, esz); h = compress_bits(h >> odd, esz); - d[oprsz_16 + i] = l + (h << 32); + d[oprsz_16 + i] = l | (h << 32); } } } @@ -2090,11 +2105,11 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) high = oprsz >> 1; } - if ((high & 3) == 0) { + if ((oprsz & 7) == 0) { uint32_t *n = vn; high >>= 2; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + for (i = 0; i < oprsz / 8; i++) { uint64_t nn = n[H4(high + i)]; d[i] = expand_bits(nn, 0); } @@ -2222,10 +2237,10 @@ void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc) */ int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); - return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz); + return last_active_element(vg, words, esz); } void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) @@ -2695,7 +2710,7 @@ static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, true); } else { @@ -2706,7 +2721,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, true); } else { @@ -2717,7 +2732,7 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, false); } else { @@ -2728,7 +2743,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, false); } else { @@ -2738,56 +2753,55 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_z(vd, vn, vg, oprsz, true); } uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_z(vd, vn, vg, oprsz, true); } void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_z(vd, vn, vg, oprsz, false); } uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_z(vd, vn, vg, oprsz, false); } void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_m(vd, vn, vg, oprsz, true); } uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_m(vd, vn, vg, oprsz, true); } void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); compute_brk_m(vd, vn, vg, oprsz, false); } uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); return compute_brks_m(vd, vn, vg, oprsz, false); } void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (!last_active_pred(vn, vg, oprsz)) { do_zero(vd, oprsz); } @@ -2812,8 +2826,7 @@ static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { return predtest_ones(vd, oprsz, -1); } else { @@ -2823,12 +2836,12 @@ uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) { - intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz]; intptr_t i; - for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + for (i = 0; i < words; ++i) { uint64_t t = n[i] & g[i] & mask; sum += ctpop64(t); } @@ -2837,8 +2850,8 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) { - uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; - intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); uint64_t esz_mask = pred_esz_masks[esz]; ARMPredicateReg *d = vd; uint32_t flags; @@ -2883,7 +2896,7 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ } \ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ { \ - uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 27402af23c..0eefb61214 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2302,11 +2302,10 @@ static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) */ TCGv_ptr t_p = tcg_temp_new_ptr(); TCGv_i32 t_desc; - unsigned vsz = pred_full_reg_size(s); - unsigned desc; + unsigned desc = 0; - desc = vsz - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); + desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); t_desc = tcg_const_i32(desc); @@ -2851,7 +2850,7 @@ static bool do_brk3(DisasContext *s, arg_rprr_s *a, TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr m = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(vsz - 2); + TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); @@ -2885,7 +2884,7 @@ static bool do_brk2(DisasContext *s, arg_rpr_s *a, TCGv_ptr d = tcg_temp_new_ptr(); TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(vsz - 2); + TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); @@ -2968,11 +2967,11 @@ static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) } else { TCGv_ptr t_pn = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); - unsigned desc; + unsigned desc = 0; TCGv_i32 t_desc; - desc = psz - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); + desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); @@ -3098,7 +3097,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) TCGv_i64 op0, op1, t0, t1, tmax; TCGv_i32 t2, t3; TCGv_ptr ptr; - unsigned desc, vsz = vec_full_reg_size(s); + unsigned vsz = vec_full_reg_size(s); + unsigned desc = 0; TCGCond cond; if (!sve_access_check(s)) { @@ -3162,8 +3162,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) /* Scale elements to bits. */ tcg_gen_shli_i32(t2, t2, a->esz); - desc = (vsz / 8) - 2; - desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); t3 = tcg_const_i32(desc); ptr = tcg_temp_new_ptr(); @@ -3440,7 +3440,7 @@ static void do_reduce(DisasContext *s, arg_rpr_esz *a, { unsigned vsz = vec_full_reg_size(s); unsigned p2vsz = pow2ceil(vsz); - TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0)); + TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz)); TCGv_ptr t_zn, t_pg, status; TCGv_i64 temp; |