summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Henderson2020-06-26 05:31:16 +0200
committerPeter Maydell2020-06-26 15:31:12 +0200
commite26d0d226892f67435cadcce86df0ddfb9943174 (patch)
tree5f89caaf281b762c318b40cb527f0721f22657ce
parenttarget/arm: Restrict the values of DCZID.BS under TCG (diff)
downloadqemu-e26d0d226892f67435cadcce86df0ddfb9943174.tar.gz
qemu-e26d0d226892f67435cadcce86df0ddfb9943174.tar.xz
qemu-e26d0d226892f67435cadcce86df0ddfb9943174.zip
target/arm: Simplify DC_ZVA
Now that we know that the operation is on a single page, we need not loop over pages while probing. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200626033144.790098-19-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/helper-a64.c96
1 files changed, 26 insertions, 70 deletions
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index bc0649a44a..8682630ff6 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -1119,85 +1119,41 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
* (which matches the usual QEMU behaviour of not implementing either
* alignment faults or any memory attribute handling).
*/
-
- ARMCPU *cpu = env_archcpu(env);
- uint64_t blocklen = 4 << cpu->dcz_blocksize;
+ int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+ int mmu_idx = cpu_mmu_index(env, false);
+ void *mem;
+
+ /*
+ * Trapless lookup. In addition to actual invalid page, may
+ * return NULL for I/O, watchpoints, clean pages, etc.
+ */
+ mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
#ifndef CONFIG_USER_ONLY
- {
+ if (unlikely(!mem)) {
+ uintptr_t ra = GETPC();
+
/*
- * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
- * the block size so we might have to do more than one TLB lookup.
- * We know that in fact for any v8 CPU the page size is at least 4K
- * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
- * 1K as an artefact of legacy v5 subpage support being present in the
- * same QEMU executable. So in practice the hostaddr[] array has
- * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
+ * Trap if accessing an invalid page. DC_ZVA requires that we supply
+ * the original pointer for an invalid page. But watchpoints require
+ * that we probe the actual space. So do both.
*/
- int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
- void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
- int try, i;
- unsigned mmu_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
- assert(maxidx <= ARRAY_SIZE(hostaddr));
-
- for (try = 0; try < 2; try++) {
-
- for (i = 0; i < maxidx; i++) {
- hostaddr[i] = tlb_vaddr_to_host(env,
- vaddr + TARGET_PAGE_SIZE * i,
- 1, mmu_idx);
- if (!hostaddr[i]) {
- break;
- }
- }
- if (i == maxidx) {
- /*
- * If it's all in the TLB it's fair game for just writing to;
- * we know we don't need to update dirty status, etc.
- */
- for (i = 0; i < maxidx - 1; i++) {
- memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
- }
- memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
- return;
- }
+ (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
+ mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
+
+ if (unlikely(!mem)) {
/*
- * OK, try a store and see if we can populate the tlb. This
- * might cause an exception if the memory isn't writable,
- * in which case we will longjmp out of here. We must for
- * this purpose use the actual register value passed to us
- * so that we get the fault address right.
+ * The only remaining reason for mem == NULL is I/O.
+ * Just do a series of byte writes as the architecture demands.
*/
- helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
- /* Now we can populate the other TLB entries, if any */
- for (i = 0; i < maxidx; i++) {
- uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
- if (va != (vaddr_in & TARGET_PAGE_MASK)) {
- helper_ret_stb_mmu(env, va, 0, oi, GETPC());
- }
+ for (int i = 0; i < blocklen; i++) {
+ cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
}
- }
-
- /*
- * Slow path (probably attempt to do this to an I/O device or
- * similar, or clearing of a block of code we have translations
- * cached for). Just do a series of byte writes as the architecture
- * demands. It's not worth trying to use a cpu_physical_memory_map(),
- * memset(), unmap() sequence here because:
- * + we'd need to account for the blocksize being larger than a page
- * + the direct-RAM access case is almost always going to be dealt
- * with in the fastpath code above, so there's no speed benefit
- * + we would have to deal with the map returning NULL because the
- * bounce buffer was in use
- */
- for (i = 0; i < blocklen; i++) {
- helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
+ return;
}
}
-#else
- memset(g2h(vaddr), 0, blocklen);
#endif
+
+ memset(mem, 0, blocklen);
}