KVM: MMU: lazily drop large spte

Currently, kvm zaps the large spte if write-protected is needed, the later read can fault on that spte. Actually, we can make the large spte readonly instead of making them un-present, the page fault caused by read access can be avoided The idea is from Avi: | As I mentioned before, write-protecting a large spte is a good idea, | since it moves some work from protect-time to fault-time, so it reduces | jitter. This removes the need for the return value. This version has fixed the issue reported in 6b73a9606, the reason of that issue is that fast_page_fault() directly sets the readonly large spte to writable but only dirty the first page into the dirty-bitmap that means other pages are missed. Fixed it by only the normal sptes (on the PT_PAGE_TABLE_LEVEL level) can be fast fixed Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
author: Xiao Guangrong 2014-04-17 11:06:14 +0200
committer: Marcelo Tosatti 2014-04-23 22:49:50 +0200
commit: c126d94f2c90ed9daee24a94f1c67aff7e9bf387 (patch)
tree: ec5b3af6af89179ff9565c9f0fd32c7914a779ed /arch/x86/kvm/mmu.c
parent: KVM: MMU: properly check last spte in fast_page_fault() (diff)
download: kernel-qcow2-linux-c126d94f2c90ed9daee24a94f1c67aff7e9bf387.tar.gz
kernel-qcow2-linux-c126d94f2c90ed9daee24a94f1c67aff7e9bf387.tar.xz
kernel-qcow2-linux-c126d94f2c90ed9daee24a94f1c67aff7e9bf387.zip
1 files changed, 18 insertions, 16 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 63107049249d..ddf06963a74c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1176,8 +1176,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
 
 /*
  * Write-protect on the specified @sptep, @pt_protect indicates whether
- * spte writ-protection is caused by protecting shadow page table.
- * @flush indicates whether tlb need be flushed.
+ * spte write-protection is caused by protecting shadow page table.
  *
  * Note: write protection is difference between drity logging and spte
  * protection:
@@ -1186,10 +1185,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
  * - for spte protection, the spte can be writable only after unsync-ing
  *   shadow page.
  *
- * Return true if the spte is dropped.
+ * Return true if tlb need be flushed.
  */
-static bool
-spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
+static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
 {
 	u64 spte = *sptep;
 
@@ -1199,17 +1197,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
 
 	rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
 
-	if (__drop_large_spte(kvm, sptep)) {
-		*flush |= true;
-		return true;
-	}
-
 	if (pt_protect)
 		spte &= ~SPTE_MMU_WRITEABLE;
 	spte = spte & ~PT_WRITABLE_MASK;
 
-	*flush |= mmu_spte_update(sptep, spte);
-	return false;
+	return mmu_spte_update(sptep, spte);
 }
 
 static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
@@ -1221,11 +1213,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
 		BUG_ON(!(*sptep & PT_PRESENT_MASK));
-		if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
-			sptep = rmap_get_first(*rmapp, &iter);
-			continue;
-		}
 
+		flush |= spte_write_protect(kvm, sptep, pt_protect);
 		sptep = rmap_get_next(&iter);
 	}
 
@@ -2877,6 +2866,19 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 		goto exit;
 
 	/*
+	 * Do not fix write-permission on the large spte since we only dirty
+	 * the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
+	 * that means other pages are missed if its slot is dirty-logged.
+	 *
+	 * Instead, we let the slow page fault path create a normal spte to
+	 * fix the access.
+	 *
+	 * See the comments in kvm_arch_commit_memory_region().
+	 */
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		goto exit;
+
+	/*
 	 * Currently, fast page fault only works for direct mapping since
 	 * the gfn is not stable for indirect shadow page.
 	 * See Documentation/virtual/kvm/locking.txt to get more detail.
author	Xiao Guangrong	2014-04-17 11:06:14 +0200
committer	Marcelo Tosatti	2014-04-23 22:49:50 +0200
commit	c126d94f2c90ed9daee24a94f1c67aff7e9bf387 (patch)
tree	ec5b3af6af89179ff9565c9f0fd32c7914a779ed /arch/x86/kvm/mmu.c
parent	KVM: MMU: properly check last spte in fast_page_fault() (diff)
download	kernel-qcow2-linux-c126d94f2c90ed9daee24a94f1c67aff7e9bf387.tar.gz kernel-qcow2-linux-c126d94f2c90ed9daee24a94f1c67aff7e9bf387.tar.xz kernel-qcow2-linux-c126d94f2c90ed9daee24a94f1c67aff7e9bf387.zip