diff mbox series

[V4,11/15] KVM/MMU: Replace tlb flush function with range list flush function

Message ID 20181013145406.4911-12-Tianyu.Lan@microsoft.com (mailing list archive)
State New, archived
Headers show
Series x86/KVM/Hyper-v: Add HV ept tlb range flush hypercall support in KVM | expand

Commit Message

Tianyu Lan Oct. 13, 2018, 2:54 p.m. UTC
From: Lan Tianyu <Tianyu.Lan@microsoft.com>

This patch is to use range list flush function in the
mmu_sync_children(), kvm_mmu_commit_zap_page() and
FNAME(sync_page)().

Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
---
 arch/x86/kvm/mmu.c         | 26 +++++++++++++++++++++++---
 arch/x86/kvm/paging_tmpl.h |  5 ++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Oct. 15, 2018, 11:51 a.m. UTC | #1
On 13/10/2018 16:54, lantianyu1986@gmail.com wrote:
>  	while (mmu_unsync_walk(parent, &pages)) {
>  		bool protected = false;
> +		LIST_HEAD(flush_list);
>  
> -		for_each_sp(pages, sp, parents, i)
> +		for_each_sp(pages, sp, parents, i) {
>  			protected |= rmap_write_protect(vcpu, sp->gfn);
> +			kvm_mmu_queue_flush_request(sp, &flush_list);
> +		}

Here you already know that the page has to be flushed, because you are
dealing with shadow page tables and those always use 4K pages.  So the
check on is_last_page is unnecessary.

> 
>  					 pte_access, PT_PAGE_TABLE_LEVEL,
>  					 gfn, spte_to_pfn(sp->spt[i]),
>  					 true, false, host_writable);
> +		if (set_spte_ret && kvm_available_flush_tlb_with_range())
> +			kvm_mmu_queue_flush_request(sp, &flush_list);
>  	}

This is wrong, I think.  sp is always the same throughout the loop, so
you are adding it multiple times to flush_list.

Instead, you need to add a separate range for each virtual address (in
this case L2 GPA) that is synced; but for each PTE that you call
set_spte here for, you could be syncing multiple L2 GPAs if a single
page is reused multiple times by the guest's EPT page tables.

And actually I may be missing something, but doesn't this apply to all
call sites?  For mmu_sync_children you can do the flush in
__rmap_write_protect and return false, similar to the first part of the
series, but not for kvm_mmu_commit_zap_page and sync_page.

Can you simplify this series to only have hv_remote_flush_tlb_with_range
and remove all the flush_list stuff?  That first part is safe and well
understood, because it uses the rmap and so it's clear that you have L2
GPAs at hand.  Most of the remarks I made on the Hyper-V API will still
apply.

Paolo

>  	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
> -		kvm_flush_remote_tlbs(vcpu->kvm);
> +		kvm_flush_remote_tlbs_with_list(vcpu->kvm, &flush_list);
>  
>  	return nr_present;
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 393f4048dd7a..69e4cff1115d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1100,6 +1100,13 @@  static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
 	}
 }
 
+static void kvm_mmu_queue_flush_request(struct kvm_mmu_page *sp,
+		struct list_head *flush_list)
+{
+	if (sp->sptep && is_last_spte(*sp->sptep, sp->role.level))
+		list_add(&sp->flush_link, flush_list);
+}
+
 void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	update_gfn_disallow_lpage_count(slot, gfn, 1);
@@ -2372,12 +2379,16 @@  static void mmu_sync_children(struct kvm_vcpu *vcpu,
 
 	while (mmu_unsync_walk(parent, &pages)) {
 		bool protected = false;
+		LIST_HEAD(flush_list);
 
-		for_each_sp(pages, sp, parents, i)
+		for_each_sp(pages, sp, parents, i) {
 			protected |= rmap_write_protect(vcpu, sp->gfn);
+			kvm_mmu_queue_flush_request(sp, &flush_list);
+		}
 
 		if (protected) {
-			kvm_flush_remote_tlbs(vcpu->kvm);
+			kvm_flush_remote_tlbs_with_list(vcpu->kvm,
+					&flush_list);
 			flush = false;
 		}
 
@@ -2713,6 +2724,7 @@  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list)
 {
 	struct kvm_mmu_page *sp, *nsp;
+	LIST_HEAD(flush_list);
 
 	if (list_empty(invalid_list))
 		return;
@@ -2726,7 +2738,15 @@  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 	 * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
 	 * guest mode and/or lockless shadow page table walks.
 	 */
-	kvm_flush_remote_tlbs(kvm);
+	if (kvm_available_flush_tlb_with_range()) {
+		list_for_each_entry(sp, invalid_list, link)
+			kvm_mmu_queue_flush_request(sp, &flush_list);
+
+		if (!list_empty(&flush_list))
+			kvm_flush_remote_tlbs_with_list(kvm, &flush_list);
+	} else {
+		kvm_flush_remote_tlbs(kvm);
+	}
 
 	list_for_each_entry_safe(sp, nsp, invalid_list, link) {
 		WARN_ON(!sp->role.invalid || sp->root_count);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 833e8855bbc9..e44737ce6bad 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -973,6 +973,7 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	bool host_writable;
 	gpa_t first_pte_gpa;
 	int set_spte_ret = 0;
+	LIST_HEAD(flush_list);
 
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
@@ -1033,10 +1034,12 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 					 pte_access, PT_PAGE_TABLE_LEVEL,
 					 gfn, spte_to_pfn(sp->spt[i]),
 					 true, false, host_writable);
+		if (set_spte_ret && kvm_available_flush_tlb_with_range())
+			kvm_mmu_queue_flush_request(sp, &flush_list);
 	}
 
 	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
-		kvm_flush_remote_tlbs(vcpu->kvm);
+		kvm_flush_remote_tlbs_with_list(vcpu->kvm, &flush_list);
 
 	return nr_present;
 }