diff mbox

[3/5] KVM: MMU: notifiers support for pinned sptes

Message ID 20140618231521.648087161@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti June 18, 2014, 11:12 p.m. UTC
Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers.

Keep pinned sptes intact if page aging.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 arch/x86/kvm/mmu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 9 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Gleb Natapov June 19, 2014, 6:48 a.m. UTC | #1
On Wed, Jun 18, 2014 at 08:12:06PM -0300, mtosatti@redhat.com wrote:
> Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers.
> 
> Keep pinned sptes intact if page aging.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
>  arch/x86/kvm/mmu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 62 insertions(+), 9 deletions(-)
> 
> Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c	2014-06-18 17:28:24.339435654 -0300
> +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c	2014-06-18 17:29:32.510225755 -0300
> @@ -1184,6 +1184,42 @@
>  		kvm_flush_remote_tlbs(vcpu->kvm);
>  }
>  
> +static void ack_flush(void *_completed)
> +{
> +}
> +
> +static void mmu_reload_pinned_vcpus(struct kvm *kvm)
> +{
> +	int i, cpu, me;
> +	cpumask_var_t cpus;
> +	struct kvm_vcpu *vcpu;
> +	unsigned int req = KVM_REQ_MMU_RELOAD;
> +
> +	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
> +
> +	me = get_cpu();
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		if (list_empty(&vcpu->arch.pinned_mmu_pages))
> +			continue;
> +		kvm_make_request(req, vcpu);
> +		cpu = vcpu->cpu;
> +
> +		/* Set ->requests bit before we read ->mode */
> +		smp_mb();
> +
> +		if (cpus != NULL && cpu != -1 && cpu != me &&
> +		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
> +			cpumask_set_cpu(cpu, cpus);
> +	}
> +	if (unlikely(cpus == NULL))
> +		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
> +	else if (!cpumask_empty(cpus))
> +		smp_call_function_many(cpus, ack_flush, NULL, 1);
> +	put_cpu();
> +	free_cpumask_var(cpus);
> +	return;
> +}
This is a c&p of make_all_cpus_request(), the only difference is checking
of vcpu->arch.pinned_mmu_pages.  You can add make_some_cpus_request(..., bool (*predicate)(struct kvm_vcpu *))
to kvm_main.c and rewrite make_all_cpus_request() to use it instead.

> +
>  /*
>   * Write-protect on the specified @sptep, @pt_protect indicates whether
>   * spte write-protection is caused by protecting shadow page table.
> @@ -1276,7 +1312,8 @@
>  }
>  
>  static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
> -			   struct kvm_memory_slot *slot, unsigned long data)
> +			   struct kvm_memory_slot *slot, unsigned long data,
> +			   bool age)
>  {
>  	u64 *sptep;
>  	struct rmap_iterator iter;
> @@ -1286,6 +1323,14 @@
>  		BUG_ON(!(*sptep & PT_PRESENT_MASK));
>  		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
>  
> +		if (is_pinned_spte(*sptep)) {
> +			/* don't nuke pinned sptes if page aging: return
> + 			 * young=yes instead.
> + 			 */
> +			if (age)
> +				return 1;
> +			mmu_reload_pinned_vcpus(kvm);
> +		}
>  		drop_spte(kvm, sptep);
>  		need_tlb_flush = 1;
>  	}
> @@ -1294,7 +1339,8 @@
>  }
>  
>  static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
> -			     struct kvm_memory_slot *slot, unsigned long data)
> +			     struct kvm_memory_slot *slot, unsigned long data,
> +			     bool age)
>  {
>  	u64 *sptep;
>  	struct rmap_iterator iter;
> @@ -1312,6 +1358,9 @@
>  
>  		need_flush = 1;
>  
> +		if (is_pinned_spte(*sptep))
> +			mmu_reload_pinned_vcpus(kvm);
> +
>  		if (pte_write(*ptep)) {
>  			drop_spte(kvm, sptep);
>  			sptep = rmap_get_first(*rmapp, &iter);
> @@ -1342,7 +1391,8 @@
>  				int (*handler)(struct kvm *kvm,
>  					       unsigned long *rmapp,
>  					       struct kvm_memory_slot *slot,
> -					       unsigned long data))
> +					       unsigned long data,
> +					       bool age))
>  {
>  	int j;
>  	int ret = 0;
> @@ -1382,7 +1432,7 @@
>  			rmapp = __gfn_to_rmap(gfn_start, j, memslot);
>  
>  			for (; idx <= idx_end; ++idx)
> -				ret |= handler(kvm, rmapp++, memslot, data);
> +				ret |= handler(kvm, rmapp++, memslot, data, false);
>  		}
>  	}
>  
> @@ -1393,7 +1443,8 @@
>  			  unsigned long data,
>  			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
>  					 struct kvm_memory_slot *slot,
> -					 unsigned long data))
> +					 unsigned long data,
> +					 bool age))
>  {
>  	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
>  }
> @@ -1414,7 +1465,8 @@
>  }
>  
>  static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
> -			 struct kvm_memory_slot *slot, unsigned long data)
> +			 struct kvm_memory_slot *slot, unsigned long data,
> +			 bool age)
>  {
>  	u64 *sptep;
>  	struct rmap_iterator uninitialized_var(iter);
> @@ -1429,7 +1481,7 @@
>  	 * out actively used pages or breaking up actively used hugepages.
>  	 */
>  	if (!shadow_accessed_mask) {
> -		young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
> +		young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true);
>  		goto out;
>  	}
>  
> @@ -1450,7 +1502,8 @@
>  }
>  
>  static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
> -			      struct kvm_memory_slot *slot, unsigned long data)
> +			      struct kvm_memory_slot *slot, unsigned long data,
> +			      bool age)
>  {
>  	u64 *sptep;
>  	struct rmap_iterator iter;
> @@ -1488,7 +1541,7 @@
>  
>  	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
>  
> -	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
> +	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false);
>  	kvm_flush_remote_tlbs(vcpu->kvm);
>  }
>  
> 
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti June 19, 2014, 6:28 p.m. UTC | #2
On Thu, Jun 19, 2014 at 09:48:50AM +0300, Gleb Natapov wrote:
> On Wed, Jun 18, 2014 at 08:12:06PM -0300, mtosatti@redhat.com wrote:
> > Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers.
> > 
> > Keep pinned sptes intact if page aging.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> > ---
> >  arch/x86/kvm/mmu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++-------
> >  1 file changed, 62 insertions(+), 9 deletions(-)
> > 
> > Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> > ===================================================================
> > --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c	2014-06-18 17:28:24.339435654 -0300
> > +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c	2014-06-18 17:29:32.510225755 -0300
> > @@ -1184,6 +1184,42 @@
> >  		kvm_flush_remote_tlbs(vcpu->kvm);
> >  }
> >  
> > +static void ack_flush(void *_completed)
> > +{
> > +}
> > +
> > +static void mmu_reload_pinned_vcpus(struct kvm *kvm)
> > +{
> > +	int i, cpu, me;
> > +	cpumask_var_t cpus;
> > +	struct kvm_vcpu *vcpu;
> > +	unsigned int req = KVM_REQ_MMU_RELOAD;
> > +
> > +	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
> > +
> > +	me = get_cpu();
> > +	kvm_for_each_vcpu(i, vcpu, kvm) {
> > +		if (list_empty(&vcpu->arch.pinned_mmu_pages))
> > +			continue;
> > +		kvm_make_request(req, vcpu);
> > +		cpu = vcpu->cpu;
> > +
> > +		/* Set ->requests bit before we read ->mode */
> > +		smp_mb();
> > +
> > +		if (cpus != NULL && cpu != -1 && cpu != me &&
> > +		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
> > +			cpumask_set_cpu(cpu, cpus);
> > +	}
> > +	if (unlikely(cpus == NULL))
> > +		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
> > +	else if (!cpumask_empty(cpus))
> > +		smp_call_function_many(cpus, ack_flush, NULL, 1);
> > +	put_cpu();
> > +	free_cpumask_var(cpus);
> > +	return;
> > +}
> This is a c&p of make_all_cpus_request(), the only difference is checking
> of vcpu->arch.pinned_mmu_pages.  You can add make_some_cpus_request(..., bool (*predicate)(struct kvm_vcpu *))
> to kvm_main.c and rewrite make_all_cpus_request() to use it instead.

Half-way through it i decided it was better to c&p.

Can change make_all_cpus_request() though if it makes more sense to you.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 20, 2014, 10:11 a.m. UTC | #3
On Thu, Jun 19, 2014 at 03:28:25PM -0300, Marcelo Tosatti wrote:
> On Thu, Jun 19, 2014 at 09:48:50AM +0300, Gleb Natapov wrote:
> > On Wed, Jun 18, 2014 at 08:12:06PM -0300, mtosatti@redhat.com wrote:
> > > Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers.
> > > 
> > > Keep pinned sptes intact if page aging.
> > > 
> > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > > 
> > > ---
> > >  arch/x86/kvm/mmu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++-------
> > >  1 file changed, 62 insertions(+), 9 deletions(-)
> > > 
> > > Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> > > ===================================================================
> > > --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c	2014-06-18 17:28:24.339435654 -0300
> > > +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c	2014-06-18 17:29:32.510225755 -0300
> > > @@ -1184,6 +1184,42 @@
> > >  		kvm_flush_remote_tlbs(vcpu->kvm);
> > >  }
> > >  
> > > +static void ack_flush(void *_completed)
> > > +{
> > > +}
> > > +
> > > +static void mmu_reload_pinned_vcpus(struct kvm *kvm)
> > > +{
> > > +	int i, cpu, me;
> > > +	cpumask_var_t cpus;
> > > +	struct kvm_vcpu *vcpu;
> > > +	unsigned int req = KVM_REQ_MMU_RELOAD;
> > > +
> > > +	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
> > > +
> > > +	me = get_cpu();
> > > +	kvm_for_each_vcpu(i, vcpu, kvm) {
> > > +		if (list_empty(&vcpu->arch.pinned_mmu_pages))
> > > +			continue;
> > > +		kvm_make_request(req, vcpu);
> > > +		cpu = vcpu->cpu;
> > > +
> > > +		/* Set ->requests bit before we read ->mode */
> > > +		smp_mb();
> > > +
> > > +		if (cpus != NULL && cpu != -1 && cpu != me &&
> > > +		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
> > > +			cpumask_set_cpu(cpu, cpus);
> > > +	}
> > > +	if (unlikely(cpus == NULL))
> > > +		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
> > > +	else if (!cpumask_empty(cpus))
> > > +		smp_call_function_many(cpus, ack_flush, NULL, 1);
> > > +	put_cpu();
> > > +	free_cpumask_var(cpus);
> > > +	return;
> > > +}
> > This is a c&p of make_all_cpus_request(), the only difference is checking
> > of vcpu->arch.pinned_mmu_pages.  You can add make_some_cpus_request(..., bool (*predicate)(struct kvm_vcpu *))
> > to kvm_main.c and rewrite make_all_cpus_request() to use it instead.
> 
> Half-way through it i decided it was better to c&p.
> 
> Can change make_all_cpus_request() though if it makes more sense to you.
> 
If I haven't missed anything and checking of pinned_mmu_pages is indeed the
only difference, then yes, reusing make_all_cpus_request() makes more sense.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c	2014-06-18 17:28:24.339435654 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.c	2014-06-18 17:29:32.510225755 -0300
@@ -1184,6 +1184,42 @@ 
 		kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
+static void ack_flush(void *_completed)
+{
+}
+
+static void mmu_reload_pinned_vcpus(struct kvm *kvm)
+{
+	int i, cpu, me;
+	cpumask_var_t cpus;
+	struct kvm_vcpu *vcpu;
+	unsigned int req = KVM_REQ_MMU_RELOAD;
+
+	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+	me = get_cpu();
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (list_empty(&vcpu->arch.pinned_mmu_pages))
+			continue;
+		kvm_make_request(req, vcpu);
+		cpu = vcpu->cpu;
+
+		/* Set ->requests bit before we read ->mode */
+		smp_mb();
+
+		if (cpus != NULL && cpu != -1 && cpu != me &&
+		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
+			cpumask_set_cpu(cpu, cpus);
+	}
+	if (unlikely(cpus == NULL))
+		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
+	else if (!cpumask_empty(cpus))
+		smp_call_function_many(cpus, ack_flush, NULL, 1);
+	put_cpu();
+	free_cpumask_var(cpus);
+	return;
+}
+
 /*
  * Write-protect on the specified @sptep, @pt_protect indicates whether
  * spte write-protection is caused by protecting shadow page table.
@@ -1276,7 +1312,8 @@ 
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			   struct kvm_memory_slot *slot, unsigned long data)
+			   struct kvm_memory_slot *slot, unsigned long data,
+			   bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1286,6 +1323,14 @@ 
 		BUG_ON(!(*sptep & PT_PRESENT_MASK));
 		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
 
+		if (is_pinned_spte(*sptep)) {
+			/* don't nuke pinned sptes if page aging: return
+ 			 * young=yes instead.
+ 			 */
+			if (age)
+				return 1;
+			mmu_reload_pinned_vcpus(kvm);
+		}
 		drop_spte(kvm, sptep);
 		need_tlb_flush = 1;
 	}
@@ -1294,7 +1339,8 @@ 
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			     struct kvm_memory_slot *slot, unsigned long data)
+			     struct kvm_memory_slot *slot, unsigned long data,
+			     bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1312,6 +1358,9 @@ 
 
 		need_flush = 1;
 
+		if (is_pinned_spte(*sptep))
+			mmu_reload_pinned_vcpus(kvm);
+
 		if (pte_write(*ptep)) {
 			drop_spte(kvm, sptep);
 			sptep = rmap_get_first(*rmapp, &iter);
@@ -1342,7 +1391,8 @@ 
 				int (*handler)(struct kvm *kvm,
 					       unsigned long *rmapp,
 					       struct kvm_memory_slot *slot,
-					       unsigned long data))
+					       unsigned long data,
+					       bool age))
 {
 	int j;
 	int ret = 0;
@@ -1382,7 +1432,7 @@ 
 			rmapp = __gfn_to_rmap(gfn_start, j, memslot);
 
 			for (; idx <= idx_end; ++idx)
-				ret |= handler(kvm, rmapp++, memslot, data);
+				ret |= handler(kvm, rmapp++, memslot, data, false);
 		}
 	}
 
@@ -1393,7 +1443,8 @@ 
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
 					 struct kvm_memory_slot *slot,
-					 unsigned long data))
+					 unsigned long data,
+					 bool age))
 {
 	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
 }
@@ -1414,7 +1465,8 @@ 
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			 struct kvm_memory_slot *slot, unsigned long data)
+			 struct kvm_memory_slot *slot, unsigned long data,
+			 bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator uninitialized_var(iter);
@@ -1429,7 +1481,7 @@ 
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
 	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+		young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true);
 		goto out;
 	}
 
@@ -1450,7 +1502,8 @@ 
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			      struct kvm_memory_slot *slot, unsigned long data)
+			      struct kvm_memory_slot *slot, unsigned long data,
+			      bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1488,7 +1541,7 @@ 
 
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }