diff mbox

[1/1,v2] KVM: MMU: Use ptep_user for cmpxchg_gpte()

Message ID 20110501143307.1bcfd375.takuya.yoshikawa@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Takuya Yoshikawa May 1, 2011, 5:33 a.m. UTC
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>

The address of the gpte was already calculated and stored in ptep_user
before entering cmpxchg_gpte().

This patch makes cmpxchg_gpte() to use that to make it clear that we
are using the same address during walk_addr_generic().

Note that the unlikely annotations are used to show that the conditions
are something unusual rather than for performance.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
 arch/x86/kvm/paging_tmpl.h |   26 ++++++++++++--------------
 1 files changed, 12 insertions(+), 14 deletions(-)

Comments

Marcelo Tosatti May 4, 2011, 11:16 a.m. UTC | #1
On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote:
> From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
> 
> The address of the gpte was already calculated and stored in ptep_user
> before entering cmpxchg_gpte().
> 
> This patch makes cmpxchg_gpte() to use that to make it clear that we
> are using the same address during walk_addr_generic().
> 
> Note that the unlikely annotations are used to show that the conditions
> are something unusual rather than for performance.
> 
> Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
> ---
>  arch/x86/kvm/paging_tmpl.h |   26 ++++++++++++--------------
>  1 files changed, 12 insertions(+), 14 deletions(-)

Hi Takuya,

> 
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 52450a6..f9d9af1 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
>  }
>  
>  static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> -			 gfn_t table_gfn, unsigned index,
> -			 pt_element_t orig_pte, pt_element_t new_pte)
> +			       pt_element_t __user *ptep_user, unsigned index,
> +			       pt_element_t orig_pte, pt_element_t new_pte)
>  {
> +	int npages;
>  	pt_element_t ret;
>  	pt_element_t *table;
>  	struct page *page;
> -	gpa_t gpa;
>  
> -	gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT,
> -				 PFERR_USER_MASK|PFERR_WRITE_MASK);
> -	if (gpa == UNMAPPED_GVA)
> +	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
> +	/* Check if the user is doing something meaningless. */
> +	if (unlikely(npages != 1))
>  		return -EFAULT;
>  
> -	page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa));
> -

gfn_to_page is the interface for mapping guest pages inside KVM,
and you're bypassing it for IMO no good reason (i doubt there's any
performance improvement by skipping the translation).


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 4, 2011, 11:44 a.m. UTC | #2
On 05/04/2011 02:16 PM, Marcelo Tosatti wrote:
> On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote:
> >  From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
> >
> >  The address of the gpte was already calculated and stored in ptep_user
> >  before entering cmpxchg_gpte().
> >
> >  This patch makes cmpxchg_gpte() to use that to make it clear that we
> >  are using the same address during walk_addr_generic().
> >
> >  Note that the unlikely annotations are used to show that the conditions
> >  are something unusual rather than for performance.
> >
> >  Signed-off-by: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
> >  ---
> >   arch/x86/kvm/paging_tmpl.h |   26 ++++++++++++--------------
> >   1 files changed, 12 insertions(+), 14 deletions(-)
>
> Hi Takuya,
>
> >
> >  diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> >  index 52450a6..f9d9af1 100644
> >  --- a/arch/x86/kvm/paging_tmpl.h
> >  +++ b/arch/x86/kvm/paging_tmpl.h
> >  @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
> >   }
> >
> >   static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> >  -			 gfn_t table_gfn, unsigned index,
> >  -			 pt_element_t orig_pte, pt_element_t new_pte)
> >  +			       pt_element_t __user *ptep_user, unsigned index,
> >  +			       pt_element_t orig_pte, pt_element_t new_pte)
> >   {
> >  +	int npages;
> >   	pt_element_t ret;
> >   	pt_element_t *table;
> >   	struct page *page;
> >  -	gpa_t gpa;
> >
> >  -	gpa = mmu->translate_gpa(vcpu, table_gfn<<  PAGE_SHIFT,
> >  -				 PFERR_USER_MASK|PFERR_WRITE_MASK);
> >  -	if (gpa == UNMAPPED_GVA)
> >  +	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1,&page);
> >  +	/* Check if the user is doing something meaningless. */
> >  +	if (unlikely(npages != 1))
> >   		return -EFAULT;
> >
> >  -	page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa));
> >  -
>
> gfn_to_page is the interface for mapping guest pages inside KVM,
> and you're bypassing it for IMO no good reason (i doubt there's any
> performance improvement by skipping the translation).

He isn't skipping it - he's using gfn_to_hva() to derive ptep_user, 
which is equivalent.

The motivation isn't performance, it's to ensure that cmpxchg_gpte() 
operates on the same address as we read it from.

(btw, we're missing a mark_page_dirty() here, no?)
Marcelo Tosatti May 4, 2011, 11:58 a.m. UTC | #3
On Wed, May 04, 2011 at 02:44:47PM +0300, Avi Kivity wrote:
> On 05/04/2011 02:16 PM, Marcelo Tosatti wrote:
> >On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote:
> >>  From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
> >>
> >>  The address of the gpte was already calculated and stored in ptep_user
> >>  before entering cmpxchg_gpte().
> >>
> >>  This patch makes cmpxchg_gpte() to use that to make it clear that we
> >>  are using the same address during walk_addr_generic().
> >>
> >>  Note that the unlikely annotations are used to show that the conditions
> >>  are something unusual rather than for performance.
> >>
> >>  Signed-off-by: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
> >>  ---
> >>   arch/x86/kvm/paging_tmpl.h |   26 ++++++++++++--------------
> >>   1 files changed, 12 insertions(+), 14 deletions(-)
> >
> >Hi Takuya,
> >
> >>
> >>  diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> >>  index 52450a6..f9d9af1 100644
> >>  --- a/arch/x86/kvm/paging_tmpl.h
> >>  +++ b/arch/x86/kvm/paging_tmpl.h
> >>  @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
> >>   }
> >>
> >>   static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> >>  -			 gfn_t table_gfn, unsigned index,
> >>  -			 pt_element_t orig_pte, pt_element_t new_pte)
> >>  +			       pt_element_t __user *ptep_user, unsigned index,
> >>  +			       pt_element_t orig_pte, pt_element_t new_pte)
> >>   {
> >>  +	int npages;
> >>   	pt_element_t ret;
> >>   	pt_element_t *table;
> >>   	struct page *page;
> >>  -	gpa_t gpa;
> >>
> >>  -	gpa = mmu->translate_gpa(vcpu, table_gfn<<  PAGE_SHIFT,
> >>  -				 PFERR_USER_MASK|PFERR_WRITE_MASK);
> >>  -	if (gpa == UNMAPPED_GVA)
> >>  +	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1,&page);
> >>  +	/* Check if the user is doing something meaningless. */
> >>  +	if (unlikely(npages != 1))
> >>   		return -EFAULT;
> >>
> >>  -	page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa));
> >>  -
> >
> >gfn_to_page is the interface for mapping guest pages inside KVM,
> >and you're bypassing it for IMO no good reason (i doubt there's any
> >performance improvement by skipping the translation).
> 
> He isn't skipping it - he's using gfn_to_hva() to derive ptep_user,
> which is equivalent.

Well, he is removing the second translation. So that is skipped.

> The motivation isn't performance, it's to ensure that cmpxchg_gpte()
> operates on the same address as we read it from.

OK, my objection is direct get_user_pages_fast usage. Please pass gfn to
gfn_to_page.

> (btw, we're missing a mark_page_dirty() here, no?)

No, see line 245.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 4, 2011, 12:11 p.m. UTC | #4
On 05/04/2011 02:58 PM, Marcelo Tosatti wrote:
> On Wed, May 04, 2011 at 02:44:47PM +0300, Avi Kivity wrote:
> >  On 05/04/2011 02:16 PM, Marcelo Tosatti wrote:
> >  >On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote:
> >  >>   From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
> >  >>
> >  >>   The address of the gpte was already calculated and stored in ptep_user
> >  >>   before entering cmpxchg_gpte().
> >  >>
> >  >>   This patch makes cmpxchg_gpte() to use that to make it clear that we
> >  >>   are using the same address during walk_addr_generic().
> >  >>
> >  >>   Note that the unlikely annotations are used to show that the conditions
> >  >>   are something unusual rather than for performance.
> >  >>
> >  >>   Signed-off-by: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
> >  >>   ---
> >  >>    arch/x86/kvm/paging_tmpl.h |   26 ++++++++++++--------------
> >  >>    1 files changed, 12 insertions(+), 14 deletions(-)
> >  >
> >  >Hi Takuya,
> >  >
> >  >>
> >  >>   diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> >  >>   index 52450a6..f9d9af1 100644
> >  >>   --- a/arch/x86/kvm/paging_tmpl.h
> >  >>   +++ b/arch/x86/kvm/paging_tmpl.h
> >  >>   @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
> >  >>    }
> >  >>
> >  >>    static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> >  >>   -			 gfn_t table_gfn, unsigned index,
> >  >>   -			 pt_element_t orig_pte, pt_element_t new_pte)
> >  >>   +			       pt_element_t __user *ptep_user, unsigned index,
> >  >>   +			       pt_element_t orig_pte, pt_element_t new_pte)
> >  >>    {
> >  >>   +	int npages;
> >  >>    	pt_element_t ret;
> >  >>    	pt_element_t *table;
> >  >>    	struct page *page;
> >  >>   -	gpa_t gpa;
> >  >>
> >  >>   -	gpa = mmu->translate_gpa(vcpu, table_gfn<<   PAGE_SHIFT,
> >  >>   -				 PFERR_USER_MASK|PFERR_WRITE_MASK);
> >  >>   -	if (gpa == UNMAPPED_GVA)
> >  >>   +	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1,&page);
> >  >>   +	/* Check if the user is doing something meaningless. */
> >  >>   +	if (unlikely(npages != 1))
> >  >>    		return -EFAULT;
> >  >>
> >  >>   -	page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa));
> >  >>   -
> >  >
> >  >gfn_to_page is the interface for mapping guest pages inside KVM,
> >  >and you're bypassing it for IMO no good reason (i doubt there's any
> >  >performance improvement by skipping the translation).
> >
> >  He isn't skipping it - he's using gfn_to_hva() to derive ptep_user,
> >  which is equivalent.
>
> Well, he is removing the second translation. So that is skipped.

hva->gpa translation is not supposed to be changed by kvm.

> >  The motivation isn't performance, it's to ensure that cmpxchg_gpte()
> >  operates on the same address as we read it from.
>
> OK, my objection is direct get_user_pages_fast usage. Please pass gfn to
> gfn_to_page.

We do get_user() in read_gpte().  That is equivalent to 
get_user_pages().  So we already broke that layer of abstraction.

> >  (btw, we're missing a mark_page_dirty() here, no?)
>
> No, see line 245.

Ah, yes.  Thanks.
Takuya Yoshikawa May 4, 2011, 2 p.m. UTC | #5
> > >  >gfn_to_page is the interface for mapping guest pages inside KVM,
> > >  >and you're bypassing it for IMO no good reason (i doubt there's any
> > >  >performance improvement by skipping the translation).
> > >
> > >  He isn't skipping it - he's using gfn_to_hva() to derive ptep_user,
> > >  which is equivalent.
> >
> > Well, he is removing the second translation. So that is skipped.
> 
> hva->gpa translation is not supposed to be changed by kvm.
> 
> > >  The motivation isn't performance, it's to ensure that cmpxchg_gpte()
> > >  operates on the same address as we read it from.
> >
> > OK, my objection is direct get_user_pages_fast usage. Please pass gfn to
> > gfn_to_page.
> 
> We do get_user() in read_gpte().  That is equivalent to 
> get_user_pages().  So we already broke that layer of abstraction.

At first, I broke hva_to_pfn() into two functions:
  hva_to_page
  hva_to_pfn
and used the former to get the page.

But after making that patch, I thought it might be a bit extra to do such
things in the function which is doing low level page manipulations like
kmapping.

Actually, we are already assuming that the page returned by gfn_to_page is
always a usual page which contains gptes without extra checks.

Which way do you like the best?

Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 4, 2011, 2:27 p.m. UTC | #6
On 05/04/2011 05:00 PM, Takuya Yoshikawa wrote:
> >
> >  We do get_user() in read_gpte().  That is equivalent to
> >  get_user_pages().  So we already broke that layer of abstraction.
>
> At first, I broke hva_to_pfn() into two functions:
>    hva_to_page
>    hva_to_pfn
> and used the former to get the page.

Ouch, what a complicated function.

> But after making that patch, I thought it might be a bit extra to do such
> things in the function which is doing low level page manipulations like
> kmapping.
>
> Actually, we are already assuming that the page returned by gfn_to_page is
> always a usual page which contains gptes without extra checks.

When gfn_to_page() returns an error, it is actually a real page in host 
memory that can be scribbled on.  So no further checks are needed.

> Which way do you like the best?

I think it should work fine as is.  The question is whether we're doing 
a layering violation here (but in any case, that was introduced by 
ptep_user, not this patch).
Marcelo Tosatti May 4, 2011, 4:13 p.m. UTC | #7
On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote:
> From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
> 
> The address of the gpte was already calculated and stored in ptep_user
> before entering cmpxchg_gpte().
> 
> This patch makes cmpxchg_gpte() to use that to make it clear that we
> are using the same address during walk_addr_generic().
> 
> Note that the unlikely annotations are used to show that the conditions
> are something unusual rather than for performance.
> 
> Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
> ---
>  arch/x86/kvm/paging_tmpl.h |   26 ++++++++++++--------------
>  1 files changed, 12 insertions(+), 14 deletions(-)

Applied, thanks.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 52450a6..f9d9af1 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -79,21 +79,19 @@  static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
 }
 
 static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-			 gfn_t table_gfn, unsigned index,
-			 pt_element_t orig_pte, pt_element_t new_pte)
+			       pt_element_t __user *ptep_user, unsigned index,
+			       pt_element_t orig_pte, pt_element_t new_pte)
 {
+	int npages;
 	pt_element_t ret;
 	pt_element_t *table;
 	struct page *page;
-	gpa_t gpa;
 
-	gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT,
-				 PFERR_USER_MASK|PFERR_WRITE_MASK);
-	if (gpa == UNMAPPED_GVA)
+	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
+	/* Check if the user is doing something meaningless. */
+	if (unlikely(npages != 1))
 		return -EFAULT;
 
-	page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa));
-
 	table = kmap_atomic(page, KM_USER0);
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
 	kunmap_atomic(table, KM_USER0);
@@ -234,9 +232,9 @@  walk:
 			int ret;
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 						       sizeof(pte));
-			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn,
-					index, pte, pte|PT_ACCESSED_MASK);
-			if (ret < 0) {
+			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
+						  pte, pte|PT_ACCESSED_MASK);
+			if (unlikely(ret < 0)) {
 				present = false;
 				break;
 			} else if (ret)
@@ -293,9 +291,9 @@  walk:
 		int ret;
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte,
-			    pte|PT_DIRTY_MASK);
-		if (ret < 0) {
+		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
+					  pte, pte|PT_DIRTY_MASK);
+		if (unlikely(ret < 0)) {
 			present = false;
 			goto error;
 		} else if (ret)