Message ID | 20110501143307.1bcfd375.takuya.yoshikawa@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote: > From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> > > The address of the gpte was already calculated and stored in ptep_user > before entering cmpxchg_gpte(). > > This patch makes cmpxchg_gpte() to use that to make it clear that we > are using the same address during walk_addr_generic(). > > Note that the unlikely annotations are used to show that the conditions > are something unusual rather than for performance. > > Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> > --- > arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- > 1 files changed, 12 insertions(+), 14 deletions(-) Hi Takuya, > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 52450a6..f9d9af1 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) > } > > static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > - gfn_t table_gfn, unsigned index, > - pt_element_t orig_pte, pt_element_t new_pte) > + pt_element_t __user *ptep_user, unsigned index, > + pt_element_t orig_pte, pt_element_t new_pte) > { > + int npages; > pt_element_t ret; > pt_element_t *table; > struct page *page; > - gpa_t gpa; > > - gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, > - PFERR_USER_MASK|PFERR_WRITE_MASK); > - if (gpa == UNMAPPED_GVA) > + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); > + /* Check if the user is doing something meaningless. */ > + if (unlikely(npages != 1)) > return -EFAULT; > > - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); > - gfn_to_page is the interface for mapping guest pages inside KVM, and you're bypassing it for IMO no good reason (i doubt there's any performance improvement by skipping the translation). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/04/2011 02:16 PM, Marcelo Tosatti wrote: > On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote: > > From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp> > > > > The address of the gpte was already calculated and stored in ptep_user > > before entering cmpxchg_gpte(). > > > > This patch makes cmpxchg_gpte() to use that to make it clear that we > > are using the same address during walk_addr_generic(). > > > > Note that the unlikely annotations are used to show that the conditions > > are something unusual rather than for performance. > > > > Signed-off-by: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp> > > --- > > arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- > > 1 files changed, 12 insertions(+), 14 deletions(-) > > Hi Takuya, > > > > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > > index 52450a6..f9d9af1 100644 > > --- a/arch/x86/kvm/paging_tmpl.h > > +++ b/arch/x86/kvm/paging_tmpl.h > > @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) > > } > > > > static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > > - gfn_t table_gfn, unsigned index, > > - pt_element_t orig_pte, pt_element_t new_pte) > > + pt_element_t __user *ptep_user, unsigned index, > > + pt_element_t orig_pte, pt_element_t new_pte) > > { > > + int npages; > > pt_element_t ret; > > pt_element_t *table; > > struct page *page; > > - gpa_t gpa; > > > > - gpa = mmu->translate_gpa(vcpu, table_gfn<< PAGE_SHIFT, > > - PFERR_USER_MASK|PFERR_WRITE_MASK); > > - if (gpa == UNMAPPED_GVA) > > + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1,&page); > > + /* Check if the user is doing something meaningless. */ > > + if (unlikely(npages != 1)) > > return -EFAULT; > > > > - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); > > - > > gfn_to_page is the interface for mapping guest pages inside KVM, > and you're bypassing it for IMO no good reason (i doubt there's any > performance improvement by skipping the translation). He isn't skipping it - he's using gfn_to_hva() to derive ptep_user, which is equivalent. The motivation isn't performance, it's to ensure that cmpxchg_gpte() operates on the same address as we read it from. (btw, we're missing a mark_page_dirty() here, no?)
On Wed, May 04, 2011 at 02:44:47PM +0300, Avi Kivity wrote: > On 05/04/2011 02:16 PM, Marcelo Tosatti wrote: > >On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote: > >> From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp> > >> > >> The address of the gpte was already calculated and stored in ptep_user > >> before entering cmpxchg_gpte(). > >> > >> This patch makes cmpxchg_gpte() to use that to make it clear that we > >> are using the same address during walk_addr_generic(). > >> > >> Note that the unlikely annotations are used to show that the conditions > >> are something unusual rather than for performance. > >> > >> Signed-off-by: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp> > >> --- > >> arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- > >> 1 files changed, 12 insertions(+), 14 deletions(-) > > > >Hi Takuya, > > > >> > >> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > >> index 52450a6..f9d9af1 100644 > >> --- a/arch/x86/kvm/paging_tmpl.h > >> +++ b/arch/x86/kvm/paging_tmpl.h > >> @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) > >> } > >> > >> static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > >> - gfn_t table_gfn, unsigned index, > >> - pt_element_t orig_pte, pt_element_t new_pte) > >> + pt_element_t __user *ptep_user, unsigned index, > >> + pt_element_t orig_pte, pt_element_t new_pte) > >> { > >> + int npages; > >> pt_element_t ret; > >> pt_element_t *table; > >> struct page *page; > >> - gpa_t gpa; > >> > >> - gpa = mmu->translate_gpa(vcpu, table_gfn<< PAGE_SHIFT, > >> - PFERR_USER_MASK|PFERR_WRITE_MASK); > >> - if (gpa == UNMAPPED_GVA) > >> + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1,&page); > >> + /* Check if the user is doing something meaningless. */ > >> + if (unlikely(npages != 1)) > >> return -EFAULT; > >> > >> - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); > >> - > > > >gfn_to_page is the interface for mapping guest pages inside KVM, > >and you're bypassing it for IMO no good reason (i doubt there's any > >performance improvement by skipping the translation). > > He isn't skipping it - he's using gfn_to_hva() to derive ptep_user, > which is equivalent. Well, he is removing the second translation. So that is skipped. > The motivation isn't performance, it's to ensure that cmpxchg_gpte() > operates on the same address as we read it from. OK, my objection is direct get_user_pages_fast usage. Please pass gfn to gfn_to_page. > (btw, we're missing a mark_page_dirty() here, no?) No, see line 245. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/04/2011 02:58 PM, Marcelo Tosatti wrote: > On Wed, May 04, 2011 at 02:44:47PM +0300, Avi Kivity wrote: > > On 05/04/2011 02:16 PM, Marcelo Tosatti wrote: > > >On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote: > > >> From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp> > > >> > > >> The address of the gpte was already calculated and stored in ptep_user > > >> before entering cmpxchg_gpte(). > > >> > > >> This patch makes cmpxchg_gpte() to use that to make it clear that we > > >> are using the same address during walk_addr_generic(). > > >> > > >> Note that the unlikely annotations are used to show that the conditions > > >> are something unusual rather than for performance. > > >> > > >> Signed-off-by: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp> > > >> --- > > >> arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- > > >> 1 files changed, 12 insertions(+), 14 deletions(-) > > > > > >Hi Takuya, > > > > > >> > > >> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > > >> index 52450a6..f9d9af1 100644 > > >> --- a/arch/x86/kvm/paging_tmpl.h > > >> +++ b/arch/x86/kvm/paging_tmpl.h > > >> @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) > > >> } > > >> > > >> static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > > >> - gfn_t table_gfn, unsigned index, > > >> - pt_element_t orig_pte, pt_element_t new_pte) > > >> + pt_element_t __user *ptep_user, unsigned index, > > >> + pt_element_t orig_pte, pt_element_t new_pte) > > >> { > > >> + int npages; > > >> pt_element_t ret; > > >> pt_element_t *table; > > >> struct page *page; > > >> - gpa_t gpa; > > >> > > >> - gpa = mmu->translate_gpa(vcpu, table_gfn<< PAGE_SHIFT, > > >> - PFERR_USER_MASK|PFERR_WRITE_MASK); > > >> - if (gpa == UNMAPPED_GVA) > > >> + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1,&page); > > >> + /* Check if the user is doing something meaningless. */ > > >> + if (unlikely(npages != 1)) > > >> return -EFAULT; > > >> > > >> - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); > > >> - > > > > > >gfn_to_page is the interface for mapping guest pages inside KVM, > > >and you're bypassing it for IMO no good reason (i doubt there's any > > >performance improvement by skipping the translation). > > > > He isn't skipping it - he's using gfn_to_hva() to derive ptep_user, > > which is equivalent. > > Well, he is removing the second translation. So that is skipped. hva->gpa translation is not supposed to be changed by kvm. > > The motivation isn't performance, it's to ensure that cmpxchg_gpte() > > operates on the same address as we read it from. > > OK, my objection is direct get_user_pages_fast usage. Please pass gfn to > gfn_to_page. We do get_user() in read_gpte(). That is equivalent to get_user_pages(). So we already broke that layer of abstraction. > > (btw, we're missing a mark_page_dirty() here, no?) > > No, see line 245. Ah, yes. Thanks.
> > > >gfn_to_page is the interface for mapping guest pages inside KVM, > > > >and you're bypassing it for IMO no good reason (i doubt there's any > > > >performance improvement by skipping the translation). > > > > > > He isn't skipping it - he's using gfn_to_hva() to derive ptep_user, > > > which is equivalent. > > > > Well, he is removing the second translation. So that is skipped. > > hva->gpa translation is not supposed to be changed by kvm. > > > > The motivation isn't performance, it's to ensure that cmpxchg_gpte() > > > operates on the same address as we read it from. > > > > OK, my objection is direct get_user_pages_fast usage. Please pass gfn to > > gfn_to_page. > > We do get_user() in read_gpte(). That is equivalent to > get_user_pages(). So we already broke that layer of abstraction. At first, I broke hva_to_pfn() into two functions: hva_to_page hva_to_pfn and used the former to get the page. But after making that patch, I thought it might be a bit extra to do such things in the function which is doing low level page manipulations like kmapping. Actually, we are already assuming that the page returned by gfn_to_page is always a usual page which contains gptes without extra checks. Which way do you like the best? Takuya -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/04/2011 05:00 PM, Takuya Yoshikawa wrote: > > > > We do get_user() in read_gpte(). That is equivalent to > > get_user_pages(). So we already broke that layer of abstraction. > > At first, I broke hva_to_pfn() into two functions: > hva_to_page > hva_to_pfn > and used the former to get the page. Ouch, what a complicated function. > But after making that patch, I thought it might be a bit extra to do such > things in the function which is doing low level page manipulations like > kmapping. > > Actually, we are already assuming that the page returned by gfn_to_page is > always a usual page which contains gptes without extra checks. When gfn_to_page() returns an error, it is actually a real page in host memory that can be scribbled on. So no further checks are needed. > Which way do you like the best? I think it should work fine as is. The question is whether we're doing a layering violation here (but in any case, that was introduced by ptep_user, not this patch).
On Sun, May 01, 2011 at 02:33:07PM +0900, Takuya Yoshikawa wrote: > From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> > > The address of the gpte was already calculated and stored in ptep_user > before entering cmpxchg_gpte(). > > This patch makes cmpxchg_gpte() to use that to make it clear that we > are using the same address during walk_addr_generic(). > > Note that the unlikely annotations are used to show that the conditions > are something unusual rather than for performance. > > Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> > --- > arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- > 1 files changed, 12 insertions(+), 14 deletions(-) Applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 52450a6..f9d9af1 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) } static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gfn_t table_gfn, unsigned index, - pt_element_t orig_pte, pt_element_t new_pte) + pt_element_t __user *ptep_user, unsigned index, + pt_element_t orig_pte, pt_element_t new_pte) { + int npages; pt_element_t ret; pt_element_t *table; struct page *page; - gpa_t gpa; - gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, - PFERR_USER_MASK|PFERR_WRITE_MASK); - if (gpa == UNMAPPED_GVA) + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + /* Check if the user is doing something meaningless. */ + if (unlikely(npages != 1)) return -EFAULT; - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); - table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); kunmap_atomic(table, KM_USER0); @@ -234,9 +232,9 @@ walk: int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, - index, pte, pte|PT_ACCESSED_MASK); - if (ret < 0) { + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_ACCESSED_MASK); + if (unlikely(ret < 0)) { present = false; break; } else if (ret) @@ -293,9 +291,9 @@ walk: int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte, - pte|PT_DIRTY_MASK); - if (ret < 0) { + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_DIRTY_MASK); + if (unlikely(ret < 0)) { present = false; goto error; } else if (ret)