diff mbox

[V6,4/5] xen/mm: Clean up pfec handling in gva_to_gfn

Message ID 1453188659-8908-5-git-send-email-huaitong.han@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Huaitong Han Jan. 19, 2016, 7:30 a.m. UTC
At the moment, the pfec argument to gva_to_gfn has two functions:

* To inform guest_walk what kind of access is happenind

* As a value to pass back into the guest in the event of a fault.

Unfortunately this is not quite treated consistently: the hvm_fetch_*
function will "pre-clear" the PFEC_insn_fetch flag before calling
gva_to_gfn; meaning guest_walk doesn't actually know whether a given
access is an instruction fetch or not.  This works now, but will cause
issues when pkeys are introduced, since guest_walk will need to know
whether an access is an instruction fetch even if it doesn't return
PFEC_insn_fetch.

Fix this by making a clean separation for in and out functionalities
of the pfec argument:

1. Always pass in the access type to gva_to_gfn

2. Filter out inappropriate access flags before returning from gva_to_gfn.

(The PFEC_insn_fetch flag should only be passed to the guest if either NX or
SMEP is enabled.  See Intel 64 Developer's Manual, Volume 3, Section 4.7.)

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
---
 xen/arch/x86/hvm/hvm.c           |  8 ++------
 xen/arch/x86/mm/hap/guest_walk.c | 10 +++++++++-
 xen/arch/x86/mm/shadow/multi.c   |  6 ++++++
 3 files changed, 17 insertions(+), 7 deletions(-)

Comments

Jan Beulich Jan. 25, 2016, 3:56 p.m. UTC | #1
>>> On 19.01.16 at 08:30, <huaitong.han@intel.com> wrote:
> At the moment, the pfec argument to gva_to_gfn has two functions:
> 
> * To inform guest_walk what kind of access is happenind
> 
> * As a value to pass back into the guest in the event of a fault.
> 
> Unfortunately this is not quite treated consistently: the hvm_fetch_*
> function will "pre-clear" the PFEC_insn_fetch flag before calling
> gva_to_gfn; meaning guest_walk doesn't actually know whether a given
> access is an instruction fetch or not.  This works now, but will cause
> issues when pkeys are introduced, since guest_walk will need to know
> whether an access is an instruction fetch even if it doesn't return
> PFEC_insn_fetch.
> 
> Fix this by making a clean separation for in and out functionalities
> of the pfec argument:
> 
> 1. Always pass in the access type to gva_to_gfn
> 
> 2. Filter out inappropriate access flags before returning from gva_to_gfn.
> 
> (The PFEC_insn_fetch flag should only be passed to the guest if either NX or
> SMEP is enabled.  See Intel 64 Developer's Manual, Volume 3, Section 4.7.)

As mentioned in various other contexts not so long ago - no SDM
section number please (as they tend to change); use section titles
instead (also below in code comments).

> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> Signed-off-by: Huaitong Han <huaitong.han@intel.com>
> ---
>  xen/arch/x86/hvm/hvm.c           |  8 ++------
>  xen/arch/x86/mm/hap/guest_walk.c | 10 +++++++++-
>  xen/arch/x86/mm/shadow/multi.c   |  6 ++++++

You failed to Cc the shadow code maintainer (now added).

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4432,11 +4432,9 @@ enum hvm_copy_result hvm_copy_from_guest_virt(
>  enum hvm_copy_result hvm_fetch_from_guest_virt(
>      void *buf, unsigned long vaddr, int size, uint32_t pfec)
>  {
> -    if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )
> -        pfec |= PFEC_insn_fetch;
>      return __hvm_copy(buf, vaddr, size,
>                        HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
> -                      PFEC_page_present | pfec);
> +                      PFEC_page_present | PFEC_insn_fetch | pfec);
>  }
>  
>  enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
> @@ -4458,11 +4456,9 @@ enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
>  enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
>      void *buf, unsigned long vaddr, int size, uint32_t pfec)
>  {
> -    if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )
> -        pfec |= PFEC_insn_fetch;
>      return __hvm_copy(buf, vaddr, size,
>                        HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
> -                      PFEC_page_present | pfec);
> +                      PFEC_page_present | PFEC_insn_fetch | pfec);
>  }
>  
>  unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)

This part
Acked-by: Jan Beulich <jbeulich@suse.com>

> --- a/xen/arch/x86/mm/hap/guest_walk.c
> +++ b/xen/arch/x86/mm/hap/guest_walk.c
> @@ -82,7 +82,7 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
>      if ( !top_page )
>      {
>          pfec[0] &= ~PFEC_page_present;
> -        return INVALID_GFN;
> +        goto out_tweak_pfec;
>      }
>      top_mfn = _mfn(page_to_mfn(top_page));
>  
> @@ -139,6 +139,14 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
>      if ( missing & _PAGE_SHARED )
>          pfec[0] = PFEC_page_shared;
>  
> +out_tweak_pfec:

To avoid corrupting the context in patches, labels should be indented
by at least one space please.

Jan
Tim Deegan Jan. 26, 2016, 2:30 p.m. UTC | #2
Hi,

At 15:30 +0800 on 19 Jan (1453217458), Huaitong Han wrote:
> At the moment, the pfec argument to gva_to_gfn has two functions:
> 
> * To inform guest_walk what kind of access is happenind
> 
> * As a value to pass back into the guest in the event of a fault.
> 
> Unfortunately this is not quite treated consistently: the hvm_fetch_*
> function will "pre-clear" the PFEC_insn_fetch flag before calling
> gva_to_gfn; meaning guest_walk doesn't actually know whether a given
> access is an instruction fetch or not.  This works now, but will cause
> issues when pkeys are introduced, since guest_walk will need to know
> whether an access is an instruction fetch even if it doesn't return
> PFEC_insn_fetch.
> 
> Fix this by making a clean separation for in and out functionalities
> of the pfec argument:
> 
> 1. Always pass in the access type to gva_to_gfn
> 
> 2. Filter out inappropriate access flags before returning from gva_to_gfn.

This seems OK.  But can you please:
 - Add this new adjustment once, in paging_gva_to_gfn(), instead of
   adding it to each implementation; and
 - Adjust the comment above the declaration of paging_gva_to_gfn() in
   paging.h to describe this new behaviour.

Also:

> diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
> index 58f7e72..bbbc706 100644
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -3668,6 +3668,12 @@ sh_gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
>              pfec[0] &= ~PFEC_page_present;
>          if ( missing & _PAGE_INVALID_BITS )
>              pfec[0] |= PFEC_reserved_bit;
> +        /*
> +         * Intel 64 Volume 3, Section 4.7: The PFEC_insn_fetch flag is
> +         * set only when NX or SMEP are enabled.
> +         */
> +        if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) )
> +            pfec[0] &= ~PFEC_insn_fetch;

This needs to either DTRT for PV guests or assert that it always sees
a HVM guest (I think this is the case but haven't tested).

Cheers,

Tim.
Huaitong Han Jan. 27, 2016, 7:22 a.m. UTC | #3
On Tue, 2016-01-26 at 14:30 +0000, Tim Deegan wrote:
> Hi,
> 
> At 15:30 +0800 on 19 Jan (1453217458), Huaitong Han wrote:
> > At the moment, the pfec argument to gva_to_gfn has two functions:
> > 
> > * To inform guest_walk what kind of access is happenind
> > 
> > * As a value to pass back into the guest in the event of a fault.
> > 
> > Unfortunately this is not quite treated consistently: the
> > hvm_fetch_*
> > function will "pre-clear" the PFEC_insn_fetch flag before calling
> > gva_to_gfn; meaning guest_walk doesn't actually know whether a
> > given
> > access is an instruction fetch or not.  This works now, but will
> > cause
> > issues when pkeys are introduced, since guest_walk will need to
> > know
> > whether an access is an instruction fetch even if it doesn't return
> > PFEC_insn_fetch.
> > 
> > Fix this by making a clean separation for in and out
> > functionalities
> > of the pfec argument:
> > 
> > 1. Always pass in the access type to gva_to_gfn
> > 
> > 2. Filter out inappropriate access flags before returning from
> > gva_to_gfn.
> 
> This seems OK.  But can you please:
>  - Add this new adjustment once, in paging_gva_to_gfn(), instead of
>    adding it to each implementation; and
>  - Adjust the comment above the declaration of paging_gva_to_gfn() in
>    paging.h to describe this new behaviour.
Although adding adjustment in paging_gva_to_gfn can reduce code
duplication, adding it to each implementation is more readable, becasue
other sections of pfec are handled in each implementation.

> 
> Cheers,
> 
> Tim.
Tim Deegan Jan. 27, 2016, 9:34 a.m. UTC | #4
Hi,

At 07:22 +0000 on 27 Jan (1453879344), Han, Huaitong wrote:
> On Tue, 2016-01-26 at 14:30 +0000, Tim Deegan wrote:
> > This seems OK.  But can you please:
> >  - Add this new adjustment once, in paging_gva_to_gfn(), instead of
> >    adding it to each implementation; and
> >  - Adjust the comment above the declaration of paging_gva_to_gfn() in
> >    paging.h to describe this new behaviour.
> Although adding adjustment in paging_gva_to_gfn can reduce code
> duplication, adding it to each implementation is more readable, becasue
> other sections of pfec are handled in each implementation.

True, but since paging_gva_to_gfn() is already non-trivial and this
is a different kind of adjustment, I'd still like it done there. 
I'll leave this to George's discretion as x86/mm maintainer.

But in any case, please add the comment describing the new semantics.

Cheers,

Tim.
Huaitong Han Jan. 27, 2016, 10:13 a.m. UTC | #5
On Wed, 2016-01-27 at 09:34 +0000, Tim Deegan wrote:
> Hi,
> 
> At 07:22 +0000 on 27 Jan (1453879344), Han, Huaitong wrote:
> > On Tue, 2016-01-26 at 14:30 +0000, Tim Deegan wrote:
> > > This seems OK.  But can you please:
> > >  - Add this new adjustment once, in paging_gva_to_gfn(), instead
> > > of
> > >    adding it to each implementation; and
> > >  - Adjust the comment above the declaration of
> > > paging_gva_to_gfn() in
> > >    paging.h to describe this new behaviour.
> > Although adding adjustment in paging_gva_to_gfn can reduce code
> > duplication, adding it to each implementation is more readable,
> > becasue
> > other sections of pfec are handled in each implementation.
> 
> True, but since paging_gva_to_gfn() is already non-trivial and this
> is a different kind of adjustment, I'd still like it done there. 
> I'll leave this to George's discretion as x86/mm maintainer.
> 
> But in any case, please add the comment describing the new semantics.
To George:
What is your opinion on Tim's comment? 

To Tim:
I will update the codes and the comment in patch serial V8.

> Cheers,
> 
> Tim.
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 21470ec..688d200 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4432,11 +4432,9 @@  enum hvm_copy_result hvm_copy_from_guest_virt(
 enum hvm_copy_result hvm_fetch_from_guest_virt(
     void *buf, unsigned long vaddr, int size, uint32_t pfec)
 {
-    if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )
-        pfec |= PFEC_insn_fetch;
     return __hvm_copy(buf, vaddr, size,
                       HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
-                      PFEC_page_present | pfec);
+                      PFEC_page_present | PFEC_insn_fetch | pfec);
 }
 
 enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
@@ -4458,11 +4456,9 @@  enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
 enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
     void *buf, unsigned long vaddr, int size, uint32_t pfec)
 {
-    if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )
-        pfec |= PFEC_insn_fetch;
     return __hvm_copy(buf, vaddr, size,
                       HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
-                      PFEC_page_present | pfec);
+                      PFEC_page_present | PFEC_insn_fetch | pfec);
 }
 
 unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)
diff --git a/xen/arch/x86/mm/hap/guest_walk.c b/xen/arch/x86/mm/hap/guest_walk.c
index 49d0328..3eb8597 100644
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -82,7 +82,7 @@  unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
     if ( !top_page )
     {
         pfec[0] &= ~PFEC_page_present;
-        return INVALID_GFN;
+        goto out_tweak_pfec;
     }
     top_mfn = _mfn(page_to_mfn(top_page));
 
@@ -139,6 +139,14 @@  unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
     if ( missing & _PAGE_SHARED )
         pfec[0] = PFEC_page_shared;
 
+out_tweak_pfec:
+    /*
+     * Intel 64 Volume 3, Section 4.7: The PFEC_insn_fetch flag is set
+     * only when NX or SMEP are enabled.
+     */
+    if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) )
+        pfec[0] &= ~PFEC_insn_fetch;
+
     return INVALID_GFN;
 }
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 58f7e72..bbbc706 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3668,6 +3668,12 @@  sh_gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
             pfec[0] &= ~PFEC_page_present;
         if ( missing & _PAGE_INVALID_BITS )
             pfec[0] |= PFEC_reserved_bit;
+        /*
+         * Intel 64 Volume 3, Section 4.7: The PFEC_insn_fetch flag is
+         * set only when NX or SMEP are enabled.
+         */
+        if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) )
+            pfec[0] &= ~PFEC_insn_fetch;
         return INVALID_GFN;
     }
     gfn = guest_walk_to_gfn(&gw);