diff mbox

x86: defer not-present segment checks

Message ID 57F65E9102000078001155F3@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Oct. 6, 2016, 12:24 p.m. UTC
Following on from commits 5602e74c60 ("x86emul: correct loading of
%ss") and bdb860d01c ("x86/HVM: correct segment register loading during
task switch") the point of the non-.present checks needs to be refined:
#NP (and its #SS companion), other than suggested by the various
instruction pages in Intel's SDM, gets checked for only after all type
and permission checks. The only checks getting done even later are the
64-bit specific ones for system descriptors (which we don't support
yet).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86: defer not-present segment checks

Following on from commits 5602e74c60 ("x86emul: correct loading of
%ss") and bdb860d01c ("x86/HVM: correct segment register loading during
task switch") the point of the non-.present checks needs to be refined:
#NP (and its #SS companion), other than suggested by the various
instruction pages in Intel's SDM, gets checked for only after all type
and permission checks. The only checks getting done even later are the
64-bit specific ones for system descriptors (which we don't support
yet).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2754,14 +2754,6 @@ static int hvm_load_segment_selector(
     do {
         desc = *pdesc;
 
-        /* Segment present in memory? */
-        if ( !(desc.b & _SEGMENT_P) )
-        {
-            fault_type = (seg != x86_seg_ss) ? TRAP_no_segment
-                                             : TRAP_stack_error;
-            goto unmap_and_fail;
-        }
-
         /* LDT descriptor is a system segment. All others are code/data. */
         if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
             goto unmap_and_fail;
@@ -2806,6 +2798,14 @@ static int hvm_load_segment_selector(
                 goto unmap_and_fail;
             break;
         }
+
+        /* Segment present in memory? */
+        if ( !(desc.b & _SEGMENT_P) )
+        {
+            fault_type = (seg != x86_seg_ss) ? TRAP_no_segment
+                                             : TRAP_stack_error;
+            goto unmap_and_fail;
+        }
     } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */
               writable && /* except if we are to discard writes */
               (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );
@@ -2892,12 +2892,6 @@ void hvm_task_switch(
     if ( tr.attr.fields.g )
         tr.limit = (tr.limit << 12) | 0xfffu;
 
-    if ( !tr.attr.fields.p )
-    {
-        hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8);
-        goto out;
-    }
-
     if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )
     {
         hvm_inject_hw_exception(
@@ -2906,6 +2900,12 @@ void hvm_task_switch(
         goto out;
     }
 
+    if ( !tr.attr.fields.p )
+    {
+        hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8);
+        goto out;
+    }
+
     if ( tr.limit < (sizeof(tss)-1) )
     {
         hvm_inject_hw_exception(TRAP_invalid_tss, tss_sel & 0xfff8);
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1311,7 +1311,7 @@ protmode_load_seg(
     struct { uint32_t a, b; } desc;
     uint8_t dpl, rpl;
     int cpl = get_cpl(ctxt, ops);
-    uint32_t new_desc_b, a_flag = 0x100;
+    uint32_t a_flag = 0x100;
     int rc, fault_type = EXC_GP;
 
     if ( cpl < 0 )
@@ -1352,13 +1352,6 @@ protmode_load_seg(
                          &desc, sizeof(desc), ctxt)) )
         return rc;
 
-    /* Segment present in memory? */
-    if ( !(desc.b & (1u<<15)) )
-    {
-        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
-        goto raise_exn;
-    }
-
     if ( !is_x86_user_segment(seg) )
     {
         /* System segments must have S flag == 0. */
@@ -1410,7 +1403,8 @@ protmode_load_seg(
         /* LDT system segment? */
         if ( (desc.b & (15u<<8)) != (2u<<8) )
             goto raise_exn;
-        goto skip_accessed_flag;
+        a_flag = 0;
+        break;
     case x86_seg_tr:
         /* Available TSS system segment? */
         if ( (desc.b & (15u<<8)) != (9u<<8) )
@@ -1428,18 +1422,26 @@ protmode_load_seg(
         break;
     }
 
+    /* Segment present in memory? */
+    if ( !(desc.b & (1u<<15)) )
+    {
+        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
+        goto raise_exn;
+    }
+
     /* Ensure Accessed flag is set. */
-    new_desc_b = desc.b | a_flag;
-    if ( !(desc.b & a_flag) &&
-         ((rc = ops->cmpxchg(
-             x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
-             &desc.b, &new_desc_b, 4, ctxt)) != 0) )
-        return rc;
+    if ( a_flag && !(desc.b & a_flag) )
+    {
+        uint32_t new_desc_b = desc.b | a_flag;
 
-    /* Force the Accessed flag in our local copy. */
-    desc.b |= a_flag;
+        if ( (rc = ops->cmpxchg(x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
+                                &desc.b, &new_desc_b, 4, ctxt)) != 0 )
+            return rc;
+
+        /* Force the Accessed flag in our local copy. */
+        desc.b = new_desc_b;
+    }
 
- skip_accessed_flag:
     sreg->base = (((desc.b <<  0) & 0xff000000u) |
                   ((desc.b << 16) & 0x00ff0000u) |
                   ((desc.a >> 16) & 0x0000ffffu));

Comments

Andrew Cooper Oct. 7, 2016, 12:28 p.m. UTC | #1
On 06/10/16 13:24, Jan Beulich wrote:
> Following on from commits 5602e74c60 ("x86emul: correct loading of
> %ss") and bdb860d01c ("x86/HVM: correct segment register loading during
> task switch") the point of the non-.present checks needs to be refined:
> #NP (and its #SS companion), other than suggested by the various
> instruction pages in Intel's SDM, gets checked for only after all type
> and permission checks. The only checks getting done even later are the
> 64-bit specific ones for system descriptors (which we don't support
> yet).

Is this from observation on native hardware?

The AMD manual does describe:

Present (P) Bit. Bit 15 of the upper doubleword. The segment-present bit
indicates that the segment referenced by the descriptor is loaded in
memory. If a reference is made to a descriptor entry when P = 0, a
segment-not-present exception (#NP) occurs.

which doesn't imply that the present bit is a validity bit for the other
contents of the segment.

Intel is similar, with:

Indicates whether the segment is present in memory (set) or not present
(clear). If this flag is clear, the processor generates a
segment-not-present exception (#NP) when a segment selector that points
to the segment descriptor is loaded into a segment register.

Furthermore, Figure 3-9 shows what a segment descriptor looks like with
the present flag is clear.  This quite clearly states that the type, S,
DPL and P fields all have meanings, but that all other fields are
explicitly available for software use.

Therefore, it seems legitimate to consider type/dpl/S before P, but we
must take extra special care not to look at any other fields until P is
found to be set.

~Andrew
Jan Beulich Oct. 7, 2016, 1:01 p.m. UTC | #2
>>> On 07.10.16 at 14:28, <andrew.cooper3@citrix.com> wrote:
> On 06/10/16 13:24, Jan Beulich wrote:
>> Following on from commits 5602e74c60 ("x86emul: correct loading of
>> %ss") and bdb860d01c ("x86/HVM: correct segment register loading during
>> task switch") the point of the non-.present checks needs to be refined:
>> #NP (and its #SS companion), other than suggested by the various
>> instruction pages in Intel's SDM, gets checked for only after all type
>> and permission checks. The only checks getting done even later are the
>> 64-bit specific ones for system descriptors (which we don't support
>> yet).
> 
> Is this from observation on native hardware?

Yes. I also found that old paper manuals are more helpful in this
respect than the SDM is nowadays.

> The AMD manual does describe:
> 
> Present (P) Bit. Bit 15 of the upper doubleword. The segment-present bit
> indicates that the segment referenced by the descriptor is loaded in
> memory. If a reference is made to a descriptor entry when P = 0, a
> segment-not-present exception (#NP) occurs.
> 
> which doesn't imply that the present bit is a validity bit for the other
> contents of the segment.
> 
> Intel is similar, with:
> 
> Indicates whether the segment is present in memory (set) or not present
> (clear). If this flag is clear, the processor generates a
> segment-not-present exception (#NP) when a segment selector that points
> to the segment descriptor is loaded into a segment register.
> 
> Furthermore, Figure 3-9 shows what a segment descriptor looks like with
> the present flag is clear.  This quite clearly states that the type, S,
> DPL and P fields all have meanings, but that all other fields are
> explicitly available for software use.
> 
> Therefore, it seems legitimate to consider type/dpl/S before P, but we
> must take extra special care not to look at any other fields until P is
> found to be set.

Exactly.

Also note how e.g. emulate_gate_op() looks at the P bit of the gate
only after having done other relevant checks. Having looked at this
again just now I realize, though, that the P bit clear on the code
segment descriptor wrongly raises #GP. As this gets fixed by the
rework patch using the generic insn decoder, I won't bother submitting
a separate fix for this though; I'll just add a note to that patch's
commit message.

Jan
Jan Beulich Oct. 7, 2016, 1:33 p.m. UTC | #3
>>> On 07.10.16 at 15:01, <JBeulich@suse.com> wrote:
> Also note how e.g. emulate_gate_op() looks at the P bit of the gate
> only after having done other relevant checks. Having looked at this
> again just now I realize, though, that the P bit clear on the code
> segment descriptor wrongly raises #GP. As this gets fixed by the
> rework patch using the generic insn decoder, I won't bother submitting
> a separate fix for this though; I'll just add a note to that patch's
> commit message.

Actually I was wrong here - it's the check of the descriptor of an
already loaded segment register that does this, so there's nothing
wrong with raising #GP when it ends up being no longer present
at that point (as there's no architectural exception in this case,
using #GP uniformly is likely better than trying to be creative).

Jan
Andrew Cooper Oct. 10, 2016, 9:33 a.m. UTC | #4
On 06/10/16 13:24, Jan Beulich wrote:
> Following on from commits 5602e74c60 ("x86emul: correct loading of
> %ss") and bdb860d01c ("x86/HVM: correct segment register loading during
> task switch") the point of the non-.present checks needs to be refined:
> #NP (and its #SS companion), other than suggested by the various
> instruction pages in Intel's SDM, gets checked for only after all type
> and permission checks. The only checks getting done even later are the
> 64-bit specific ones for system descriptors (which we don't support
> yet).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Testing confirms this to be correct.  However, there is one issue...

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1311,7 +1311,7 @@ protmode_load_seg(
>      struct { uint32_t a, b; } desc;
>      uint8_t dpl, rpl;
>      int cpl = get_cpl(ctxt, ops);
> -    uint32_t new_desc_b, a_flag = 0x100;
> +    uint32_t a_flag = 0x100;
>      int rc, fault_type = EXC_GP;
>  
>      if ( cpl < 0 )
> @@ -1352,13 +1352,6 @@ protmode_load_seg(
>                           &desc, sizeof(desc), ctxt)) )
>          return rc;
>  
> -    /* Segment present in memory? */
> -    if ( !(desc.b & (1u<<15)) )
> -    {
> -        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
> -        goto raise_exn;
> -    }
> -
>      if ( !is_x86_user_segment(seg) )
>      {
>          /* System segments must have S flag == 0. */
> @@ -1410,7 +1403,8 @@ protmode_load_seg(
>          /* LDT system segment? */
>          if ( (desc.b & (15u<<8)) != (2u<<8) )
>              goto raise_exn;
> -        goto skip_accessed_flag;
> +        a_flag = 0;
> +        break;
>      case x86_seg_tr:
>          /* Available TSS system segment? */
>          if ( (desc.b & (15u<<8)) != (9u<<8) )
> @@ -1428,18 +1422,26 @@ protmode_load_seg(

Inbetween these two hunks lives the 64bit %cs check for L and D.  Until
P is checked and found to be set, these bits are available for software use.

The check should be moved down until after the presence check.

Otherwise, everything else looks fine.

~Andrew

>          break;
>      }
>  
> +    /* Segment present in memory? */
> +    if ( !(desc.b & (1u<<15)) )
> +    {
> +        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
> +        goto raise_exn;
> +    }
> +
>      /* Ensure Accessed flag is set. */
> -    new_desc_b = desc.b | a_flag;
> -    if ( !(desc.b & a_flag) &&
> -         ((rc = ops->cmpxchg(
> -             x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
> -             &desc.b, &new_desc_b, 4, ctxt)) != 0) )
> -        return rc;
> +    if ( a_flag && !(desc.b & a_flag) )
> +    {
> +        uint32_t new_desc_b = desc.b | a_flag;
>  
> -    /* Force the Accessed flag in our local copy. */
> -    desc.b |= a_flag;
> +        if ( (rc = ops->cmpxchg(x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
> +                                &desc.b, &new_desc_b, 4, ctxt)) != 0 )
> +            return rc;
> +
> +        /* Force the Accessed flag in our local copy. */
> +        desc.b = new_desc_b;
> +    }
>  
> - skip_accessed_flag:
>      sreg->base = (((desc.b <<  0) & 0xff000000u) |
>                    ((desc.b << 16) & 0x00ff0000u) |
>                    ((desc.a >> 16) & 0x0000ffffu));
>
>
diff mbox

Patch

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2754,14 +2754,6 @@  static int hvm_load_segment_selector(
     do {
         desc = *pdesc;
 
-        /* Segment present in memory? */
-        if ( !(desc.b & _SEGMENT_P) )
-        {
-            fault_type = (seg != x86_seg_ss) ? TRAP_no_segment
-                                             : TRAP_stack_error;
-            goto unmap_and_fail;
-        }
-
         /* LDT descriptor is a system segment. All others are code/data. */
         if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
             goto unmap_and_fail;
@@ -2806,6 +2798,14 @@  static int hvm_load_segment_selector(
                 goto unmap_and_fail;
             break;
         }
+
+        /* Segment present in memory? */
+        if ( !(desc.b & _SEGMENT_P) )
+        {
+            fault_type = (seg != x86_seg_ss) ? TRAP_no_segment
+                                             : TRAP_stack_error;
+            goto unmap_and_fail;
+        }
     } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */
               writable && /* except if we are to discard writes */
               (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );
@@ -2892,12 +2892,6 @@  void hvm_task_switch(
     if ( tr.attr.fields.g )
         tr.limit = (tr.limit << 12) | 0xfffu;
 
-    if ( !tr.attr.fields.p )
-    {
-        hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8);
-        goto out;
-    }
-
     if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )
     {
         hvm_inject_hw_exception(
@@ -2906,6 +2900,12 @@  void hvm_task_switch(
         goto out;
     }
 
+    if ( !tr.attr.fields.p )
+    {
+        hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8);
+        goto out;
+    }
+
     if ( tr.limit < (sizeof(tss)-1) )
     {
         hvm_inject_hw_exception(TRAP_invalid_tss, tss_sel & 0xfff8);
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1311,7 +1311,7 @@  protmode_load_seg(
     struct { uint32_t a, b; } desc;
     uint8_t dpl, rpl;
     int cpl = get_cpl(ctxt, ops);
-    uint32_t new_desc_b, a_flag = 0x100;
+    uint32_t a_flag = 0x100;
     int rc, fault_type = EXC_GP;
 
     if ( cpl < 0 )
@@ -1352,13 +1352,6 @@  protmode_load_seg(
                          &desc, sizeof(desc), ctxt)) )
         return rc;
 
-    /* Segment present in memory? */
-    if ( !(desc.b & (1u<<15)) )
-    {
-        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
-        goto raise_exn;
-    }
-
     if ( !is_x86_user_segment(seg) )
     {
         /* System segments must have S flag == 0. */
@@ -1410,7 +1403,8 @@  protmode_load_seg(
         /* LDT system segment? */
         if ( (desc.b & (15u<<8)) != (2u<<8) )
             goto raise_exn;
-        goto skip_accessed_flag;
+        a_flag = 0;
+        break;
     case x86_seg_tr:
         /* Available TSS system segment? */
         if ( (desc.b & (15u<<8)) != (9u<<8) )
@@ -1428,18 +1422,26 @@  protmode_load_seg(
         break;
     }
 
+    /* Segment present in memory? */
+    if ( !(desc.b & (1u<<15)) )
+    {
+        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
+        goto raise_exn;
+    }
+
     /* Ensure Accessed flag is set. */
-    new_desc_b = desc.b | a_flag;
-    if ( !(desc.b & a_flag) &&
-         ((rc = ops->cmpxchg(
-             x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
-             &desc.b, &new_desc_b, 4, ctxt)) != 0) )
-        return rc;
+    if ( a_flag && !(desc.b & a_flag) )
+    {
+        uint32_t new_desc_b = desc.b | a_flag;
 
-    /* Force the Accessed flag in our local copy. */
-    desc.b |= a_flag;
+        if ( (rc = ops->cmpxchg(x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
+                                &desc.b, &new_desc_b, 4, ctxt)) != 0 )
+            return rc;
+
+        /* Force the Accessed flag in our local copy. */
+        desc.b = new_desc_b;
+    }
 
- skip_accessed_flag:
     sreg->base = (((desc.b <<  0) & 0xff000000u) |
                   ((desc.b << 16) & 0x00ff0000u) |
                   ((desc.a >> 16) & 0x0000ffffu));