diff mbox series

[v2,10/14] x86/extable: Adjust extable handling to be shadow stack compatible

Message ID 20200527191847.17207-11-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86: Support for CET Supervisor Shadow Stacks | expand

Commit Message

Andrew Cooper May 27, 2020, 7:18 p.m. UTC
When adjusting an IRET frame to recover from a fault, and equivalent
adjustment needs making in the shadow IRET frame.

The adjustment in exception_with_ints_disabled() could in principle be an
alternative block rather than an ifdef, as the only two current users of
_PRE_EXTABLE() are IRET-to-guest instructions.  However, this is not a
fastpath, and this form is more robust to future changes.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>

v2:
 * Break extable_shstk_fixup() out into a separate function.
 * Guard from shstk underflows, and unrealistic call traces.
---
 xen/arch/x86/traps.c        | 67 ++++++++++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/x86_64/entry.S | 11 +++++++-
 2 files changed, 76 insertions(+), 2 deletions(-)

Comments

Jan Beulich May 28, 2020, 4:15 p.m. UTC | #1
On 27.05.2020 21:18, Andrew Cooper wrote:
> @@ -400,6 +400,18 @@ unsigned long get_stack_trace_bottom(unsigned long sp)
>      }
>  }
>  
> +static unsigned long get_shstk_bottom(unsigned long sp)
> +{
> +    switch ( get_stack_page(sp) )
> +    {
> +#ifdef CONFIG_XEN_SHSTK
> +    case 0:  return ROUNDUP(sp, IST_SHSTK_SIZE) - sizeof(unsigned long);
> +    case 5:  return ROUNDUP(sp, PAGE_SIZE)      - sizeof(unsigned long);

PRIMARY_SHSTK_SLOT please and, if introduced as suggested for the
earlier patch, IST_SHSTK_SLOT in the earlier line.

> @@ -763,6 +775,56 @@ static void do_reserved_trap(struct cpu_user_regs *regs)
>            trapnr, vec_name(trapnr), regs->error_code);
>  }
>  
> +static void extable_shstk_fixup(struct cpu_user_regs *regs, unsigned long fixup)
> +{
> +    unsigned long ssp, *ptr, *base;
> +
> +    asm ( "rdsspq %0" : "=r" (ssp) : "0" (1) );
> +    if ( ssp == 1 )
> +        return;
> +
> +    ptr = _p(ssp);
> +    base = _p(get_shstk_bottom(ssp));
> +
> +    for ( ; ptr < base; ++ptr )
> +    {
> +        /*
> +         * Search for %rip.  The shstk currently looks like this:
> +         *
> +         *   ...  [Likely pointed to by SSP]
> +         *   %cs  [== regs->cs]
> +         *   %rip [== regs->rip]
> +         *   SSP  [Likely points to 3 slots higher, above %cs]
> +         *   ...  [call tree to this function, likely 2/3 slots]
> +         *
> +         * and we want to overwrite %rip with fixup.  There are two
> +         * complications:
> +         *   1) We cant depend on SSP values, because they won't differ by 3
> +         *      slots if the exception is taken on an IST stack.
> +         *   2) There are synthetic (unrealistic but not impossible) scenarios
> +         *      where %rip can end up in the call tree to this function, so we
> +         *      can't check against regs->rip alone.
> +         *
> +         * Check for both reg->rip and regs->cs matching.

Nit: regs->rip

> +         */
> +
> +        if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
> +        {
> +            asm ( "wrssq %[fix], %[stk]"
> +                  : [stk] "=m" (*ptr)

Could this be ptr[0], to match the if()?

Considering how important it is that we don't fix up the wrong stack
location here, I continue to wonder if we wouldn't better also
include the SSP value on the stack in the checking, at the very
least by way of an ASSERT() or BUG_ON(). Since, with how the code is
currently written, this would require a somewhat odd looking ptr[-1]
I also wonder whether "while ( ++ptr < base )" as the loop header
wouldn't be better. The first entry on the stack can't be the RIP
we look for anyway, can it?

> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>          call  search_pre_exception_table
>          testq %rax,%rax                 # no fixup code for faulting EIP?
>          jz    1b
> -        movq  %rax,UREGS_rip(%rsp)
> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
> +
> +#ifdef CONFIG_XEN_SHSTK
> +        mov    $1, %edi
> +        rdsspq %rdi
> +        cmp    $1, %edi
> +        je     .L_exn_shstk_done
> +        wrssq  %rax, (%rdi)             # fixup shadow stack

According to the comment in extable_shstk_fixup(), isn't the value
to be replaced at 8(%rdi)?

Jan
Andrew Cooper May 29, 2020, 7:43 p.m. UTC | #2
On 28/05/2020 17:15, Jan Beulich wrote:
> On 27.05.2020 21:18, Andrew Cooper wrote:
>> @@ -400,6 +400,18 @@ unsigned long get_stack_trace_bottom(unsigned long sp)
>>      }
>>  }
>>  
>> +static unsigned long get_shstk_bottom(unsigned long sp)
>> +{
>> +    switch ( get_stack_page(sp) )
>> +    {
>> +#ifdef CONFIG_XEN_SHSTK
>> +    case 0:  return ROUNDUP(sp, IST_SHSTK_SIZE) - sizeof(unsigned long);
>> +    case 5:  return ROUNDUP(sp, PAGE_SIZE)      - sizeof(unsigned long);
> PRIMARY_SHSTK_SLOT please and, if introduced as suggested for the
> earlier patch, IST_SHSTK_SLOT in the earlier line.
>
>> @@ -763,6 +775,56 @@ static void do_reserved_trap(struct cpu_user_regs *regs)
>>            trapnr, vec_name(trapnr), regs->error_code);
>>  }
>>  
>> +static void extable_shstk_fixup(struct cpu_user_regs *regs, unsigned long fixup)
>> +{
>> +    unsigned long ssp, *ptr, *base;
>> +
>> +    asm ( "rdsspq %0" : "=r" (ssp) : "0" (1) );
>> +    if ( ssp == 1 )
>> +        return;
>> +
>> +    ptr = _p(ssp);
>> +    base = _p(get_shstk_bottom(ssp));
>> +
>> +    for ( ; ptr < base; ++ptr )
>> +    {
>> +        /*
>> +         * Search for %rip.  The shstk currently looks like this:
>> +         *
>> +         *   ...  [Likely pointed to by SSP]
>> +         *   %cs  [== regs->cs]
>> +         *   %rip [== regs->rip]
>> +         *   SSP  [Likely points to 3 slots higher, above %cs]
>> +         *   ...  [call tree to this function, likely 2/3 slots]
>> +         *
>> +         * and we want to overwrite %rip with fixup.  There are two
>> +         * complications:
>> +         *   1) We cant depend on SSP values, because they won't differ by 3
>> +         *      slots if the exception is taken on an IST stack.
>> +         *   2) There are synthetic (unrealistic but not impossible) scenarios
>> +         *      where %rip can end up in the call tree to this function, so we
>> +         *      can't check against regs->rip alone.
>> +         *
>> +         * Check for both reg->rip and regs->cs matching.
> Nit: regs->rip
>
>> +         */
>> +
>> +        if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
>> +        {
>> +            asm ( "wrssq %[fix], %[stk]"
>> +                  : [stk] "=m" (*ptr)
> Could this be ptr[0], to match the if()?
>
> Considering how important it is that we don't fix up the wrong stack
> location here, I continue to wonder if we wouldn't better also
> include the SSP value on the stack in the checking, at the very
> least by way of an ASSERT() or BUG_ON().

Well no, for the reason discussed in point 1.

Its not technically an issue right now, but there is no possible way to
BUILD_BUG_ON() someone turning an exception into IST, or stopping the
use of the extable infrastructure on a #DB.

Such a check would lie in wait and either provide an unexpected tangent
to someone debugging a complicated issue (I do use #DB for a fair bit),
or become a security vulnerability.

> Since, with how the code is
> currently written, this would require a somewhat odd looking ptr[-1]
> I also wonder whether "while ( ++ptr < base )" as the loop header
> wouldn't be better. The first entry on the stack can't be the RIP
> we look for anyway, can it?

Yes it can.

>> --- a/xen/arch/x86/x86_64/entry.S
>> +++ b/xen/arch/x86/x86_64/entry.S
>> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>>          call  search_pre_exception_table
>>          testq %rax,%rax                 # no fixup code for faulting EIP?
>>          jz    1b
>> -        movq  %rax,UREGS_rip(%rsp)
>> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
>> +
>> +#ifdef CONFIG_XEN_SHSTK
>> +        mov    $1, %edi
>> +        rdsspq %rdi
>> +        cmp    $1, %edi
>> +        je     .L_exn_shstk_done
>> +        wrssq  %rax, (%rdi)             # fixup shadow stack
> According to the comment in extable_shstk_fixup(), isn't the value
> to be replaced at 8(%rdi)?

Hmm - I think you're right.  I thought I had this covered by unit tests.

~Andrew
Andrew Cooper May 29, 2020, 9:17 p.m. UTC | #3
On 29/05/2020 20:43, Andrew Cooper wrote:
> On 28/05/2020 17:15, Jan Beulich wrote:
>> On 27.05.2020 21:18, Andrew Cooper wrote:
>>> +
>>> +        if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
>>> +        {
>>> +            asm ( "wrssq %[fix], %[stk]"
>>> +                  : [stk] "=m" (*ptr)
>> Could this be ptr[0], to match the if()?
>>
>> Considering how important it is that we don't fix up the wrong stack
>> location here, I continue to wonder if we wouldn't better also
>> include the SSP value on the stack in the checking, at the very
>> least by way of an ASSERT() or BUG_ON().
> Well no, for the reason discussed in point 1.
>
> Its not technically an issue right now, but there is no possible way to
> BUILD_BUG_ON() someone turning an exception into IST, or stopping the
> use of the extable infrastructure on a #DB.
>
> Such a check would lie in wait and either provide an unexpected tangent
> to someone debugging a complicated issue (I do use #DB for a fair bit),
> or become a security vulnerability.

Also (which I forgot first time around),

The ptr[1] == regs->cs check is 64 bits wide, and the way to get that on
the shadow stack would be to execute a call instruction ending at at
0xe008 linear, or from a bad WRSSQ edit, both of which are serious
errors and worthy of hitting the BUG().

>>> --- a/xen/arch/x86/x86_64/entry.S
>>> +++ b/xen/arch/x86/x86_64/entry.S
>>> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>>>          call  search_pre_exception_table
>>>          testq %rax,%rax                 # no fixup code for faulting EIP?
>>>          jz    1b
>>> -        movq  %rax,UREGS_rip(%rsp)
>>> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
>>> +
>>> +#ifdef CONFIG_XEN_SHSTK
>>> +        mov    $1, %edi
>>> +        rdsspq %rdi
>>> +        cmp    $1, %edi
>>> +        je     .L_exn_shstk_done
>>> +        wrssq  %rax, (%rdi)             # fixup shadow stack
>> According to the comment in extable_shstk_fixup(), isn't the value
>> to be replaced at 8(%rdi)?
> Hmm - I think you're right.  I thought I had this covered by unit tests.

The unit test has been fixed, and so has this code.

~Andrew
Jan Beulich June 2, 2020, 12:57 p.m. UTC | #4
On 29.05.2020 21:43, Andrew Cooper wrote:
> On 28/05/2020 17:15, Jan Beulich wrote:
>> On 27.05.2020 21:18, Andrew Cooper wrote:
>>> @@ -763,6 +775,56 @@ static void do_reserved_trap(struct cpu_user_regs *regs)
>>>            trapnr, vec_name(trapnr), regs->error_code);
>>>  }
>>>  
>>> +static void extable_shstk_fixup(struct cpu_user_regs *regs, unsigned long fixup)
>>> +{
>>> +    unsigned long ssp, *ptr, *base;
>>> +
>>> +    asm ( "rdsspq %0" : "=r" (ssp) : "0" (1) );
>>> +    if ( ssp == 1 )
>>> +        return;
>>> +
>>> +    ptr = _p(ssp);
>>> +    base = _p(get_shstk_bottom(ssp));
>>> +
>>> +    for ( ; ptr < base; ++ptr )
>>> +    {
>>> +        /*
>>> +         * Search for %rip.  The shstk currently looks like this:
>>> +         *
>>> +         *   ...  [Likely pointed to by SSP]
>>> +         *   %cs  [== regs->cs]
>>> +         *   %rip [== regs->rip]
>>> +         *   SSP  [Likely points to 3 slots higher, above %cs]
>>> +         *   ...  [call tree to this function, likely 2/3 slots]
>>> +         *
>>> +         * and we want to overwrite %rip with fixup.  There are two
>>> +         * complications:
>>> +         *   1) We cant depend on SSP values, because they won't differ by 3
>>> +         *      slots if the exception is taken on an IST stack.
>>> +         *   2) There are synthetic (unrealistic but not impossible) scenarios
>>> +         *      where %rip can end up in the call tree to this function, so we
>>> +         *      can't check against regs->rip alone.
>>> +         *
>>> +         * Check for both reg->rip and regs->cs matching.
>>> +         */
>>> +
>>> +        if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
>>> +        {
>>> +            asm ( "wrssq %[fix], %[stk]"
>>> +                  : [stk] "=m" (*ptr)
>> Could this be ptr[0], to match the if()?
>>
>> Considering how important it is that we don't fix up the wrong stack
>> location here, I continue to wonder if we wouldn't better also
>> include the SSP value on the stack in the checking, at the very
>> least by way of an ASSERT() or BUG_ON().
> 
> Well no, for the reason discussed in point 1.

I don't see my suggestion in conflict with that point. I didn't
suggest an check using == ; instead what I'm thinking about here
is something as weak as "Does this point somewhere into the
stack range for this CPU?" After all there are only a limited
set of classes of entries that can be on a shadow stack:
- LIP (Xen .text, livepatching area)
- CS  (<= 0xffff)
- SSP (within stack range for the CPU)
- supervisor token (a single precise address)
- padding (zero)
The number ranges covered by these classes are entirely disjoint,
so qualifying all three slots accordingly can be done without any
risk of getting an entry wrong.

> Its not technically an issue right now, but there is no possible way to
> BUILD_BUG_ON() someone turning an exception into IST, or stopping the
> use of the extable infrastructure on a #DB.
> 
> Such a check would lie in wait and either provide an unexpected tangent
> to someone debugging a complicated issue (I do use #DB for a fair bit),
> or become a security vulnerability.
> 
>> Since, with how the code is
>> currently written, this would require a somewhat odd looking ptr[-1]
>> I also wonder whether "while ( ++ptr < base )" as the loop header
>> wouldn't be better. The first entry on the stack can't be the RIP
>> we look for anyway, can it?
> 
> Yes it can.

How, when there's the return address of this function plus
an SSP value preceding it?

Jan
Jan Beulich June 2, 2020, 1:11 p.m. UTC | #5
On 29.05.2020 23:17, Andrew Cooper wrote:
> On 29/05/2020 20:43, Andrew Cooper wrote:
>> On 28/05/2020 17:15, Jan Beulich wrote:
>>> On 27.05.2020 21:18, Andrew Cooper wrote:
>>>> +
>>>> +        if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
>>>> +        {
>>>> +            asm ( "wrssq %[fix], %[stk]"
>>>> +                  : [stk] "=m" (*ptr)
>>> Could this be ptr[0], to match the if()?
>>>
>>> Considering how important it is that we don't fix up the wrong stack
>>> location here, I continue to wonder if we wouldn't better also
>>> include the SSP value on the stack in the checking, at the very
>>> least by way of an ASSERT() or BUG_ON().
>> Well no, for the reason discussed in point 1.
>>
>> Its not technically an issue right now, but there is no possible way to
>> BUILD_BUG_ON() someone turning an exception into IST, or stopping the
>> use of the extable infrastructure on a #DB.
>>
>> Such a check would lie in wait and either provide an unexpected tangent
>> to someone debugging a complicated issue (I do use #DB for a fair bit),
>> or become a security vulnerability.
> 
> Also (which I forgot first time around),
> 
> The ptr[1] == regs->cs check is 64 bits wide, and the way to get that on
> the shadow stack would be to execute a call instruction ending at at
> 0xe008 linear, or from a bad WRSSQ edit, both of which are serious
> errors and worthy of hitting the BUG().

Maybe you misunderstood me then: By suggesting more strict checking
I'm actually asking for it to become more likely to hit that BUG(),
not less likely. (This is despite agreeing that the very limited
range of CS values on the stack already makes it practically
impossible to mistake the frame found.)

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 235a72cf4a..ce910294ea 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -363,7 +363,7 @@  static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs)
 }
 
 /*
- * Notes for get_stack_trace_bottom() and get_stack_dump_bottom()
+ * Notes for get_{stack,shstk}*_bottom() helpers
  *
  * Stack pages 1 - 4:
  *   These are all 1-page IST stacks.  Each of these stacks have an exception
@@ -400,6 +400,18 @@  unsigned long get_stack_trace_bottom(unsigned long sp)
     }
 }
 
+static unsigned long get_shstk_bottom(unsigned long sp)
+{
+    switch ( get_stack_page(sp) )
+    {
+#ifdef CONFIG_XEN_SHSTK
+    case 0:  return ROUNDUP(sp, IST_SHSTK_SIZE) - sizeof(unsigned long);
+    case 5:  return ROUNDUP(sp, PAGE_SIZE)      - sizeof(unsigned long);
+#endif
+    default: return sp - sizeof(unsigned long);
+    }
+}
+
 unsigned long get_stack_dump_bottom(unsigned long sp)
 {
     switch ( get_stack_page(sp) )
@@ -763,6 +775,56 @@  static void do_reserved_trap(struct cpu_user_regs *regs)
           trapnr, vec_name(trapnr), regs->error_code);
 }
 
+static void extable_shstk_fixup(struct cpu_user_regs *regs, unsigned long fixup)
+{
+    unsigned long ssp, *ptr, *base;
+
+    asm ( "rdsspq %0" : "=r" (ssp) : "0" (1) );
+    if ( ssp == 1 )
+        return;
+
+    ptr = _p(ssp);
+    base = _p(get_shstk_bottom(ssp));
+
+    for ( ; ptr < base; ++ptr )
+    {
+        /*
+         * Search for %rip.  The shstk currently looks like this:
+         *
+         *   ...  [Likely pointed to by SSP]
+         *   %cs  [== regs->cs]
+         *   %rip [== regs->rip]
+         *   SSP  [Likely points to 3 slots higher, above %cs]
+         *   ...  [call tree to this function, likely 2/3 slots]
+         *
+         * and we want to overwrite %rip with fixup.  There are two
+         * complications:
+         *   1) We cant depend on SSP values, because they won't differ by 3
+         *      slots if the exception is taken on an IST stack.
+         *   2) There are synthetic (unrealistic but not impossible) scenarios
+         *      where %rip can end up in the call tree to this function, so we
+         *      can't check against regs->rip alone.
+         *
+         * Check for both reg->rip and regs->cs matching.
+         */
+
+        if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
+        {
+            asm ( "wrssq %[fix], %[stk]"
+                  : [stk] "=m" (*ptr)
+                  : [fix] "r" (fixup) );
+            return;
+        }
+    }
+
+    /*
+     * We failed to locate and fix up the shadow IRET frame.  This could be
+     * due to shadow stack corruption, or bad logic above.  We cannot continue
+     * executing the interrupted context.
+     */
+    BUG();
+}
+
 static bool extable_fixup(struct cpu_user_regs *regs, bool print)
 {
     unsigned long fixup = search_exception_table(regs);
@@ -779,6 +841,9 @@  static bool extable_fixup(struct cpu_user_regs *regs, bool print)
                vec_name(regs->entry_vector), regs->error_code,
                _p(regs->rip), _p(regs->rip), _p(fixup));
 
+    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
+        extable_shstk_fixup(regs, fixup);
+
     regs->rip = fixup;
     this_cpu(last_extable_addr) = regs->rip;
 
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index f7ee3dce91..78ac0df49f 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -708,7 +708,16 @@  exception_with_ints_disabled:
         call  search_pre_exception_table
         testq %rax,%rax                 # no fixup code for faulting EIP?
         jz    1b
-        movq  %rax,UREGS_rip(%rsp)
+        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
+
+#ifdef CONFIG_XEN_SHSTK
+        mov    $1, %edi
+        rdsspq %rdi
+        cmp    $1, %edi
+        je     .L_exn_shstk_done
+        wrssq  %rax, (%rdi)             # fixup shadow stack
+.L_exn_shstk_done:
+#endif
         subq  $8,UREGS_rsp(%rsp)        # add ec/ev to previous stack frame
         testb $15,UREGS_rsp(%rsp)       # return %rsp is now aligned?
         jz    1f                        # then there is a pad quadword already