diff mbox series

[12/16] x86/extable: Adjust extable handling to be shadow stack compatible

Message ID 20200501225838.9866-13-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: Support for CET Supervisor Shadow Stacks | expand

Commit Message

Andrew Cooper May 1, 2020, 10:58 p.m. UTC
When adjusting an IRET frame to recover from a fault, and equivalent
adjustment needs making in the shadow IRET frame.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/traps.c        | 22 ++++++++++++++++++++++
 xen/arch/x86/x86_64/entry.S | 11 ++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

Comments

Jan Beulich May 7, 2020, 1:35 p.m. UTC | #1
On 02.05.2020 00:58, Andrew Cooper wrote:
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -778,6 +778,28 @@ static bool exception_fixup(struct cpu_user_regs *regs, bool print)
>                 vec_name(regs->entry_vector), regs->error_code,
>                 _p(regs->rip), _p(regs->rip), _p(fixup));
>  
> +    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
> +    {
> +        unsigned long ssp;
> +
> +        asm ("rdsspq %0" : "=r" (ssp) : "0" (1) );
> +        if ( ssp != 1 )
> +        {
> +            unsigned long *ptr = _p(ssp);
> +
> +            /* Search for %rip in the shadow stack, ... */
> +            while ( *ptr != regs->rip )
> +                ptr++;

Wouldn't it be better to bound the loop, as it shouldn't search past
(strictly speaking not even to) the next page boundary? Also you
don't care about the top of the stack (being the to be restored SSP),
do you? I.e. maybe

            while ( *++ptr != regs->rip )
                ;

?

And then - isn't searching for a specific RIP value alone prone to
error, in case a it matches an ordinary return address? I.e.
wouldn't you better search for a matching RIP accompanied by a
suitable pointer into the shadow stack and a matching CS value?
Otherwise, ...

> +            ASSERT(ptr[1] == __HYPERVISOR_CS);

... also assert that ptr[-1] points into the shadow stack?

> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>          call  search_pre_exception_table
>          testq %rax,%rax                 # no fixup code for faulting EIP?
>          jz    1b
> -        movq  %rax,UREGS_rip(%rsp)
> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
> +
> +#ifdef CONFIG_XEN_SHSTK
> +        mov    $1, %edi
> +        rdsspq %rdi
> +        cmp    $1, %edi
> +        je     .L_exn_shstk_done
> +        wrssq  %rax, (%rdi)             # fixup shadow stack
> +.L_exn_shstk_done:
> +#endif

Again avoid the conditional jump by using alternatives patching?

Jan
Andrew Cooper May 11, 2020, 9:09 p.m. UTC | #2
On 07/05/2020 14:35, Jan Beulich wrote:
> On 02.05.2020 00:58, Andrew Cooper wrote:
>> --- a/xen/arch/x86/traps.c
>> +++ b/xen/arch/x86/traps.c
>> @@ -778,6 +778,28 @@ static bool exception_fixup(struct cpu_user_regs *regs, bool print)
>>                 vec_name(regs->entry_vector), regs->error_code,
>>                 _p(regs->rip), _p(regs->rip), _p(fixup));
>>  
>> +    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
>> +    {
>> +        unsigned long ssp;
>> +
>> +        asm ("rdsspq %0" : "=r" (ssp) : "0" (1) );
>> +        if ( ssp != 1 )
>> +        {
>> +            unsigned long *ptr = _p(ssp);
>> +
>> +            /* Search for %rip in the shadow stack, ... */
>> +            while ( *ptr != regs->rip )
>> +                ptr++;
> Wouldn't it be better to bound the loop, as it shouldn't search past
> (strictly speaking not even to) the next page boundary? Also you
> don't care about the top of the stack (being the to be restored SSP),
> do you? I.e. maybe
>
>             while ( *++ptr != regs->rip )
>                 ;
>
> ?
>
> And then - isn't searching for a specific RIP value alone prone to
> error, in case a it matches an ordinary return address? I.e.
> wouldn't you better search for a matching RIP accompanied by a
> suitable pointer into the shadow stack and a matching CS value?
> Otherwise, ...
>
>> +            ASSERT(ptr[1] == __HYPERVISOR_CS);
> ... also assert that ptr[-1] points into the shadow stack?

So this is the problem I was talking about that the previous contexts
SSP isn't stored anywhere helpful.

What we are in practice doing is looking 2 or 3 words up the shadow
stack (depending on exactly how deep our call graph is), to the shadow
IRET frame matching the real IRET frame which regs is pointing to.

Both IRET frames were pushed in the process of generating the exception,
and we've already matched regs->rip to the exception table record.  We
need to fix up regs->rip and the shadow lip to the fixup address.

As we are always fixing up an exception generated from Xen context, we
know that ptr[1] == __HYPERVISOR_CS, and *ptr[-1] = &ptr[2], as we
haven't switched shadow stack as part of taking this exception. 
However, this second point is fragile to exception handlers moving onto IST.

We can't encounter regs->rip in the shadow stack between the current SSP
and the IRET frame we're looking to fix up, or we would have taken a
recursive fault and not reached exception_fixup() to begin with.

Therefore, the loop is reasonably bounded in all cases.

Sadly, there is no RDSS instruction, so we can't actually use shadow
stack reads to spot if we underflowed the shadow stack, and there is no
useful alternative to panic() if we fail to find the shadow IRET frame.

>> --- a/xen/arch/x86/x86_64/entry.S
>> +++ b/xen/arch/x86/x86_64/entry.S
>> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>>          call  search_pre_exception_table
>>          testq %rax,%rax                 # no fixup code for faulting EIP?
>>          jz    1b
>> -        movq  %rax,UREGS_rip(%rsp)
>> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
>> +
>> +#ifdef CONFIG_XEN_SHSTK
>> +        mov    $1, %edi
>> +        rdsspq %rdi
>> +        cmp    $1, %edi
>> +        je     .L_exn_shstk_done
>> +        wrssq  %rax, (%rdi)             # fixup shadow stack
>> +.L_exn_shstk_done:
>> +#endif
> Again avoid the conditional jump by using alternatives patching?

Well - that depends on whether we're likely to gain any new content in
the pre exception table.

As it stands, it is only the IRET(s) to userspace so would be safe to
turn this into an unconditional alternative.  Even in the crash case, we
won't be returning to guest context after having started the crash
teardown path.

~Andrew
Jan Beulich May 12, 2020, 2:31 p.m. UTC | #3
On 11.05.2020 23:09, Andrew Cooper wrote:
> On 07/05/2020 14:35, Jan Beulich wrote:
>> On 02.05.2020 00:58, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/traps.c
>>> +++ b/xen/arch/x86/traps.c
>>> @@ -778,6 +778,28 @@ static bool exception_fixup(struct cpu_user_regs *regs, bool print)
>>>                 vec_name(regs->entry_vector), regs->error_code,
>>>                 _p(regs->rip), _p(regs->rip), _p(fixup));
>>>  
>>> +    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
>>> +    {
>>> +        unsigned long ssp;
>>> +
>>> +        asm ("rdsspq %0" : "=r" (ssp) : "0" (1) );
>>> +        if ( ssp != 1 )
>>> +        {
>>> +            unsigned long *ptr = _p(ssp);
>>> +
>>> +            /* Search for %rip in the shadow stack, ... */
>>> +            while ( *ptr != regs->rip )
>>> +                ptr++;
>> Wouldn't it be better to bound the loop, as it shouldn't search past
>> (strictly speaking not even to) the next page boundary? Also you
>> don't care about the top of the stack (being the to be restored SSP),
>> do you? I.e. maybe
>>
>>             while ( *++ptr != regs->rip )
>>                 ;
>>
>> ?
>>
>> And then - isn't searching for a specific RIP value alone prone to
>> error, in case a it matches an ordinary return address? I.e.
>> wouldn't you better search for a matching RIP accompanied by a
>> suitable pointer into the shadow stack and a matching CS value?
>> Otherwise, ...
>>
>>> +            ASSERT(ptr[1] == __HYPERVISOR_CS);
>> ... also assert that ptr[-1] points into the shadow stack?
> 
> So this is the problem I was talking about that the previous contexts
> SSP isn't stored anywhere helpful.
> 
> What we are in practice doing is looking 2 or 3 words up the shadow
> stack (depending on exactly how deep our call graph is), to the shadow
> IRET frame matching the real IRET frame which regs is pointing to.
> 
> Both IRET frames were pushed in the process of generating the exception,
> and we've already matched regs->rip to the exception table record.  We
> need to fix up regs->rip and the shadow lip to the fixup address.
> 
> As we are always fixing up an exception generated from Xen context, we
> know that ptr[1] == __HYPERVISOR_CS, and *ptr[-1] = &ptr[2], as we
> haven't switched shadow stack as part of taking this exception. 
> However, this second point is fragile to exception handlers moving onto IST.
> 
> We can't encounter regs->rip in the shadow stack between the current SSP
> and the IRET frame we're looking to fix up, or we would have taken a
> recursive fault and not reached exception_fixup() to begin with.

I'm afraid I don't follow here. Consider a function (also)
involved in exception handling having this code sequence:

    call    func
    mov     (%rax), %eax

If the fault we're handling occured on the MOV and
exception_fixup() is a descendant of func(), then the first
instance of an address on the shadow stack pointing at this
MOV is going to be the one which did not fault.

> Therefore, the loop is reasonably bounded in all cases.
> 
> Sadly, there is no RDSS instruction, so we can't actually use shadow
> stack reads to spot if we underflowed the shadow stack, and there is no
> useful alternative to panic() if we fail to find the shadow IRET frame.

But afaics you don't panic() in this case. Instead you continue
looping until (presumably) you hit some form of fault.

>>> --- a/xen/arch/x86/x86_64/entry.S
>>> +++ b/xen/arch/x86/x86_64/entry.S
>>> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>>>          call  search_pre_exception_table
>>>          testq %rax,%rax                 # no fixup code for faulting EIP?
>>>          jz    1b
>>> -        movq  %rax,UREGS_rip(%rsp)
>>> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
>>> +
>>> +#ifdef CONFIG_XEN_SHSTK
>>> +        mov    $1, %edi
>>> +        rdsspq %rdi
>>> +        cmp    $1, %edi
>>> +        je     .L_exn_shstk_done
>>> +        wrssq  %rax, (%rdi)             # fixup shadow stack
>>> +.L_exn_shstk_done:
>>> +#endif
>> Again avoid the conditional jump by using alternatives patching?
> 
> Well - that depends on whether we're likely to gain any new content in
> the pre exception table.
> 
> As it stands, it is only the IRET(s) to userspace so would be safe to
> turn this into an unconditional alternative.  Even in the crash case, we
> won't be returning to guest context after having started the crash
> teardown path.

Ah, right - perhaps indeed better keep it as is then.

Jan
Andrew Cooper May 12, 2020, 4:14 p.m. UTC | #4
On 12/05/2020 15:31, Jan Beulich wrote:
> On 11.05.2020 23:09, Andrew Cooper wrote:
>> On 07/05/2020 14:35, Jan Beulich wrote:
>>> On 02.05.2020 00:58, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/traps.c
>>>> +++ b/xen/arch/x86/traps.c
>>>> @@ -778,6 +778,28 @@ static bool exception_fixup(struct cpu_user_regs *regs, bool print)
>>>>                 vec_name(regs->entry_vector), regs->error_code,
>>>>                 _p(regs->rip), _p(regs->rip), _p(fixup));
>>>>  
>>>> +    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
>>>> +    {
>>>> +        unsigned long ssp;
>>>> +
>>>> +        asm ("rdsspq %0" : "=r" (ssp) : "0" (1) );
>>>> +        if ( ssp != 1 )
>>>> +        {
>>>> +            unsigned long *ptr = _p(ssp);
>>>> +
>>>> +            /* Search for %rip in the shadow stack, ... */
>>>> +            while ( *ptr != regs->rip )
>>>> +                ptr++;
>>> Wouldn't it be better to bound the loop, as it shouldn't search past
>>> (strictly speaking not even to) the next page boundary? Also you
>>> don't care about the top of the stack (being the to be restored SSP),
>>> do you? I.e. maybe
>>>
>>>             while ( *++ptr != regs->rip )
>>>                 ;
>>>
>>> ?
>>>
>>> And then - isn't searching for a specific RIP value alone prone to
>>> error, in case a it matches an ordinary return address? I.e.
>>> wouldn't you better search for a matching RIP accompanied by a
>>> suitable pointer into the shadow stack and a matching CS value?
>>> Otherwise, ...
>>>
>>>> +            ASSERT(ptr[1] == __HYPERVISOR_CS);
>>> ... also assert that ptr[-1] points into the shadow stack?
>> So this is the problem I was talking about that the previous contexts
>> SSP isn't stored anywhere helpful.
>>
>> What we are in practice doing is looking 2 or 3 words up the shadow
>> stack (depending on exactly how deep our call graph is), to the shadow
>> IRET frame matching the real IRET frame which regs is pointing to.
>>
>> Both IRET frames were pushed in the process of generating the exception,
>> and we've already matched regs->rip to the exception table record.  We
>> need to fix up regs->rip and the shadow lip to the fixup address.
>>
>> As we are always fixing up an exception generated from Xen context, we
>> know that ptr[1] == __HYPERVISOR_CS, and *ptr[-1] = &ptr[2], as we
>> haven't switched shadow stack as part of taking this exception. 
>> However, this second point is fragile to exception handlers moving onto IST.
>>
>> We can't encounter regs->rip in the shadow stack between the current SSP
>> and the IRET frame we're looking to fix up, or we would have taken a
>> recursive fault and not reached exception_fixup() to begin with.
> I'm afraid I don't follow here. Consider a function (also)
> involved in exception handling having this code sequence:
>
>     call    func
>     mov     (%rax), %eax
>
> If the fault we're handling occured on the MOV and
> exception_fixup() is a descendant of func(), then the first
> instance of an address on the shadow stack pointing at this
> MOV is going to be the one which did not fault.

No.  The moment `call func` returns, the address you're looking to match
is rubble no longer on the stack.  Numerically, it will be located at
SSP-8 when the fault for MOV is generated.

In this exact case, it would be clobbered by the shadow IRET frame, but
even if it was deeper in the call tree, we would still never encounter
it from waking up the shadow stack from SSP.

The only things you will find on the shadow stack is the shadow IRET
frame, handle_exception_saved(), do_*(), fixup_exception(), except that
I'd not like to fix the behaviour to require exactly two function calls
of depth for fixup_exception().

>> Therefore, the loop is reasonably bounded in all cases.
>>
>> Sadly, there is no RDSS instruction, so we can't actually use shadow
>> stack reads to spot if we underflowed the shadow stack, and there is no
>> useful alternative to panic() if we fail to find the shadow IRET frame.
> But afaics you don't panic() in this case. Instead you continue
> looping until (presumably) you hit some form of fault.
>
>>>> --- a/xen/arch/x86/x86_64/entry.S
>>>> +++ b/xen/arch/x86/x86_64/entry.S
>>>> @@ -708,7 +708,16 @@ exception_with_ints_disabled:
>>>>          call  search_pre_exception_table
>>>>          testq %rax,%rax                 # no fixup code for faulting EIP?
>>>>          jz    1b
>>>> -        movq  %rax,UREGS_rip(%rsp)
>>>> +        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
>>>> +
>>>> +#ifdef CONFIG_XEN_SHSTK
>>>> +        mov    $1, %edi
>>>> +        rdsspq %rdi
>>>> +        cmp    $1, %edi
>>>> +        je     .L_exn_shstk_done
>>>> +        wrssq  %rax, (%rdi)             # fixup shadow stack
>>>> +.L_exn_shstk_done:
>>>> +#endif
>>> Again avoid the conditional jump by using alternatives patching?
>> Well - that depends on whether we're likely to gain any new content in
>> the pre exception table.
>>
>> As it stands, it is only the IRET(s) to userspace so would be safe to
>> turn this into an unconditional alternative.  Even in the crash case, we
>> won't be returning to guest context after having started the crash
>> teardown path.
> Ah, right - perhaps indeed better keep it as is then.

That was my reasoning.  It is a path we expect to execute never with
well behaved guests, so I erred on the safe side.

~Andrew
Jan Beulich May 13, 2020, 9:22 a.m. UTC | #5
On 12.05.2020 18:14, Andrew Cooper wrote:
> On 12/05/2020 15:31, Jan Beulich wrote:
>> On 11.05.2020 23:09, Andrew Cooper wrote:
>>> On 07/05/2020 14:35, Jan Beulich wrote:
>>>> On 02.05.2020 00:58, Andrew Cooper wrote:
>>>>> --- a/xen/arch/x86/traps.c
>>>>> +++ b/xen/arch/x86/traps.c
>>>>> @@ -778,6 +778,28 @@ static bool exception_fixup(struct cpu_user_regs *regs, bool print)
>>>>>                 vec_name(regs->entry_vector), regs->error_code,
>>>>>                 _p(regs->rip), _p(regs->rip), _p(fixup));
>>>>>  
>>>>> +    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
>>>>> +    {
>>>>> +        unsigned long ssp;
>>>>> +
>>>>> +        asm ("rdsspq %0" : "=r" (ssp) : "0" (1) );
>>>>> +        if ( ssp != 1 )
>>>>> +        {
>>>>> +            unsigned long *ptr = _p(ssp);
>>>>> +
>>>>> +            /* Search for %rip in the shadow stack, ... */
>>>>> +            while ( *ptr != regs->rip )
>>>>> +                ptr++;
>>>> Wouldn't it be better to bound the loop, as it shouldn't search past
>>>> (strictly speaking not even to) the next page boundary? Also you
>>>> don't care about the top of the stack (being the to be restored SSP),
>>>> do you? I.e. maybe
>>>>
>>>>             while ( *++ptr != regs->rip )
>>>>                 ;
>>>>
>>>> ?
>>>>
>>>> And then - isn't searching for a specific RIP value alone prone to
>>>> error, in case a it matches an ordinary return address? I.e.
>>>> wouldn't you better search for a matching RIP accompanied by a
>>>> suitable pointer into the shadow stack and a matching CS value?
>>>> Otherwise, ...
>>>>
>>>>> +            ASSERT(ptr[1] == __HYPERVISOR_CS);
>>>> ... also assert that ptr[-1] points into the shadow stack?
>>> So this is the problem I was talking about that the previous contexts
>>> SSP isn't stored anywhere helpful.
>>>
>>> What we are in practice doing is looking 2 or 3 words up the shadow
>>> stack (depending on exactly how deep our call graph is), to the shadow
>>> IRET frame matching the real IRET frame which regs is pointing to.
>>>
>>> Both IRET frames were pushed in the process of generating the exception,
>>> and we've already matched regs->rip to the exception table record.  We
>>> need to fix up regs->rip and the shadow lip to the fixup address.
>>>
>>> As we are always fixing up an exception generated from Xen context, we
>>> know that ptr[1] == __HYPERVISOR_CS, and *ptr[-1] = &ptr[2], as we
>>> haven't switched shadow stack as part of taking this exception. 
>>> However, this second point is fragile to exception handlers moving onto IST.
>>>
>>> We can't encounter regs->rip in the shadow stack between the current SSP
>>> and the IRET frame we're looking to fix up, or we would have taken a
>>> recursive fault and not reached exception_fixup() to begin with.
>> I'm afraid I don't follow here. Consider a function (also)
>> involved in exception handling having this code sequence:
>>
>>     call    func
>>     mov     (%rax), %eax
>>
>> If the fault we're handling occured on the MOV and
>> exception_fixup() is a descendant of func(), then the first
>> instance of an address on the shadow stack pointing at this
>> MOV is going to be the one which did not fault.
> 
> No.  The moment `call func` returns, the address you're looking to match
> is rubble no longer on the stack.  Numerically, it will be located at
> SSP-8 when the fault for MOV is generated.

I think I still didn't explain the scenario sufficiently well:

In some function, say test(), we have above code sequence and
the MOV faults. The exception handling then goes
- assembly entry
- C entry
- test()
- exception_fixup()
in order of (nesting) call sequence, i.e. with _all_ respective
return addresses still on the stack. Since your lookup starts
from SSP, there'll be two active frames on the stack which both
have the same return address.

We may not actively have such a code structure right now, but
it would seem shortsighted to me to not account for the
possibility.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 1cf00c1f4a..2354357cc1 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -778,6 +778,28 @@  static bool exception_fixup(struct cpu_user_regs *regs, bool print)
                vec_name(regs->entry_vector), regs->error_code,
                _p(regs->rip), _p(regs->rip), _p(fixup));
 
+    if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
+    {
+        unsigned long ssp;
+
+        asm ("rdsspq %0" : "=r" (ssp) : "0" (1) );
+        if ( ssp != 1 )
+        {
+            unsigned long *ptr = _p(ssp);
+
+            /* Search for %rip in the shadow stack, ... */
+            while ( *ptr != regs->rip )
+                ptr++;
+
+            ASSERT(ptr[1] == __HYPERVISOR_CS);
+
+            /* ... and adjust to the fixup location. */
+            asm ("wrssq %[fix], %[stk]"
+                 : [stk] "=m" (*ptr)
+                 : [fix] "r" (fixup));
+        }
+    }
+
     regs->rip = fixup;
 
     return true;
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 6403c0ab92..06da350ba0 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -708,7 +708,16 @@  exception_with_ints_disabled:
         call  search_pre_exception_table
         testq %rax,%rax                 # no fixup code for faulting EIP?
         jz    1b
-        movq  %rax,UREGS_rip(%rsp)
+        movq  %rax,UREGS_rip(%rsp)      # fixup regular stack
+
+#ifdef CONFIG_XEN_SHSTK
+        mov    $1, %edi
+        rdsspq %rdi
+        cmp    $1, %edi
+        je     .L_exn_shstk_done
+        wrssq  %rax, (%rdi)             # fixup shadow stack
+.L_exn_shstk_done:
+#endif
         subq  $8,UREGS_rsp(%rsp)        # add ec/ev to previous stack frame
         testb $15,UREGS_rsp(%rsp)       # return %rsp is now aligned?
         jz    1f                        # then there is a pad quadword already