diff mbox series

[15/16] x86/entry: Adjust guest paths to be shadow stack compatible

Message ID 20200501225838.9866-16-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: Support for CET Supervisor Shadow Stacks | expand

Commit Message

Andrew Cooper May 1, 2020, 10:58 p.m. UTC
The SYSCALL/SYSEXIT paths need to use {SET,CLR}SSBSY.  The IRET to guest paths
must not, which forces us to spill a register to the stack.

The IST switch onto the primary stack is not great as we have an instruction
boundary with no shadow stack.  This is the least bad option available.

These paths are not used before shadow stacks are properly established, so can
use alternatives to avoid extra runtime CET detection logic.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/x86_64/compat/entry.S |  2 +-
 xen/arch/x86/x86_64/entry.S        | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

Comments

Jan Beulich May 7, 2020, 2:12 p.m. UTC | #1
On 02.05.2020 00:58, Andrew Cooper wrote:
> The SYSCALL/SYSEXIT paths need to use {SET,CLR}SSBSY.

I take it you mean SYSRET, not SYSEXIT. I do think though that you
also need to deal with the SYSENTER entry point we have.

> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -198,7 +198,7 @@ ENTRY(cr4_pv32_restore)
>  
>  /* See lstar_enter for entry register state. */
>  ENTRY(cstar_enter)
> -        /* sti could live here when we don't switch page tables below. */
> +        ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK

I don't see why you delete the comment here (or elsewhere). While
I recall you not really wanting them there, I still think they're
useful to have, and they shouldn't be deleted as a side effect of
an entirely unrelated change. Of course they need to live after
your insertions then.

> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -194,6 +194,15 @@ restore_all_guest:
>          movq  8(%rsp),%rcx            # RIP
>          ja    iret_exit_to_guest
>  
> +        /* Clear the supervisor shadow stack token busy bit. */
> +.macro rag_clrssbsy
> +        push %rax
> +        rdsspq %rax
> +        clrssbsy (%rax)
> +        pop %rax
> +.endm
> +        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK

In principle you could get away without spilling %rax:

        cmpl  $1,%ecx
        ja    iret_exit_to_guest

        /* Clear the supervisor shadow stack token busy bit. */
.macro rag_clrssbsy
        rdsspq %rcx
        clrssbsy (%rcx)
.endm
        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
        movq  8(%rsp),%rcx            # RIP
        cmpw  $FLAT_USER_CS32,16(%rsp)# CS
        movq  32(%rsp),%rsp           # RSP
        je    1f
        sysretq
1:      sysretl

        ALIGN
/* No special register assumptions. */
iret_exit_to_guest:
        movq  8(%rsp),%rcx            # RIP
        andl  $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp)
        ...

Also - what about CLRSSBSY failing? It would seem easier to diagnose
this right here than when getting presumably #DF upon next entry into
Xen. At the very least I think it deserves a comment if an error case
does not get handled.

Somewhat similar for SETSSBSY, except there things get complicated by
it raising #CP instead of setting EFLAGS.CF: Aiui it would require us
to handle #CP on an IST stack in order to avoid #DF there.

> @@ -877,6 +886,14 @@ handle_ist_exception:
>          movl  $UREGS_kernel_sizeof/8,%ecx
>          movq  %rdi,%rsp
>          rep   movsq
> +
> +        /* Switch Shadow Stacks */
> +.macro ist_switch_shstk
> +        rdsspq %rdi
> +        clrssbsy (%rdi)
> +        setssbsy
> +.endm

Could you extend the comment to mention the caveat that you point
out in the description?

Jan
Andrew Cooper May 7, 2020, 3:50 p.m. UTC | #2
On 07/05/2020 15:12, Jan Beulich wrote:
> On 02.05.2020 00:58, Andrew Cooper wrote:
>> The SYSCALL/SYSEXIT paths need to use {SET,CLR}SSBSY.
> I take it you mean SYSRET, not SYSEXIT.

I do, sorry.

> I do think though that you
> also need to deal with the SYSENTER entry point we have.

Oh - so we do.

>> --- a/xen/arch/x86/x86_64/compat/entry.S
>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>> @@ -198,7 +198,7 @@ ENTRY(cr4_pv32_restore)
>>  
>>  /* See lstar_enter for entry register state. */
>>  ENTRY(cstar_enter)
>> -        /* sti could live here when we don't switch page tables below. */
>> +        ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
> I don't see why you delete the comment here (or elsewhere). While
> I recall you not really wanting them there, I still think they're
> useful to have, and they shouldn't be deleted as a side effect of
> an entirely unrelated change. Of course they need to live after
> your insertions then.

Do you not remember Juergen performance testing results concerning this
comment?  The results were provably worse.

It is a useless comment.  Sure, its technically accurate, but so are an
arbitrarily large number of other comments about how we could permute
the code.

It has already been concluded that we won't be making the suggested
change.  Having a /* TODO - doing X will make the system slower */ isn't
something we should have adding to the complexity of the code, and
tricking people into thinking that something should be done.

>> --- a/xen/arch/x86/x86_64/entry.S
>> +++ b/xen/arch/x86/x86_64/entry.S
>> @@ -194,6 +194,15 @@ restore_all_guest:
>>          movq  8(%rsp),%rcx            # RIP
>>          ja    iret_exit_to_guest
>>  
>> +        /* Clear the supervisor shadow stack token busy bit. */
>> +.macro rag_clrssbsy
>> +        push %rax
>> +        rdsspq %rax
>> +        clrssbsy (%rax)
>> +        pop %rax
>> +.endm
>> +        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
> In principle you could get away without spilling %rax:
>
>         cmpl  $1,%ecx
>         ja    iret_exit_to_guest
>
>         /* Clear the supervisor shadow stack token busy bit. */
> .macro rag_clrssbsy
>         rdsspq %rcx
>         clrssbsy (%rcx)
> .endm
>         ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>         movq  8(%rsp),%rcx            # RIP
>         cmpw  $FLAT_USER_CS32,16(%rsp)# CS
>         movq  32(%rsp),%rsp           # RSP
>         je    1f
>         sysretq
> 1:      sysretl
>
>         ALIGN
> /* No special register assumptions. */
> iret_exit_to_guest:
>         movq  8(%rsp),%rcx            # RIP
>         andl  $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp)
>         ...
>
> Also - what about CLRSSBSY failing? It would seem easier to diagnose
> this right here than when getting presumably #DF upon next entry into
> Xen. At the very least I think it deserves a comment if an error case
> does not get handled.

I did consider this, but ultimately decided against it.

You can't have an unlikely block inside a alternative block because the
jmp's displacement doesn't get fixed up.  Keeping everything inline puts
an incorrect statically-predicted branch in program flow.

Most importantly however, is that the SYSRET path is vastly less common
than the IRET path.  There is no easy way to proactively spot problems
in the IRET path, which means that conditions leading to a problem are
already far more likely to manifest as #DF, so there is very little
value in adding complexity to the SYSRET path in the first place.

> Somewhat similar for SETSSBSY, except there things get complicated by
> it raising #CP instead of setting EFLAGS.CF: Aiui it would require us
> to handle #CP on an IST stack in order to avoid #DF there.

Right, but having #CP as IST gives us far worse problems.

Being able to spot #CP vs #DF doesn't help usefully.  Its still some
arbitrary period of time after the damage was done.

Any nesting of #CP (including fault on IRET out) results in losing
program state and entering an infinite loop.

The cases which end up as #DF are properly fatal to the system, and we
at least get a clean crash out it.

>> @@ -877,6 +886,14 @@ handle_ist_exception:
>>          movl  $UREGS_kernel_sizeof/8,%ecx
>>          movq  %rdi,%rsp
>>          rep   movsq
>> +
>> +        /* Switch Shadow Stacks */
>> +.macro ist_switch_shstk
>> +        rdsspq %rdi
>> +        clrssbsy (%rdi)
>> +        setssbsy
>> +.endm
> Could you extend the comment to mention the caveat that you point
> out in the description?

Ok.

~Andrew
Jan Beulich May 7, 2020, 4:15 p.m. UTC | #3
On 07.05.2020 17:50, Andrew Cooper wrote:
> On 07/05/2020 15:12, Jan Beulich wrote:
>> On 02.05.2020 00:58, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/x86_64/compat/entry.S
>>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>>> @@ -198,7 +198,7 @@ ENTRY(cr4_pv32_restore)
>>>  
>>>  /* See lstar_enter for entry register state. */
>>>  ENTRY(cstar_enter)
>>> -        /* sti could live here when we don't switch page tables below. */
>>> +        ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
>> I don't see why you delete the comment here (or elsewhere). While
>> I recall you not really wanting them there, I still think they're
>> useful to have, and they shouldn't be deleted as a side effect of
>> an entirely unrelated change. Of course they need to live after
>> your insertions then.
> 
> Do you not remember Juergen performance testing results concerning this
> comment?  The results were provably worse.
> 
> It is a useless comment.  Sure, its technically accurate, but so are an
> arbitrarily large number of other comments about how we could permute
> the code.
> 
> It has already been concluded that we won't be making the suggested
> change.  Having a /* TODO - doing X will make the system slower */ isn't
> something we should have adding to the complexity of the code, and
> tricking people into thinking that something should be done.

A separate patch is still the way to go then, with reference to
the claimed performance testing results.

>>> --- a/xen/arch/x86/x86_64/entry.S
>>> +++ b/xen/arch/x86/x86_64/entry.S
>>> @@ -194,6 +194,15 @@ restore_all_guest:
>>>          movq  8(%rsp),%rcx            # RIP
>>>          ja    iret_exit_to_guest
>>>  
>>> +        /* Clear the supervisor shadow stack token busy bit. */
>>> +.macro rag_clrssbsy
>>> +        push %rax
>>> +        rdsspq %rax
>>> +        clrssbsy (%rax)
>>> +        pop %rax
>>> +.endm
>>> +        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>> In principle you could get away without spilling %rax:
>>
>>         cmpl  $1,%ecx
>>         ja    iret_exit_to_guest
>>
>>         /* Clear the supervisor shadow stack token busy bit. */
>> .macro rag_clrssbsy
>>         rdsspq %rcx
>>         clrssbsy (%rcx)
>> .endm
>>         ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>>         movq  8(%rsp),%rcx            # RIP
>>         cmpw  $FLAT_USER_CS32,16(%rsp)# CS
>>         movq  32(%rsp),%rsp           # RSP
>>         je    1f
>>         sysretq
>> 1:      sysretl
>>
>>         ALIGN
>> /* No special register assumptions. */
>> iret_exit_to_guest:
>>         movq  8(%rsp),%rcx            # RIP
>>         andl  $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp)
>>         ...
>>
>> Also - what about CLRSSBSY failing? It would seem easier to diagnose
>> this right here than when getting presumably #DF upon next entry into
>> Xen. At the very least I think it deserves a comment if an error case
>> does not get handled.
> 
> I did consider this, but ultimately decided against it.
> 
> You can't have an unlikely block inside a alternative block because the
> jmp's displacement doesn't get fixed up.

We do fix up unconditional JMP/CALL displacements; I don't
see why we couldn't also do so for conditional ones.

>  Keeping everything inline puts
> an incorrect statically-predicted branch in program flow.
> 
> Most importantly however, is that the SYSRET path is vastly less common
> than the IRET path.  There is no easy way to proactively spot problems
> in the IRET path, which means that conditions leading to a problem are
> already far more likely to manifest as #DF, so there is very little
> value in adding complexity to the SYSRET path in the first place.

The SYSRET path being uncommon is a problem by itself imo, if
that's indeed the case. I'm sure I've suggested before that
we convert frames to TRAP_syscall ones whenever possible,
such that we wouldn't go the slower IRET path.

>> Somewhat similar for SETSSBSY, except there things get complicated by
>> it raising #CP instead of setting EFLAGS.CF: Aiui it would require us
>> to handle #CP on an IST stack in order to avoid #DF there.
> 
> Right, but having #CP as IST gives us far worse problems.
> 
> Being able to spot #CP vs #DF doesn't help usefully.  Its still some
> arbitrary period of time after the damage was done.
> 
> Any nesting of #CP (including fault on IRET out) results in losing
> program state and entering an infinite loop.
> 
> The cases which end up as #DF are properly fatal to the system, and we
> at least get a clean crash out it.

May I suggest that all of this gets spelled out in at least
the description of the patch, so that it can be properly
understood (and, if need be, revisited) later on?

Jan
Andrew Cooper May 11, 2020, 9:45 p.m. UTC | #4
On 07/05/2020 17:15, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/x86_64/entry.S
>>>> +++ b/xen/arch/x86/x86_64/entry.S
>>>> @@ -194,6 +194,15 @@ restore_all_guest:
>>>>          movq  8(%rsp),%rcx            # RIP
>>>>          ja    iret_exit_to_guest
>>>>  
>>>> +        /* Clear the supervisor shadow stack token busy bit. */
>>>> +.macro rag_clrssbsy
>>>> +        push %rax
>>>> +        rdsspq %rax
>>>> +        clrssbsy (%rax)
>>>> +        pop %rax
>>>> +.endm
>>>> +        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>>> In principle you could get away without spilling %rax:
>>>
>>>         cmpl  $1,%ecx
>>>         ja    iret_exit_to_guest
>>>
>>>         /* Clear the supervisor shadow stack token busy bit. */
>>> .macro rag_clrssbsy
>>>         rdsspq %rcx
>>>         clrssbsy (%rcx)
>>> .endm
>>>         ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>>>         movq  8(%rsp),%rcx            # RIP
>>>         cmpw  $FLAT_USER_CS32,16(%rsp)# CS
>>>         movq  32(%rsp),%rsp           # RSP
>>>         je    1f
>>>         sysretq
>>> 1:      sysretl
>>>
>>>         ALIGN
>>> /* No special register assumptions. */
>>> iret_exit_to_guest:
>>>         movq  8(%rsp),%rcx            # RIP
>>>         andl  $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp)
>>>         ...
>>>
>>> Also - what about CLRSSBSY failing? It would seem easier to diagnose
>>> this right here than when getting presumably #DF upon next entry into
>>> Xen. At the very least I think it deserves a comment if an error case
>>> does not get handled.
>> I did consider this, but ultimately decided against it.
>>
>> You can't have an unlikely block inside a alternative block because the
>> jmp's displacement doesn't get fixed up.
> We do fix up unconditional JMP/CALL displacements; I don't
> see why we couldn't also do so for conditional ones.

Only for the first instruction in the block.

We do not decode the entire block of instructions and fix up each
displacement.

>
>>   Keeping everything inline puts
>> an incorrect statically-predicted branch in program flow.
>>
>> Most importantly however, is that the SYSRET path is vastly less common
>> than the IRET path.  There is no easy way to proactively spot problems
>> in the IRET path, which means that conditions leading to a problem are
>> already far more likely to manifest as #DF, so there is very little
>> value in adding complexity to the SYSRET path in the first place.
> The SYSRET path being uncommon is a problem by itself imo, if
> that's indeed the case. I'm sure I've suggested before that
> we convert frames to TRAP_syscall ones whenever possible,
> such that we wouldn't go the slower IRET path.

It is not possible to convert any.

The opportunistic SYSRET logic in Linux loses you performance in
reality.  Its just that the extra conditionals are very highly predicted
and totally dominated by the ring transition cost.

You can create a synthetic test case where the opportunistic logic makes
a performance win, but the chances of encountering real world code where
TRAP_syscall is clear and %r11 and %rcx match flags/rip is 2 ^ 128.

It is very much not worth the extra code and cycles taken to implement.

>>> Somewhat similar for SETSSBSY, except there things get complicated by
>>> it raising #CP instead of setting EFLAGS.CF: Aiui it would require us
>>> to handle #CP on an IST stack in order to avoid #DF there.
>> Right, but having #CP as IST gives us far worse problems.
>>
>> Being able to spot #CP vs #DF doesn't help usefully.  Its still some
>> arbitrary period of time after the damage was done.
>>
>> Any nesting of #CP (including fault on IRET out) results in losing
>> program state and entering an infinite loop.
>>
>> The cases which end up as #DF are properly fatal to the system, and we
>> at least get a clean crash out it.
> May I suggest that all of this gets spelled out in at least
> the description of the patch, so that it can be properly
> understood (and, if need be, revisited) later on?

Is this really the right patch to do that?

I do eventually plan to put a whole load of this kinds of details into
the hypervisor guide.

~Andrew
Jan Beulich May 12, 2020, 2:56 p.m. UTC | #5
On 11.05.2020 23:45, Andrew Cooper wrote:
> On 07/05/2020 17:15, Jan Beulich wrote:
>>>>> --- a/xen/arch/x86/x86_64/entry.S
>>>>> +++ b/xen/arch/x86/x86_64/entry.S
>>>>> @@ -194,6 +194,15 @@ restore_all_guest:
>>>>>          movq  8(%rsp),%rcx            # RIP
>>>>>          ja    iret_exit_to_guest
>>>>>  
>>>>> +        /* Clear the supervisor shadow stack token busy bit. */
>>>>> +.macro rag_clrssbsy
>>>>> +        push %rax
>>>>> +        rdsspq %rax
>>>>> +        clrssbsy (%rax)
>>>>> +        pop %rax
>>>>> +.endm
>>>>> +        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>>>> In principle you could get away without spilling %rax:
>>>>
>>>>         cmpl  $1,%ecx
>>>>         ja    iret_exit_to_guest
>>>>
>>>>         /* Clear the supervisor shadow stack token busy bit. */
>>>> .macro rag_clrssbsy
>>>>         rdsspq %rcx
>>>>         clrssbsy (%rcx)
>>>> .endm
>>>>         ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
>>>>         movq  8(%rsp),%rcx            # RIP
>>>>         cmpw  $FLAT_USER_CS32,16(%rsp)# CS
>>>>         movq  32(%rsp),%rsp           # RSP
>>>>         je    1f
>>>>         sysretq
>>>> 1:      sysretl
>>>>
>>>>         ALIGN
>>>> /* No special register assumptions. */
>>>> iret_exit_to_guest:
>>>>         movq  8(%rsp),%rcx            # RIP
>>>>         andl  $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp)
>>>>         ...
>>>>
>>>> Also - what about CLRSSBSY failing? It would seem easier to diagnose
>>>> this right here than when getting presumably #DF upon next entry into
>>>> Xen. At the very least I think it deserves a comment if an error case
>>>> does not get handled.
>>> I did consider this, but ultimately decided against it.
>>>
>>> You can't have an unlikely block inside a alternative block because the
>>> jmp's displacement doesn't get fixed up.
>> We do fix up unconditional JMP/CALL displacements; I don't
>> see why we couldn't also do so for conditional ones.
> 
> Only for the first instruction in the block.
> 
> We do not decode the entire block of instructions and fix up each
> displacement.

Right, but that's not overly difficult to overcome - simply split
the ALTERNATIVE in two.

>>>   Keeping everything inline puts
>>> an incorrect statically-predicted branch in program flow.
>>>
>>> Most importantly however, is that the SYSRET path is vastly less common
>>> than the IRET path.  There is no easy way to proactively spot problems
>>> in the IRET path, which means that conditions leading to a problem are
>>> already far more likely to manifest as #DF, so there is very little
>>> value in adding complexity to the SYSRET path in the first place.
>> The SYSRET path being uncommon is a problem by itself imo, if
>> that's indeed the case. I'm sure I've suggested before that
>> we convert frames to TRAP_syscall ones whenever possible,
>> such that we wouldn't go the slower IRET path.
> 
> It is not possible to convert any.
> 
> The opportunistic SYSRET logic in Linux loses you performance in
> reality.  Its just that the extra conditionals are very highly predicted
> and totally dominated by the ring transition cost.
> 
> You can create a synthetic test case where the opportunistic logic makes
> a performance win, but the chances of encountering real world code where
> TRAP_syscall is clear and %r11 and %rcx match flags/rip is 2 ^ 128.
> 
> It is very much not worth the extra code and cycles taken to implement.

Oops, yes, for a moment I forgot this minor detail of %rcx/%r11.

>>>> Somewhat similar for SETSSBSY, except there things get complicated by
>>>> it raising #CP instead of setting EFLAGS.CF: Aiui it would require us
>>>> to handle #CP on an IST stack in order to avoid #DF there.
>>> Right, but having #CP as IST gives us far worse problems.
>>>
>>> Being able to spot #CP vs #DF doesn't help usefully.  Its still some
>>> arbitrary period of time after the damage was done.
>>>
>>> Any nesting of #CP (including fault on IRET out) results in losing
>>> program state and entering an infinite loop.
>>>
>>> The cases which end up as #DF are properly fatal to the system, and we
>>> at least get a clean crash out it.
>> May I suggest that all of this gets spelled out in at least
>> the description of the patch, so that it can be properly
>> understood (and, if need be, revisited) later on?
> 
> Is this really the right patch to do that?
> 
> I do eventually plan to put a whole load of this kinds of details into
> the hypervisor guide.

Well, as you can see having some of these considerations and
decisions spelled out would already have helped review here.
Whether this is exactly the right patch I'm not sure, but I'd
find it quite helpful if such was available at least for
cross referencing.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 3cd375bd48..7816d0d4ac 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -198,7 +198,7 @@  ENTRY(cr4_pv32_restore)
 
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
         CR4_PV32_RESTORE
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_USER_SS32, 8(%rsp) /* Assume a 64bit domain.  Compat handled lower. */
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 06da350ba0..91cd8f94fd 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -194,6 +194,15 @@  restore_all_guest:
         movq  8(%rsp),%rcx            # RIP
         ja    iret_exit_to_guest
 
+        /* Clear the supervisor shadow stack token busy bit. */
+.macro rag_clrssbsy
+        push %rax
+        rdsspq %rax
+        clrssbsy (%rax)
+        pop %rax
+.endm
+        ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
+
         cmpw  $FLAT_USER_CS32,16(%rsp)# CS
         movq  32(%rsp),%rsp           # RSP
         je    1f
@@ -226,7 +235,7 @@  iret_exit_to_guest:
  * %ss must be saved into the space left by the trampoline.
  */
 ENTRY(lstar_enter)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
         pushq %r11
@@ -877,6 +886,14 @@  handle_ist_exception:
         movl  $UREGS_kernel_sizeof/8,%ecx
         movq  %rdi,%rsp
         rep   movsq
+
+        /* Switch Shadow Stacks */
+.macro ist_switch_shstk
+        rdsspq %rdi
+        clrssbsy (%rdi)
+        setssbsy
+.endm
+        ALTERNATIVE "", ist_switch_shstk, X86_FEATURE_XEN_SHSTK
 1:
 #else
         ASSERT_CONTEXT_IS_XEN