diff mbox series

[v5,04/27] x86/fpu/xstate: Add XSAVES system states for shadow stack

Message ID 20181011151523.27101-5-yu-cheng.yu@intel.com (mailing list archive)
State New, archived
Headers show
Series Control Flow Enforcement: Shadow Stack | expand

Commit Message

Yu-cheng Yu Oct. 11, 2018, 3:15 p.m. UTC
Intel Control-flow Enforcement Technology (CET) introduces the
following MSRs into the XSAVES system states.

    IA32_U_CET (user-mode CET settings),
    IA32_PL3_SSP (user-mode shadow stack),
    IA32_PL0_SSP (kernel-mode shadow stack),
    IA32_PL1_SSP (ring-1 shadow stack),
    IA32_PL2_SSP (ring-2 shadow stack).

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
 arch/x86/include/asm/fpu/types.h            | 22 +++++++++++++++++++++
 arch/x86/include/asm/fpu/xstate.h           |  4 +++-
 arch/x86/include/uapi/asm/processor-flags.h |  2 ++
 arch/x86/kernel/fpu/xstate.c                | 10 ++++++++++
 4 files changed, 37 insertions(+), 1 deletion(-)

Comments

Borislav Petkov Nov. 8, 2018, 6:40 p.m. UTC | #1
On Thu, Oct 11, 2018 at 08:15:00AM -0700, Yu-cheng Yu wrote:
> Intel Control-flow Enforcement Technology (CET) introduces the
> following MSRs into the XSAVES system states.
> 
>     IA32_U_CET (user-mode CET settings),
>     IA32_PL3_SSP (user-mode shadow stack),
>     IA32_PL0_SSP (kernel-mode shadow stack),
>     IA32_PL1_SSP (ring-1 shadow stack),
>     IA32_PL2_SSP (ring-2 shadow stack).

And?

That commit message got chopped off here, it seems.

> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
> ---
>  arch/x86/include/asm/fpu/types.h            | 22 +++++++++++++++++++++
>  arch/x86/include/asm/fpu/xstate.h           |  4 +++-
>  arch/x86/include/uapi/asm/processor-flags.h |  2 ++
>  arch/x86/kernel/fpu/xstate.c                | 10 ++++++++++
>  4 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
> index 202c53918ecf..e55d51d172f1 100644
> --- a/arch/x86/include/asm/fpu/types.h
> +++ b/arch/x86/include/asm/fpu/types.h
> @@ -114,6 +114,9 @@ enum xfeature {
>  	XFEATURE_Hi16_ZMM,
>  	XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
>  	XFEATURE_PKRU,
> +	XFEATURE_RESERVED,
> +	XFEATURE_SHSTK_USER,
> +	XFEATURE_SHSTK_KERNEL,
>  
>  	XFEATURE_MAX,
>  };
> @@ -128,6 +131,8 @@ enum xfeature {
>  #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
>  #define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
>  #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
> +#define XFEATURE_MASK_SHSTK_USER	(1 << XFEATURE_SHSTK_USER)
> +#define XFEATURE_MASK_SHSTK_KERNEL	(1 << XFEATURE_SHSTK_KERNEL)
>  
>  #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
>  #define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK \
> @@ -229,6 +234,23 @@ struct pkru_state {
>  	u32				pad;
>  } __packed;
>  
> +/*
> + * State component 11 is Control flow Enforcement user states

Why the Camel-cased naming?

"Control" then "flow" then capitalized again "Enforcement".

Fix all occurrences pls, especially the user-visible strings.

> + */
> +struct cet_user_state {
> +	u64 u_cet;	/* user control flow settings */
> +	u64 user_ssp;	/* user shadow stack pointer */

Prefix both with "usr_" instead.

> +} __packed;
> +
> +/*
> + * State component 12 is Control flow Enforcement kernel states
> + */
> +struct cet_kernel_state {
> +	u64 kernel_ssp;	/* kernel shadow stack */
> +	u64 pl1_ssp;	/* ring-1 shadow stack */
> +	u64 pl2_ssp;	/* ring-2 shadow stack */

Just write "privilege level" everywhere - not "ring".

Btw, do you see how the type and the name of all those other fields in
that file are tabulated? Except yours...

> +} __packed;
> +
>  struct xstate_header {
>  	u64				xfeatures;
>  	u64				xcomp_bv;
> diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
> index d8e2ec99f635..18b60748a34d 100644
> --- a/arch/x86/include/asm/fpu/xstate.h
> +++ b/arch/x86/include/asm/fpu/xstate.h
> @@ -28,7 +28,9 @@
>  				  XFEATURE_MASK_Hi16_ZMM | \
>  				  XFEATURE_MASK_PKRU | \
>  				  XFEATURE_MASK_BNDREGS | \
> -				  XFEATURE_MASK_BNDCSR)
> +				  XFEATURE_MASK_BNDCSR | \
> +				  XFEATURE_MASK_SHSTK_USER | \
> +				  XFEATURE_MASK_SHSTK_KERNEL)
>  
>  #ifdef CONFIG_X86_64
>  #define REX_PREFIX	"0x48, "
> diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
> index bcba3c643e63..25311ec4b731 100644
> --- a/arch/x86/include/uapi/asm/processor-flags.h
> +++ b/arch/x86/include/uapi/asm/processor-flags.h
> @@ -130,6 +130,8 @@
>  #define X86_CR4_SMAP		_BITUL(X86_CR4_SMAP_BIT)
>  #define X86_CR4_PKE_BIT		22 /* enable Protection Keys support */
>  #define X86_CR4_PKE		_BITUL(X86_CR4_PKE_BIT)
> +#define X86_CR4_CET_BIT		23 /* enable Control flow Enforcement */
> +#define X86_CR4_CET		_BITUL(X86_CR4_CET_BIT)
>  
>  /*
>   * x86-64 Task Priority Register, CR8
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 605ec6decf3e..ad36ea28bfd1 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -35,6 +35,9 @@ static const char *xfeature_names[] =
>  	"Processor Trace (unused)"	,
>  	"Protection Keys User registers",
>  	"unknown xstate feature"	,
> +	"Control flow User registers"	,
> +	"Control flow Kernel registers"	,
> +	"unknown xstate feature"	,

So there are two "unknown xstate feature" array elems now...

>  static short xsave_cpuid_features[] __initdata = {
> @@ -48,6 +51,9 @@ static short xsave_cpuid_features[] __initdata = {
>  	X86_FEATURE_AVX512F,
>  	X86_FEATURE_INTEL_PT,
>  	X86_FEATURE_PKU,
> +	0,		   /* Unused */

What's that for?

> +	X86_FEATURE_SHSTK, /* XFEATURE_SHSTK_USER */
> +	X86_FEATURE_SHSTK, /* XFEATURE_SHSTK_KERNEL */
>  };
Yu-cheng Yu Nov. 8, 2018, 8:40 p.m. UTC | #2
On Thu, 2018-11-08 at 19:40 +0100, Borislav Petkov wrote:
> On Thu, Oct 11, 2018 at 08:15:00AM -0700, Yu-cheng Yu wrote:
> > [...] 
> > +/*
> > + * State component 11 is Control flow Enforcement user states
> 
> Why the Camel-cased naming?
> 
> "Control" then "flow" then capitalized again "Enforcement".
> 
> Fix all occurrences pls, especially the user-visible strings.

I will change it to "Control-flow Enforcement" everywhere.

> > + */
> > +struct cet_user_state {
> > +	u64 u_cet;	/* user control flow settings */
> > +	u64 user_ssp;	/* user shadow stack pointer */
> 
> Prefix both with "usr_" instead.

Ok.

> [...]
> 
> Just write "privilege level" everywhere - not "ring".
> 
> Btw, do you see how the type and the name of all those other fields in
> that file are tabulated? Except yours...

I will fix it.

[...] 
> > 
> > diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> > index 605ec6decf3e..ad36ea28bfd1 100644
> > --- a/arch/x86/kernel/fpu/xstate.c
> > +++ b/arch/x86/kernel/fpu/xstate.c
> > @@ -35,6 +35,9 @@ static const char *xfeature_names[] =
> >  	"Processor Trace (unused)"	,
> >  	"Protection Keys User registers",
> >  	"unknown xstate feature"	,
> > +	"Control flow User registers"	,
> > +	"Control flow Kernel registers"	,
> > +	"unknown xstate feature"	,
> 
> So there are two "unknown xstate feature" array elems now...
> 
> >  static short xsave_cpuid_features[] __initdata = {
> > @@ -48,6 +51,9 @@ static short xsave_cpuid_features[] __initdata = {
> >  	X86_FEATURE_AVX512F,
> >  	X86_FEATURE_INTEL_PT,
> >  	X86_FEATURE_PKU,
> > +	0,		   /* Unused */
> 
> What's that for?

In fpu_init_system_xstate(), we test and clear features that are not enabled.
There we depend on the order of these elements.  This is the tenth "unknown
xstate feature".

Yu-cheng
Andy Lutomirski Nov. 8, 2018, 8:46 p.m. UTC | #3
On Thu, Oct 11, 2018 at 8:20 AM Yu-cheng Yu <yu-cheng.yu@intel.com> wrote:
>
> Intel Control-flow Enforcement Technology (CET) introduces the
> following MSRs into the XSAVES system states.
>
>     IA32_U_CET (user-mode CET settings),
>     IA32_PL3_SSP (user-mode shadow stack),
>     IA32_PL0_SSP (kernel-mode shadow stack),
>     IA32_PL1_SSP (ring-1 shadow stack),
>     IA32_PL2_SSP (ring-2 shadow stack).
>
> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
> ---
>  arch/x86/include/asm/fpu/types.h            | 22 +++++++++++++++++++++
>  arch/x86/include/asm/fpu/xstate.h           |  4 +++-
>  arch/x86/include/uapi/asm/processor-flags.h |  2 ++
>  arch/x86/kernel/fpu/xstate.c                | 10 ++++++++++
>  4 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
> index 202c53918ecf..e55d51d172f1 100644
> --- a/arch/x86/include/asm/fpu/types.h
> +++ b/arch/x86/include/asm/fpu/types.h
> @@ -114,6 +114,9 @@ enum xfeature {
>         XFEATURE_Hi16_ZMM,
>         XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
>         XFEATURE_PKRU,
> +       XFEATURE_RESERVED,
> +       XFEATURE_SHSTK_USER,
> +       XFEATURE_SHSTK_KERNEL,
>
>         XFEATURE_MAX,
>  };
> @@ -128,6 +131,8 @@ enum xfeature {
>  #define XFEATURE_MASK_Hi16_ZMM         (1 << XFEATURE_Hi16_ZMM)
>  #define XFEATURE_MASK_PT               (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
>  #define XFEATURE_MASK_PKRU             (1 << XFEATURE_PKRU)
> +#define XFEATURE_MASK_SHSTK_USER       (1 << XFEATURE_SHSTK_USER)
> +#define XFEATURE_MASK_SHSTK_KERNEL     (1 << XFEATURE_SHSTK_KERNEL)
>
>  #define XFEATURE_MASK_FPSSE            (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
>  #define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK \
> @@ -229,6 +234,23 @@ struct pkru_state {
>         u32                             pad;
>  } __packed;
>
> +/*
> + * State component 11 is Control flow Enforcement user states
> + */
> +struct cet_user_state {
> +       u64 u_cet;      /* user control flow settings */
> +       u64 user_ssp;   /* user shadow stack pointer */
> +} __packed;
> +
> +/*
> + * State component 12 is Control flow Enforcement kernel states
> + */
> +struct cet_kernel_state {
> +       u64 kernel_ssp; /* kernel shadow stack */
> +       u64 pl1_ssp;    /* ring-1 shadow stack */
> +       u64 pl2_ssp;    /* ring-2 shadow stack */
> +} __packed;
> +

Why are these __packed?  It seems like it'll generate bad code for no
obvious purpose.
Yu-cheng Yu Nov. 8, 2018, 9:01 p.m. UTC | #4
On Thu, 2018-11-08 at 12:46 -0800, Andy Lutomirski wrote:
> On Thu, Oct 11, 2018 at 8:20 AM Yu-cheng Yu <yu-cheng.yu@intel.com> wrote:
> > 
> > Intel Control-flow Enforcement Technology (CET) introduces the
> > following MSRs into the XSAVES system states.
> > 
> >     IA32_U_CET (user-mode CET settings),
> >     IA32_PL3_SSP (user-mode shadow stack),
> >     IA32_PL0_SSP (kernel-mode shadow stack),
> >     IA32_PL1_SSP (ring-1 shadow stack),
> >     IA32_PL2_SSP (ring-2 shadow stack).
> > 
> > Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
> > ---
> >  arch/x86/include/asm/fpu/types.h            | 22 +++++++++++++++++++++
> >  arch/x86/include/asm/fpu/xstate.h           |  4 +++-
> >  arch/x86/include/uapi/asm/processor-flags.h |  2 ++
> >  arch/x86/kernel/fpu/xstate.c                | 10 ++++++++++
> >  4 files changed, 37 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/fpu/types.h
> > b/arch/x86/include/asm/fpu/types.h
> > index 202c53918ecf..e55d51d172f1 100644
> > --- a/arch/x86/include/asm/fpu/types.h
> > +++ b/arch/x86/include/asm/fpu/types.h
> > @@ -114,6 +114,9 @@ enum xfeature {
> >         XFEATURE_Hi16_ZMM,
> >         XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
> >         XFEATURE_PKRU,
> > +       XFEATURE_RESERVED,
> > +       XFEATURE_SHSTK_USER,
> > +       XFEATURE_SHSTK_KERNEL,
> > 
> >         XFEATURE_MAX,
> >  };
> > @@ -128,6 +131,8 @@ enum xfeature {
> >  #define XFEATURE_MASK_Hi16_ZMM         (1 << XFEATURE_Hi16_ZMM)
> >  #define XFEATURE_MASK_PT               (1 <<
> > XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
> >  #define XFEATURE_MASK_PKRU             (1 << XFEATURE_PKRU)
> > +#define XFEATURE_MASK_SHSTK_USER       (1 << XFEATURE_SHSTK_USER)
> > +#define XFEATURE_MASK_SHSTK_KERNEL     (1 << XFEATURE_SHSTK_KERNEL)
> > 
> >  #define XFEATURE_MASK_FPSSE            (XFEATURE_MASK_FP |
> > XFEATURE_MASK_SSE)
> >  #define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK \
> > @@ -229,6 +234,23 @@ struct pkru_state {
> >         u32                             pad;
> >  } __packed;
> > 
> > +/*
> > + * State component 11 is Control flow Enforcement user states
> > + */
> > +struct cet_user_state {
> > +       u64 u_cet;      /* user control flow settings */
> > +       u64 user_ssp;   /* user shadow stack pointer */
> > +} __packed;
> > +
> > +/*
> > + * State component 12 is Control flow Enforcement kernel states
> > + */
> > +struct cet_kernel_state {
> > +       u64 kernel_ssp; /* kernel shadow stack */
> > +       u64 pl1_ssp;    /* ring-1 shadow stack */
> > +       u64 pl2_ssp;    /* ring-2 shadow stack */
> > +} __packed;
> > +
> 
> Why are these __packed?  It seems like it'll generate bad code for no
> obvious purpose.

That prevents any possibility that the compiler will insert padding, although in
64-bit kernel this should not happen to either struct.  Also all xstate
components here are packed.

Yu-cheng
Andy Lutomirski Nov. 8, 2018, 9:22 p.m. UTC | #5
On Thu, Nov 8, 2018 at 1:06 PM Yu-cheng Yu <yu-cheng.yu@intel.com> wrote:
>
> On Thu, 2018-11-08 at 12:46 -0800, Andy Lutomirski wrote:
> > On Thu, Oct 11, 2018 at 8:20 AM Yu-cheng Yu <yu-cheng.yu@intel.com> wrote:
> > >
> > > Intel Control-flow Enforcement Technology (CET) introduces the
> > > following MSRs into the XSAVES system states.
> > >
> > >     IA32_U_CET (user-mode CET settings),
> > >     IA32_PL3_SSP (user-mode shadow stack),
> > >     IA32_PL0_SSP (kernel-mode shadow stack),
> > >     IA32_PL1_SSP (ring-1 shadow stack),
> > >     IA32_PL2_SSP (ring-2 shadow stack).
> > >
> > > Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
> > > ---
> > >  arch/x86/include/asm/fpu/types.h            | 22 +++++++++++++++++++++
> > >  arch/x86/include/asm/fpu/xstate.h           |  4 +++-
> > >  arch/x86/include/uapi/asm/processor-flags.h |  2 ++
> > >  arch/x86/kernel/fpu/xstate.c                | 10 ++++++++++
> > >  4 files changed, 37 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/x86/include/asm/fpu/types.h
> > > b/arch/x86/include/asm/fpu/types.h
> > > index 202c53918ecf..e55d51d172f1 100644
> > > --- a/arch/x86/include/asm/fpu/types.h
> > > +++ b/arch/x86/include/asm/fpu/types.h
> > > @@ -114,6 +114,9 @@ enum xfeature {
> > >         XFEATURE_Hi16_ZMM,
> > >         XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
> > >         XFEATURE_PKRU,
> > > +       XFEATURE_RESERVED,
> > > +       XFEATURE_SHSTK_USER,
> > > +       XFEATURE_SHSTK_KERNEL,
> > >
> > >         XFEATURE_MAX,
> > >  };
> > > @@ -128,6 +131,8 @@ enum xfeature {
> > >  #define XFEATURE_MASK_Hi16_ZMM         (1 << XFEATURE_Hi16_ZMM)
> > >  #define XFEATURE_MASK_PT               (1 <<
> > > XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
> > >  #define XFEATURE_MASK_PKRU             (1 << XFEATURE_PKRU)
> > > +#define XFEATURE_MASK_SHSTK_USER       (1 << XFEATURE_SHSTK_USER)
> > > +#define XFEATURE_MASK_SHSTK_KERNEL     (1 << XFEATURE_SHSTK_KERNEL)
> > >
> > >  #define XFEATURE_MASK_FPSSE            (XFEATURE_MASK_FP |
> > > XFEATURE_MASK_SSE)
> > >  #define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK \
> > > @@ -229,6 +234,23 @@ struct pkru_state {
> > >         u32                             pad;
> > >  } __packed;
> > >
> > > +/*
> > > + * State component 11 is Control flow Enforcement user states
> > > + */
> > > +struct cet_user_state {
> > > +       u64 u_cet;      /* user control flow settings */
> > > +       u64 user_ssp;   /* user shadow stack pointer */
> > > +} __packed;
> > > +
> > > +/*
> > > + * State component 12 is Control flow Enforcement kernel states
> > > + */
> > > +struct cet_kernel_state {
> > > +       u64 kernel_ssp; /* kernel shadow stack */
> > > +       u64 pl1_ssp;    /* ring-1 shadow stack */
> > > +       u64 pl2_ssp;    /* ring-2 shadow stack */
> > > +} __packed;
> > > +
> >
> > Why are these __packed?  It seems like it'll generate bad code for no
> > obvious purpose.
>
> That prevents any possibility that the compiler will insert padding, although in
> 64-bit kernel this should not happen to either struct.  Also all xstate
> components here are packed.
>

They both seem like bugs, perhaps.  As I understand it, __packed
removes padding, but it also forces the compiler to expect the fields
to be unaligned even if they are actually aligned.
Cyrill Gorcunov Nov. 8, 2018, 9:31 p.m. UTC | #6
On Thu, Nov 08, 2018 at 01:22:54PM -0800, Andy Lutomirski wrote:
> > >
> > > Why are these __packed?  It seems like it'll generate bad code for no
> > > obvious purpose.
> >
> > That prevents any possibility that the compiler will insert padding, although in
> > 64-bit kernel this should not happen to either struct.  Also all xstate
> > components here are packed.
> >
> 
> They both seem like bugs, perhaps.  As I understand it, __packed
> removes padding, but it also forces the compiler to expect the fields
> to be unaligned even if they are actually aligned.

How is that? Andy, mind to point where you get that this
attribute forces compiler to make such assumption?
Dave Hansen Nov. 8, 2018, 9:48 p.m. UTC | #7
On 11/8/18 1:22 PM, Andy Lutomirski wrote:
>> +struct cet_kernel_state {
>> +       u64 kernel_ssp; /* kernel shadow stack */
>> +       u64 pl1_ssp;    /* ring-1 shadow stack */
>> +       u64 pl2_ssp;    /* ring-2 shadow stack */
>> +} __packed;
>> +
> Why are these __packed?  It seems like it'll generate bad code for no
> obvious purpose.

It's a hardware-defined in-memory structure.  Granted, we'd need a
really wonky compiler to make that anything *other* than a nicely-packed
24-byte structure, but the __packed makes it explicit.

It is probably a really useful long-term thing to stop using __packed
and start using "__hw_defined" or something that #defines down to __packed.
Matthew Wilcox Nov. 8, 2018, 10 p.m. UTC | #8
On Thu, Nov 08, 2018 at 01:48:54PM -0800, Dave Hansen wrote:
> On 11/8/18 1:22 PM, Andy Lutomirski wrote:
> >> +struct cet_kernel_state {
> >> +       u64 kernel_ssp; /* kernel shadow stack */
> >> +       u64 pl1_ssp;    /* ring-1 shadow stack */
> >> +       u64 pl2_ssp;    /* ring-2 shadow stack */
> >> +} __packed;
> >> +
> > Why are these __packed?  It seems like it'll generate bad code for no
> > obvious purpose.
> 
> It's a hardware-defined in-memory structure.  Granted, we'd need a
> really wonky compiler to make that anything *other* than a nicely-packed
> 24-byte structure, but the __packed makes it explicit.
> 
> It is probably a really useful long-term thing to stop using __packed
> and start using "__hw_defined" or something that #defines down to __packed.

packed doesn't mean "don't leave gaps".  It means:

'packed'
     The 'packed' attribute specifies that a variable or structure field
     should have the smallest possible alignment--one byte for a
     variable, and one bit for a field, unless you specify a larger
     value with the 'aligned' attribute.

So Andy's right.  It tells the compiler, "this struct will not be naturally aligned, it will be aligned to a 1-byte boundary".  Which is silly.  If we have

struct b {
	unsigned long x;
} __packed;

struct a {
	char c;
	struct b b;
};

we want struct b to start at offset 8, but with __packed, it will start
at offset 1.

Delete __packed.  It doesn't do what you think it does.
Andy Lutomirski Nov. 8, 2018, 10:01 p.m. UTC | #9
On Thu, Nov 8, 2018 at 1:31 PM Cyrill Gorcunov <gorcunov@gmail.com> wrote:
>
> On Thu, Nov 08, 2018 at 01:22:54PM -0800, Andy Lutomirski wrote:
> > > >
> > > > Why are these __packed?  It seems like it'll generate bad code for no
> > > > obvious purpose.
> > >
> > > That prevents any possibility that the compiler will insert padding, although in
> > > 64-bit kernel this should not happen to either struct.  Also all xstate
> > > components here are packed.
> > >
> >
> > They both seem like bugs, perhaps.  As I understand it, __packed
> > removes padding, but it also forces the compiler to expect the fields
> > to be unaligned even if they are actually aligned.
>
> How is that? Andy, mind to point where you get that this
> attribute forces compiler to make such assumption?

It's from memory.  But gcc seems to agree with me I compiled this:

struct foo {
    int x;
} __attribute__((packed));

int read_foo(struct foo *f)
{
    return f->x;
}

int align_of_foo_x(struct foo *f)
{
    return __alignof__(f->x);
}

Compiling with -O2 gives:

    .globl    read_foo
    .type    read_foo, @function
read_foo:
    movl    (%rdi), %eax
    ret
    .size    read_foo, .-read_foo

    .p2align 4,,15
    .globl    align_of_foo_x
    .type    align_of_foo_x, @function
align_of_foo_x:
    movl    $1, %eax
    ret
    .size    align_of_foo_x, .-align_of_foo_x

So gcc thinks that the x field is one-byte-aligned, but the code is
okay (at least in this instance) on x86.
Building for armv5 gives:

    .type    read_foo, %function
read_foo:
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    @ link register save eliminated.
    ldrb    r3, [r0]    @ zero_extendqisi2
    ldrb    r1, [r0, #1]    @ zero_extendqisi2
    ldrb    r2, [r0, #2]    @ zero_extendqisi2
    orr    r3, r3, r1, lsl #8
    ldrb    r0, [r0, #3]    @ zero_extendqisi2
    orr    r3, r3, r2, lsl #16
    orr    r0, r3, r0, lsl #24
    bx    lr
    .size    read_foo, .-read_foo
    .align    2
    .global    align_of_foo_x
    .syntax unified
    .arm
    .fpu vfpv3-d16
    .type    align_of_foo_x, %function

So I'm pretty sure I'm right.
Cyrill Gorcunov Nov. 8, 2018, 10:18 p.m. UTC | #10
On Thu, Nov 08, 2018 at 02:01:42PM -0800, Andy Lutomirski wrote:
> > >
> > > They both seem like bugs, perhaps.  As I understand it, __packed
> > > removes padding, but it also forces the compiler to expect the fields
> > > to be unaligned even if they are actually aligned.
> >
> > How is that? Andy, mind to point where you get that this
> > attribute forces compiler to make such assumption?
> 
> It's from memory.  But gcc seems to agree with me I compiled this:
> 

Indeed, thanks!
Dave Hansen Nov. 8, 2018, 11:35 p.m. UTC | #11
On 11/8/18 2:00 PM, Matthew Wilcox wrote:
> struct a {
> 	char c;
> 	struct b b;
> };
> 
> we want struct b to start at offset 8, but with __packed, it will start
> at offset 1.

You're talking about how we want the struct laid out in memory if we
have control over the layout.  I'm talking about what happens if
something *else* tells us the layout, like a hardware specification
which is what is in play with the XSAVE instruction dictated layout
that's in question here.

What I'm concerned about is a structure like this:

struct foo {
        u32 i1;
        u64 i2;
};

If we leave that to natural alignment, we end up with a 16-byte
structure laid out like this:

	0-3	i1
	3-8	alignment gap
	8-15	i2

Which isn't what we want.  We want a 12-byte structure, laid out like this:

	0-3	i1
	4-11	i2

Which we get with:


struct foo {
        u32 i1;
        u64 i2;
} __packed;

Now, looking at Yu-cheng's specific example, it doesn't matter.  We've
got 64-bit types and natural 64-bit alignment.  Without __packed, we
need to look out for natural alignment screwing us up.  With __packed,
it just does what it *looks* like it does.
Borislav Petkov Nov. 8, 2018, 11:52 p.m. UTC | #12
On Thu, Nov 08, 2018 at 12:40:02PM -0800, Yu-cheng Yu wrote:
> In fpu_init_system_xstate(), we test and clear features that are not enabled.
> There we depend on the order of these elements.  This is the tenth "unknown
> xstate feature".

Aha, those are *reserved* bits - not unused, in XCR0.

Do an s/unused/reserved/g pls.

Now let's see, you have 0 for the 10th bit which happens to be

#define X86_FEATURE_FPU                 ( 0*32+ 0) /* Onboard FPU */

too. And if we look at the code:

        for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
                if (!boot_cpu_has(xsave_cpuid_features[i]))
                        xfeatures_mask_all &= ~BIT_ULL(i);

guess what happens if i == 10.

I know, the subsequent & SUPPORTED_XFEATURES_MASK saves you from the
#GP but that's still not good enough. The loop should not even call
boot_cpu_has() for reserved feature bits.

Thx.
Matthew Wilcox Nov. 9, 2018, 12:32 a.m. UTC | #13
On Thu, Nov 08, 2018 at 03:35:02PM -0800, Dave Hansen wrote:
> On 11/8/18 2:00 PM, Matthew Wilcox wrote:
> > struct a {
> > 	char c;
> > 	struct b b;
> > };
> > 
> > we want struct b to start at offset 8, but with __packed, it will start
> > at offset 1.
> 
> You're talking about how we want the struct laid out in memory if we
> have control over the layout.  I'm talking about what happens if
> something *else* tells us the layout, like a hardware specification
> which is what is in play with the XSAVE instruction dictated layout
> that's in question here.
> 
> What I'm concerned about is a structure like this:
> 
> struct foo {
>         u32 i1;
>         u64 i2;
> };
> 
> If we leave that to natural alignment, we end up with a 16-byte
> structure laid out like this:
> 
> 	0-3	i1
> 	3-8	alignment gap
> 	8-15	i2

I know you actually meant:

	0-3	i1
	4-7	pad
	8-15	i2

> Which isn't what we want.  We want a 12-byte structure, laid out like this:
> 
> 	0-3	i1
> 	4-11	i2
> 
> Which we get with:
> 
> struct foo {
>         u32 i1;
>         u64 i2;
> } __packed;

But we _also_ get pessimised accesses to i1 and i2.  Because gcc can't
rely on struct foo being aligned to a 4 or even 8 byte boundary (it
might be embedded in "struct a" from above).

> Now, looking at Yu-cheng's specific example, it doesn't matter.  We've
> got 64-bit types and natural 64-bit alignment.  Without __packed, we
> need to look out for natural alignment screwing us up.  With __packed,
> it just does what it *looks* like it does.

The question is whether Yu-cheng's struct is ever embedded in another
struct.  And if so, what does the hardware do?
Andy Lutomirski Nov. 9, 2018, 12:45 a.m. UTC | #14
On Thu, Nov 8, 2018 at 4:32 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, Nov 08, 2018 at 03:35:02PM -0800, Dave Hansen wrote:
> > On 11/8/18 2:00 PM, Matthew Wilcox wrote:
> > > struct a {
> > >     char c;
> > >     struct b b;
> > > };
> > >
> > > we want struct b to start at offset 8, but with __packed, it will start
> > > at offset 1.
> >
> > You're talking about how we want the struct laid out in memory if we
> > have control over the layout.  I'm talking about what happens if
> > something *else* tells us the layout, like a hardware specification
> > which is what is in play with the XSAVE instruction dictated layout
> > that's in question here.
> >
> > What I'm concerned about is a structure like this:
> >
> > struct foo {
> >         u32 i1;
> >         u64 i2;
> > };
> >
> > If we leave that to natural alignment, we end up with a 16-byte
> > structure laid out like this:
> >
> >       0-3     i1
> >       3-8     alignment gap
> >       8-15    i2
>
> I know you actually meant:
>
>         0-3     i1
>         4-7     pad
>         8-15    i2
>
> > Which isn't what we want.  We want a 12-byte structure, laid out like this:
> >
> >       0-3     i1
> >       4-11    i2
> >
> > Which we get with:
> >
> > struct foo {
> >         u32 i1;
> >         u64 i2;
> > } __packed;
>
> But we _also_ get pessimised accesses to i1 and i2.  Because gcc can't
> rely on struct foo being aligned to a 4 or even 8 byte boundary (it
> might be embedded in "struct a" from above).
>

In the event we end up with a hardware structure that has
not-really-aligned elements, I suspect we can ask gcc for a new
extension to help.  Or maybe some hack like:

struct foo {
  u32 i1;
  struct {
    u64 i2;
  } __attribute__((packed));
};

would do the trick.
Dave Hansen Nov. 9, 2018, 5:13 p.m. UTC | #15
On 11/8/18 4:32 PM, Matthew Wilcox wrote:
>> Now, looking at Yu-cheng's specific example, it doesn't matter.  We've
>> got 64-bit types and natural 64-bit alignment.  Without __packed, we
>> need to look out for natural alignment screwing us up.  With __packed,
>> it just does what it *looks* like it does.
> The question is whether Yu-cheng's struct is ever embedded in another
> struct.  And if so, what does the hardware do?

It's not really.

+struct cet_user_state {
+	u64 u_cet;	/* user control flow settings */
+	u64 user_ssp;	/* user shadow stack pointer */
+} __packed;

This ends up embedded in 'struct fpu'.  The hardware tells us what the
sum of all the sizes of all the state components are, and also tells us
the offsets inside the larger buffer.

We double-check that the structure sizes exactly match the sizes that
the hardware tells us that the buffer pieces are via XCHECK_SZ().

But, later versions of the hardware have instructions that don't have
static offsets for the state components (when the XSAVES/XSAVEC
instructions are used).  So, for those, the structure embedding isn't
used at *all* since some state might not be present.
Matthew Wilcox Nov. 9, 2018, 5:17 p.m. UTC | #16
On Fri, Nov 09, 2018 at 09:13:32AM -0800, Dave Hansen wrote:
> On 11/8/18 4:32 PM, Matthew Wilcox wrote:
> >> Now, looking at Yu-cheng's specific example, it doesn't matter.  We've
> >> got 64-bit types and natural 64-bit alignment.  Without __packed, we
> >> need to look out for natural alignment screwing us up.  With __packed,
> >> it just does what it *looks* like it does.
> > The question is whether Yu-cheng's struct is ever embedded in another
> > struct.  And if so, what does the hardware do?
> 
> It's not really.
> 
> +struct cet_user_state {
> +	u64 u_cet;	/* user control flow settings */
> +	u64 user_ssp;	/* user shadow stack pointer */
> +} __packed;
> 
> This ends up embedded in 'struct fpu'.  The hardware tells us what the
> sum of all the sizes of all the state components are, and also tells us
> the offsets inside the larger buffer.
> 
> We double-check that the structure sizes exactly match the sizes that
> the hardware tells us that the buffer pieces are via XCHECK_SZ().
> 
> But, later versions of the hardware have instructions that don't have
> static offsets for the state components (when the XSAVES/XSAVEC
> instructions are used).  So, for those, the structure embedding isn't
> used at *all* since some state might not be present.

But *when present*, this structure is always aligned on an 8-byte
boundary, right?
Dave Hansen Nov. 9, 2018, 5:20 p.m. UTC | #17
On 11/9/18 9:17 AM, Matthew Wilcox wrote:
>> But, later versions of the hardware have instructions that don't have
>> static offsets for the state components (when the XSAVES/XSAVEC
>> instructions are used).  So, for those, the structure embedding isn't
>> used at *all* since some state might not be present.
> But *when present*, this structure is always aligned on an 8-byte
> boundary, right?

There's no guarantee of that.

There is an "aligned" attribute for each XSAVE state component, but I do
not believe it is set for anything yet.
Dave Hansen Nov. 9, 2018, 5:28 p.m. UTC | #18
On 11/9/18 9:20 AM, Dave Hansen wrote:
> On 11/9/18 9:17 AM, Matthew Wilcox wrote:
>>> But, later versions of the hardware have instructions that don't have
>>> static offsets for the state components (when the XSAVES/XSAVEC
>>> instructions are used).  So, for those, the structure embedding isn't
>>> used at *all* since some state might not be present.
>> But *when present*, this structure is always aligned on an 8-byte
>> boundary, right?

Practically, though, I think it ends up always being aligned on an
8-byte boundary.
Andy Lutomirski Nov. 11, 2018, 2:59 p.m. UTC | #19
> On Nov 11, 2018, at 3:31 AM, Pavel Machek <pavel@ucw.cz> wrote:
> 
> Hi!
> 
>>> +/*
>>> + * State component 12 is Control flow Enforcement kernel states
>>> + */
>>> +struct cet_kernel_state {
>>> +    u64 kernel_ssp;    /* kernel shadow stack */
>>> +    u64 pl1_ssp;    /* ring-1 shadow stack */
>>> +    u64 pl2_ssp;    /* ring-2 shadow stack */
>> 
>> Just write "privilege level" everywhere - not "ring".
> 
> Please just use word "ring". It is well estabilished terminology.
> 
> Which ring is priviledge level 1, given that we have SMM and
> virtualization support?

To the contrary: CPL, DPL, and RPL are very well defined terms in the architecture manuals. “PL” is privilege level. PL 1 is very well defined.

SMM is SMM, full stop (unless dual mode or whatever it’s called is on, but AFAIK no one uses it).  VMX non-root CPL 1 is *still* privilege level 1.

In contrast, the security community likes to call SMM “ring -1”, which is cute, but wrong from a systems programmer view. For example, SMM’s CPL can still range from 0-3.

> 
>                                    Pavel
> -- 
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
diff mbox series

Patch

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c53918ecf..e55d51d172f1 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -114,6 +114,9 @@  enum xfeature {
 	XFEATURE_Hi16_ZMM,
 	XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
 	XFEATURE_PKRU,
+	XFEATURE_RESERVED,
+	XFEATURE_SHSTK_USER,
+	XFEATURE_SHSTK_KERNEL,
 
 	XFEATURE_MAX,
 };
@@ -128,6 +131,8 @@  enum xfeature {
 #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
 #define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_SHSTK_USER	(1 << XFEATURE_SHSTK_USER)
+#define XFEATURE_MASK_SHSTK_KERNEL	(1 << XFEATURE_SHSTK_KERNEL)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 #define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK \
@@ -229,6 +234,23 @@  struct pkru_state {
 	u32				pad;
 } __packed;
 
+/*
+ * State component 11 is Control flow Enforcement user states
+ */
+struct cet_user_state {
+	u64 u_cet;	/* user control flow settings */
+	u64 user_ssp;	/* user shadow stack pointer */
+} __packed;
+
+/*
+ * State component 12 is Control flow Enforcement kernel states
+ */
+struct cet_kernel_state {
+	u64 kernel_ssp;	/* kernel shadow stack */
+	u64 pl1_ssp;	/* ring-1 shadow stack */
+	u64 pl2_ssp;	/* ring-2 shadow stack */
+} __packed;
+
 struct xstate_header {
 	u64				xfeatures;
 	u64				xcomp_bv;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index d8e2ec99f635..18b60748a34d 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -28,7 +28,9 @@ 
 				  XFEATURE_MASK_Hi16_ZMM | \
 				  XFEATURE_MASK_PKRU | \
 				  XFEATURE_MASK_BNDREGS | \
-				  XFEATURE_MASK_BNDCSR)
+				  XFEATURE_MASK_BNDCSR | \
+				  XFEATURE_MASK_SHSTK_USER | \
+				  XFEATURE_MASK_SHSTK_KERNEL)
 
 #ifdef CONFIG_X86_64
 #define REX_PREFIX	"0x48, "
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index bcba3c643e63..25311ec4b731 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -130,6 +130,8 @@ 
 #define X86_CR4_SMAP		_BITUL(X86_CR4_SMAP_BIT)
 #define X86_CR4_PKE_BIT		22 /* enable Protection Keys support */
 #define X86_CR4_PKE		_BITUL(X86_CR4_PKE_BIT)
+#define X86_CR4_CET_BIT		23 /* enable Control flow Enforcement */
+#define X86_CR4_CET		_BITUL(X86_CR4_CET_BIT)
 
 /*
  * x86-64 Task Priority Register, CR8
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 605ec6decf3e..ad36ea28bfd1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -35,6 +35,9 @@  static const char *xfeature_names[] =
 	"Processor Trace (unused)"	,
 	"Protection Keys User registers",
 	"unknown xstate feature"	,
+	"Control flow User registers"	,
+	"Control flow Kernel registers"	,
+	"unknown xstate feature"	,
 };
 
 static short xsave_cpuid_features[] __initdata = {
@@ -48,6 +51,9 @@  static short xsave_cpuid_features[] __initdata = {
 	X86_FEATURE_AVX512F,
 	X86_FEATURE_INTEL_PT,
 	X86_FEATURE_PKU,
+	0,		   /* Unused */
+	X86_FEATURE_SHSTK, /* XFEATURE_SHSTK_USER */
+	X86_FEATURE_SHSTK, /* XFEATURE_SHSTK_KERNEL */
 };
 
 /*
@@ -312,6 +318,8 @@  static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 	print_xstate_feature(XFEATURE_MASK_PKRU);
+	print_xstate_feature(XFEATURE_MASK_SHSTK_USER);
+	print_xstate_feature(XFEATURE_MASK_SHSTK_KERNEL);
 }
 
 /*
@@ -558,6 +566,8 @@  static void check_xstate_against_struct(int nr)
 	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
 	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
 	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
+	XCHECK_SZ(sz, nr, XFEATURE_SHSTK_USER,   struct cet_user_state);
+	XCHECK_SZ(sz, nr, XFEATURE_SHSTK_KERNEL, struct cet_kernel_state);
 
 	/*
 	 * Make *SURE* to add any feature numbers in below if