diff mbox

[RFC,v2,22/27] x86/cet/ibt: User-mode indirect branch tracking support

Message ID 20180710222639.8241-23-yu-cheng.yu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yu, Yu-cheng July 10, 2018, 10:26 p.m. UTC
Add user-mode indirect branch tracking enabling/disabling
and supporting routines.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
 arch/x86/include/asm/cet.h               |  8 +++
 arch/x86/include/asm/disabled-features.h |  8 ++-
 arch/x86/kernel/cet.c                    | 73 ++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c             | 20 ++++++-
 arch/x86/kernel/elf.c                    | 16 +++++-
 arch/x86/kernel/process.c                |  1 +
 6 files changed, 123 insertions(+), 3 deletions(-)

Comments

Dave Hansen July 11, 2018, 12:11 a.m. UTC | #1
Is this feature *integral* to shadow stacks?  Or, should it just be in a
different series?

> diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h
> index d9ae3d86cdd7..71da2cccba16 100644
> --- a/arch/x86/include/asm/cet.h
> +++ b/arch/x86/include/asm/cet.h
> @@ -12,7 +12,10 @@ struct task_struct;
>  struct cet_status {
>  	unsigned long	shstk_base;
>  	unsigned long	shstk_size;
> +	unsigned long	ibt_bitmap_addr;
> +	unsigned long	ibt_bitmap_size;
>  	unsigned int	shstk_enabled:1;
> +	unsigned int	ibt_enabled:1;
>  };

Is there a reason we're not using pointers here?  This seems like the
kind of place that we probably want __user pointers.


> +static unsigned long ibt_mmap(unsigned long addr, unsigned long len)
> +{
> +	struct mm_struct *mm = current->mm;
> +	unsigned long populate;
> +
> +	down_write(&mm->mmap_sem);
> +	addr = do_mmap(NULL, addr, len, PROT_READ | PROT_WRITE,
> +		       MAP_ANONYMOUS | MAP_PRIVATE,
> +		       VM_DONTDUMP, 0, &populate, NULL);
> +	up_write(&mm->mmap_sem);
> +
> +	if (populate)
> +		mm_populate(addr, populate);
> +
> +	return addr;
> +}

We're going to have to start consolidating these at some point.  We have
at least three of them now, maybe more.

> +int cet_setup_ibt_bitmap(void)
> +{
> +	u64 r;
> +	unsigned long bitmap;
> +	unsigned long size;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
> +		return -EOPNOTSUPP;
> +
> +	size = TASK_SIZE_MAX / PAGE_SIZE / BITS_PER_BYTE;

Just a note: this table is going to be gigantic on 5-level paging
systems, and userspace won't, by default use any of that extra address
space.  I think it ends up being a 512GB allocation in a 128TB address
space.

Is that a problem?

On 5-level paging systems, maybe we should just stick it up in the high
part of the address space.

> +	bitmap = ibt_mmap(0, size);
> +
> +	if (bitmap >= TASK_SIZE_MAX)
> +		return -ENOMEM;
> +
> +	bitmap &= PAGE_MASK;

We're page-aligning the result of an mmap()?  Why?

> +	rdmsrl(MSR_IA32_U_CET, r);
> +	r |= (MSR_IA32_CET_LEG_IW_EN | bitmap);
> +	wrmsrl(MSR_IA32_U_CET, r);

Comments, please.  What is this doing, logically?  Also, why are we
OR'ing the results into this MSR?  What are we trying to preserve?

> +	current->thread.cet.ibt_bitmap_addr = bitmap;
> +	current->thread.cet.ibt_bitmap_size = size;
> +	return 0;
> +}
> +
> +void cet_disable_ibt(void)
> +{
> +	u64 r;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
> +		return;

Does this need a check for being already disabled?

> +	rdmsrl(MSR_IA32_U_CET, r);
> +	r &= ~(MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_LEG_IW_EN |
> +	       MSR_IA32_CET_NO_TRACK_EN);
> +	wrmsrl(MSR_IA32_U_CET, r);
> +	current->thread.cet.ibt_enabled = 0;
> +}

What's the locking for current->thread.cet?

> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 705467839ce8..c609c9ce5691 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -413,7 +413,8 @@ __setup("nopku", setup_disable_pku);
>  
>  static __always_inline void setup_cet(struct cpuinfo_x86 *c)
>  {
> -	if (cpu_feature_enabled(X86_FEATURE_SHSTK))
> +	if (cpu_feature_enabled(X86_FEATURE_SHSTK) ||
> +	    cpu_feature_enabled(X86_FEATURE_IBT))
>  		cr4_set_bits(X86_CR4_CET);
>  }
>  
> @@ -434,6 +435,23 @@ static __init int setup_disable_shstk(char *s)
>  __setup("no_cet_shstk", setup_disable_shstk);
>  #endif
>  
> +#ifdef CONFIG_X86_INTEL_BRANCH_TRACKING_USER
> +static __init int setup_disable_ibt(char *s)
> +{
> +	/* require an exact match without trailing characters */
> +	if (strlen(s))
> +		return 0;
> +
> +	if (!boot_cpu_has(X86_FEATURE_IBT))
> +		return 1;
> +
> +	setup_clear_cpu_cap(X86_FEATURE_IBT);
> +	pr_info("x86: 'no_cet_ibt' specified, disabling Branch Tracking\n");
> +	return 1;
> +}
> +__setup("no_cet_ibt", setup_disable_ibt);
> +#endif
>  /*
>   * Some CPU features depend on higher CPUID levels, which may not always
>   * be available due to CPUID level capping or broken virtualization
> diff --git a/arch/x86/kernel/elf.c b/arch/x86/kernel/elf.c
> index 233f6dad9c1f..42e08d3b573e 100644
> --- a/arch/x86/kernel/elf.c
> +++ b/arch/x86/kernel/elf.c
> @@ -15,6 +15,7 @@
>  #include <linux/fs.h>
>  #include <linux/uaccess.h>
>  #include <linux/string.h>
> +#include <linux/compat.h>
>  
>  /*
>   * The .note.gnu.property layout:
> @@ -222,7 +223,8 @@ int arch_setup_features(void *ehdr_p, void *phdr_p,
>  
>  	struct elf64_hdr *ehdr64 = ehdr_p;
>  
> -	if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
> +	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
> +	    !cpu_feature_enabled(X86_FEATURE_IBT))
>  		return 0;
>  
>  	if (ehdr64->e_ident[EI_CLASS] == ELFCLASS64) {
> @@ -250,6 +252,9 @@ int arch_setup_features(void *ehdr_p, void *phdr_p,
>  	current->thread.cet.shstk_enabled = 0;
>  	current->thread.cet.shstk_base = 0;
>  	current->thread.cet.shstk_size = 0;
> +	current->thread.cet.ibt_enabled = 0;
> +	current->thread.cet.ibt_bitmap_addr = 0;
> +	current->thread.cet.ibt_bitmap_size = 0;
>  	if (cpu_feature_enabled(X86_FEATURE_SHSTK)) {
>  		if (shstk) {
>  			err = cet_setup_shstk();
> @@ -257,6 +262,15 @@ int arch_setup_features(void *ehdr_p, void *phdr_p,
>  				goto out;
>  		}
>  	}
> +
> +	if (cpu_feature_enabled(X86_FEATURE_IBT)) {
> +		if (ibt) {
> +			err = cet_setup_ibt();
> +			if (err < 0)
> +				goto out;
> +		}
> +	}

You introduced 'ibt' before it was used.  Please wait to introduce it
until you actually use it to make it easier to review.

Also, what's wrong with:

	if (cpu_feature_enabled(X86_FEATURE_IBT) && ibt) {
		...
	}

?
Jann Horn July 11, 2018, 9:07 p.m. UTC | #2
On Tue, Jul 10, 2018 at 3:31 PM Yu-cheng Yu <yu-cheng.yu@intel.com> wrote:
>
> Add user-mode indirect branch tracking enabling/disabling
> and supporting routines.
>
> Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
[...]
> diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
> index 4eba7790c4e4..8bbd63e1a2ba 100644
> --- a/arch/x86/kernel/cet.c
> +++ b/arch/x86/kernel/cet.c
[...]
> +static unsigned long ibt_mmap(unsigned long addr, unsigned long len)
> +{
> +       struct mm_struct *mm = current->mm;
> +       unsigned long populate;
> +
> +       down_write(&mm->mmap_sem);
> +       addr = do_mmap(NULL, addr, len, PROT_READ | PROT_WRITE,
> +                      MAP_ANONYMOUS | MAP_PRIVATE,
> +                      VM_DONTDUMP, 0, &populate, NULL);
> +       up_write(&mm->mmap_sem);
> +
> +       if (populate)
> +               mm_populate(addr, populate);
> +
> +       return addr;
> +}

Is this thing going to stay writable? Will any process with an IBT
bitmap be able to disable protections by messing with the bitmap even
if the lock-out mode is active? If so, would it perhaps make sense to
forbid lock-out mode if an IBT bitmap is active, to make it clear that
effective lock-out is impossible in that state?
Yu, Yu-cheng July 11, 2018, 10:10 p.m. UTC | #3
On Tue, 2018-07-10 at 17:11 -0700, Dave Hansen wrote:
> Is this feature *integral* to shadow stacks?  Or, should it just be
> in a
> different series?

The whole CET series is mostly about SHSTK and only a minority for IBT.
IBT changes cannot be applied by itself without first applying SHSTK
changes.  Would the titles help, e.g. x86/cet/ibt, x86/cet/shstk, etc.?

> 
> > 
> > diff --git a/arch/x86/include/asm/cet.h
> > b/arch/x86/include/asm/cet.h
> > index d9ae3d86cdd7..71da2cccba16 100644
> > --- a/arch/x86/include/asm/cet.h
> > +++ b/arch/x86/include/asm/cet.h
> > @@ -12,7 +12,10 @@ struct task_struct;
> >  struct cet_status {
> >  	unsigned long	shstk_base;
> >  	unsigned long	shstk_size;
> > +	unsigned long	ibt_bitmap_addr;
> > +	unsigned long	ibt_bitmap_size;
> >  	unsigned int	shstk_enabled:1;
> > +	unsigned int	ibt_enabled:1;
> >  };
> Is there a reason we're not using pointers here?  This seems like the
> kind of place that we probably want __user pointers.

Yes, I will change that.

> 
> 
> > 
> > +static unsigned long ibt_mmap(unsigned long addr, unsigned long
> > len)
> > +{
> > +	struct mm_struct *mm = current->mm;
> > +	unsigned long populate;
> > +
> > +	down_write(&mm->mmap_sem);
> > +	addr = do_mmap(NULL, addr, len, PROT_READ | PROT_WRITE,
> > +		       MAP_ANONYMOUS | MAP_PRIVATE,
> > +		       VM_DONTDUMP, 0, &populate, NULL);
> > +	up_write(&mm->mmap_sem);
> > +
> > +	if (populate)
> > +		mm_populate(addr, populate);
> > +
> > +	return addr;
> > +}
> We're going to have to start consolidating these at some point.  We
> have
> at least three of them now, maybe more.

Maybe we can do the following in linux/mm.h?

+static inline unsigned long do_mmap_locked(addr, len, prot,
+					    flags, vm_flags)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long populate;
+
+	down_write(&mm->mmap_sem);
+	addr = do_mmap(NULL, addr, len, prot, flags, vm_flags,
+		       0, &populate, NULL);
+	up_write(&mm->mmap_sem);
+
+	if (populate)
+		mm_populate(addr, populate);
+
+	return addr;
+} 

> > 
> > +int cet_setup_ibt_bitmap(void)
> > +{
> > +	u64 r;
> > +	unsigned long bitmap;
> > +	unsigned long size;
> > +
> > +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
> > +		return -EOPNOTSUPP;
> > +
> > +	size = TASK_SIZE_MAX / PAGE_SIZE / BITS_PER_BYTE;
> Just a note: this table is going to be gigantic on 5-level paging
> systems, and userspace won't, by default use any of that extra
> address
> space.  I think it ends up being a 512GB allocation in a 128TB
> address
> space.
> 
> Is that a problem?
>
> On 5-level paging systems, maybe we should just stick it up in the
> high
> part of the address space.

We do not know in advance if dlopen() needs to create the bitmap.  Do
we always reserve high address or force legacy libs to low address?

> 
> > 
> > +	bitmap = ibt_mmap(0, size);
> > +
> > +	if (bitmap >= TASK_SIZE_MAX)
> > +		return -ENOMEM;
> > +
> > +	bitmap &= PAGE_MASK;
> We're page-aligning the result of an mmap()?  Why?

This may not be necessary.  The lower bits of MSR_IA32_U_CET are
settings and not part of the bitmap address.  Is this is safer?

> 
> > 
> > +	rdmsrl(MSR_IA32_U_CET, r);
> > +	r |= (MSR_IA32_CET_LEG_IW_EN | bitmap);
> > +	wrmsrl(MSR_IA32_U_CET, r);
> Comments, please.  What is this doing, logically?  Also, why are we
> OR'ing the results into this MSR?  What are we trying to preserve?

I will add comments.

> 
> > 
> > +	current->thread.cet.ibt_bitmap_addr = bitmap;
> > +	current->thread.cet.ibt_bitmap_size = size;
> > +	return 0;
> > +}
> > +
> > +void cet_disable_ibt(void)
> > +{
> > +	u64 r;
> > +
> > +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
> > +		return;
> Does this need a check for being already disabled?

We need that.  We cannot write to those MSRs if the CPU does not
support it.

> 
> > 
> > +	rdmsrl(MSR_IA32_U_CET, r);
> > +	r &= ~(MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_LEG_IW_EN |
> > +	       MSR_IA32_CET_NO_TRACK_EN);
> > +	wrmsrl(MSR_IA32_U_CET, r);
> > +	current->thread.cet.ibt_enabled = 0;
> > +}
> What's the locking for current->thread.cet?

Now CET is not locked until the application calls ARCH_CET_LOCK.

> 
> > 
> > diff --git a/arch/x86/kernel/cpu/common.c
> > b/arch/x86/kernel/cpu/common.c
> > index 705467839ce8..c609c9ce5691 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -413,7 +413,8 @@ __setup("nopku", setup_disable_pku);
> >  
> >  static __always_inline void setup_cet(struct cpuinfo_x86 *c)
> >  {
> > -	if (cpu_feature_enabled(X86_FEATURE_SHSTK))
> > +	if (cpu_feature_enabled(X86_FEATURE_SHSTK) ||
> > +	    cpu_feature_enabled(X86_FEATURE_IBT))
> >  		cr4_set_bits(X86_CR4_CET);
> >  }
> >  
> > @@ -434,6 +435,23 @@ static __init int setup_disable_shstk(char *s)
> >  __setup("no_cet_shstk", setup_disable_shstk);
> >  #endif
> >  
> > +#ifdef CONFIG_X86_INTEL_BRANCH_TRACKING_USER
> > +static __init int setup_disable_ibt(char *s)
> > +{
> > +	/* require an exact match without trailing characters */
> > +	if (strlen(s))
> > +		return 0;
> > +
> > +	if (!boot_cpu_has(X86_FEATURE_IBT))
> > +		return 1;
> > +
> > +	setup_clear_cpu_cap(X86_FEATURE_IBT);
> > +	pr_info("x86: 'no_cet_ibt' specified, disabling Branch
> > Tracking\n");
> > +	return 1;
> > +}
> > +__setup("no_cet_ibt", setup_disable_ibt);
> > +#endif
> >  /*
> >   * Some CPU features depend on higher CPUID levels, which may not
> > always
> >   * be available due to CPUID level capping or broken
> > virtualization
> > diff --git a/arch/x86/kernel/elf.c b/arch/x86/kernel/elf.c
> > index 233f6dad9c1f..42e08d3b573e 100644
> > --- a/arch/x86/kernel/elf.c
> > +++ b/arch/x86/kernel/elf.c
> > @@ -15,6 +15,7 @@
> >  #include <linux/fs.h>
> >  #include <linux/uaccess.h>
> >  #include <linux/string.h>
> > +#include <linux/compat.h>
> >  
> >  /*
> >   * The .note.gnu.property layout:
> > @@ -222,7 +223,8 @@ int arch_setup_features(void *ehdr_p, void
> > *phdr_p,
> >  
> >  	struct elf64_hdr *ehdr64 = ehdr_p;
> >  
> > -	if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
> > +	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
> > +	    !cpu_feature_enabled(X86_FEATURE_IBT))
> >  		return 0;
> >  
> >  	if (ehdr64->e_ident[EI_CLASS] == ELFCLASS64) {
> > @@ -250,6 +252,9 @@ int arch_setup_features(void *ehdr_p, void
> > *phdr_p,
> >  	current->thread.cet.shstk_enabled = 0;
> >  	current->thread.cet.shstk_base = 0;
> >  	current->thread.cet.shstk_size = 0;
> > +	current->thread.cet.ibt_enabled = 0;
> > +	current->thread.cet.ibt_bitmap_addr = 0;
> > +	current->thread.cet.ibt_bitmap_size = 0;
> >  	if (cpu_feature_enabled(X86_FEATURE_SHSTK)) {
> >  		if (shstk) {
> >  			err = cet_setup_shstk();
> > @@ -257,6 +262,15 @@ int arch_setup_features(void *ehdr_p, void
> > *phdr_p,
> >  				goto out;
> >  		}
> >  	}
> > +
> > +	if (cpu_feature_enabled(X86_FEATURE_IBT)) {
> > +		if (ibt) {
> > +			err = cet_setup_ibt();
> > +			if (err < 0)
> > +				goto out;
> > +		}
> > +	}
> You introduced 'ibt' before it was used.  Please wait to introduce it
> until you actually use it to make it easier to review.
> 
> Also, what's wrong with:
> 
> 	if (cpu_feature_enabled(X86_FEATURE_IBT) && ibt) {
> 		...
> 	}
> 
> ?

I will fix it.
Dave Hansen July 11, 2018, 10:40 p.m. UTC | #4
On 07/11/2018 03:10 PM, Yu-cheng Yu wrote:
> On Tue, 2018-07-10 at 17:11 -0700, Dave Hansen wrote:
>> Is this feature *integral* to shadow stacks?  Or, should it just be
>> in a
>> different series?
> 
> The whole CET series is mostly about SHSTK and only a minority for IBT.
> IBT changes cannot be applied by itself without first applying SHSTK
> changes.  Would the titles help, e.g. x86/cet/ibt, x86/cet/shstk, etc.?

That doesn't really answer what I asked, though.

Do shadow stacks *require* IBT?  Or, should we concentrate on merging
shadow stacks themselves first and then do IBT at a later time, in a
different patch series?

But, yes, better patch titles would help, although I'm not sure that's
quite the format that Ingo and Thomas prefer.

>>> +int cet_setup_ibt_bitmap(void)
>>> +{
>>> +	u64 r;
>>> +	unsigned long bitmap;
>>> +	unsigned long size;
>>> +
>>> +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	size = TASK_SIZE_MAX / PAGE_SIZE / BITS_PER_BYTE;
>> Just a note: this table is going to be gigantic on 5-level paging
>> systems, and userspace won't, by default use any of that extra
>> address
>> space.  I think it ends up being a 512GB allocation in a 128TB
>> address
>> space.
>>
>> Is that a problem?
>> 
>> On 5-level paging systems, maybe we should just stick it up in the 
>> high part of the address space.
> 
> We do not know in advance if dlopen() needs to create the bitmap.  Do
> we always reserve high address or force legacy libs to low address?

Does it matter?  Does code ever get pointers to this area?  Might they
be depending on high address bits for the IBT being clear?


>>> +	bitmap = ibt_mmap(0, size);
>>> +
>>> +	if (bitmap >= TASK_SIZE_MAX)
>>> +		return -ENOMEM;
>>> +
>>> +	bitmap &= PAGE_MASK;
>> We're page-aligning the result of an mmap()?  Why?
> 
> This may not be necessary.  The lower bits of MSR_IA32_U_CET are
> settings and not part of the bitmap address.  Is this is safer?

No.  If we have mmap() returning non-page-aligned addresses, we have
bigger problems.  Worst-case, do

	WARN_ON_ONCE(bitmap & ~PAGE_MASK);

>>> +	current->thread.cet.ibt_bitmap_addr = bitmap;
>>> +	current->thread.cet.ibt_bitmap_size = size;
>>> +	return 0;
>>> +}
>>> +
>>> +void cet_disable_ibt(void)
>>> +{
>>> +	u64 r;
>>> +
>>> +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
>>> +		return;
>> Does this need a check for being already disabled?
> 
> We need that.  We cannot write to those MSRs if the CPU does not
> support it.

No, I mean for code doing cet_disable_ibt() twice in a row.

>>> +	rdmsrl(MSR_IA32_U_CET, r);
>>> +	r &= ~(MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_LEG_IW_EN |
>>> +	       MSR_IA32_CET_NO_TRACK_EN);
>>> +	wrmsrl(MSR_IA32_U_CET, r);
>>> +	current->thread.cet.ibt_enabled = 0;
>>> +}
>> What's the locking for current->thread.cet?
> 
> Now CET is not locked until the application calls ARCH_CET_LOCK.

No, I mean what is the in-kernel locking for the current->thread.cet
data structure?  Is there none because it's only every modified via
current->thread and it's entirely thread-local?
Yu, Yu-cheng July 11, 2018, 11 p.m. UTC | #5
On Wed, 2018-07-11 at 15:40 -0700, Dave Hansen wrote:
> On 07/11/2018 03:10 PM, Yu-cheng Yu wrote:
> > 
> > On Tue, 2018-07-10 at 17:11 -0700, Dave Hansen wrote:
> > > 
> > > Is this feature *integral* to shadow stacks?  Or, should it just
> > > be
> > > in a
> > > different series?
> > The whole CET series is mostly about SHSTK and only a minority for
> > IBT.
> > IBT changes cannot be applied by itself without first applying
> > SHSTK
> > changes.  Would the titles help, e.g. x86/cet/ibt, x86/cet/shstk,
> > etc.?
> That doesn't really answer what I asked, though.
> 
> Do shadow stacks *require* IBT?  Or, should we concentrate on merging
> shadow stacks themselves first and then do IBT at a later time, in a
> different patch series?
> 
> But, yes, better patch titles would help, although I'm not sure
> that's
> quite the format that Ingo and Thomas prefer.

Shadow stack does not require IBT, but they complement each other.  If
we can resolve the legacy bitmap, both features can be merged at the
same time.

> 
> > 
> > > 
> > > > 
> > > > +int cet_setup_ibt_bitmap(void)
> > > > +{
> > > > +	u64 r;
> > > > +	unsigned long bitmap;
> > > > +	unsigned long size;
> > > > +
> > > > +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
> > > > +		return -EOPNOTSUPP;
> > > > +
> > > > +	size = TASK_SIZE_MAX / PAGE_SIZE / BITS_PER_BYTE;
> > > Just a note: this table is going to be gigantic on 5-level paging
> > > systems, and userspace won't, by default use any of that extra
> > > address
> > > space.  I think it ends up being a 512GB allocation in a 128TB
> > > address
> > > space.
> > > 
> > > Is that a problem?
> > > 
> > > On 5-level paging systems, maybe we should just stick it up in
> > > the 
> > > high part of the address space.
> > We do not know in advance if dlopen() needs to create the bitmap.
> >  Do
> > we always reserve high address or force legacy libs to low address?
> Does it matter?  Does code ever get pointers to this area?  Might
> they
> be depending on high address bits for the IBT being clear?

GLIBC does the bitmap setup.  It sets bits in there.
I thought you wanted a smaller bitmap?  One way is forcing legacy libs
to low address, or not having the bitmap at all, i.e. turn IBT off.

> 
> 
> > 
> > > 
> > > > 
> > > > +	bitmap = ibt_mmap(0, size);
> > > > +
> > > > +	if (bitmap >= TASK_SIZE_MAX)
> > > > +		return -ENOMEM;
> > > > +
> > > > +	bitmap &= PAGE_MASK;
> > > We're page-aligning the result of an mmap()?  Why?
> > This may not be necessary.  The lower bits of MSR_IA32_U_CET are
> > settings and not part of the bitmap address.  Is this is safer?
> No.  If we have mmap() returning non-page-aligned addresses, we have
> bigger problems.  Worst-case, do
> 
> 	WARN_ON_ONCE(bitmap & ~PAGE_MASK);
> 

Ok.

> > 
> > > 
> > > > 
> > > > +	current->thread.cet.ibt_bitmap_addr = bitmap;
> > > > +	current->thread.cet.ibt_bitmap_size = size;
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +void cet_disable_ibt(void)
> > > > +{
> > > > +	u64 r;
> > > > +
> > > > +	if (!cpu_feature_enabled(X86_FEATURE_IBT))
> > > > +		return;
> > > Does this need a check for being already disabled?
> > We need that.  We cannot write to those MSRs if the CPU does not
> > support it.
> No, I mean for code doing cet_disable_ibt() twice in a row.

Got it.

> 
> > 
> > > 
> > > > 
> > > > +	rdmsrl(MSR_IA32_U_CET, r);
> > > > +	r &= ~(MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_LEG_IW_EN
> > > > |
> > > > +	       MSR_IA32_CET_NO_TRACK_EN);
> > > > +	wrmsrl(MSR_IA32_U_CET, r);
> > > > +	current->thread.cet.ibt_enabled = 0;
> > > > +}
> > > What's the locking for current->thread.cet?
> > Now CET is not locked until the application calls ARCH_CET_LOCK.
> No, I mean what is the in-kernel locking for the current->thread.cet
> data structure?  Is there none because it's only every modified via
> current->thread and it's entirely thread-local?

Yes, that is the case.
Dave Hansen July 11, 2018, 11:16 p.m. UTC | #6
On 07/11/2018 04:00 PM, Yu-cheng Yu wrote:
> On Wed, 2018-07-11 at 15:40 -0700, Dave Hansen wrote:
>> On 07/11/2018 03:10 PM, Yu-cheng Yu wrote:
>>>
>>> On Tue, 2018-07-10 at 17:11 -0700, Dave Hansen wrote:
>>>>
>>>> Is this feature *integral* to shadow stacks?  Or, should it just
>>>> be
>>>> in a
>>>> different series?
>>> The whole CET series is mostly about SHSTK and only a minority for
>>> IBT.
>>> IBT changes cannot be applied by itself without first applying
>>> SHSTK
>>> changes.  Would the titles help, e.g. x86/cet/ibt, x86/cet/shstk,
>>> etc.?
>> That doesn't really answer what I asked, though.
>>
>> Do shadow stacks *require* IBT?  Or, should we concentrate on merging
>> shadow stacks themselves first and then do IBT at a later time, in a
>> different patch series?
>>
>> But, yes, better patch titles would help, although I'm not sure
>> that's
>> quite the format that Ingo and Thomas prefer.
> 
> Shadow stack does not require IBT, but they complement each other.  If
> we can resolve the legacy bitmap, both features can be merged at the
> same time.

As large as this patch set is, I'd really prefer to see you get shadow
stacks merged and then move on to IBT.  I say separate them.

> GLIBC does the bitmap setup.  It sets bits in there.
> I thought you wanted a smaller bitmap?  One way is forcing legacy libs
> to low address, or not having the bitmap at all, i.e. turn IBT off.

I'm concerned with two things:
1. the virtual address space consumption, especially the *default* case
   which will be apps using 4-level address space amounts, but having
   5-level-sized tables.
2. the driving a truck-sized hole in the address space limits

You can force legacy libs to low addresses, but you can't stop anyone
from putting code into a high address *later*, at least with the code we
have today.

>>>>> +	rdmsrl(MSR_IA32_U_CET, r);
>>>>> +	r &= ~(MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_LEG_IW_EN
>>>>> |
>>>>> +	       MSR_IA32_CET_NO_TRACK_EN);
>>>>> +	wrmsrl(MSR_IA32_U_CET, r);
>>>>> +	current->thread.cet.ibt_enabled = 0;
>>>>> +}
>>>> What's the locking for current->thread.cet?
>>> Now CET is not locked until the application calls ARCH_CET_LOCK.
>> No, I mean what is the in-kernel locking for the current->thread.cet
>> data structure?  Is there none because it's only every modified via
>> current->thread and it's entirely thread-local?
> 
> Yes, that is the case.
Yu, Yu-cheng July 13, 2018, 5:56 p.m. UTC | #7
On Wed, 2018-07-11 at 16:16 -0700, Dave Hansen wrote:
> On 07/11/2018 04:00 PM, Yu-cheng Yu wrote:
> > 
> > On Wed, 2018-07-11 at 15:40 -0700, Dave Hansen wrote:
> > > 
> > > On 07/11/2018 03:10 PM, Yu-cheng Yu wrote:
> > > > 
> > > > 
> > > > On Tue, 2018-07-10 at 17:11 -0700, Dave Hansen wrote:
> > > > > 
> > > > > 
> > > > > Is this feature *integral* to shadow stacks?  Or, should it just
> > > > > be
> > > > > in a
> > > > > different series?
> > > > The whole CET series is mostly about SHSTK and only a minority for
> > > > IBT.
> > > > IBT changes cannot be applied by itself without first applying
> > > > SHSTK
> > > > changes.  Would the titles help, e.g. x86/cet/ibt, x86/cet/shstk,
> > > > etc.?
> > > That doesn't really answer what I asked, though.
> > > 
> > > Do shadow stacks *require* IBT?  Or, should we concentrate on merging
> > > shadow stacks themselves first and then do IBT at a later time, in a
> > > different patch series?
> > > 
> > > But, yes, better patch titles would help, although I'm not sure
> > > that's
> > > quite the format that Ingo and Thomas prefer.
> > Shadow stack does not require IBT, but they complement each other.  If
> > we can resolve the legacy bitmap, both features can be merged at the
> > same time.
> As large as this patch set is, I'd really prefer to see you get shadow
> stacks merged and then move on to IBT.  I say separate them.

Ok, separate them.

> 
> > 
> > GLIBC does the bitmap setup.  It sets bits in there.
> > I thought you wanted a smaller bitmap?  One way is forcing legacy libs
> > to low address, or not having the bitmap at all, i.e. turn IBT off.
> I'm concerned with two things:
> 1. the virtual address space consumption, especially the *default* case
>    which will be apps using 4-level address space amounts, but having
>    5-level-sized tables.
> 2. the driving a truck-sized hole in the address space limits
> 
> You can force legacy libs to low addresses, but you can't stop anyone
> from putting code into a high address *later*, at least with the code we
> have today.

So we will always reserve a big space for all CET tasks?

Currently if an application does dlopen() a legacy lib, it will have only
partial IBT protection and no SHSTK.  Do we want to consider simply turning
off IBT in that case?

Yu-cheng
Dave Hansen July 13, 2018, 6:05 p.m. UTC | #8
On 07/13/2018 10:56 AM, Yu-cheng Yu wrote:
>>> GLIBC does the bitmap setup.  It sets bits in there.
>>> I thought you wanted a smaller bitmap?  One way is forcing legacy libs
>>> to low address, or not having the bitmap at all, i.e. turn IBT off.
>> I'm concerned with two things:
>> 1. the virtual address space consumption, especially the *default* case
>>    which will be apps using 4-level address space amounts, but having
>>    5-level-sized tables.
>> 2. the driving a truck-sized hole in the address space limits
>>
>> You can force legacy libs to low addresses, but you can't stop anyone
>> from putting code into a high address *later*, at least with the code we
>> have today.
> So we will always reserve a big space for all CET tasks?

Yes.  You either hard-restrict the address space (which we can't do
currently) or you reserve a big space.

> Currently if an application does dlopen() a legacy lib, it will have only
> partial IBT protection and no SHSTK.  Do we want to consider simply turning
> off IBT in that case?

I don't know.  I honestly don't understand the threat model enough to
give you a good answer.  Is there background on this in the docs?
diff mbox

Patch

diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h
index d9ae3d86cdd7..71da2cccba16 100644
--- a/arch/x86/include/asm/cet.h
+++ b/arch/x86/include/asm/cet.h
@@ -12,7 +12,10 @@  struct task_struct;
 struct cet_status {
 	unsigned long	shstk_base;
 	unsigned long	shstk_size;
+	unsigned long	ibt_bitmap_addr;
+	unsigned long	ibt_bitmap_size;
 	unsigned int	shstk_enabled:1;
+	unsigned int	ibt_enabled:1;
 };
 
 #ifdef CONFIG_X86_INTEL_CET
@@ -21,6 +24,9 @@  void cet_disable_shstk(void);
 void cet_disable_free_shstk(struct task_struct *p);
 int cet_restore_signal(unsigned long ssp);
 int cet_setup_signal(bool ia32, unsigned long rstor, unsigned long *new_ssp);
+int cet_setup_ibt(void);
+int cet_setup_ibt_bitmap(void);
+void cet_disable_ibt(void);
 #else
 static inline int cet_setup_shstk(void) { return 0; }
 static inline void cet_disable_shstk(void) {}
@@ -28,6 +34,8 @@  static inline void cet_disable_free_shstk(struct task_struct *p) {}
 static inline int cet_restore_signal(unsigned long ssp) { return 0; }
 static inline int cet_setup_signal(bool ia32, unsigned long rstor,
 				   unsigned long *new_ssp) { return 0; }
+static inline int cet_setup_ibt(void) { return 0; }
+static inline void cet_disable_ibt(void) {}
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 3624a11e5ba6..ce5bdaf0f1ff 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -62,6 +62,12 @@ 
 #define DISABLE_SHSTK	(1<<(X86_FEATURE_SHSTK & 31))
 #endif
 
+#ifdef CONFIG_X86_INTEL_BRANCH_TRACKING_USER
+#define DISABLE_IBT	0
+#else
+#define DISABLE_IBT	(1<<(X86_FEATURE_IBT & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -72,7 +78,7 @@ 
 #define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
-#define DISABLED_MASK7	(DISABLE_PTI)
+#define DISABLED_MASK7	(DISABLE_PTI|DISABLE_IBT)
 #define DISABLED_MASK8	0
 #define DISABLED_MASK9	(DISABLE_MPX)
 #define DISABLED_MASK10	0
diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
index 4eba7790c4e4..8bbd63e1a2ba 100644
--- a/arch/x86/kernel/cet.c
+++ b/arch/x86/kernel/cet.c
@@ -12,6 +12,8 @@ 
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/sched/signal.h>
+#include <linux/vmalloc.h>
+#include <linux/bitops.h>
 #include <asm/msr.h>
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
@@ -241,3 +243,74 @@  int cet_setup_signal(bool ia32, unsigned long rstor_addr,
 	set_shstk_ptr(ssp);
 	return 0;
 }
+
+static unsigned long ibt_mmap(unsigned long addr, unsigned long len)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long populate;
+
+	down_write(&mm->mmap_sem);
+	addr = do_mmap(NULL, addr, len, PROT_READ | PROT_WRITE,
+		       MAP_ANONYMOUS | MAP_PRIVATE,
+		       VM_DONTDUMP, 0, &populate, NULL);
+	up_write(&mm->mmap_sem);
+
+	if (populate)
+		mm_populate(addr, populate);
+
+	return addr;
+}
+
+int cet_setup_ibt(void)
+{
+	u64 r;
+
+	if (!cpu_feature_enabled(X86_FEATURE_IBT))
+		return -EOPNOTSUPP;
+
+	rdmsrl(MSR_IA32_U_CET, r);
+	r |= (MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_NO_TRACK_EN);
+	wrmsrl(MSR_IA32_U_CET, r);
+	current->thread.cet.ibt_enabled = 1;
+	return 0;
+}
+
+int cet_setup_ibt_bitmap(void)
+{
+	u64 r;
+	unsigned long bitmap;
+	unsigned long size;
+
+	if (!cpu_feature_enabled(X86_FEATURE_IBT))
+		return -EOPNOTSUPP;
+
+	size = TASK_SIZE_MAX / PAGE_SIZE / BITS_PER_BYTE;
+	bitmap = ibt_mmap(0, size);
+
+	if (bitmap >= TASK_SIZE_MAX)
+		return -ENOMEM;
+
+	bitmap &= PAGE_MASK;
+
+	rdmsrl(MSR_IA32_U_CET, r);
+	r |= (MSR_IA32_CET_LEG_IW_EN | bitmap);
+	wrmsrl(MSR_IA32_U_CET, r);
+
+	current->thread.cet.ibt_bitmap_addr = bitmap;
+	current->thread.cet.ibt_bitmap_size = size;
+	return 0;
+}
+
+void cet_disable_ibt(void)
+{
+	u64 r;
+
+	if (!cpu_feature_enabled(X86_FEATURE_IBT))
+		return;
+
+	rdmsrl(MSR_IA32_U_CET, r);
+	r &= ~(MSR_IA32_CET_ENDBR_EN | MSR_IA32_CET_LEG_IW_EN |
+	       MSR_IA32_CET_NO_TRACK_EN);
+	wrmsrl(MSR_IA32_U_CET, r);
+	current->thread.cet.ibt_enabled = 0;
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 705467839ce8..c609c9ce5691 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -413,7 +413,8 @@  __setup("nopku", setup_disable_pku);
 
 static __always_inline void setup_cet(struct cpuinfo_x86 *c)
 {
-	if (cpu_feature_enabled(X86_FEATURE_SHSTK))
+	if (cpu_feature_enabled(X86_FEATURE_SHSTK) ||
+	    cpu_feature_enabled(X86_FEATURE_IBT))
 		cr4_set_bits(X86_CR4_CET);
 }
 
@@ -434,6 +435,23 @@  static __init int setup_disable_shstk(char *s)
 __setup("no_cet_shstk", setup_disable_shstk);
 #endif
 
+#ifdef CONFIG_X86_INTEL_BRANCH_TRACKING_USER
+static __init int setup_disable_ibt(char *s)
+{
+	/* require an exact match without trailing characters */
+	if (strlen(s))
+		return 0;
+
+	if (!boot_cpu_has(X86_FEATURE_IBT))
+		return 1;
+
+	setup_clear_cpu_cap(X86_FEATURE_IBT);
+	pr_info("x86: 'no_cet_ibt' specified, disabling Branch Tracking\n");
+	return 1;
+}
+__setup("no_cet_ibt", setup_disable_ibt);
+#endif
+
 /*
  * Some CPU features depend on higher CPUID levels, which may not always
  * be available due to CPUID level capping or broken virtualization
diff --git a/arch/x86/kernel/elf.c b/arch/x86/kernel/elf.c
index 233f6dad9c1f..42e08d3b573e 100644
--- a/arch/x86/kernel/elf.c
+++ b/arch/x86/kernel/elf.c
@@ -15,6 +15,7 @@ 
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/string.h>
+#include <linux/compat.h>
 
 /*
  * The .note.gnu.property layout:
@@ -222,7 +223,8 @@  int arch_setup_features(void *ehdr_p, void *phdr_p,
 
 	struct elf64_hdr *ehdr64 = ehdr_p;
 
-	if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
+	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+	    !cpu_feature_enabled(X86_FEATURE_IBT))
 		return 0;
 
 	if (ehdr64->e_ident[EI_CLASS] == ELFCLASS64) {
@@ -250,6 +252,9 @@  int arch_setup_features(void *ehdr_p, void *phdr_p,
 	current->thread.cet.shstk_enabled = 0;
 	current->thread.cet.shstk_base = 0;
 	current->thread.cet.shstk_size = 0;
+	current->thread.cet.ibt_enabled = 0;
+	current->thread.cet.ibt_bitmap_addr = 0;
+	current->thread.cet.ibt_bitmap_size = 0;
 	if (cpu_feature_enabled(X86_FEATURE_SHSTK)) {
 		if (shstk) {
 			err = cet_setup_shstk();
@@ -257,6 +262,15 @@  int arch_setup_features(void *ehdr_p, void *phdr_p,
 				goto out;
 		}
 	}
+
+	if (cpu_feature_enabled(X86_FEATURE_IBT)) {
+		if (ibt) {
+			err = cet_setup_ibt();
+			if (err < 0)
+				goto out;
+		}
+	}
+
 out:
 	return err;
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b3b0b482983a..309ebb7f9d8d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -138,6 +138,7 @@  void flush_thread(void)
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 
 	cet_disable_shstk();
+	cet_disable_ibt();
 	fpu__clear(&tsk->thread.fpu);
 }