diff mbox series

[RFCv2,09/10] x86/mm: Add userspace API to enable Linear Address Masking

Message ID 20220511022751.65540-11-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series Linear Address Masking enabling | expand

Commit Message

Kirill A . Shutemov May 11, 2022, 2:27 a.m. UTC
Allow to enable Linear Address Masking via ARCH_THREAD_FEATURE_ENABLE
arch_prctl(2).

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/kernel/process.c    | 21 +++++++++++++++-
 arch/x86/kernel/process.h    |  2 ++
 arch/x86/kernel/process_64.c | 46 ++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)

Comments

Peter Zijlstra May 11, 2022, 7:26 a.m. UTC | #1
On Wed, May 11, 2022 at 05:27:50AM +0300, Kirill A. Shutemov wrote:
> @@ -1013,8 +1017,23 @@ static long thread_feature_prctl(struct task_struct *task, int option,
>  
>  	/* Handle ARCH_THREAD_FEATURE_ENABLE */
>  
> +	if (features & (X86_THREAD_LAM_U48 | X86_THREAD_LAM_U57)) {
> +		long ret;
> +
> +		/* LAM is only available in long mode */
> +		if (in_32bit_syscall())
> +			return -EINVAL;

So what happens if userspace sets up a 32bit code entry in the LDT and
does the LAM thing as a 64bit syscamm but then goes run 32bit code?

> +
> +		ret = enable_lam(task, features);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	task->thread.features |= features;
>  out:
> +	/* Update CR3 to get LAM active */
> +	switch_mm(task->mm, task->mm, task);
> +
>  	return task->thread.features;
>  }
>
H.J. Lu May 11, 2022, 2:15 p.m. UTC | #2
On Tue, May 10, 2022 at 7:29 PM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Allow to enable Linear Address Masking via ARCH_THREAD_FEATURE_ENABLE
> arch_prctl(2).
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/kernel/process.c    | 21 +++++++++++++++-
>  arch/x86/kernel/process.h    |  2 ++
>  arch/x86/kernel/process_64.c | 46 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 68 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index cb8fc28f2eae..911c24321312 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -46,6 +46,8 @@
>  #include <asm/proto.h>
>  #include <asm/frame.h>
>  #include <asm/unwind.h>
> +#include <asm/mmu_context.h>
> +#include <asm/compat.h>
>
>  #include "process.h"
>
> @@ -992,7 +994,9 @@ unsigned long __get_wchan(struct task_struct *p)
>  static long thread_feature_prctl(struct task_struct *task, int option,
>                                  unsigned long features)

Since this arch_prctl will also be used for CET,  which supports
32-bit processes,
shouldn't int, instead of long, be used?

>  {
> -       const unsigned long known_features = 0;
> +       const unsigned long known_features =
> +               X86_THREAD_LAM_U48 |
> +               X86_THREAD_LAM_U57;
>
>         if (features & ~known_features)
>                 return -EINVAL;
> @@ -1013,8 +1017,23 @@ static long thread_feature_prctl(struct task_struct *task, int option,
>
>         /* Handle ARCH_THREAD_FEATURE_ENABLE */
>
> +       if (features & (X86_THREAD_LAM_U48 | X86_THREAD_LAM_U57)) {
> +               long ret;
> +
> +               /* LAM is only available in long mode */
> +               if (in_32bit_syscall())
> +                       return -EINVAL;
> +
> +               ret = enable_lam(task, features);
> +               if (ret)
> +                       return ret;
> +       }
> +
>         task->thread.features |= features;
>  out:
> +       /* Update CR3 to get LAM active */
> +       switch_mm(task->mm, task->mm, task);
> +
>         return task->thread.features;
>  }
>
> diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
> index 76b547b83232..b8fa0e599c6e 100644
> --- a/arch/x86/kernel/process.h
> +++ b/arch/x86/kernel/process.h
> @@ -4,6 +4,8 @@
>
>  #include <asm/spec-ctrl.h>
>
> +long enable_lam(struct task_struct *task, unsigned long features);
> +
>  void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
>
>  /*
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index e459253649be..a25c51da7005 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -729,6 +729,52 @@ void set_personality_ia32(bool x32)
>  }
>  EXPORT_SYMBOL_GPL(set_personality_ia32);
>
> +static bool lam_u48_allowed(void)
> +{
> +       struct mm_struct *mm = current->mm;
> +
> +       if (!full_va_allowed(mm))
> +               return true;
> +
> +       return find_vma(mm, DEFAULT_MAP_WINDOW) == NULL;
> +}
> +
> +long enable_lam(struct task_struct *task, unsigned long features)
> +{
> +       features |= task->thread.features;
> +
> +       /* LAM_U48 and LAM_U57 are mutually exclusive */
> +       if ((features & X86_THREAD_LAM_U48) && (features & X86_THREAD_LAM_U57))
> +               return -EINVAL;
> +
> +       if (!cpu_feature_enabled(X86_FEATURE_LAM))
> +               return -ENXIO;
> +
> +       if (mmap_write_lock_killable(task->mm))
> +               return -EINTR;
> +
> +       if ((features & X86_THREAD_LAM_U48) && !lam_u48_allowed()) {
> +               mmap_write_unlock(task->mm);
> +               return -EINVAL;
> +       }
> +
> +       /*
> +        * Record the most permissive (allowing the widest tags) LAM
> +        * mode to the mm context. It determinates if a mappings above
> +        * 47 bit is allowed for the process.
> +        *
> +        * The mode is also used by a kernel thread when it does work
> +        * on behalf of the process (like async I/O, io_uring, etc.)
> +        */
> +       if (features & X86_THREAD_LAM_U48)
> +               current->mm->context.lam = LAM_U48;
> +       else if (current->mm->context.lam == LAM_NONE)
> +               current->mm->context.lam = LAM_U57;
> +
> +       mmap_write_unlock(task->mm);
> +       return 0;
> +}
> +
>  #ifdef CONFIG_CHECKPOINT_RESTORE
>  static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
>  {
> --
> 2.35.1
>
Thomas Gleixner May 12, 2022, 2:21 p.m. UTC | #3
On Wed, May 11 2022 at 07:15, H. J. Lu wrote:
>> @@ -992,7 +994,9 @@ unsigned long __get_wchan(struct task_struct *p)
>>  static long thread_feature_prctl(struct task_struct *task, int option,
>>                                  unsigned long features)
>
> Since this arch_prctl will also be used for CET,  which supports
> 32-bit processes,
> shouldn't int, instead of long, be used?

This is kernel internal code and the compat syscall takes care of the
int to long conversion. So yes, that could use unsigned int, but it does
not matter.

Thanks,

        tglx
Thomas Gleixner May 12, 2022, 2:46 p.m. UTC | #4
On Wed, May 11 2022 at 09:26, Peter Zijlstra wrote:

> On Wed, May 11, 2022 at 05:27:50AM +0300, Kirill A. Shutemov wrote:
>> @@ -1013,8 +1017,23 @@ static long thread_feature_prctl(struct task_struct *task, int option,
>>  
>>  	/* Handle ARCH_THREAD_FEATURE_ENABLE */
>>  
>> +	if (features & (X86_THREAD_LAM_U48 | X86_THREAD_LAM_U57)) {
>> +		long ret;
>> +
>> +		/* LAM is only available in long mode */
>> +		if (in_32bit_syscall())
>> +			return -EINVAL;
>
> So what happens if userspace sets up a 32bit code entry in the LDT and
> does the LAM thing as a 64bit syscamm but then goes run 32bit code?

AFAICS, nothing happens. The only requirements are CR4.PAE = 1,
IA32_EFER.LME = 1. Those are unaffected from user space running 32bit
code, no?

32bit code can't use 64bit pointers so it can't have metadata bits
set. But x32 can and is excluded by the above too.

So the whole muck must be conditional on CONFIG_X86_64=y and does not
need any other restrictions IMO.

Thanks,

        tglx
diff mbox series

Patch

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cb8fc28f2eae..911c24321312 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -46,6 +46,8 @@ 
 #include <asm/proto.h>
 #include <asm/frame.h>
 #include <asm/unwind.h>
+#include <asm/mmu_context.h>
+#include <asm/compat.h>
 
 #include "process.h"
 
@@ -992,7 +994,9 @@  unsigned long __get_wchan(struct task_struct *p)
 static long thread_feature_prctl(struct task_struct *task, int option,
 				 unsigned long features)
 {
-	const unsigned long known_features = 0;
+	const unsigned long known_features =
+		X86_THREAD_LAM_U48 |
+		X86_THREAD_LAM_U57;
 
 	if (features & ~known_features)
 		return -EINVAL;
@@ -1013,8 +1017,23 @@  static long thread_feature_prctl(struct task_struct *task, int option,
 
 	/* Handle ARCH_THREAD_FEATURE_ENABLE */
 
+	if (features & (X86_THREAD_LAM_U48 | X86_THREAD_LAM_U57)) {
+		long ret;
+
+		/* LAM is only available in long mode */
+		if (in_32bit_syscall())
+			return -EINVAL;
+
+		ret = enable_lam(task, features);
+		if (ret)
+			return ret;
+	}
+
 	task->thread.features |= features;
 out:
+	/* Update CR3 to get LAM active */
+	switch_mm(task->mm, task->mm, task);
+
 	return task->thread.features;
 }
 
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
index 76b547b83232..b8fa0e599c6e 100644
--- a/arch/x86/kernel/process.h
+++ b/arch/x86/kernel/process.h
@@ -4,6 +4,8 @@ 
 
 #include <asm/spec-ctrl.h>
 
+long enable_lam(struct task_struct *task, unsigned long features);
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
 
 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e459253649be..a25c51da7005 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -729,6 +729,52 @@  void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
+static bool lam_u48_allowed(void)
+{
+	struct mm_struct *mm = current->mm;
+
+	if (!full_va_allowed(mm))
+		return true;
+
+	return find_vma(mm, DEFAULT_MAP_WINDOW) == NULL;
+}
+
+long enable_lam(struct task_struct *task, unsigned long features)
+{
+	features |= task->thread.features;
+
+	/* LAM_U48 and LAM_U57 are mutually exclusive */
+	if ((features & X86_THREAD_LAM_U48) && (features & X86_THREAD_LAM_U57))
+		return -EINVAL;
+
+	if (!cpu_feature_enabled(X86_FEATURE_LAM))
+		return -ENXIO;
+
+	if (mmap_write_lock_killable(task->mm))
+		return -EINTR;
+
+	if ((features & X86_THREAD_LAM_U48) && !lam_u48_allowed()) {
+		mmap_write_unlock(task->mm);
+		return -EINVAL;
+	}
+
+	/*
+	 * Record the most permissive (allowing the widest tags) LAM
+	 * mode to the mm context. It determinates if a mappings above
+	 * 47 bit is allowed for the process.
+	 *
+	 * The mode is also used by a kernel thread when it does work
+	 * on behalf of the process (like async I/O, io_uring, etc.)
+	 */
+	if (features & X86_THREAD_LAM_U48)
+		current->mm->context.lam = LAM_U48;
+	else if (current->mm->context.lam == LAM_NONE)
+		current->mm->context.lam = LAM_U57;
+
+	mmap_write_unlock(task->mm);
+	return 0;
+}
+
 #ifdef CONFIG_CHECKPOINT_RESTORE
 static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
 {