diff mbox series

[RFCv2,05/10] x86/mm: Provide untagged_addr() helper

Message ID 20220511022751.65540-7-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series Linear Address Masking enabling | expand

Commit Message

kirill.shutemov@linux.intel.com May 11, 2022, 2:27 a.m. UTC
The helper used by the core-mm to strip tag bits and get the address to
the canonical shape. In only handles userspace addresses.

For LAM, the address gets sanitized according to the thread features.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/page_32.h |  3 +++
 arch/x86/include/asm/page_64.h | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

Comments

Peter Zijlstra May 11, 2022, 7:21 a.m. UTC | #1
On Wed, May 11, 2022 at 05:27:46AM +0300, Kirill A. Shutemov wrote:
> +#define __untagged_addr(addr, n)	\
> +	((__force __typeof__(addr))sign_extend64((__force u64)(addr), n))
> +
> +#define untagged_addr(addr)	({					\
> +	u64 __addr = (__force u64)(addr);				\
> +	if (__addr >> 63 == 0) {					\
> +		if (current->thread.features & X86_THREAD_LAM_U57)	\
> +			__addr &= __untagged_addr(__addr, 56);		\
> +		else if (current->thread.features & X86_THREAD_LAM_U48)	\
> +			__addr &= __untagged_addr(__addr, 47);		\
> +	}								\
> +	(__force __typeof__(addr))__addr;				\
> +})

Assuming you got your bits in hardware order:

	u64 __addr = addr;
	if ((s64)__addr >= 0) {
		int lam = (current->thread.features >> X86_THREAD_LAM_U57) & 3;
		if (lam)
			__addr &= sign_extend64(__addr, 65 - 9*lam);
	}
	__addr;

has less branches on and should definitely result in better code (or I
need more morning juice).

> +
> +#define untagged_ptr(ptr)	({					\
> +	u64 __ptrval = (__force u64)(ptr);				\
> +	__ptrval = untagged_addr(__ptrval);				\
> +	(__force __typeof__(*(ptr)) *)__ptrval;				\
> +})
>  #endif	/* !__ASSEMBLY__ */
>  
>  #ifdef CONFIG_X86_VSYSCALL_EMULATION
> -- 
> 2.35.1
>
Peter Zijlstra May 11, 2022, 7:45 a.m. UTC | #2
On Wed, May 11, 2022 at 09:21:16AM +0200, Peter Zijlstra wrote:
> On Wed, May 11, 2022 at 05:27:46AM +0300, Kirill A. Shutemov wrote:
> > +#define __untagged_addr(addr, n)	\
> > +	((__force __typeof__(addr))sign_extend64((__force u64)(addr), n))
> > +
> > +#define untagged_addr(addr)	({					\
> > +	u64 __addr = (__force u64)(addr);				\
> > +	if (__addr >> 63 == 0) {					\
> > +		if (current->thread.features & X86_THREAD_LAM_U57)	\
> > +			__addr &= __untagged_addr(__addr, 56);		\
> > +		else if (current->thread.features & X86_THREAD_LAM_U48)	\
> > +			__addr &= __untagged_addr(__addr, 47);		\
> > +	}								\
> > +	(__force __typeof__(addr))__addr;				\
> > +})
> 
> Assuming you got your bits in hardware order:
> 
> 	u64 __addr = addr;
> 	if ((s64)__addr >= 0) {
> 		int lam = (current->thread.features >> X86_THREAD_LAM_U57) & 3;

That needs a _BIT suffix or something, same in the previous reply.

> 		if (lam)
> 			__addr &= sign_extend64(__addr, 65 - 9*lam);
> 	}
> 	__addr;
> 
> has less branches on and should definitely result in better code (or I
> need more morning juice).

I definitely needs more morning juice :-)
Thomas Gleixner May 12, 2022, 1:06 p.m. UTC | #3
On Wed, May 11 2022 at 05:27, Kirill A. Shutemov wrote:
> +#define __untagged_addr(addr, n)	\
> +	((__force __typeof__(addr))sign_extend64((__force u64)(addr), n))

How is this supposed to be correct? This sign extends based on bit 47
resp. 56, i.e. the topmost bit of the userspace address space for the
LAM mode.

So if that bit _is_ set, then the result has bit 48-63 resp. 57-63 set
as well. Not really what you want, right?

This has to mask out bit 48-62 resp. 57-62 and leave all other bits
alone.

> +#define untagged_addr(addr)	({					\
> +	u64 __addr = (__force u64)(addr);				\
> +	if (__addr >> 63 == 0) {					\
> +		if (current->thread.features & X86_THREAD_LAM_U57)	\
> +			__addr &= __untagged_addr(__addr, 56);		\
> +		else if (current->thread.features & X86_THREAD_LAM_U48)	\
> +			__addr &= __untagged_addr(__addr, 47);		\
> +	}								\
> +	(__force __typeof__(addr))__addr;				\
> +})

So this wants something like this:

#define untagged_addr(addr)	({			\
	u64 __addr = (__force u64)(addr);		\
							\
	__addr &= current->thread.lam_untag_mask;	\
	(__force __typeof__(addr))__addr;		\
})

No conditionals, fast _and_ correct. Setting this untag mask up once
when LAM is enabled is not rocket science.

Thanks,

        tglx
Peter Zijlstra May 12, 2022, 2:23 p.m. UTC | #4
On Thu, May 12, 2022 at 03:06:38PM +0200, Thomas Gleixner wrote:

> #define untagged_addr(addr)	({			\
> 	u64 __addr = (__force u64)(addr);		\
> 							\
> 	__addr &= current->thread.lam_untag_mask;	\
> 	(__force __typeof__(addr))__addr;		\
> })
> 
> No conditionals, fast _and_ correct. Setting this untag mask up once
> when LAM is enabled is not rocket science.

But that goes wrong if someone ever wants to untag a kernel address and
not use the result for access_ok().

I'd feel better about something like:

	s64 __addr = (addr);
	s64 __sign = __addr;

	__sign >>= 63;
	__sign &= lam_untag_mask;
	__addr &= lam_untag_mask;
	__addr |= __sign;

	__addr;

Which simply extends bit 63 downwards -- although possibly there's an
easier way to do that, this is pretty gross.
Thomas Gleixner May 12, 2022, 3:16 p.m. UTC | #5
On Thu, May 12 2022 at 16:23, Peter Zijlstra wrote:
> On Thu, May 12, 2022 at 03:06:38PM +0200, Thomas Gleixner wrote:
>
>> #define untagged_addr(addr)	({			\
>> 	u64 __addr = (__force u64)(addr);		\
>> 							\
>> 	__addr &= current->thread.lam_untag_mask;	\
>> 	(__force __typeof__(addr))__addr;		\
>> })
>> 
>> No conditionals, fast _and_ correct. Setting this untag mask up once
>> when LAM is enabled is not rocket science.
>
> But that goes wrong if someone ever wants to untag a kernel address and
> not use the result for access_ok().
>
> I'd feel better about something like:
>
> 	s64 __addr = (addr);
> 	s64 __sign = __addr;
>
> 	__sign >>= 63;
> 	__sign &= lam_untag_mask;

that needs to be

 	__sign &= ~lam_untag_mask;

> 	__addr &= lam_untag_mask;
> 	__addr |= __sign;
>
> 	__addr;
>
> Which simply extends bit 63 downwards -- although possibly there's an
> easier way to do that, this is pretty gross.

For the price of a conditional:

    __addr &= lam_untag_mask;
    if (__addr & BIT(63))
        __addr |= ~lam_untag_mask;

Now you have the choice between gross and ugly.

Thanks,

        tglx
Thomas Gleixner May 12, 2022, 11:14 p.m. UTC | #6
On Thu, May 12 2022 at 17:16, Thomas Gleixner wrote:
> On Thu, May 12 2022 at 16:23, Peter Zijlstra wrote:
>> On Thu, May 12, 2022 at 03:06:38PM +0200, Thomas Gleixner wrote:
>>
>>> #define untagged_addr(addr)	({			\
>>> 	u64 __addr = (__force u64)(addr);		\
>>> 							\
>>> 	__addr &= current->thread.lam_untag_mask;	\
>>> 	(__force __typeof__(addr))__addr;		\
>>> })
>>> 
>>> No conditionals, fast _and_ correct. Setting this untag mask up once
>>> when LAM is enabled is not rocket science.
>>
>> But that goes wrong if someone ever wants to untag a kernel address and
>> not use the result for access_ok().
>>
>> I'd feel better about something like:
>>
>> 	s64 __addr = (addr);
>> 	s64 __sign = __addr;
>>
>> 	__sign >>= 63;
>> 	__sign &= lam_untag_mask;
>
> that needs to be
>
>  	__sign &= ~lam_untag_mask;
>
>> 	__addr &= lam_untag_mask;
>> 	__addr |= __sign;
>>
>> 	__addr;
>>
>> Which simply extends bit 63 downwards -- although possibly there's an
>> easier way to do that, this is pretty gross.
>
> For the price of a conditional:
>
>     __addr &= lam_untag_mask;
>     if (__addr & BIT(63))
>         __addr |= ~lam_untag_mask;
>
> Now you have the choice between gross and ugly.

Though we can also replace your flavour of gross with a different
flavour of gross:

	s64 sign = (s64)(addr) >> 63;

	addr ^= sign;
	addr &= mask;
	addr ^= sign;

After twisting my brain around replacing gross by something differently
gross and coming up with the gem above I actually did compile the
variants and discovered that GCC compiles your flavour of gross exactly
to this:

        mov    %rdi,%rax
        sar    $0x3f,%rax
        xor    %rax,%rdi
        and    %rsi,%rdi
        xor    %rdi,%rax

I have to admit that compilers are sometimes pretty smart. I might have
to rethink my prejudice. :)

But then clang converts your flavour of 'gross' to:

     	mov    %rsi,%rax
     	mov    %rsi,%rcx
     	and    %rdi,%rax
     	sar    $0x3f,%rdi
     	not    %rcx
     	and    %rdi,%rcx
     	or     %rcx,%rax

and my explicit flavour to:

      	mov    %rdi,%rax
      	mov    %rdi,%rcx
      	sar    $0x3f,%rcx
      	xor    %rcx,%rax
      	and    %rsi,%rax
      	xor    %rcx,%rax

which is at least slightly less retarted, but still has a pointless mov
there. Note, that this was compiled in user space with noinline
functions. I did some inlined variants as well and clang still insists
on using an extra register for no obvious reason. This might be more
efficient in reality, but I haven't bothered to write a test which
might give an answer via perf.

The ugly with the conditional resolves for both compilers to:

       	mov    %rsi,%rax
       	mov    %rsi,%rcx
       	not    %rcx
       	or     %rdi,%rcx
       	and    %rdi,%rax
       	test   %rdi,%rdi
       	cmovs  %rcx,%rax

At least they agree on that one.

But whatever we chose, it's sad, that we need to have support for
interfaces which swallow any pointer (user or kernel) because otherwise
this really boils down to a single OR resp. AND operation plus the
according mov to retrieve the mask.

Thanks,

        tglx
David Laight May 13, 2022, 10:14 a.m. UTC | #7
From: Thomas Gleixner
> Sent: 13 May 2022 00:15
...
> But whatever we chose, it's sad, that we need to have support for
> interfaces which swallow any pointer (user or kernel) because otherwise
> this really boils down to a single OR resp. AND operation plus the
> according mov to retrieve the mask.

Are there any of those left?
Most will have gone with setfs(KERNEL_DS) removal.
Almost all code has to know whether an address is user
or kernel - the value can't be used because of architectures
that use the same address values in user and kernel.

How often do addresses actually need de-tagging?
Probably only code that is looking for page table
entries for virtual addresses?
How often does that happen for user addresses?

If the hardware is ignoring the bits then you don't
need to remove them before memory accesses.
That would include all userspace accesses.
Clearly access_ok() has to work with tagged addresses,
but that doesn't require the tag be masked off.
It can just check the transfer doesn't cross 1u<<63.
It (probably) just requires the fault handler to treat
non-canonical address faults as page faults.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index df42f8aa99e4..2d35059b90c1 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,6 +15,9 @@  extern unsigned long __phys_addr(unsigned long);
 #define __phys_addr_symbol(x)	__phys_addr(x)
 #define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
 
+#define untagged_addr(addr)	(addr)
+#define untagged_ptr(ptr)	(ptr)
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index e9c86299b835..3a40c958b24a 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -7,6 +7,7 @@ 
 #ifndef __ASSEMBLY__
 #include <asm/cpufeatures.h>
 #include <asm/alternative.h>
+#include <uapi/asm/prctl.h>
 
 /* duplicated to the one in bootmem.h */
 extern unsigned long max_pfn;
@@ -90,6 +91,25 @@  static __always_inline unsigned long task_size_max(void)
 }
 #endif	/* CONFIG_X86_5LEVEL */
 
+#define __untagged_addr(addr, n)	\
+	((__force __typeof__(addr))sign_extend64((__force u64)(addr), n))
+
+#define untagged_addr(addr)	({					\
+	u64 __addr = (__force u64)(addr);				\
+	if (__addr >> 63 == 0) {					\
+		if (current->thread.features & X86_THREAD_LAM_U57)	\
+			__addr &= __untagged_addr(__addr, 56);		\
+		else if (current->thread.features & X86_THREAD_LAM_U48)	\
+			__addr &= __untagged_addr(__addr, 47);		\
+	}								\
+	(__force __typeof__(addr))__addr;				\
+})
+
+#define untagged_ptr(ptr)	({					\
+	u64 __ptrval = (__force u64)(ptr);				\
+	__ptrval = untagged_addr(__ptrval);				\
+	(__force __typeof__(*(ptr)) *)__ptrval;				\
+})
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION