[v2,3/7] arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1
diff mbox

Message ID 1472828533-28197-4-git-send-email-catalin.marinas@arm.com
State New
Headers show

Commit Message

Catalin Marinas Sept. 2, 2016, 3:02 p.m. UTC
This patch adds the uaccess macros/functions to disable access to user
space by setting TTBR0_EL1 to a reserved zeroed page. Since the value
written to TTBR0_EL1 must be a physical address, for simplicity this
patch introduces a reserved_ttbr0 page at a constant offset from
swapper_pg_dir. The uaccess_disable code uses the ttbr1_el1 value
adjusted by the reserved_ttbr0 offset.

Enabling access to user is done by restoring TTBR0_EL1 with the value
from the struct thread_info ttbr0 variable. Interrupts must be disabled
during the uaccess_ttbr0_enable code to ensure the atomicity of the
thread_info.ttbr0 read and TTBR0_EL1 write.

Cc: Will Deacon <will.deacon@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/assembler.h      | 16 ++++++
 arch/arm64/include/asm/cpufeature.h     |  6 +++
 arch/arm64/include/asm/kernel-pgtable.h |  7 +++
 arch/arm64/include/asm/thread_info.h    |  3 ++
 arch/arm64/include/asm/uaccess.h        | 89 +++++++++++++++++++++++++++++++--
 arch/arm64/kernel/asm-offsets.c         |  3 ++
 arch/arm64/kernel/cpufeature.c          |  1 +
 arch/arm64/kernel/entry.S               |  4 --
 arch/arm64/kernel/head.S                |  6 +--
 arch/arm64/kernel/vmlinux.lds.S         |  5 ++
 10 files changed, 129 insertions(+), 11 deletions(-)

Comments

Mark Rutland Sept. 5, 2016, 5:20 p.m. UTC | #1
Hi Catalin,

On Fri, Sep 02, 2016 at 04:02:09PM +0100, Catalin Marinas wrote:
> This patch adds the uaccess macros/functions to disable access to user
> space by setting TTBR0_EL1 to a reserved zeroed page. Since the value
> written to TTBR0_EL1 must be a physical address, for simplicity this
> patch introduces a reserved_ttbr0 page at a constant offset from
> swapper_pg_dir. The uaccess_disable code uses the ttbr1_el1 value
> adjusted by the reserved_ttbr0 offset.
> 
> Enabling access to user is done by restoring TTBR0_EL1 with the value
> from the struct thread_info ttbr0 variable. Interrupts must be disabled
> during the uaccess_ttbr0_enable code to ensure the atomicity of the
> thread_info.ttbr0 read and TTBR0_EL1 write.

[...]

>  /*
> + * Return the current thread_info.
> + */
> +	.macro	get_thread_info, rd
> +	mrs	\rd, sp_el0
> +	.endm

It may be worth noting in the commit message that we had to factor this
out of entry.S, or doing that as a preparatory patch.

> +/*
>   * Errata workaround post TTBR0_EL1 update.
>   */
>  	.macro	post_ttbr0_update_workaround, ret = 0
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 7099f26e3702..023066d9bf7f 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -216,6 +216,12 @@ static inline bool system_supports_mixed_endian_el0(void)
>  	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
>  }
>  
> +static inline bool system_supports_ttbr0_pan(void)
> +{
> +	return IS_ENABLED(CONFIG_ARM64_TTBR0_PAN) &&
> +		!cpus_have_cap(ARM64_HAS_PAN);
> +}
> +

Nit: s/supports/uses/ would be clearer.

[...]

> +#ifdef CONFIG_ARM64_TTBR0_PAN
> +#define RESERVED_TTBR0_SIZE	(PAGE_SIZE)
> +#else
> +#define RESERVED_TTBR0_SIZE	(0)
> +#endif

I was going to suggest that we use the empty_zero_page, which we can
address with an adrp, because I had forgotten that we need to generate
the *physical* address.

It would be good if we could have a description of why we need the new
reserved page somewhere in the code. I'm sure I won't be the only one
tripped up by this.

It would be possible to use the existing empty_zero_page, if we're happy
to have a MOVZ; MOVK; MOVK; MOVK sequence that we patch at boot-time.
That could be faster than an MRS on some implementations.

We don't (yet) have infrastructure for that, though.

[...]

> +static inline void uaccess_ttbr0_enable(void)
> +{
> +	unsigned long flags;
> +
> +	/*
> +	 * Disable interrupts to avoid preemption and potential saved
> +	 * TTBR0_EL1 updates between reading the variable and the MSR.
> +	 */
> +	local_irq_save(flags);
> +	write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
> +	isb();
> +	local_irq_restore(flags);
> +}

I don't follow what problem this actually protects us against. In the
case of preemption everything should be saved+restored transparently, or
things would go wrong as soon as we enable IRQs anyway.

Is this a hold-over from a percpu approach rather than the
current_thread_info() approach?

What am I missing?

> +#else
> +static inline void uaccess_ttbr0_disable(void)
> +{
> +}
> +
> +static inline void uaccess_ttbr0_enable(void)
> +{
> +}
> +#endif

I think that it's better to drop the ifdef and add:

	if (!IS_ENABLED(CONFIG_ARM64_TTBR0_PAN))
		return;

... at the start of each function. GCC should optimize the entire thing
away when not used, but we'll get compiler coverage regardless, and
therefore less breakage. All the symbols we required should exist
regardless.

[...]

>  	.macro	uaccess_enable, tmp1, tmp2
> +#ifdef CONFIG_ARM64_TTBR0_PAN
> +alternative_if_not ARM64_HAS_PAN
> +	save_and_disable_irq \tmp2		// avoid preemption
> +	uaccess_ttbr0_enable \tmp1
> +	restore_irq \tmp2
> +alternative_else
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +alternative_endif
> +#endif

How about something like:

	.macro alternative_endif_else_nop
	alternative_else
	.rept ((662b-661b) / 4)
	       nop
	.endr
	alternative_endif
	.endm

So for the above we could have:

	alternative_if_not ARM64_HAS_PAN
		save_and_disable_irq \tmp2
		uaccess_ttbr0_enable \tmp1
		restore_irq \tmp2
	alternative_endif_else_nop

I'll see about spinning a patch, or discovering why that happens to be
broken.

[...]

>  	 * tables again to remove any speculatively loaded cache lines.
>  	 */
>  	mov	x0, x25
> -	add	x1, x26, #SWAPPER_DIR_SIZE
> +	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
>  	dmb	sy
>  	bl	__inval_cache_range
>  
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 659963d40bb4..fe393ccf9352 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -196,6 +196,11 @@ SECTIONS
>  	swapper_pg_dir = .;
>  	. += SWAPPER_DIR_SIZE;
>  
> +#ifdef CONFIG_ARM64_TTBR0_PAN
> +	reserved_ttbr0 = .;
> +	. += PAGE_SIZE;
> +#endif

Surely RESERVED_TTBR0_SIZE, as elsewhere?

Thanks,
Mark.
Catalin Marinas Sept. 6, 2016, 10:27 a.m. UTC | #2
On Mon, Sep 05, 2016 at 06:20:38PM +0100, Mark Rutland wrote:
> On Fri, Sep 02, 2016 at 04:02:09PM +0100, Catalin Marinas wrote:
> > +#ifdef CONFIG_ARM64_TTBR0_PAN
> > +#define RESERVED_TTBR0_SIZE	(PAGE_SIZE)
> > +#else
> > +#define RESERVED_TTBR0_SIZE	(0)
> > +#endif
> 
> I was going to suggest that we use the empty_zero_page, which we can
> address with an adrp, because I had forgotten that we need to generate
> the *physical* address.
> 
> It would be good if we could have a description of why we need the new
> reserved page somewhere in the code. I'm sure I won't be the only one
> tripped up by this.
> 
> It would be possible to use the existing empty_zero_page, if we're happy
> to have a MOVZ; MOVK; MOVK; MOVK sequence that we patch at boot-time.
> That could be faster than an MRS on some implementations.

I was trying to keep the number of instructions to a minimum in
preference to potentially slightly faster sequence (I haven't done any
benchmarks). On ARMv8.1+ implementations, we just end up with more nops.

We could also do an ldr from a PC-relative address, it's one instruction
and it may not be (significantly) slower than MRS + ADD.

> > +static inline void uaccess_ttbr0_enable(void)
> > +{
> > +	unsigned long flags;
> > +
> > +	/*
> > +	 * Disable interrupts to avoid preemption and potential saved
> > +	 * TTBR0_EL1 updates between reading the variable and the MSR.
> > +	 */
> > +	local_irq_save(flags);
> > +	write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
> > +	isb();
> > +	local_irq_restore(flags);
> > +}
> 
> I don't follow what problem this actually protects us against. In the
> case of preemption everything should be saved+restored transparently, or
> things would go wrong as soon as we enable IRQs anyway.
> 
> Is this a hold-over from a percpu approach rather than the
> current_thread_info() approach?

If we get preempted between reading current_thread_info()->ttbr0 and
writing TTBR0_EL1, a series of context switches could lead to the update
of the ASID part of ttbr0. The actual MSR would store an old ASID in
TTBR0_EL1.

> > +#else
> > +static inline void uaccess_ttbr0_disable(void)
> > +{
> > +}
> > +
> > +static inline void uaccess_ttbr0_enable(void)
> > +{
> > +}
> > +#endif
> 
> I think that it's better to drop the ifdef and add:
> 
> 	if (!IS_ENABLED(CONFIG_ARM64_TTBR0_PAN))
> 		return;
> 
> ... at the start of each function. GCC should optimize the entire thing
> away when not used, but we'll get compiler coverage regardless, and
> therefore less breakage. All the symbols we required should exist
> regardless.

The reason for this is that thread_info.ttbr0 is conditionally defined.
I don't think the compiler would ignore it.

> >  	.macro	uaccess_enable, tmp1, tmp2
> > +#ifdef CONFIG_ARM64_TTBR0_PAN
> > +alternative_if_not ARM64_HAS_PAN
> > +	save_and_disable_irq \tmp2		// avoid preemption
> > +	uaccess_ttbr0_enable \tmp1
> > +	restore_irq \tmp2
> > +alternative_else
> > +	nop
> > +	nop
> > +	nop
> > +	nop
> > +	nop
> > +	nop
> > +	nop
> > +alternative_endif
> > +#endif
> 
> How about something like:
> 
> 	.macro alternative_endif_else_nop
> 	alternative_else
> 	.rept ((662b-661b) / 4)
> 	       nop
> 	.endr
> 	alternative_endif
> 	.endm
> 
> So for the above we could have:
> 
> 	alternative_if_not ARM64_HAS_PAN
> 		save_and_disable_irq \tmp2
> 		uaccess_ttbr0_enable \tmp1
> 		restore_irq \tmp2
> 	alternative_endif_else_nop
> 
> I'll see about spinning a patch, or discovering why that happens to be
> broken.

This looks better. Minor comment, I would actually name the ending
statement alternative_else_nop_endif to match the order in which you'd
normally write them.

> >  	 * tables again to remove any speculatively loaded cache lines.
> >  	 */
> >  	mov	x0, x25
> > -	add	x1, x26, #SWAPPER_DIR_SIZE
> > +	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
> >  	dmb	sy
> >  	bl	__inval_cache_range
> >  
> > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> > index 659963d40bb4..fe393ccf9352 100644
> > --- a/arch/arm64/kernel/vmlinux.lds.S
> > +++ b/arch/arm64/kernel/vmlinux.lds.S
> > @@ -196,6 +196,11 @@ SECTIONS
> >  	swapper_pg_dir = .;
> >  	. += SWAPPER_DIR_SIZE;
> >  
> > +#ifdef CONFIG_ARM64_TTBR0_PAN
> > +	reserved_ttbr0 = .;
> > +	. += PAGE_SIZE;
> > +#endif
> 
> Surely RESERVED_TTBR0_SIZE, as elsewhere?

I'll try to move it somewhere where it can be included in vmlinux.lds.S
(I can probably include cpufeature.h directly).
Mark Rutland Sept. 6, 2016, 10:45 a.m. UTC | #3
On Tue, Sep 06, 2016 at 11:27:42AM +0100, Catalin Marinas wrote:
> On Mon, Sep 05, 2016 at 06:20:38PM +0100, Mark Rutland wrote:
> > On Fri, Sep 02, 2016 at 04:02:09PM +0100, Catalin Marinas wrote:
> > > +static inline void uaccess_ttbr0_enable(void)
> > > +{
> > > +	unsigned long flags;
> > > +
> > > +	/*
> > > +	 * Disable interrupts to avoid preemption and potential saved
> > > +	 * TTBR0_EL1 updates between reading the variable and the MSR.
> > > +	 */
> > > +	local_irq_save(flags);
> > > +	write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
> > > +	isb();
> > > +	local_irq_restore(flags);
> > > +}
> > 
> > I don't follow what problem this actually protects us against. In the
> > case of preemption everything should be saved+restored transparently, or
> > things would go wrong as soon as we enable IRQs anyway.
> > 
> > Is this a hold-over from a percpu approach rather than the
> > current_thread_info() approach?
> 
> If we get preempted between reading current_thread_info()->ttbr0 and
> writing TTBR0_EL1, a series of context switches could lead to the update
> of the ASID part of ttbr0. The actual MSR would store an old ASID in
> TTBR0_EL1.

Ah! Can you fold something about racing with an ASID update into the
description?

> > > +#else
> > > +static inline void uaccess_ttbr0_disable(void)
> > > +{
> > > +}
> > > +
> > > +static inline void uaccess_ttbr0_enable(void)
> > > +{
> > > +}
> > > +#endif
> > 
> > I think that it's better to drop the ifdef and add:
> > 
> > 	if (!IS_ENABLED(CONFIG_ARM64_TTBR0_PAN))
> > 		return;
> > 
> > ... at the start of each function. GCC should optimize the entire thing
> > away when not used, but we'll get compiler coverage regardless, and
> > therefore less breakage. All the symbols we required should exist
> > regardless.
> 
> The reason for this is that thread_info.ttbr0 is conditionally defined.
> I don't think the compiler would ignore it.

Good point; I missed that.

[...]

> > How about something like:
> > 
> > 	.macro alternative_endif_else_nop
> > 	alternative_else
> > 	.rept ((662b-661b) / 4)
> > 	       nop
> > 	.endr
> > 	alternative_endif
> > 	.endm
> > 
> > So for the above we could have:
> > 
> > 	alternative_if_not ARM64_HAS_PAN
> > 		save_and_disable_irq \tmp2
> > 		uaccess_ttbr0_enable \tmp1
> > 		restore_irq \tmp2
> > 	alternative_endif_else_nop
> > 
> > I'll see about spinning a patch, or discovering why that happens to be
> > broken.
> 
> This looks better. Minor comment, I would actually name the ending
> statement alternative_else_nop_endif to match the order in which you'd
> normally write them.

Completely agreed. I already made this change locally, immediately after
sending the suggestion. :)

> > >  	 * tables again to remove any speculatively loaded cache lines.
> > >  	 */
> > >  	mov	x0, x25
> > > -	add	x1, x26, #SWAPPER_DIR_SIZE
> > > +	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
> > >  	dmb	sy
> > >  	bl	__inval_cache_range
> > >  
> > > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> > > index 659963d40bb4..fe393ccf9352 100644
> > > --- a/arch/arm64/kernel/vmlinux.lds.S
> > > +++ b/arch/arm64/kernel/vmlinux.lds.S
> > > @@ -196,6 +196,11 @@ SECTIONS
> > >  	swapper_pg_dir = .;
> > >  	. += SWAPPER_DIR_SIZE;
> > >  
> > > +#ifdef CONFIG_ARM64_TTBR0_PAN
> > > +	reserved_ttbr0 = .;
> > > +	. += PAGE_SIZE;
> > > +#endif
> > 
> > Surely RESERVED_TTBR0_SIZE, as elsewhere?
> 
> I'll try to move it somewhere where it can be included in vmlinux.lds.S
> (I can probably include cpufeature.h directly).

Our vmlinux.lds.S already includes <asm/kernel-pagetable.h>, so I think
that should work already.

Thanks,
Mark.
Catalin Marinas Sept. 9, 2016, 5:15 p.m. UTC | #4
On Fri, Sep 02, 2016 at 04:02:09PM +0100, Catalin Marinas wrote:
>  /*
>   * User access enabling/disabling.
>   */
> +#ifdef CONFIG_ARM64_TTBR0_PAN
> +static inline void uaccess_ttbr0_disable(void)
> +{
> +	unsigned long ttbr;
> +
> +	/* reserved_ttbr0 placed at the end of swapper_pg_dir */
> +	ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
> +	write_sysreg(ttbr, ttbr0_el1);
> +	isb();
> +}
> +
> +static inline void uaccess_ttbr0_enable(void)
> +{
> +	unsigned long flags;
> +
> +	/*
> +	 * Disable interrupts to avoid preemption and potential saved
> +	 * TTBR0_EL1 updates between reading the variable and the MSR.
> +	 */
> +	local_irq_save(flags);
> +	write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
> +	isb();
> +	local_irq_restore(flags);
> +}

I followed up with the ARM architects on potential improvements to this
sequence. In summary, changing TCR_EL1.A1 is not guaranteed to have an
effect unless it is followed by TLBI. IOW, we can't use this bit for a
quick switch to the reserved ASID.

Setting TCR_EL1.EPD0 to 1 would work as long as it is followed by an
ASID change to a reserved one with no entries in the TLB. However, the
code sequence above (and the corresponding asm ones) would become even
more complex, so I don't think we gain anything.

Untested, using EPD0 (the assembly version would look sligtly better
than the C version but still a few instructions more than what we
currently have):

static inline void uaccess_ttbr0_disable(void)
{
	unsigned long ttbr;
	unsigned long tcr;

	/* disable TTBR0 page table walks */
	tcr = read_sysreg(tcr_el1);
	tcr |= TCR_ELD0
	write_sysreg(tcr, tcr_el1);
	isb();

	/* mask out the ASID bits (zero is a reserved ASID) */
	ttbr = read_sysreg(ttbr0_el1);
	ttbr &= ~ASID_MASK;
	write_sysreg(ttbr, ttbr0_el1);
	isb();
}

static inline void uaccess_ttbr0_enable(void)
{
	unsigned long flags;

	local_irq_save(flags);

	ttbr = read_sysreg(ttbr0_el1);
	ttbr |= current_thread_info()->asid;
	write_sysreg(ttbr, ttbr0_el1);
	isb();

	/* enable TTBR0 page table walks */
	tcr = read_sysreg(tcr_el1);
	tcr |= TCR_ELD0
	write_sysreg(tcr, tcr_el1);
	isb();

	local_irq_restore(flags);
}

The IRQ disabling for the above sequence is still required since we need
to guarantee the atomicity of the ASID read with the TTBR0_EL1 write.

We may be able to avoid current_thread_info()->asid *if* we find some
other per-CPU place to store the ASID (unused TTBR1_EL1 bits was
suggested, though not sure about the architecture requirements on those
bits being zero when TCR_EL1.A1 is 0). But even with these in place, the
requirement to have to ISBs and the additional TCR_EL1 read/write
doesn't give us anything better.

In conclusion, I propose that we stick to the current TTBR0_EL1 switch
as per these patches.
Ard Biesheuvel Sept. 11, 2016, 1:55 p.m. UTC | #5
On 6 September 2016 at 11:45, Mark Rutland <mark.rutland@arm.com> wrote:
> On Tue, Sep 06, 2016 at 11:27:42AM +0100, Catalin Marinas wrote:
>> On Mon, Sep 05, 2016 at 06:20:38PM +0100, Mark Rutland wrote:
>> > On Fri, Sep 02, 2016 at 04:02:09PM +0100, Catalin Marinas wrote:
>> > > +static inline void uaccess_ttbr0_enable(void)
>> > > +{
>> > > + unsigned long flags;
>> > > +
>> > > + /*
>> > > +  * Disable interrupts to avoid preemption and potential saved
>> > > +  * TTBR0_EL1 updates between reading the variable and the MSR.
>> > > +  */
>> > > + local_irq_save(flags);
>> > > + write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
>> > > + isb();
>> > > + local_irq_restore(flags);
>> > > +}
>> >
>> > I don't follow what problem this actually protects us against. In the
>> > case of preemption everything should be saved+restored transparently, or
>> > things would go wrong as soon as we enable IRQs anyway.
>> >
>> > Is this a hold-over from a percpu approach rather than the
>> > current_thread_info() approach?
>>
>> If we get preempted between reading current_thread_info()->ttbr0 and
>> writing TTBR0_EL1, a series of context switches could lead to the update
>> of the ASID part of ttbr0. The actual MSR would store an old ASID in
>> TTBR0_EL1.
>
> Ah! Can you fold something about racing with an ASID update into the
> description?
>
>> > > +#else
>> > > +static inline void uaccess_ttbr0_disable(void)
>> > > +{
>> > > +}
>> > > +
>> > > +static inline void uaccess_ttbr0_enable(void)
>> > > +{
>> > > +}
>> > > +#endif
>> >
>> > I think that it's better to drop the ifdef and add:
>> >
>> >     if (!IS_ENABLED(CONFIG_ARM64_TTBR0_PAN))
>> >             return;
>> >
>> > ... at the start of each function. GCC should optimize the entire thing
>> > away when not used, but we'll get compiler coverage regardless, and
>> > therefore less breakage. All the symbols we required should exist
>> > regardless.
>>
>> The reason for this is that thread_info.ttbr0 is conditionally defined.
>> I don't think the compiler would ignore it.
>
> Good point; I missed that.
>
> [...]
>
>> > How about something like:
>> >
>> >     .macro alternative_endif_else_nop
>> >     alternative_else
>> >     .rept ((662b-661b) / 4)
>> >            nop
>> >     .endr
>> >     alternative_endif
>> >     .endm
>> >
>> > So for the above we could have:
>> >
>> >     alternative_if_not ARM64_HAS_PAN
>> >             save_and_disable_irq \tmp2
>> >             uaccess_ttbr0_enable \tmp1
>> >             restore_irq \tmp2
>> >     alternative_endif_else_nop
>> >
>> > I'll see about spinning a patch, or discovering why that happens to be
>> > broken.
>>
>> This looks better. Minor comment, I would actually name the ending
>> statement alternative_else_nop_endif to match the order in which you'd
>> normally write them.
>
> Completely agreed. I already made this change locally, immediately after
> sending the suggestion. :)
>
>> > >    * tables again to remove any speculatively loaded cache lines.
>> > >    */
>> > >   mov     x0, x25
>> > > - add     x1, x26, #SWAPPER_DIR_SIZE
>> > > + add     x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
>> > >   dmb     sy
>> > >   bl      __inval_cache_range
>> > >
>> > > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
>> > > index 659963d40bb4..fe393ccf9352 100644
>> > > --- a/arch/arm64/kernel/vmlinux.lds.S
>> > > +++ b/arch/arm64/kernel/vmlinux.lds.S
>> > > @@ -196,6 +196,11 @@ SECTIONS
>> > >   swapper_pg_dir = .;
>> > >   . += SWAPPER_DIR_SIZE;
>> > >
>> > > +#ifdef CONFIG_ARM64_TTBR0_PAN
>> > > + reserved_ttbr0 = .;
>> > > + . += PAGE_SIZE;
>> > > +#endif
>> >
>> > Surely RESERVED_TTBR0_SIZE, as elsewhere?
>>
>> I'll try to move it somewhere where it can be included in vmlinux.lds.S
>> (I can probably include cpufeature.h directly).
>

Do we really need another zero page? The ordinary zero page is already
statically allocated these days, so we could simply move it between
idmap_pg_dir[] and swapper_pg_dir[], and get all the changes in the
early boot code for free (given that it covers the range between the
start of idmap_pg_dir[] and the end of swapper_pg_dir[])

That way, we could refer to __pa(empty_zero_page) anywhere by reading
ttbr1_el1 and subtracting PAGE_SIZE
Catalin Marinas Sept. 12, 2016, 9:32 a.m. UTC | #6
On Sun, Sep 11, 2016 at 02:55:12PM +0100, Ard Biesheuvel wrote:
> On 6 September 2016 at 11:45, Mark Rutland <mark.rutland@arm.com> wrote:
> > On Tue, Sep 06, 2016 at 11:27:42AM +0100, Catalin Marinas wrote:
> >> On Mon, Sep 05, 2016 at 06:20:38PM +0100, Mark Rutland wrote:
> >> > On Fri, Sep 02, 2016 at 04:02:09PM +0100, Catalin Marinas wrote:
> >> > >    * tables again to remove any speculatively loaded cache lines.
> >> > >    */
> >> > >   mov     x0, x25
> >> > > - add     x1, x26, #SWAPPER_DIR_SIZE
> >> > > + add     x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
> >> > >   dmb     sy
> >> > >   bl      __inval_cache_range
> >> > >
> >> > > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> >> > > index 659963d40bb4..fe393ccf9352 100644
> >> > > --- a/arch/arm64/kernel/vmlinux.lds.S
> >> > > +++ b/arch/arm64/kernel/vmlinux.lds.S
> >> > > @@ -196,6 +196,11 @@ SECTIONS
> >> > >   swapper_pg_dir = .;
> >> > >   . += SWAPPER_DIR_SIZE;
> >> > >
> >> > > +#ifdef CONFIG_ARM64_TTBR0_PAN
> >> > > + reserved_ttbr0 = .;
> >> > > + . += PAGE_SIZE;
> >> > > +#endif
> >> >
> >> > Surely RESERVED_TTBR0_SIZE, as elsewhere?
> >>
> >> I'll try to move it somewhere where it can be included in vmlinux.lds.S
> >> (I can probably include cpufeature.h directly).
> 
> Do we really need another zero page? The ordinary zero page is already
> statically allocated these days, so we could simply move it between
> idmap_pg_dir[] and swapper_pg_dir[], and get all the changes in the
> early boot code for free (given that it covers the range between the
> start of idmap_pg_dir[] and the end of swapper_pg_dir[])
> 
> That way, we could refer to __pa(empty_zero_page) anywhere by reading
> ttbr1_el1 and subtracting PAGE_SIZE

That's fine by me. I'll cherry-pick your patch and rebase this series on
top.

Patch
diff mbox

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b16bbf1fb786..bf1c797052dd 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -41,6 +41,15 @@ 
 	msr	daifclr, #2
 	.endm
 
+	.macro	save_and_disable_irq, flags
+	mrs	\flags, daif
+	msr	daifset, #2
+	.endm
+
+	.macro	restore_irq, flags
+	msr	daif, \flags
+	.endm
+
 /*
  * Enable and disable debug exceptions.
  */
@@ -351,6 +360,13 @@  alternative_endif
 	.endm
 
 /*
+ * Return the current thread_info.
+ */
+	.macro	get_thread_info, rd
+	mrs	\rd, sp_el0
+	.endm
+
+/*
  * Errata workaround post TTBR0_EL1 update.
  */
 	.macro	post_ttbr0_update_workaround, ret = 0
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7099f26e3702..023066d9bf7f 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -216,6 +216,12 @@  static inline bool system_supports_mixed_endian_el0(void)
 	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
 }
 
+static inline bool system_supports_ttbr0_pan(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_TTBR0_PAN) &&
+		!cpus_have_cap(ARM64_HAS_PAN);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7e51d1b57c0c..f825ffda9c52 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -19,6 +19,7 @@ 
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
+#include <asm/pgtable.h>
 #include <asm/sparsemem.h>
 
 /*
@@ -54,6 +55,12 @@ 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
 #define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
 
+#ifdef CONFIG_ARM64_TTBR0_PAN
+#define RESERVED_TTBR0_SIZE	(PAGE_SIZE)
+#else
+#define RESERVED_TTBR0_SIZE	(0)
+#endif
+
 /* Initial memory map size */
 #if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_BLOCK_SHIFT	SECTION_SHIFT
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index abd64bd1f6d9..e4cff7307d28 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -47,6 +47,9 @@  typedef unsigned long mm_segment_t;
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	mm_segment_t		addr_limit;	/* address limit */
+#ifdef CONFIG_ARM64_TTBR0_PAN
+	u64			ttbr0;		/* saved TTBR0_EL1 */
+#endif
 	struct task_struct	*task;		/* main task structure */
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 	int			cpu;		/* cpu */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index fde5f7a13030..3b2cc7787d5a 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -29,6 +29,7 @@ 
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 #include <asm/errno.h>
@@ -116,16 +117,56 @@  static inline void set_fs(mm_segment_t fs)
 /*
  * User access enabling/disabling.
  */
+#ifdef CONFIG_ARM64_TTBR0_PAN
+static inline void uaccess_ttbr0_disable(void)
+{
+	unsigned long ttbr;
+
+	/* reserved_ttbr0 placed at the end of swapper_pg_dir */
+	ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
+	write_sysreg(ttbr, ttbr0_el1);
+	isb();
+}
+
+static inline void uaccess_ttbr0_enable(void)
+{
+	unsigned long flags;
+
+	/*
+	 * Disable interrupts to avoid preemption and potential saved
+	 * TTBR0_EL1 updates between reading the variable and the MSR.
+	 */
+	local_irq_save(flags);
+	write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
+	isb();
+	local_irq_restore(flags);
+}
+#else
+static inline void uaccess_ttbr0_disable(void)
+{
+}
+
+static inline void uaccess_ttbr0_enable(void)
+{
+}
+#endif
+
 #define uaccess_disable(alt)						\
 do {									\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt,			\
-			CONFIG_ARM64_PAN));				\
+	if (system_supports_ttbr0_pan())				\
+		uaccess_ttbr0_disable();				\
+	else								\
+		asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt,		\
+				CONFIG_ARM64_PAN));			\
 } while (0)
 
 #define uaccess_enable(alt)						\
 do {									\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt,			\
-			CONFIG_ARM64_PAN));				\
+	if (system_supports_ttbr0_pan())				\
+		uaccess_ttbr0_enable();					\
+	else								\
+		asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt,		\
+				CONFIG_ARM64_PAN));			\
 } while (0)
 
 /*
@@ -338,11 +379,36 @@  extern __must_check long strnlen_user(const char __user *str, long n);
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/kernel-pgtable.h>
 
 /*
  * User access enabling/disabling macros.
  */
+	.macro	uaccess_ttbr0_disable, tmp1
+	mrs	\tmp1, ttbr1_el1		// swapper_pg_dir
+	add	\tmp1, \tmp1, #SWAPPER_DIR_SIZE	// reserved_ttbr0 at the end of swapper_pg_dir
+	msr	ttbr0_el1, \tmp1		// set reserved TTBR0_EL1
+	isb
+	.endm
+
+	.macro	uaccess_ttbr0_enable, tmp1
+	get_thread_info \tmp1
+	ldr	\tmp1, [\tmp1, #TI_TTBR0]	// load saved TTBR0_EL1
+	msr	ttbr0_el1, \tmp1		// set the non-PAN TTBR0_EL1
+	isb
+	.endm
+
 	.macro	uaccess_disable, tmp1
+#ifdef CONFIG_ARM64_TTBR0_PAN
+alternative_if_not ARM64_HAS_PAN
+	uaccess_ttbr0_disable \tmp1
+alternative_else
+	nop
+	nop
+	nop
+	nop
+alternative_endif
+#endif
 alternative_if_not ARM64_ALT_PAN_NOT_UAO
 	nop
 alternative_else
@@ -351,6 +417,21 @@  alternative_endif
 	.endm
 
 	.macro	uaccess_enable, tmp1, tmp2
+#ifdef CONFIG_ARM64_TTBR0_PAN
+alternative_if_not ARM64_HAS_PAN
+	save_and_disable_irq \tmp2		// avoid preemption
+	uaccess_ttbr0_enable \tmp1
+	restore_irq \tmp2
+alternative_else
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+alternative_endif
+#endif
 alternative_if_not ARM64_ALT_PAN_NOT_UAO
 	nop
 alternative_else
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 05070b72fc28..0af4d9a6c10d 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -38,6 +38,9 @@  int main(void)
   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
   DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
+#ifdef CONFIG_ARM64_TTBR0_PAN
+  DEFINE(TI_TTBR0,		offsetof(struct thread_info, ttbr0));
+#endif
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   BLANK();
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 62272eac1352..fd0971afd142 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -45,6 +45,7 @@  unsigned int compat_elf_hwcap2 __read_mostly;
 #endif
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcaps);
 
 #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
 	{						\
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 441420ca7d08..be1e3987c07a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -190,10 +190,6 @@  alternative_endif
 	eret					// return to kernel
 	.endm
 
-	.macro	get_thread_info, rd
-	mrs	\rd, sp_el0
-	.endm
-
 	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 3e7b050e99dc..d4188396302f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -320,14 +320,14 @@  __create_page_tables:
 	 * dirty cache lines being evicted.
 	 */
 	mov	x0, x25
-	add	x1, x26, #SWAPPER_DIR_SIZE
+	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
 	bl	__inval_cache_range
 
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	mov	x0, x25
-	add	x6, x26, #SWAPPER_DIR_SIZE
+	add	x6, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
 1:	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
@@ -406,7 +406,7 @@  __create_page_tables:
 	 * tables again to remove any speculatively loaded cache lines.
 	 */
 	mov	x0, x25
-	add	x1, x26, #SWAPPER_DIR_SIZE
+	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
 	dmb	sy
 	bl	__inval_cache_range
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 659963d40bb4..fe393ccf9352 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -196,6 +196,11 @@  SECTIONS
 	swapper_pg_dir = .;
 	. += SWAPPER_DIR_SIZE;
 
+#ifdef CONFIG_ARM64_TTBR0_PAN
+	reserved_ttbr0 = .;
+	. += PAGE_SIZE;
+#endif
+
 	_end = .;
 
 	STABS_DEBUG