diff mbox series

[v2] arm64: Trap WFI executed in userspace

Message ID 20180810091939.2773-1-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show
Series [v2] arm64: Trap WFI executed in userspace | expand

Commit Message

Marc Zyngier Aug. 10, 2018, 9:19 a.m. UTC
It recently came to light that userspace can execute WFI, and that
the arm64 kernel doesn trap this event. This sounds rather benign,
but the kernel should decide when it wants to wait for an interrupt,
and not userspace.

Let's trap WFI and immediately return after having skipped the
instruction. This effectively makes WFI a rather expensive NOP.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/esr.h    |  4 ++++
 arch/arm64/include/asm/sysreg.h |  4 ++--
 arch/arm64/kernel/entry.S       |  1 +
 arch/arm64/kernel/traps.c       | 11 +++++++++++
 4 files changed, 18 insertions(+), 2 deletions(-)

Comments

Robin Murphy Aug. 10, 2018, 9:43 a.m. UTC | #1
On 10/08/18 10:19, Marc Zyngier wrote:
> It recently came to light that userspace can execute WFI, and that
> the arm64 kernel doesn trap this event. This sounds rather benign,

                         't

> but the kernel should decide when it wants to wait for an interrupt,
> and not userspace.
> 
> Let's trap WFI and immediately return after having skipped the
> instruction. This effectively makes WFI a rather expensive NOP.

...which is still a perfectly valid behaviour, given that it's a hint. 
That's fine by me :)

> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>   arch/arm64/include/asm/esr.h    |  4 ++++
>   arch/arm64/include/asm/sysreg.h |  4 ++--
>   arch/arm64/kernel/entry.S       |  1 +
>   arch/arm64/kernel/traps.c       | 11 +++++++++++
>   4 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index ce70c3ffb993..9a2b6cee4e2b 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -137,6 +137,7 @@
>   #define ESR_ELx_CV		(UL(1) << 24)
>   #define ESR_ELx_COND_SHIFT	(20)
>   #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
> +#define ESR_ELx_WFx_ISS_WFI	(UL(0) << 0)
>   #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
>   #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
>   
> @@ -148,6 +149,9 @@
>   #define DISR_EL1_ESR_MASK	(ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
>   
>   /* ESR value templates for specific events */
> +#define ESR_ELx_WFx_MASK	(ESR_ELx_EC_MASK | 1)

Super-nit: is it worth defining ESR_ELx_WFx_ISS_TI rather than 
open-coding 1?

Robin.

> +#define ESR_ELx_WFx_WFI_VAL	((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) |	\
> +				 ESR_ELx_WFx_ISS_WFI)
>   
>   /* BRK instruction trap from AArch64 state */
>   #define ESR_ELx_VAL_BRK64(imm)					\
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index e205ec8489e9..eeae15a3f265 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -487,12 +487,12 @@
>   
>   #define SCTLR_EL1_SET	(SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   |\
>   			 SCTLR_EL1_SA0  | SCTLR_EL1_SED  | SCTLR_ELx_I    |\
> -			 SCTLR_EL1_DZE  | SCTLR_EL1_UCT  | SCTLR_EL1_NTWI |\
> +			 SCTLR_EL1_DZE  | SCTLR_EL1_UCT  |		   \
>   			 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
>   			 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1)
>   #define SCTLR_EL1_CLEAR	(SCTLR_ELx_A   | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD    |\
>   			 SCTLR_EL1_UMA | SCTLR_ELx_WXN     | ENDIAN_CLEAR_EL1 |\
> -			 SCTLR_EL1_RES0)
> +			 SCTLR_EL1_NTWI | SCTLR_EL1_RES0)
>   
>   #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff
>   #error "Inconsistent SCTLR_EL1 set/clear bits"
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 09dbea221a27..6c8e5dc695d2 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -665,6 +665,7 @@ el0_sync:
>   	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
>   	b.eq	el0_fpsimd_exc
>   	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
> +	ccmp	x24, #ESR_ELx_EC_WFx, #4, ne
>   	b.eq	el0_sys
>   	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
>   	b.eq	el0_sp_pc
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 039e9ff379cc..9d6b655529a1 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -496,6 +496,11 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
>   	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
>   }
>   
> +static void wfi_handler(unsigned int esr, struct pt_regs *regs)
> +{
> +	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
> +}
> +
>   struct sys64_hook {
>   	unsigned int esr_mask;
>   	unsigned int esr_val;
> @@ -526,6 +531,12 @@ static struct sys64_hook sys64_hooks[] = {
>   		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
>   		.handler = cntfrq_read_handler,
>   	},
> +	{
> +		/* Trap WFI instructions executed in userspace */
> +		.esr_mask = ESR_ELx_WFx_MASK,
> +		.esr_val = ESR_ELx_WFx_WFI_VAL,
> +		.handler = wfi_handler,
> +	},
>   	{},
>   };
>   
>
Dave Martin Aug. 10, 2018, 10:06 a.m. UTC | #2
On Fri, Aug 10, 2018 at 10:43:13AM +0100, Robin Murphy wrote:
> On 10/08/18 10:19, Marc Zyngier wrote:
> >It recently came to light that userspace can execute WFI, and that
> >the arm64 kernel doesn trap this event. This sounds rather benign,
> 
>                         't
> 
> >but the kernel should decide when it wants to wait for an interrupt,
> >and not userspace.
> >
> >Let's trap WFI and immediately return after having skipped the
> >instruction. This effectively makes WFI a rather expensive NOP.
> 
> ...which is still a perfectly valid behaviour, given that it's a hint.
> That's fine by me :)

The only plausible use of WFI that I can see is in a polling or idle
loop of some description.  So turning WFI into a NOP will likely create
a userspace busy-wait that never explicitly blocks or yields, which may
be considered pathological behaviour.

If there is junk is some app store that does this it may lead to
power wastage and thermal throttling, damaging performance of the whole
system the rogue app runs on.

That kind of consequence is partly why I suggested breaking rogue apps
cleanly (via SIGILL) rather than breaking the system hosting those apps
in messier ways.  I accept that SIGILL is a step to far.

So, I continue to favour the sched_yield() approach over all.

Should this be discussed with the security folks?  It feels like we're
speculating about whether sched_yield() is a security concern.

If it is, could we not NOP or SIGILL WFI if sched_yield() is disallowed
by seccomp, and treat it as a sched_yield() otherwise?

[...]

Cheers
---Dave
Robin Murphy Aug. 10, 2018, 10:25 a.m. UTC | #3
On 10/08/18 11:06, Dave Martin wrote:
> On Fri, Aug 10, 2018 at 10:43:13AM +0100, Robin Murphy wrote:
>> On 10/08/18 10:19, Marc Zyngier wrote:
>>> It recently came to light that userspace can execute WFI, and that
>>> the arm64 kernel doesn trap this event. This sounds rather benign,
>>
>>                          't
>>
>>> but the kernel should decide when it wants to wait for an interrupt,
>>> and not userspace.
>>>
>>> Let's trap WFI and immediately return after having skipped the
>>> instruction. This effectively makes WFI a rather expensive NOP.
>>
>> ...which is still a perfectly valid behaviour, given that it's a hint.
>> That's fine by me :)
> 
> The only plausible use of WFI that I can see is in a polling or idle
> loop of some description.  So turning WFI into a NOP will likely create
> a userspace busy-wait that never explicitly blocks or yields, which may
> be considered pathological behaviour.

But that might be the case anyway, if the CPU doesn't have a WFI 
implementation, or has an "execute WFI as a NOP" chicken bit set for any 
reason.

> If there is junk is some app store that does this it may lead to
> power wastage and thermal throttling, damaging performance of the whole
> system the rogue app runs on.
> 
> That kind of consequence is partly why I suggested breaking rogue apps
> cleanly (via SIGILL) rather than breaking the system hosting those apps
> in messier ways.  I accept that SIGILL is a step to far.
> 
> So, I continue to favour the sched_yield() approach over all.
> 
> Should this be discussed with the security folks?  It feels like we're
> speculating about whether sched_yield() is a security concern.
> 
> If it is, could we not NOP or SIGILL WFI if sched_yield() is disallowed
> by seccomp, and treat it as a sched_yield() otherwise?

There's also the caveat in the trap itself (which I only remembered on a 
second look), where the exact definition says "..., if the instruction 
would otherwise have caused the PE to enter a low-power state." That 
implies that we may never trap on CPUs which ignore WFI, and even on 
those which don't it seems permissible for the CPU to not trap if it 
knows it's just about to respond to a pending interrupt. Thus the only 
behaviour we can consistently enforce with the trap is in fact NOP. Oh well.

Robin.
Marc Zyngier Aug. 10, 2018, 10:28 a.m. UTC | #4
On Fri, 10 Aug 2018 10:43:13 +0100,
Robin Murphy <robin.murphy@arm.com> wrote:
> 
> On 10/08/18 10:19, Marc Zyngier wrote:
> > It recently came to light that userspace can execute WFI, and that
> > the arm64 kernel doesn trap this event. This sounds rather benign,
> 
>                         't

Damn! Catalin had the same remark on v1, and I missed it.

> 
> > but the kernel should decide when it wants to wait for an interrupt,
> > and not userspace.
> > 
> > Let's trap WFI and immediately return after having skipped the
> > instruction. This effectively makes WFI a rather expensive NOP.
> 
> ...which is still a perfectly valid behaviour, given that it's a
> hint. That's fine by me :)
> 
> > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> > ---
> >   arch/arm64/include/asm/esr.h    |  4 ++++
> >   arch/arm64/include/asm/sysreg.h |  4 ++--
> >   arch/arm64/kernel/entry.S       |  1 +
> >   arch/arm64/kernel/traps.c       | 11 +++++++++++
> >   4 files changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> > index ce70c3ffb993..9a2b6cee4e2b 100644
> > --- a/arch/arm64/include/asm/esr.h
> > +++ b/arch/arm64/include/asm/esr.h
> > @@ -137,6 +137,7 @@
> >   #define ESR_ELx_CV		(UL(1) << 24)
> >   #define ESR_ELx_COND_SHIFT	(20)
> >   #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
> > +#define ESR_ELx_WFx_ISS_WFI	(UL(0) << 0)
> >   #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
> >   #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
> >   @@ -148,6 +149,9 @@
> >   #define DISR_EL1_ESR_MASK	(ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
> >     /* ESR value templates for specific events */
> > +#define ESR_ELx_WFx_MASK	(ESR_ELx_EC_MASK | 1)
> 
> Super-nit: is it worth defining ESR_ELx_WFx_ISS_TI rather than
> open-coding 1?

Could do!

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index ce70c3ffb993..9a2b6cee4e2b 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -137,6 +137,7 @@ 
 #define ESR_ELx_CV		(UL(1) << 24)
 #define ESR_ELx_COND_SHIFT	(20)
 #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
+#define ESR_ELx_WFx_ISS_WFI	(UL(0) << 0)
 #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
 #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
 
@@ -148,6 +149,9 @@ 
 #define DISR_EL1_ESR_MASK	(ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
 
 /* ESR value templates for specific events */
+#define ESR_ELx_WFx_MASK	(ESR_ELx_EC_MASK | 1)
+#define ESR_ELx_WFx_WFI_VAL	((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) |	\
+				 ESR_ELx_WFx_ISS_WFI)
 
 /* BRK instruction trap from AArch64 state */
 #define ESR_ELx_VAL_BRK64(imm)					\
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e205ec8489e9..eeae15a3f265 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -487,12 +487,12 @@ 
 
 #define SCTLR_EL1_SET	(SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   |\
 			 SCTLR_EL1_SA0  | SCTLR_EL1_SED  | SCTLR_ELx_I    |\
-			 SCTLR_EL1_DZE  | SCTLR_EL1_UCT  | SCTLR_EL1_NTWI |\
+			 SCTLR_EL1_DZE  | SCTLR_EL1_UCT  |		   \
 			 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
 			 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1)
 #define SCTLR_EL1_CLEAR	(SCTLR_ELx_A   | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD    |\
 			 SCTLR_EL1_UMA | SCTLR_ELx_WXN     | ENDIAN_CLEAR_EL1 |\
-			 SCTLR_EL1_RES0)
+			 SCTLR_EL1_NTWI | SCTLR_EL1_RES0)
 
 #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff
 #error "Inconsistent SCTLR_EL1 set/clear bits"
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 09dbea221a27..6c8e5dc695d2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -665,6 +665,7 @@  el0_sync:
 	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
 	b.eq	el0_fpsimd_exc
 	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
+	ccmp	x24, #ESR_ELx_EC_WFx, #4, ne
 	b.eq	el0_sys
 	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
 	b.eq	el0_sp_pc
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 039e9ff379cc..9d6b655529a1 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -496,6 +496,11 @@  static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
+static void wfi_handler(unsigned int esr, struct pt_regs *regs)
+{
+	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+}
+
 struct sys64_hook {
 	unsigned int esr_mask;
 	unsigned int esr_val;
@@ -526,6 +531,12 @@  static struct sys64_hook sys64_hooks[] = {
 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
 		.handler = cntfrq_read_handler,
 	},
+	{
+		/* Trap WFI instructions executed in userspace */
+		.esr_mask = ESR_ELx_WFx_MASK,
+		.esr_val = ESR_ELx_WFx_WFI_VAL,
+		.handler = wfi_handler,
+	},
 	{},
 };