[v2,4/5] arm64: KVM: Prevent speculative S1 PTW when restoring vcpu context
diff mbox series

Message ID 20191019095521.31722-5-maz@kernel.org
State Mainlined
Commit bd227553ad5077f21ddb382dcd910ba46181805a
Headers show
Series
  • arm64: KVM: Add workaround for errata 1319367 and 1319537
Related show

Commit Message

Marc Zyngier Oct. 19, 2019, 9:55 a.m. UTC
When handling erratum 1319367, we must ensure that the page table
walker cannot parse the S1 page tables while the guest is in an
inconsistent state. This is done as follows:

On guest entry:
- TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
- all system registers are restored, except for TCR_EL1 and SCTLR_EL1
- stage-2 is restored
- SCTLR_EL1 and TCR_EL1 are restored

On guest exit:
- SCTLR_EL1.M and TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
- stage-2 is disabled
- All host system registers are restored

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/switch.c    | 31 ++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/sysreg-sr.c | 35 ++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 2 deletions(-)

Comments

James Morse Oct. 24, 2019, 4:10 p.m. UTC | #1
Hi Marc,

On 19/10/2019 10:55, Marc Zyngier wrote:
> When handling erratum 1319367, we must ensure that the page table
> walker cannot parse the S1 page tables while the guest is in an
> inconsistent state. This is done as follows:
> 
> On guest entry:
> - TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
> - all system registers are restored, except for TCR_EL1 and SCTLR_EL1
> - stage-2 is restored
> - SCTLR_EL1 and TCR_EL1 are restored
> 
> On guest exit:
> - SCTLR_EL1.M and TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
> - stage-2 is disabled
> - All host system registers are restored

Reviewed-by: James Morse <james.morse@arm.com>

(whitespace nit below)


> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 69e10b29cbd0..5765b17c38c7 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -118,6 +118,20 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
>  	}
>  
>  	write_sysreg(val, cptr_el2);
> +
> +	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
> +		struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
> +
> +		isb();
> +		/*
> +		 * At this stage, and thanks to the above isb(), S2 is
> +		 * configured and enabled. We can now restore the guest's S1
> +		 * configuration: SCTLR, and only then TCR.
> +		 */

(note for my future self: because the guest may have had M=0 and rubbish in the TTBRs)

> +		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
> +		isb();
> +		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
> +	}
>  }
>  


> diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> index 7ddbc849b580..fb97547bfa79 100644
> --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> @@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
>  {
>  	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
>  	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
> -	write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
> +
> +	if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
> +		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
> +		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
> +	} else	if (!ctxt->__hyp_running_vcpu) {
> +		/*
> +		 * Must only be done for guest registers, hence the context
> +		 * test. We'recoming from the host, so SCTLR.M is already

(Nit: We'recoming?)

> +		 * set. Pairs with __activate_traps_nvhe().
> +		 */
> +		write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
> +				  TCR_EPD1_MASK | TCR_EPD0_MASK),
> +				 SYS_TCR);
> +		isb();
> +	}



Thanks,

James
Marc Zyngier Oct. 26, 2019, 10:20 a.m. UTC | #2
On Thu, 24 Oct 2019 17:10:44 +0100,
James Morse <james.morse@arm.com> wrote:

Hi James,

> Hi Marc,
> 
> On 19/10/2019 10:55, Marc Zyngier wrote:
> > When handling erratum 1319367, we must ensure that the page table
> > walker cannot parse the S1 page tables while the guest is in an
> > inconsistent state. This is done as follows:
> > 
> > On guest entry:
> > - TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
> > - all system registers are restored, except for TCR_EL1 and SCTLR_EL1
> > - stage-2 is restored
> > - SCTLR_EL1 and TCR_EL1 are restored
> > 
> > On guest exit:
> > - SCTLR_EL1.M and TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
> > - stage-2 is disabled
> > - All host system registers are restored
> 
> Reviewed-by: James Morse <james.morse@arm.com>
> 
> (whitespace nit below)
> 
> 
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 69e10b29cbd0..5765b17c38c7 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -118,6 +118,20 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
> >  	}
> >  
> >  	write_sysreg(val, cptr_el2);
> > +
> > +	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
> > +		struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
> > +
> > +		isb();
> > +		/*
> > +		 * At this stage, and thanks to the above isb(), S2 is
> > +		 * configured and enabled. We can now restore the guest's S1
> > +		 * configuration: SCTLR, and only then TCR.
> > +		 */
> 
> (note for my future self: because the guest may have had M=0 and rubbish in the TTBRs)
> 
> > +		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
> > +		isb();
> > +		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
> > +	}
> >  }
> >  
> 
> 
> > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> > index 7ddbc849b580..fb97547bfa79 100644
> > --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> > @@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
> >  {
> >  	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
> >  	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
> > -	write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
> > +
> > +	if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
> > +		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
> > +		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
> > +	} else	if (!ctxt->__hyp_running_vcpu) {
> > +		/*
> > +		 * Must only be done for guest registers, hence the context
> > +		 * test. We'recoming from the host, so SCTLR.M is already
> 
> (Nit: We'recoming?)

Well spotted, now fixed. And thanks for the reviewing, much appreciated.

Catalin, Will: given that this series conflicts with the workaround for
erratum 1542419, do you mind taking it via the arm64 tree?

To make things a bit simpler, I've updated the series with James' tags at
[1], and pushed out a resolution of the merge with arm64/for-next/core [2].

Thanks,

	M.

[1] git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/erratum-1319367
[2] git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/erratum-1319367-resolved
Catalin Marinas Oct. 28, 2019, 10:32 a.m. UTC | #3
On Sat, Oct 26, 2019 at 11:20:35AM +0100, Marc Zyngier wrote:
> Catalin, Will: given that this series conflicts with the workaround for
> erratum 1542419, do you mind taking it via the arm64 tree?

I assume you target 5.5 with this workaround.

I don't mind merging it but if you want to queue it, we already have a
stable for-next/neoverse-n1-stale-instr branch with 1542419 (I'll push a
fixup on top soon for a clang warning). The other issue is that we get a
conflict with mainline due to the tx2 erratum. If it gets too
complicated, I'll also merge for-next/fixes into for-next/core.
Marc Zyngier Oct. 28, 2019, 10:49 a.m. UTC | #4
On Mon, 28 Oct 2019 10:32:17 +0000,
Catalin Marinas <catalin.marinas@arm.com> wrote:

Hi Catalin,

> On Sat, Oct 26, 2019 at 11:20:35AM +0100, Marc Zyngier wrote:
> > Catalin, Will: given that this series conflicts with the workaround for
> > erratum 1542419, do you mind taking it via the arm64 tree?
> 
> I assume you target 5.5 with this workaround.

Absolutely. I'm happy to look at backports once this is in.

> I don't mind merging it but if you want to queue it, we already have
> a stable for-next/neoverse-n1-stale-instr branch with 1542419 (I'll
> push a fixup on top soon for a clang warning). The other issue is
> that we get a conflict with mainline due to the tx2 erratum. If it
> gets too complicated, I'll also merge for-next/fixes into
> for-next/core.

OK, let me have another look at providing a resolution that includes
all of the above. Worse case, you'll be able to pull the branch
directly.

Thanks,

	M.
Catalin Marinas Oct. 28, 2019, 11:06 a.m. UTC | #5
On Mon, Oct 28, 2019 at 10:49:37AM +0000, Marc Zyngier wrote:
> On Mon, 28 Oct 2019 10:32:17 +0000,
> Catalin Marinas <catalin.marinas@arm.com> wrote:
> > On Sat, Oct 26, 2019 at 11:20:35AM +0100, Marc Zyngier wrote:
> > > Catalin, Will: given that this series conflicts with the workaround for
> > > erratum 1542419, do you mind taking it via the arm64 tree?
[...]
> > I don't mind merging it but if you want to queue it, we already have
> > a stable for-next/neoverse-n1-stale-instr branch with 1542419 (I'll
> > push a fixup on top soon for a clang warning). The other issue is
> > that we get a conflict with mainline due to the tx2 erratum. If it
> > gets too complicated, I'll also merge for-next/fixes into
> > for-next/core.
> 
> OK, let me have another look at providing a resolution that includes
> all of the above. Worse case, you'll be able to pull the branch
> directly.

Don't worry about the resolution, I'll fix it up myself when merging
into for-next/core. The latter is not a stable branch, just an octopus
merge of various for-next/* topic branches.

Patch
diff mbox series

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 69e10b29cbd0..5765b17c38c7 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -118,6 +118,20 @@  static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 	}
 
 	write_sysreg(val, cptr_el2);
+
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+		isb();
+		/*
+		 * At this stage, and thanks to the above isb(), S2 is
+		 * configured and enabled. We can now restore the guest's S1
+		 * configuration: SCTLR, and only then TCR.
+		 */
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		isb();
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	}
 }
 
 static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
@@ -156,6 +170,23 @@  static void __hyp_text __deactivate_traps_nvhe(void)
 {
 	u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		u64 val;
+
+		/*
+		 * Set the TCR and SCTLR registers in the exact opposite
+		 * sequence as __activate_traps_nvhe (first prevent walks,
+		 * then force the MMU on). A generous sprinkling of isb()
+		 * ensure that things happen in this exact order.
+		 */
+		val = read_sysreg_el1(SYS_TCR);
+		write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+		isb();
+		val = read_sysreg_el1(SYS_SCTLR);
+		write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+		isb();
+	}
+
 	__deactivate_traps_common();
 
 	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7ddbc849b580..fb97547bfa79 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -117,12 +117,26 @@  static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 {
 	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
 	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
-	write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+
+	if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	} else	if (!ctxt->__hyp_running_vcpu) {
+		/*
+		 * Must only be done for guest registers, hence the context
+		 * test. We'recoming from the host, so SCTLR.M is already
+		 * set. Pairs with __activate_traps_nvhe().
+		 */
+		write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
+				  TCR_EPD1_MASK | TCR_EPD0_MASK),
+				 SYS_TCR);
+		isb();
+	}
+
 	write_sysreg(ctxt->sys_regs[ACTLR_EL1],		actlr_el1);
 	write_sysreg_el1(ctxt->sys_regs[CPACR_EL1],	SYS_CPACR);
 	write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1],	SYS_TTBR0);
 	write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1],	SYS_TTBR1);
-	write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
 	write_sysreg_el1(ctxt->sys_regs[ESR_EL1],	SYS_ESR);
 	write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1],	SYS_AFSR0);
 	write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1],	SYS_AFSR1);
@@ -135,6 +149,23 @@  static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) &&
+	    ctxt->__hyp_running_vcpu) {
+		/*
+		 * Must only be done for host registers, hence the context
+		 * test. Pairs with __deactivate_traps_nvhe().
+		 */
+		isb();
+		/*
+		 * At this stage, and thanks to the above isb(), S2 is
+		 * deconfigured and disabled. We can now restore the host's
+		 * S1 configuration: SCTLR, and only then TCR.
+		 */
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		isb();
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	}
+
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		SYS_ELR);
 	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);