diff mbox series

[v4,06/36] KVM: arm64: nv: Handle CNTHCTL_EL2 specially

Message ID 20241009190019.3222687-7-maz@kernel.org (mailing list archive)
State New
Headers show
Series KVM: arm64: Add EL2 support to FEAT_S1PIE/S1POE | expand

Commit Message

Marc Zyngier Oct. 9, 2024, 6:59 p.m. UTC
Accessing CNTHCTL_EL2 is fraught with danger if running with
HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and
thus can be changed behind our back, while the rest lives
in the CNTHCTL_EL2 shadow copy that is memory-based.

Yes, this is a lot of fun!

Make sure that we merge the two on read access, while we can
write to CNTKCTL_EL1 in a more straightforward manner.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/sys_regs.c    | 28 ++++++++++++++++++++++++++++
 include/kvm/arm_arch_timer.h |  3 +++
 2 files changed, 31 insertions(+)

Comments

Alexandru Elisei Oct. 16, 2024, 9:37 a.m. UTC | #1
Hi Marc,

I'm planning to have a look at (some) of the patches, do you have a timeline for
merging the series? Just so I know what to prioritise.

On Wed, Oct 09, 2024 at 07:59:49PM +0100, Marc Zyngier wrote:
> Accessing CNTHCTL_EL2 is fraught with danger if running with
> HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and
> thus can be changed behind our back, while the rest lives
> in the CNTHCTL_EL2 shadow copy that is memory-based.
> 
> Yes, this is a lot of fun!
> 
> Make sure that we merge the two on read access, while we can
> write to CNTKCTL_EL1 in a more straightforward manner.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/kvm/sys_regs.c    | 28 ++++++++++++++++++++++++++++
>  include/kvm/arm_arch_timer.h |  3 +++
>  2 files changed, 31 insertions(+)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 3cd54656a8e2f..932d2fb7a52a0 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -157,6 +157,21 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>  		if (!is_hyp_ctxt(vcpu))
>  			goto memory_read;
>  
> +		/*
> +		 * CNTHCTL_EL2 requires some special treatment to
> +		 * account for the bits that can be set via CNTKCTL_EL1.
> +		 */
> +		switch (reg) {
> +		case CNTHCTL_EL2:
> +			if (vcpu_el2_e2h_is_set(vcpu)) {
> +				val = read_sysreg_el1(SYS_CNTKCTL);
> +				val &= CNTKCTL_VALID_BITS;
> +				val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
> +				return val;
> +			}
> +			break;
> +		}
> +
>  		/*
>  		 * If this register does not have an EL1 counterpart,
>  		 * then read the stored EL2 version.
> @@ -207,6 +222,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
>  		 */
>  		__vcpu_sys_reg(vcpu, reg) = val;
>  
> +		switch (reg) {
> +		case CNTHCTL_EL2:
> +			/*
> +			 * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
> +			 * Otherwise, some of the bits are backed by
> +			 * CNTKCTL_EL1, while the rest is kept in memory.
> +			 * Yes, this is fun stuff.
> +			 */
> +			if (vcpu_el2_e2h_is_set(vcpu))
> +				write_sysreg_el1(val, SYS_CNTKCTL);

Sorry, but I just can't seem to get my head around why the RES0 bits aren't
cleared. Is KVM relying on the guest to implement Should-Be-Zero-or-Preserved,
as per the RES0 definition?

> +			return;
> +		}
> +
>  		/* No EL1 counterpart? We're done here.? */
>  		if (reg == el1r)
>  			return;
> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> index c819c5d16613b..fd650a8789b91 100644
> --- a/include/kvm/arm_arch_timer.h
> +++ b/include/kvm/arm_arch_timer.h
> @@ -147,6 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
>  void kvm_timer_cpu_up(void);
>  void kvm_timer_cpu_down(void);
>  
> +/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
> +#define CNTKCTL_VALID_BITS	(BIT(17) | GENMASK_ULL(9, 0))

This does match CNTHCTL_EL2_VHE().

Thanks,
Alex

> +
>  static inline bool has_cntpoff(void)
>  {
>  	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> -- 
> 2.39.2
>
Marc Zyngier Oct. 16, 2024, 11:29 a.m. UTC | #2
On Wed, 16 Oct 2024 10:37:18 +0100,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> Hi Marc,
> 
> I'm planning to have a look at (some) of the patches, do you have a
> timeline for merging the series? Just so I know what to prioritise.

I want it merged yesterday. All of it.

> 
> On Wed, Oct 09, 2024 at 07:59:49PM +0100, Marc Zyngier wrote:
> > Accessing CNTHCTL_EL2 is fraught with danger if running with
> > HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and
> > thus can be changed behind our back, while the rest lives
> > in the CNTHCTL_EL2 shadow copy that is memory-based.
> > 
> > Yes, this is a lot of fun!
> > 
> > Make sure that we merge the two on read access, while we can
> > write to CNTKCTL_EL1 in a more straightforward manner.
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >  arch/arm64/kvm/sys_regs.c    | 28 ++++++++++++++++++++++++++++
> >  include/kvm/arm_arch_timer.h |  3 +++
> >  2 files changed, 31 insertions(+)
> > 
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 3cd54656a8e2f..932d2fb7a52a0 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -157,6 +157,21 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
> >  		if (!is_hyp_ctxt(vcpu))
> >  			goto memory_read;
> >  
> > +		/*
> > +		 * CNTHCTL_EL2 requires some special treatment to
> > +		 * account for the bits that can be set via CNTKCTL_EL1.
> > +		 */
> > +		switch (reg) {
> > +		case CNTHCTL_EL2:
> > +			if (vcpu_el2_e2h_is_set(vcpu)) {
> > +				val = read_sysreg_el1(SYS_CNTKCTL);
> > +				val &= CNTKCTL_VALID_BITS;
> > +				val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
> > +				return val;
> > +			}
> > +			break;
> > +		}
> > +
> >  		/*
> >  		 * If this register does not have an EL1 counterpart,
> >  		 * then read the stored EL2 version.
> > @@ -207,6 +222,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
> >  		 */
> >  		__vcpu_sys_reg(vcpu, reg) = val;
> >  
> > +		switch (reg) {
> > +		case CNTHCTL_EL2:
> > +			/*
> > +			 * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
> > +			 * Otherwise, some of the bits are backed by
> > +			 * CNTKCTL_EL1, while the rest is kept in memory.
> > +			 * Yes, this is fun stuff.
> > +			 */
> > +			if (vcpu_el2_e2h_is_set(vcpu))
> > +				write_sysreg_el1(val, SYS_CNTKCTL);
> 
> Sorry, but I just can't seem to get my head around why the RES0 bits aren't
> cleared. Is KVM relying on the guest to implement Should-Be-Zero-or-Preserved,
> as per the RES0 definition?

KVM isn't relying on anything. And it isn't about the RES0 bits not
being cleared. It is about the HW not providing storage for some of
the CNTHCTL_EL2 bits when the guest is using CNTKCTL_EL1 as a proxy
for its own view of CNTHCTL_EL2.

Namely, bits outside of CNTKCTL_VALID_BITS are not guaranteed to be
stored until (IIRC) FEAT_NV2p1, which retrospectively fixes the
architecture by mandating that the relevant bits have dedicated
storage.

	M.
Alexandru Elisei Oct. 16, 2024, 1:19 p.m. UTC | #3
Hi,

On Wed, Oct 16, 2024 at 12:29:02PM +0100, Marc Zyngier wrote:
> On Wed, 16 Oct 2024 10:37:18 +0100,
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> > 
> > Hi Marc,
> > 
> > I'm planning to have a look at (some) of the patches, do you have a
> > timeline for merging the series? Just so I know what to prioritise.
> 
> I want it merged yesterday. All of it.
> 
> > 
> > On Wed, Oct 09, 2024 at 07:59:49PM +0100, Marc Zyngier wrote:
> > > Accessing CNTHCTL_EL2 is fraught with danger if running with
> > > HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and
> > > thus can be changed behind our back, while the rest lives
> > > in the CNTHCTL_EL2 shadow copy that is memory-based.
> > > 
> > > Yes, this is a lot of fun!
> > > 
> > > Make sure that we merge the two on read access, while we can
> > > write to CNTKCTL_EL1 in a more straightforward manner.
> > > 
> > > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > > ---
> > >  arch/arm64/kvm/sys_regs.c    | 28 ++++++++++++++++++++++++++++
> > >  include/kvm/arm_arch_timer.h |  3 +++
> > >  2 files changed, 31 insertions(+)
> > > 
> > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > > index 3cd54656a8e2f..932d2fb7a52a0 100644
> > > --- a/arch/arm64/kvm/sys_regs.c
> > > +++ b/arch/arm64/kvm/sys_regs.c
> > > @@ -157,6 +157,21 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
> > >  		if (!is_hyp_ctxt(vcpu))
> > >  			goto memory_read;
> > >  
> > > +		/*
> > > +		 * CNTHCTL_EL2 requires some special treatment to
> > > +		 * account for the bits that can be set via CNTKCTL_EL1.
> > > +		 */
> > > +		switch (reg) {
> > > +		case CNTHCTL_EL2:
> > > +			if (vcpu_el2_e2h_is_set(vcpu)) {
> > > +				val = read_sysreg_el1(SYS_CNTKCTL);
> > > +				val &= CNTKCTL_VALID_BITS;
> > > +				val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
> > > +				return val;
> > > +			}
> > > +			break;
> > > +		}
> > > +
> > >  		/*
> > >  		 * If this register does not have an EL1 counterpart,
> > >  		 * then read the stored EL2 version.
> > > @@ -207,6 +222,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
> > >  		 */
> > >  		__vcpu_sys_reg(vcpu, reg) = val;
> > >  
> > > +		switch (reg) {
> > > +		case CNTHCTL_EL2:
> > > +			/*
> > > +			 * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
> > > +			 * Otherwise, some of the bits are backed by
> > > +			 * CNTKCTL_EL1, while the rest is kept in memory.
> > > +			 * Yes, this is fun stuff.
> > > +			 */
> > > +			if (vcpu_el2_e2h_is_set(vcpu))
> > > +				write_sysreg_el1(val, SYS_CNTKCTL);
> > 
> > Sorry, but I just can't seem to get my head around why the RES0 bits aren't
> > cleared. Is KVM relying on the guest to implement Should-Be-Zero-or-Preserved,
> > as per the RES0 definition?
> 
> KVM isn't relying on anything. And it isn't about the RES0 bits not
> being cleared. It is about the HW not providing storage for some of
> the CNTHCTL_EL2 bits when the guest is using CNTKCTL_EL1 as a proxy
> for its own view of CNTHCTL_EL2.
> 
> Namely, bits outside of CNTKCTL_VALID_BITS are not guaranteed to be
> stored until (IIRC) FEAT_NV2p1, which retrospectively fixes the
> architecture by mandating that the relevant bits have dedicated
> storage.

The definition for RES0 says:

'A bit that is RES0 in a context is reserved for possible future use in that
context. To preserve forward compatibility, software:
 * Must not rely on the bit reading as 0.
 * Must use an SBZP policy to write to the bit.'

where Should-Be-Zero-of-Preserved (SBZP):

'When writing this field, software must either write all 0s to this field or, if
the register is being restored from a previously read state, write the
previously read value to this field. If this is not done, then the result is
unpredictable.'

And what about the rest of the RES0 bits from CNTKCTL_EL1, those that are RES0
in both registers?

Thanks,
Alex
Marc Zyngier Oct. 16, 2024, 1:41 p.m. UTC | #4
On Wed, 16 Oct 2024 14:19:14 +0100,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> > > > @@ -207,6 +222,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
> > > >  		 */
> > > >  		__vcpu_sys_reg(vcpu, reg) = val;
> > > >  
> > > > +		switch (reg) {
> > > > +		case CNTHCTL_EL2:
> > > > +			/*
> > > > +			 * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
> > > > +			 * Otherwise, some of the bits are backed by
> > > > +			 * CNTKCTL_EL1, while the rest is kept in memory.
> > > > +			 * Yes, this is fun stuff.
> > > > +			 */
> > > > +			if (vcpu_el2_e2h_is_set(vcpu))
> > > > +				write_sysreg_el1(val, SYS_CNTKCTL);
> > > 
> > > Sorry, but I just can't seem to get my head around why the RES0 bits aren't
> > > cleared. Is KVM relying on the guest to implement Should-Be-Zero-or-Preserved,
> > > as per the RES0 definition?
> > 
> > KVM isn't relying on anything. And it isn't about the RES0 bits not
> > being cleared. It is about the HW not providing storage for some of
> > the CNTHCTL_EL2 bits when the guest is using CNTKCTL_EL1 as a proxy
> > for its own view of CNTHCTL_EL2.
> > 
> > Namely, bits outside of CNTKCTL_VALID_BITS are not guaranteed to be
> > stored until (IIRC) FEAT_NV2p1, which retrospectively fixes the
> > architecture by mandating that the relevant bits have dedicated
> > storage.
> 
> The definition for RES0 says:
> 
> 'A bit that is RES0 in a context is reserved for possible future use in that
> context. To preserve forward compatibility, software:
>  * Must not rely on the bit reading as 0.
>  * Must use an SBZP policy to write to the bit.'
> 
> where Should-Be-Zero-of-Preserved (SBZP):
> 
> 'When writing this field, software must either write all 0s to this field or, if
> the register is being restored from a previously read state, write the
> previously read value to this field. If this is not done, then the result is
> unpredictable.'

And? I can quote the ARM ARM too, but that's not going to lead us
anywhere if you don't explain why what you quote is related to the
problem at hand (hint, I don't think it is).

> And what about the rest of the RES0 bits from CNTKCTL_EL1, those that are RES0
> in both registers?

What about them *what*?

It would definitely help if you didn't write in riddles and actually
spell out what you mean. If you think this code is wrong, please
explain why you think it is wrong, and maybe we'll be able to make
some progress.


Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3cd54656a8e2f..932d2fb7a52a0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -157,6 +157,21 @@  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
 		if (!is_hyp_ctxt(vcpu))
 			goto memory_read;
 
+		/*
+		 * CNTHCTL_EL2 requires some special treatment to
+		 * account for the bits that can be set via CNTKCTL_EL1.
+		 */
+		switch (reg) {
+		case CNTHCTL_EL2:
+			if (vcpu_el2_e2h_is_set(vcpu)) {
+				val = read_sysreg_el1(SYS_CNTKCTL);
+				val &= CNTKCTL_VALID_BITS;
+				val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
+				return val;
+			}
+			break;
+		}
+
 		/*
 		 * If this register does not have an EL1 counterpart,
 		 * then read the stored EL2 version.
@@ -207,6 +222,19 @@  void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
 		 */
 		__vcpu_sys_reg(vcpu, reg) = val;
 
+		switch (reg) {
+		case CNTHCTL_EL2:
+			/*
+			 * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
+			 * Otherwise, some of the bits are backed by
+			 * CNTKCTL_EL1, while the rest is kept in memory.
+			 * Yes, this is fun stuff.
+			 */
+			if (vcpu_el2_e2h_is_set(vcpu))
+				write_sysreg_el1(val, SYS_CNTKCTL);
+			return;
+		}
+
 		/* No EL1 counterpart? We're done here.? */
 		if (reg == el1r)
 			return;
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index c819c5d16613b..fd650a8789b91 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -147,6 +147,9 @@  u64 timer_get_cval(struct arch_timer_context *ctxt);
 void kvm_timer_cpu_up(void);
 void kvm_timer_cpu_down(void);
 
+/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
+#define CNTKCTL_VALID_BITS	(BIT(17) | GENMASK_ULL(9, 0))
+
 static inline bool has_cntpoff(void)
 {
 	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));