diff mbox series

[v5,36/69] KVM: arm64: nv: Filter out unsupported features from ID regs

Message ID 20211129200150.351436-37-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: ARMv8.3/8.4 Nested Virtualization support | expand

Commit Message

Marc Zyngier Nov. 29, 2021, 8:01 p.m. UTC
As there is a number of features that we either can't support,
or don't want to support right away with NV, let's add some
basic filtering so that we don't advertize silly things to the
EL2 guest.

Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_nested.h |   6 ++
 arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c           |   4 +-
 arch/arm64/kvm/sys_regs.h           |   2 +
 4 files changed, 163 insertions(+), 1 deletion(-)

Comments

Ganapatrao Kulkarni Dec. 20, 2021, 7:26 a.m. UTC | #1
Hi Marc,

On 30-11-2021 01:31 am, Marc Zyngier wrote:
> As there is a number of features that we either can't support,
> or don't want to support right away with NV, let's add some
> basic filtering so that we don't advertize silly things to the
> EL2 guest.
> 
> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>   arch/arm64/include/asm/kvm_nested.h |   6 ++
>   arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
>   arch/arm64/kvm/sys_regs.c           |   4 +-
>   arch/arm64/kvm/sys_regs.h           |   2 +
>   4 files changed, 163 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> index 07c15f51cf86..026ddaad972c 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
>   extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
>   extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
>   
> +struct sys_reg_params;
> +struct sys_reg_desc;
> +
> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> +			  const struct sys_reg_desc *r);
> +
>   #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index 42a96c8d2adc..19b674983e13 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -20,6 +20,10 @@
>   #include <linux/kvm_host.h>
>   
>   #include <asm/kvm_emulate.h>
> +#include <asm/kvm_nested.h>
> +#include <asm/sysreg.h>
> +
> +#include "sys_regs.h"
>   
>   /*
>    * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
>   
>   	return -EINVAL;
>   }
> +
> +/*
> + * Our emulated CPU doesn't support all the possible features. For the
> + * sake of simplicity (and probably mental sanity), wipe out a number
> + * of feature bits we don't intend to support for the time being.
> + * This list should get updated as new features get added to the NV
> + * support, and new extension to the architecture.
> + */
> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> +			  const struct sys_reg_desc *r)
> +{
> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
> +	u64 val, tmp;
> +
> +	if (!nested_virt_in_use(v))
> +		return;
> +
> +	val = p->regval;
> +
> +	switch (id) {
> +	case SYS_ID_AA64ISAR0_EL1:
> +		/* Support everything but O.S. and Range TLBIs */
> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
> +			 GENMASK_ULL(27, 24)		|
> +			 GENMASK_ULL(3, 0));
> +		break;
> +
> +	case SYS_ID_AA64ISAR1_EL1:
> +		/* Support everything but PtrAuth and Spec Invalidation */
> +		val &= ~(GENMASK_ULL(63, 56)		|
> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
> +			 FEATURE(ID_AA64ISAR1_GPI)	|
> +			 FEATURE(ID_AA64ISAR1_GPA)	|
> +			 FEATURE(ID_AA64ISAR1_API)	|
> +			 FEATURE(ID_AA64ISAR1_APA));
> +		break;
> +
> +	case SYS_ID_AA64PFR0_EL1:
> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
> +		val &= ~(GENMASK_ULL(55, 52)		|
> +			 FEATURE(ID_AA64PFR0_AMU)	|
> +			 FEATURE(ID_AA64PFR0_MPAM)	|
> +			 FEATURE(ID_AA64PFR0_SEL2)	|
> +			 FEATURE(ID_AA64PFR0_RAS)	|
> +			 FEATURE(ID_AA64PFR0_SVE)	|
> +			 FEATURE(ID_AA64PFR0_EL3)	|
> +			 FEATURE(ID_AA64PFR0_EL2));
> +		/* 64bit EL2/EL3 only */
> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
> +		break;
> +
> +	case SYS_ID_AA64PFR1_EL1:
> +		/* Only support SSBS */
> +		val &= FEATURE(ID_AA64PFR1_SSBS);
> +		break;
> +
> +	case SYS_ID_AA64MMFR0_EL1:
> +		/* Hide ECV, FGT, ExS, Secure Memory */
> +		val &= ~(GENMASK_ULL(63, 43)			|
> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
> +
> +		/* Disallow unsupported S2 page sizes */
> +		switch (PAGE_SIZE) {
> +		case SZ_64K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
> +			fallthrough;
> +		case SZ_16K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
> +			fallthrough;
> +		case SZ_4K:
> +			/* Support everything */
> +			break;
> +		}

It seems to me that Host hypervisor(L0) has to boot with 4KB page size 
to support all (4, 16 and 64KB) page sizes at L1, any specific reason 
for this restriction?

> +		/* Advertize supported S2 page sizes */
> +		switch (PAGE_SIZE) {
> +		case SZ_4K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
> +			fallthrough;
> +		case SZ_16K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0010);
> +			fallthrough;
> +		case SZ_64K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN64_2), 0b0010);
> +			break;
> +		}
> +		/* Cap PARange to 40bits */

Any specific reasons for the 40 bit cap?

> +		tmp = FIELD_GET(FEATURE(ID_AA64MMFR0_PARANGE), val);
> +		if (tmp > 0b0010) {
> +			val &= ~FEATURE(ID_AA64MMFR0_PARANGE);
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_PARANGE), 0b0010);
> +		}
> +		break;
> +
> +	case SYS_ID_AA64MMFR1_EL1:
> +		val &= (FEATURE(ID_AA64MMFR1_PAN)	|
> +			FEATURE(ID_AA64MMFR1_LOR)	|
> +			FEATURE(ID_AA64MMFR1_HPD)	|
> +			FEATURE(ID_AA64MMFR1_VHE)	|
> +			FEATURE(ID_AA64MMFR1_VMIDBITS));
> +		break;
> +
> +	case SYS_ID_AA64MMFR2_EL1:
> +		val &= ~(FEATURE(ID_AA64MMFR2_EVT)	|
> +			 FEATURE(ID_AA64MMFR2_BBM)	|
> +			 FEATURE(ID_AA64MMFR2_TTL)	|
> +			 GENMASK_ULL(47, 44)		|
> +			 FEATURE(ID_AA64MMFR2_ST)	|
> +			 FEATURE(ID_AA64MMFR2_CCIDX)	|
> +			 FEATURE(ID_AA64MMFR2_LVA));
> +
> +		/* Force TTL support */
> +		val |= FIELD_PREP(FEATURE(ID_AA64MMFR2_TTL), 0b0001);
> +		break;
> +
> +	case SYS_ID_AA64DFR0_EL1:
> +		/* Only limited support for PMU, Debug, BPs and WPs */
> +		val &= (FEATURE(ID_AA64DFR0_PMSVER)	|
> +			FEATURE(ID_AA64DFR0_WRPS)	|
> +			FEATURE(ID_AA64DFR0_BRPS)	|
> +			FEATURE(ID_AA64DFR0_DEBUGVER));
> +
> +		/* Cap PMU to ARMv8.1 */
> +		tmp = FIELD_GET(FEATURE(ID_AA64DFR0_PMUVER), val);
> +		if (tmp > 0b0100) {
> +			val &= ~FEATURE(ID_AA64DFR0_PMUVER);
> +			val |= FIELD_PREP(FEATURE(ID_AA64DFR0_PMUVER), 0b0100);
> +		}
> +		/* Cap Debug to ARMv8.1 */
> +		tmp = FIELD_GET(FEATURE(ID_AA64DFR0_DEBUGVER), val);
> +		if (tmp > 0b0111) {
> +			val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
> +			val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 0b0111);
> +		}
> +		break;
> +
> +	default:
> +		/* Unknown register, just wipe it clean */
> +		val = 0;
> +		break;
> +	}
> +
> +	p->regval = val;
> +}
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 9deedd5a058f..19b33ccb61b8 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1431,8 +1431,10 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
>   			  const struct sys_reg_desc *r)
>   {
>   	bool raz = sysreg_visible_as_raz(vcpu, r);
> +	bool ret = __access_id_reg(vcpu, p, r, raz);
>   
> -	return __access_id_reg(vcpu, p, r, raz);
> +	access_nested_id_reg(vcpu, p, r);
> +	return ret;
>   }
>   
>   static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
> diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
> index cc0cc95a0280..d260c26b1834 100644
> --- a/arch/arm64/kvm/sys_regs.h
> +++ b/arch/arm64/kvm/sys_regs.h
> @@ -201,4 +201,6 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
>   	CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)),	\
>   	Op2(sys_reg_Op2(reg))
>   
> +#define FEATURE(x)	(GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
> +
>   #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */

Thanks,
Ganapat
Marc Zyngier Dec. 20, 2021, 9:56 a.m. UTC | #2
On Mon, 20 Dec 2021 07:26:50 +0000,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
> 
> Hi Marc,
> 
> On 30-11-2021 01:31 am, Marc Zyngier wrote:
> > As there is a number of features that we either can't support,
> > or don't want to support right away with NV, let's add some
> > basic filtering so that we don't advertize silly things to the
> > EL2 guest.
> > 
> > Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >   arch/arm64/include/asm/kvm_nested.h |   6 ++
> >   arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
> >   arch/arm64/kvm/sys_regs.c           |   4 +-
> >   arch/arm64/kvm/sys_regs.h           |   2 +
> >   4 files changed, 163 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> > index 07c15f51cf86..026ddaad972c 100644
> > --- a/arch/arm64/include/asm/kvm_nested.h
> > +++ b/arch/arm64/include/asm/kvm_nested.h
> > @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
> >   extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
> >   extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
> >   +struct sys_reg_params;
> > +struct sys_reg_desc;
> > +
> > +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> > +			  const struct sys_reg_desc *r);
> > +
> >   #endif /* __ARM64_KVM_NESTED_H */
> > diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> > index 42a96c8d2adc..19b674983e13 100644
> > --- a/arch/arm64/kvm/nested.c
> > +++ b/arch/arm64/kvm/nested.c
> > @@ -20,6 +20,10 @@
> >   #include <linux/kvm_host.h>
> >     #include <asm/kvm_emulate.h>
> > +#include <asm/kvm_nested.h>
> > +#include <asm/sysreg.h>
> > +
> > +#include "sys_regs.h"
> >     /*
> >    * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> > @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
> >     	return -EINVAL;
> >   }
> > +
> > +/*
> > + * Our emulated CPU doesn't support all the possible features. For the
> > + * sake of simplicity (and probably mental sanity), wipe out a number
> > + * of feature bits we don't intend to support for the time being.
> > + * This list should get updated as new features get added to the NV
> > + * support, and new extension to the architecture.
> > + */
> > +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> > +			  const struct sys_reg_desc *r)
> > +{
> > +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
> > +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
> > +	u64 val, tmp;
> > +
> > +	if (!nested_virt_in_use(v))
> > +		return;
> > +
> > +	val = p->regval;
> > +
> > +	switch (id) {
> > +	case SYS_ID_AA64ISAR0_EL1:
> > +		/* Support everything but O.S. and Range TLBIs */
> > +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
> > +			 GENMASK_ULL(27, 24)		|
> > +			 GENMASK_ULL(3, 0));
> > +		break;
> > +
> > +	case SYS_ID_AA64ISAR1_EL1:
> > +		/* Support everything but PtrAuth and Spec Invalidation */
> > +		val &= ~(GENMASK_ULL(63, 56)		|
> > +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
> > +			 FEATURE(ID_AA64ISAR1_GPI)	|
> > +			 FEATURE(ID_AA64ISAR1_GPA)	|
> > +			 FEATURE(ID_AA64ISAR1_API)	|
> > +			 FEATURE(ID_AA64ISAR1_APA));
> > +		break;
> > +
> > +	case SYS_ID_AA64PFR0_EL1:
> > +		/* No AMU, MPAM, S-EL2, RAS or SVE */
> > +		val &= ~(GENMASK_ULL(55, 52)		|
> > +			 FEATURE(ID_AA64PFR0_AMU)	|
> > +			 FEATURE(ID_AA64PFR0_MPAM)	|
> > +			 FEATURE(ID_AA64PFR0_SEL2)	|
> > +			 FEATURE(ID_AA64PFR0_RAS)	|
> > +			 FEATURE(ID_AA64PFR0_SVE)	|
> > +			 FEATURE(ID_AA64PFR0_EL3)	|
> > +			 FEATURE(ID_AA64PFR0_EL2));
> > +		/* 64bit EL2/EL3 only */
> > +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
> > +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
> > +		break;
> > +
> > +	case SYS_ID_AA64PFR1_EL1:
> > +		/* Only support SSBS */
> > +		val &= FEATURE(ID_AA64PFR1_SSBS);
> > +		break;
> > +
> > +	case SYS_ID_AA64MMFR0_EL1:
> > +		/* Hide ECV, FGT, ExS, Secure Memory */
> > +		val &= ~(GENMASK_ULL(63, 43)			|
> > +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
> > +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
> > +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
> > +			 FEATURE(ID_AA64MMFR0_SNSMEM));
> > +
> > +		/* Disallow unsupported S2 page sizes */
> > +		switch (PAGE_SIZE) {
> > +		case SZ_64K:
> > +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
> > +			fallthrough;
> > +		case SZ_16K:
> > +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
> > +			fallthrough;
> > +		case SZ_4K:
> > +			/* Support everything */
> > +			break;
> > +		}
> 
> It seems to me that Host hypervisor(L0) has to boot with 4KB page size
> to support all (4, 16 and 64KB) page sizes at L1, any specific reason
> for this restriction?

Well, yes.

If you have a L0 that has booted with (let's say) 64kB page size, how
do you provide S2 mappings with 4kB granularity so that you can
implement the permissions that a L1 guest hypervisor can impose on its
own guest, given that KVM currently mandates S1 and S2 to use the same
page sizes?

You can't. That's why we tell the guest hypervisor how much we
support, and the guest hypervisor can decide to go ahead or not
depending on what it does.

If one day we can support S2 mappings that are smaller than the host
page sizes, then we'll be able to allow to advertise all page sizes.
But I wouldn't hold my breath for this to happen.

> 
> > +		/* Advertize supported S2 page sizes */
> > +		switch (PAGE_SIZE) {
> > +		case SZ_4K:
> > +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
> > +			fallthrough;
> > +		case SZ_16K:
> > +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0010);
> > +			fallthrough;
> > +		case SZ_64K:
> > +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN64_2), 0b0010);
> > +			break;
> > +		}
> > +		/* Cap PARange to 40bits */
> 
> Any specific reasons for the 40 bit cap?

The only platform this currently runs on is a model, and 1TB of
address space is what it supports. At some point, this will require
userspace involvement to set it up, but we're not quite ready for that
either. And given that there is no HW, the urge for changing this is
extremely limited.

	M.
Ganapatrao Kulkarni Dec. 21, 2021, 6:03 a.m. UTC | #3
On 20-12-2021 03:26 pm, Marc Zyngier wrote:
> On Mon, 20 Dec 2021 07:26:50 +0000,
> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>
>>
>> Hi Marc,
>>
>> On 30-11-2021 01:31 am, Marc Zyngier wrote:
>>> As there is a number of features that we either can't support,
>>> or don't want to support right away with NV, let's add some
>>> basic filtering so that we don't advertize silly things to the
>>> EL2 guest.
>>>
>>> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
>>>
>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>> ---
>>>    arch/arm64/include/asm/kvm_nested.h |   6 ++
>>>    arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
>>>    arch/arm64/kvm/sys_regs.c           |   4 +-
>>>    arch/arm64/kvm/sys_regs.h           |   2 +
>>>    4 files changed, 163 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
>>> index 07c15f51cf86..026ddaad972c 100644
>>> --- a/arch/arm64/include/asm/kvm_nested.h
>>> +++ b/arch/arm64/include/asm/kvm_nested.h
>>> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
>>>    extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
>>>    extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
>>>    +struct sys_reg_params;
>>> +struct sys_reg_desc;
>>> +
>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>>> +			  const struct sys_reg_desc *r);
>>> +
>>>    #endif /* __ARM64_KVM_NESTED_H */
>>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>>> index 42a96c8d2adc..19b674983e13 100644
>>> --- a/arch/arm64/kvm/nested.c
>>> +++ b/arch/arm64/kvm/nested.c
>>> @@ -20,6 +20,10 @@
>>>    #include <linux/kvm_host.h>
>>>      #include <asm/kvm_emulate.h>
>>> +#include <asm/kvm_nested.h>
>>> +#include <asm/sysreg.h>
>>> +
>>> +#include "sys_regs.h"
>>>      /*
>>>     * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
>>> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
>>>      	return -EINVAL;
>>>    }
>>> +
>>> +/*
>>> + * Our emulated CPU doesn't support all the possible features. For the
>>> + * sake of simplicity (and probably mental sanity), wipe out a number
>>> + * of feature bits we don't intend to support for the time being.
>>> + * This list should get updated as new features get added to the NV
>>> + * support, and new extension to the architecture.
>>> + */
>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>>> +			  const struct sys_reg_desc *r)
>>> +{
>>> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
>>> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
>>> +	u64 val, tmp;
>>> +
>>> +	if (!nested_virt_in_use(v))
>>> +		return;
>>> +
>>> +	val = p->regval;
>>> +
>>> +	switch (id) {
>>> +	case SYS_ID_AA64ISAR0_EL1:
>>> +		/* Support everything but O.S. and Range TLBIs */
>>> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
>>> +			 GENMASK_ULL(27, 24)		|
>>> +			 GENMASK_ULL(3, 0));
>>> +		break;
>>> +
>>> +	case SYS_ID_AA64ISAR1_EL1:
>>> +		/* Support everything but PtrAuth and Spec Invalidation */
>>> +		val &= ~(GENMASK_ULL(63, 56)		|
>>> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
>>> +			 FEATURE(ID_AA64ISAR1_GPI)	|
>>> +			 FEATURE(ID_AA64ISAR1_GPA)	|
>>> +			 FEATURE(ID_AA64ISAR1_API)	|
>>> +			 FEATURE(ID_AA64ISAR1_APA));
>>> +		break;
>>> +
>>> +	case SYS_ID_AA64PFR0_EL1:
>>> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
>>> +		val &= ~(GENMASK_ULL(55, 52)		|
>>> +			 FEATURE(ID_AA64PFR0_AMU)	|
>>> +			 FEATURE(ID_AA64PFR0_MPAM)	|
>>> +			 FEATURE(ID_AA64PFR0_SEL2)	|
>>> +			 FEATURE(ID_AA64PFR0_RAS)	|
>>> +			 FEATURE(ID_AA64PFR0_SVE)	|
>>> +			 FEATURE(ID_AA64PFR0_EL3)	|
>>> +			 FEATURE(ID_AA64PFR0_EL2));
>>> +		/* 64bit EL2/EL3 only */
>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
>>> +		break;
>>> +
>>> +	case SYS_ID_AA64PFR1_EL1:
>>> +		/* Only support SSBS */
>>> +		val &= FEATURE(ID_AA64PFR1_SSBS);
>>> +		break;
>>> +
>>> +	case SYS_ID_AA64MMFR0_EL1:
>>> +		/* Hide ECV, FGT, ExS, Secure Memory */
>>> +		val &= ~(GENMASK_ULL(63, 43)			|
>>> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
>>> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
>>> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
>>> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
>>> +
>>> +		/* Disallow unsupported S2 page sizes */
>>> +		switch (PAGE_SIZE) {
>>> +		case SZ_64K:
>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
>>> +			fallthrough;
>>> +		case SZ_16K:
>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
>>> +			fallthrough;
>>> +		case SZ_4K:
>>> +			/* Support everything */
>>> +			break;
>>> +		}
>>
>> It seems to me that Host hypervisor(L0) has to boot with 4KB page size
>> to support all (4, 16 and 64KB) page sizes at L1, any specific reason
>> for this restriction?
> 
> Well, yes.
> 
> If you have a L0 that has booted with (let's say) 64kB page size, how
> do you provide S2 mappings with 4kB granularity so that you can
> implement the permissions that a L1 guest hypervisor can impose on its
> own guest, given that KVM currently mandates S1 and S2 to use the same
> page sizes?
> 
> You can't. That's why we tell the guest hypervisor how much we
> support, and the guest hypervisor can decide to go ahead or not
> depending on what it does.
> 
> If one day we can support S2 mappings that are smaller than the host
> page sizes, then we'll be able to allow to advertise all page sizes.
> But I wouldn't hold my breath for this to happen.

Thanks for the detailed explanation!.
Can we put one line comment that explains why this manipulation?
It would be helpful to see a comment like S2 PAGE_SIZE should be
at-least the size of Host PAGE_SIZE?

> 
>>
>>> +		/* Advertize supported S2 page sizes */
>>> +		switch (PAGE_SIZE) {
>>> +		case SZ_4K:
>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
>>> +			fallthrough;
>>> +		case SZ_16K:
>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0010);
>>> +			fallthrough;
>>> +		case SZ_64K:
>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN64_2), 0b0010);
>>> +			break;
>>> +		}
>>> +		/* Cap PARange to 40bits */
>>
>> Any specific reasons for the 40 bit cap?
> 
> The only platform this currently runs on is a model, and 1TB of
> address space is what it supports. At some point, this will require
> userspace involvement to set it up, but we're not quite ready for that
> either. And given that there is no HW, the urge for changing this is
> extremely limited.
Makes sense, thanks.

Please feel free to add.
Reviewed-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>

> 
> 	M.
> 

Thanks,
Ganapat
Marc Zyngier Dec. 21, 2021, 9:10 a.m. UTC | #4
On Tue, 21 Dec 2021 06:03:49 +0000,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
> 
> 
> On 20-12-2021 03:26 pm, Marc Zyngier wrote:
> > On Mon, 20 Dec 2021 07:26:50 +0000,
> > Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> >> 
> >> 
> >> Hi Marc,
> >> 
> >> On 30-11-2021 01:31 am, Marc Zyngier wrote:
> >>> As there is a number of features that we either can't support,
> >>> or don't want to support right away with NV, let's add some
> >>> basic filtering so that we don't advertize silly things to the
> >>> EL2 guest.
> >>> 
> >>> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
> >>> 
> >>> Signed-off-by: Marc Zyngier <maz@kernel.org>
> >>> ---
> >>>    arch/arm64/include/asm/kvm_nested.h |   6 ++
> >>>    arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
> >>>    arch/arm64/kvm/sys_regs.c           |   4 +-
> >>>    arch/arm64/kvm/sys_regs.h           |   2 +
> >>>    4 files changed, 163 insertions(+), 1 deletion(-)
> >>> 
> >>> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> >>> index 07c15f51cf86..026ddaad972c 100644
> >>> --- a/arch/arm64/include/asm/kvm_nested.h
> >>> +++ b/arch/arm64/include/asm/kvm_nested.h
> >>> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
> >>>    extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
> >>>    extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
> >>>    +struct sys_reg_params;
> >>> +struct sys_reg_desc;
> >>> +
> >>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> >>> +			  const struct sys_reg_desc *r);
> >>> +
> >>>    #endif /* __ARM64_KVM_NESTED_H */
> >>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> >>> index 42a96c8d2adc..19b674983e13 100644
> >>> --- a/arch/arm64/kvm/nested.c
> >>> +++ b/arch/arm64/kvm/nested.c
> >>> @@ -20,6 +20,10 @@
> >>>    #include <linux/kvm_host.h>
> >>>      #include <asm/kvm_emulate.h>
> >>> +#include <asm/kvm_nested.h>
> >>> +#include <asm/sysreg.h>
> >>> +
> >>> +#include "sys_regs.h"
> >>>      /*
> >>>     * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> >>> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
> >>>      	return -EINVAL;
> >>>    }
> >>> +
> >>> +/*
> >>> + * Our emulated CPU doesn't support all the possible features. For the
> >>> + * sake of simplicity (and probably mental sanity), wipe out a number
> >>> + * of feature bits we don't intend to support for the time being.
> >>> + * This list should get updated as new features get added to the NV
> >>> + * support, and new extension to the architecture.
> >>> + */
> >>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> >>> +			  const struct sys_reg_desc *r)
> >>> +{
> >>> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
> >>> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
> >>> +	u64 val, tmp;
> >>> +
> >>> +	if (!nested_virt_in_use(v))
> >>> +		return;
> >>> +
> >>> +	val = p->regval;
> >>> +
> >>> +	switch (id) {
> >>> +	case SYS_ID_AA64ISAR0_EL1:
> >>> +		/* Support everything but O.S. and Range TLBIs */
> >>> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
> >>> +			 GENMASK_ULL(27, 24)		|
> >>> +			 GENMASK_ULL(3, 0));
> >>> +		break;
> >>> +
> >>> +	case SYS_ID_AA64ISAR1_EL1:
> >>> +		/* Support everything but PtrAuth and Spec Invalidation */
> >>> +		val &= ~(GENMASK_ULL(63, 56)		|
> >>> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
> >>> +			 FEATURE(ID_AA64ISAR1_GPI)	|
> >>> +			 FEATURE(ID_AA64ISAR1_GPA)	|
> >>> +			 FEATURE(ID_AA64ISAR1_API)	|
> >>> +			 FEATURE(ID_AA64ISAR1_APA));
> >>> +		break;
> >>> +
> >>> +	case SYS_ID_AA64PFR0_EL1:
> >>> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
> >>> +		val &= ~(GENMASK_ULL(55, 52)		|
> >>> +			 FEATURE(ID_AA64PFR0_AMU)	|
> >>> +			 FEATURE(ID_AA64PFR0_MPAM)	|
> >>> +			 FEATURE(ID_AA64PFR0_SEL2)	|
> >>> +			 FEATURE(ID_AA64PFR0_RAS)	|
> >>> +			 FEATURE(ID_AA64PFR0_SVE)	|
> >>> +			 FEATURE(ID_AA64PFR0_EL3)	|
> >>> +			 FEATURE(ID_AA64PFR0_EL2));
> >>> +		/* 64bit EL2/EL3 only */
> >>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
> >>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
> >>> +		break;
> >>> +
> >>> +	case SYS_ID_AA64PFR1_EL1:
> >>> +		/* Only support SSBS */
> >>> +		val &= FEATURE(ID_AA64PFR1_SSBS);
> >>> +		break;
> >>> +
> >>> +	case SYS_ID_AA64MMFR0_EL1:
> >>> +		/* Hide ECV, FGT, ExS, Secure Memory */
> >>> +		val &= ~(GENMASK_ULL(63, 43)			|
> >>> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
> >>> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
> >>> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
> >>> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
> >>> +
> >>> +		/* Disallow unsupported S2 page sizes */
> >>> +		switch (PAGE_SIZE) {
> >>> +		case SZ_64K:
> >>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
> >>> +			fallthrough;
> >>> +		case SZ_16K:
> >>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
> >>> +			fallthrough;
> >>> +		case SZ_4K:
> >>> +			/* Support everything */
> >>> +			break;
> >>> +		}
> >> 
> >> It seems to me that Host hypervisor(L0) has to boot with 4KB page size
> >> to support all (4, 16 and 64KB) page sizes at L1, any specific reason
> >> for this restriction?
> > 
> > Well, yes.
> > 
> > If you have a L0 that has booted with (let's say) 64kB page size, how
> > do you provide S2 mappings with 4kB granularity so that you can
> > implement the permissions that a L1 guest hypervisor can impose on its
> > own guest, given that KVM currently mandates S1 and S2 to use the same
> > page sizes?
> > 
> > You can't. That's why we tell the guest hypervisor how much we
> > support, and the guest hypervisor can decide to go ahead or not
> > depending on what it does.
> > 
> > If one day we can support S2 mappings that are smaller than the host
> > page sizes, then we'll be able to allow to advertise all page sizes.
> > But I wouldn't hold my breath for this to happen.
> 
> Thanks for the detailed explanation!.
> Can we put one line comment that explains why this manipulation?
> It would be helpful to see a comment like S2 PAGE_SIZE should be
> at-least the size of Host PAGE_SIZE?

Can do, but we need to get the terminology straight, because this is
very quickly becoming confusing. Something like:

diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 7c9dd1edf011..d35a947f5679 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -850,7 +850,12 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
 			/* Support everything */
 			break;
 		}
-		/* Advertize supported S2 page sizes */
+		/*
+		 * Since we can't support a guest S2 page size smaller than
+		 * the host's own page size (due to KVM only populating its
+		 * own S2 using the kernel's page size), advertise the
+		 * limitation using FEAT_GTG.
+		 */
 		switch (PAGE_SIZE) {
 		case SZ_4K:
 			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);

So not quite a one line comment! ;-)

Ultimately, all there is to know is in the description of FEAT_GTG in
the ARMv8 ARM.

Thanks,

	M.
Ganapatrao Kulkarni Dec. 21, 2021, 10:07 a.m. UTC | #5
On 21-12-2021 02:40 pm, Marc Zyngier wrote:
> On Tue, 21 Dec 2021 06:03:49 +0000,
> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>
>>
>>
>> On 20-12-2021 03:26 pm, Marc Zyngier wrote:
>>> On Mon, 20 Dec 2021 07:26:50 +0000,
>>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>>
>>>>
>>>> Hi Marc,
>>>>
>>>> On 30-11-2021 01:31 am, Marc Zyngier wrote:
>>>>> As there is a number of features that we either can't support,
>>>>> or don't want to support right away with NV, let's add some
>>>>> basic filtering so that we don't advertize silly things to the
>>>>> EL2 guest.
>>>>>
>>>>> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
>>>>>
>>>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>>>> ---
>>>>>     arch/arm64/include/asm/kvm_nested.h |   6 ++
>>>>>     arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
>>>>>     arch/arm64/kvm/sys_regs.c           |   4 +-
>>>>>     arch/arm64/kvm/sys_regs.h           |   2 +
>>>>>     4 files changed, 163 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
>>>>> index 07c15f51cf86..026ddaad972c 100644
>>>>> --- a/arch/arm64/include/asm/kvm_nested.h
>>>>> +++ b/arch/arm64/include/asm/kvm_nested.h
>>>>> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
>>>>>     extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
>>>>>     extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
>>>>>     +struct sys_reg_params;
>>>>> +struct sys_reg_desc;
>>>>> +
>>>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>>>>> +			  const struct sys_reg_desc *r);
>>>>> +
>>>>>     #endif /* __ARM64_KVM_NESTED_H */
>>>>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>>>>> index 42a96c8d2adc..19b674983e13 100644
>>>>> --- a/arch/arm64/kvm/nested.c
>>>>> +++ b/arch/arm64/kvm/nested.c
>>>>> @@ -20,6 +20,10 @@
>>>>>     #include <linux/kvm_host.h>
>>>>>       #include <asm/kvm_emulate.h>
>>>>> +#include <asm/kvm_nested.h>
>>>>> +#include <asm/sysreg.h>
>>>>> +
>>>>> +#include "sys_regs.h"
>>>>>       /*
>>>>>      * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
>>>>> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
>>>>>       	return -EINVAL;
>>>>>     }
>>>>> +
>>>>> +/*
>>>>> + * Our emulated CPU doesn't support all the possible features. For the
>>>>> + * sake of simplicity (and probably mental sanity), wipe out a number
>>>>> + * of feature bits we don't intend to support for the time being.
>>>>> + * This list should get updated as new features get added to the NV
>>>>> + * support, and new extension to the architecture.
>>>>> + */
>>>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>>>>> +			  const struct sys_reg_desc *r)
>>>>> +{
>>>>> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
>>>>> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
>>>>> +	u64 val, tmp;
>>>>> +
>>>>> +	if (!nested_virt_in_use(v))
>>>>> +		return;
>>>>> +
>>>>> +	val = p->regval;
>>>>> +
>>>>> +	switch (id) {
>>>>> +	case SYS_ID_AA64ISAR0_EL1:
>>>>> +		/* Support everything but O.S. and Range TLBIs */
>>>>> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
>>>>> +			 GENMASK_ULL(27, 24)		|
>>>>> +			 GENMASK_ULL(3, 0));
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64ISAR1_EL1:
>>>>> +		/* Support everything but PtrAuth and Spec Invalidation */
>>>>> +		val &= ~(GENMASK_ULL(63, 56)		|
>>>>> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_GPI)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_GPA)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_API)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_APA));
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64PFR0_EL1:
>>>>> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
>>>>> +		val &= ~(GENMASK_ULL(55, 52)		|
>>>>> +			 FEATURE(ID_AA64PFR0_AMU)	|
>>>>> +			 FEATURE(ID_AA64PFR0_MPAM)	|
>>>>> +			 FEATURE(ID_AA64PFR0_SEL2)	|
>>>>> +			 FEATURE(ID_AA64PFR0_RAS)	|
>>>>> +			 FEATURE(ID_AA64PFR0_SVE)	|
>>>>> +			 FEATURE(ID_AA64PFR0_EL3)	|
>>>>> +			 FEATURE(ID_AA64PFR0_EL2));
>>>>> +		/* 64bit EL2/EL3 only */
>>>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
>>>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64PFR1_EL1:
>>>>> +		/* Only support SSBS */
>>>>> +		val &= FEATURE(ID_AA64PFR1_SSBS);
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64MMFR0_EL1:
>>>>> +		/* Hide ECV, FGT, ExS, Secure Memory */
>>>>> +		val &= ~(GENMASK_ULL(63, 43)			|
>>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
>>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
>>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
>>>>> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
>>>>> +
>>>>> +		/* Disallow unsupported S2 page sizes */
>>>>> +		switch (PAGE_SIZE) {
>>>>> +		case SZ_64K:
>>>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
>>>>> +			fallthrough;
>>>>> +		case SZ_16K:
>>>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
>>>>> +			fallthrough;
>>>>> +		case SZ_4K:
>>>>> +			/* Support everything */
>>>>> +			break;
>>>>> +		}
>>>>
>>>> It seems to me that Host hypervisor(L0) has to boot with 4KB page size
>>>> to support all (4, 16 and 64KB) page sizes at L1, any specific reason
>>>> for this restriction?
>>>
>>> Well, yes.
>>>
>>> If you have a L0 that has booted with (let's say) 64kB page size, how
>>> do you provide S2 mappings with 4kB granularity so that you can
>>> implement the permissions that a L1 guest hypervisor can impose on its
>>> own guest, given that KVM currently mandates S1 and S2 to use the same
>>> page sizes?
>>>
>>> You can't. That's why we tell the guest hypervisor how much we
>>> support, and the guest hypervisor can decide to go ahead or not
>>> depending on what it does.
>>>
>>> If one day we can support S2 mappings that are smaller than the host
>>> page sizes, then we'll be able to allow to advertise all page sizes.
>>> But I wouldn't hold my breath for this to happen.
>>
>> Thanks for the detailed explanation!.
>> Can we put one line comment that explains why this manipulation?
>> It would be helpful to see a comment like S2 PAGE_SIZE should be
>> at-least the size of Host PAGE_SIZE?
> 
> Can do, but we need to get the terminology straight, because this is
> very quickly becoming confusing. Something like:
> 
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index 7c9dd1edf011..d35a947f5679 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -850,7 +850,12 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>   			/* Support everything */
>   			break;
>   		}
> -		/* Advertize supported S2 page sizes */
> +		/*
> +		 * Since we can't support a guest S2 page size smaller than
> +		 * the host's own page size (due to KVM only populating its
> +		 * own S2 using the kernel's page size), advertise the
> +		 * limitation using FEAT_GTG.

Thanks this helps!.
> +		 */
>   		switch (PAGE_SIZE) {
>   		case SZ_4K:
>   			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
> 
> So not quite a one line comment! ;-)
Indeed, thanks.
> 
> Ultimately, all there is to know is in the description of FEAT_GTG in
> the ARMv8 ARM.
> 
> Thanks,
> 
> 	M.
> 

Thanks,
Ganapat
Ganapatrao Kulkarni Jan. 4, 2022, 10:24 a.m. UTC | #6
On 30-11-2021 01:31 am, Marc Zyngier wrote:
> As there is a number of features that we either can't support,
> or don't want to support right away with NV, let's add some
> basic filtering so that we don't advertize silly things to the
> EL2 guest.
> 
> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
> 

Typo: advertize.

> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>   arch/arm64/include/asm/kvm_nested.h |   6 ++
>   arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
>   arch/arm64/kvm/sys_regs.c           |   4 +-
>   arch/arm64/kvm/sys_regs.h           |   2 +
>   4 files changed, 163 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> index 07c15f51cf86..026ddaad972c 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
>   extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
>   extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
>   
> +struct sys_reg_params;
> +struct sys_reg_desc;
> +
> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> +			  const struct sys_reg_desc *r);
> +
>   #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index 42a96c8d2adc..19b674983e13 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -20,6 +20,10 @@
>   #include <linux/kvm_host.h>
>   
>   #include <asm/kvm_emulate.h>
> +#include <asm/kvm_nested.h>
> +#include <asm/sysreg.h>
> +
> +#include "sys_regs.h"
>   
>   /*
>    * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
>   
>   	return -EINVAL;
>   }
> +
> +/*
> + * Our emulated CPU doesn't support all the possible features. For the
> + * sake of simplicity (and probably mental sanity), wipe out a number
> + * of feature bits we don't intend to support for the time being.
> + * This list should get updated as new features get added to the NV
> + * support, and new extension to the architecture.
> + */
> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> +			  const struct sys_reg_desc *r)
> +{
> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
> +	u64 val, tmp;
> +
> +	if (!nested_virt_in_use(v))
> +		return;
> +
> +	val = p->regval;
> +
> +	switch (id) {
> +	case SYS_ID_AA64ISAR0_EL1:
> +		/* Support everything but O.S. and Range TLBIs */
> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
> +			 GENMASK_ULL(27, 24)		|
> +			 GENMASK_ULL(3, 0));
> +		break;
> +
> +	case SYS_ID_AA64ISAR1_EL1:
> +		/* Support everything but PtrAuth and Spec Invalidation */
> +		val &= ~(GENMASK_ULL(63, 56)		|
> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
> +			 FEATURE(ID_AA64ISAR1_GPI)	|
> +			 FEATURE(ID_AA64ISAR1_GPA)	|
> +			 FEATURE(ID_AA64ISAR1_API)	|
> +			 FEATURE(ID_AA64ISAR1_APA));
> +		break;
> +
> +	case SYS_ID_AA64PFR0_EL1:
> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
> +		val &= ~(GENMASK_ULL(55, 52)		|
> +			 FEATURE(ID_AA64PFR0_AMU)	|
> +			 FEATURE(ID_AA64PFR0_MPAM)	|
> +			 FEATURE(ID_AA64PFR0_SEL2)	|
> +			 FEATURE(ID_AA64PFR0_RAS)	|
> +			 FEATURE(ID_AA64PFR0_SVE)	|
> +			 FEATURE(ID_AA64PFR0_EL3)	|
> +			 FEATURE(ID_AA64PFR0_EL2));
> +		/* 64bit EL2/EL3 only */
> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
> +		break;
> +
> +	case SYS_ID_AA64PFR1_EL1:
> +		/* Only support SSBS */
> +		val &= FEATURE(ID_AA64PFR1_SSBS);
> +		break;
> +
> +	case SYS_ID_AA64MMFR0_EL1:
> +		/* Hide ECV, FGT, ExS, Secure Memory */
> +		val &= ~(GENMASK_ULL(63, 43)			|
> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
> +
> +		/* Disallow unsupported S2 page sizes */
> +		switch (PAGE_SIZE) {
> +		case SZ_64K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
> +			fallthrough;
> +		case SZ_16K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
> +			fallthrough;
> +		case SZ_4K:
> +			/* Support everything */
> +			break;
> +		}
> +		/* Advertize supported S2 page sizes */
> +		switch (PAGE_SIZE) {
> +		case SZ_4K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
> +			fallthrough;
> +		case SZ_16K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0010);
> +			fallthrough;
> +		case SZ_64K:
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN64_2), 0b0010);
> +			break;
> +		}
> +		/* Cap PARange to 40bits */
> +		tmp = FIELD_GET(FEATURE(ID_AA64MMFR0_PARANGE), val);
> +		if (tmp > 0b0010) {
> +			val &= ~FEATURE(ID_AA64MMFR0_PARANGE);
> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_PARANGE), 0b0010);
> +		}
> +		break;
> +
> +	case SYS_ID_AA64MMFR1_EL1:
> +		val &= (FEATURE(ID_AA64MMFR1_PAN)	|
> +			FEATURE(ID_AA64MMFR1_LOR)	|
> +			FEATURE(ID_AA64MMFR1_HPD)	|
> +			FEATURE(ID_AA64MMFR1_VHE)	|
> +			FEATURE(ID_AA64MMFR1_VMIDBITS));
> +		break;
> +
> +	case SYS_ID_AA64MMFR2_EL1:
> +		val &= ~(FEATURE(ID_AA64MMFR2_EVT)	|
> +			 FEATURE(ID_AA64MMFR2_BBM)	|
> +			 FEATURE(ID_AA64MMFR2_TTL)	|
> +			 GENMASK_ULL(47, 44)		|
> +			 FEATURE(ID_AA64MMFR2_ST)	|
> +			 FEATURE(ID_AA64MMFR2_CCIDX)	|
> +			 FEATURE(ID_AA64MMFR2_LVA));
> +
> +		/* Force TTL support */
> +		val |= FIELD_PREP(FEATURE(ID_AA64MMFR2_TTL), 0b0001);
> +		break;
> +
> +	case SYS_ID_AA64DFR0_EL1:
> +		/* Only limited support for PMU, Debug, BPs and WPs */
> +		val &= (FEATURE(ID_AA64DFR0_PMSVER)	|
> +			FEATURE(ID_AA64DFR0_WRPS)	|
> +			FEATURE(ID_AA64DFR0_BRPS)	|
> +			FEATURE(ID_AA64DFR0_DEBUGVER));
> +
> +		/* Cap PMU to ARMv8.1 */
> +		tmp = FIELD_GET(FEATURE(ID_AA64DFR0_PMUVER), val);
> +		if (tmp > 0b0100) {
> +			val &= ~FEATURE(ID_AA64DFR0_PMUVER);
> +			val |= FIELD_PREP(FEATURE(ID_AA64DFR0_PMUVER), 0b0100);
> +		}
> +		/* Cap Debug to ARMv8.1 */
> +		tmp = FIELD_GET(FEATURE(ID_AA64DFR0_DEBUGVER), val);
> +		if (tmp > 0b0111) {
> +			val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
> +			val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 0b0111);
> +		}
> +		break;
> +
> +	default:
> +		/* Unknown register, just wipe it clean */
> +		val = 0;
> +		break;
> +	}
> +
> +	p->regval = val;
> +}
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 9deedd5a058f..19b33ccb61b8 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1431,8 +1431,10 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
>   			  const struct sys_reg_desc *r)
>   {
>   	bool raz = sysreg_visible_as_raz(vcpu, r);
> +	bool ret = __access_id_reg(vcpu, p, r, raz);
>   
> -	return __access_id_reg(vcpu, p, r, raz);
> +	access_nested_id_reg(vcpu, p, r);
> +	return ret;
>   }
>   
>   static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
> diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
> index cc0cc95a0280..d260c26b1834 100644
> --- a/arch/arm64/kvm/sys_regs.h
> +++ b/arch/arm64/kvm/sys_regs.h
> @@ -201,4 +201,6 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
>   	CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)),	\
>   	Op2(sys_reg_Op2(reg))
>   
> +#define FEATURE(x)	(GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
> +
>   #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */

Thanks,
Ganapat
Ganapatrao Kulkarni Jan. 21, 2022, 11:33 a.m. UTC | #7
Hi Marc,

On 21-12-2021 02:40 pm, Marc Zyngier wrote:
> On Tue, 21 Dec 2021 06:03:49 +0000,
> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>
>>
>>
>> On 20-12-2021 03:26 pm, Marc Zyngier wrote:
>>> On Mon, 20 Dec 2021 07:26:50 +0000,
>>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>>
>>>>
>>>> Hi Marc,
>>>>
>>>> On 30-11-2021 01:31 am, Marc Zyngier wrote:
>>>>> As there is a number of features that we either can't support,
>>>>> or don't want to support right away with NV, let's add some
>>>>> basic filtering so that we don't advertize silly things to the
>>>>> EL2 guest.
>>>>>
>>>>> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
>>>>>
>>>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>>>> ---
>>>>>     arch/arm64/include/asm/kvm_nested.h |   6 ++
>>>>>     arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
>>>>>     arch/arm64/kvm/sys_regs.c           |   4 +-
>>>>>     arch/arm64/kvm/sys_regs.h           |   2 +
>>>>>     4 files changed, 163 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
>>>>> index 07c15f51cf86..026ddaad972c 100644
>>>>> --- a/arch/arm64/include/asm/kvm_nested.h
>>>>> +++ b/arch/arm64/include/asm/kvm_nested.h
>>>>> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
>>>>>     extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
>>>>>     extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
>>>>>     +struct sys_reg_params;
>>>>> +struct sys_reg_desc;
>>>>> +
>>>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>>>>> +			  const struct sys_reg_desc *r);
>>>>> +
>>>>>     #endif /* __ARM64_KVM_NESTED_H */
>>>>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>>>>> index 42a96c8d2adc..19b674983e13 100644
>>>>> --- a/arch/arm64/kvm/nested.c
>>>>> +++ b/arch/arm64/kvm/nested.c
>>>>> @@ -20,6 +20,10 @@
>>>>>     #include <linux/kvm_host.h>
>>>>>       #include <asm/kvm_emulate.h>
>>>>> +#include <asm/kvm_nested.h>
>>>>> +#include <asm/sysreg.h>
>>>>> +
>>>>> +#include "sys_regs.h"
>>>>>       /*
>>>>>      * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
>>>>> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
>>>>>       	return -EINVAL;
>>>>>     }
>>>>> +
>>>>> +/*
>>>>> + * Our emulated CPU doesn't support all the possible features. For the
>>>>> + * sake of simplicity (and probably mental sanity), wipe out a number
>>>>> + * of feature bits we don't intend to support for the time being.
>>>>> + * This list should get updated as new features get added to the NV
>>>>> + * support, and new extension to the architecture.
>>>>> + */
>>>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>>>>> +			  const struct sys_reg_desc *r)
>>>>> +{
>>>>> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
>>>>> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
>>>>> +	u64 val, tmp;
>>>>> +
>>>>> +	if (!nested_virt_in_use(v))
>>>>> +		return;
>>>>> +
>>>>> +	val = p->regval;
>>>>> +
>>>>> +	switch (id) {
>>>>> +	case SYS_ID_AA64ISAR0_EL1:
>>>>> +		/* Support everything but O.S. and Range TLBIs */
>>>>> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
>>>>> +			 GENMASK_ULL(27, 24)		|
>>>>> +			 GENMASK_ULL(3, 0));
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64ISAR1_EL1:
>>>>> +		/* Support everything but PtrAuth and Spec Invalidation */
>>>>> +		val &= ~(GENMASK_ULL(63, 56)		|
>>>>> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_GPI)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_GPA)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_API)	|
>>>>> +			 FEATURE(ID_AA64ISAR1_APA));
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64PFR0_EL1:
>>>>> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
>>>>> +		val &= ~(GENMASK_ULL(55, 52)		|
>>>>> +			 FEATURE(ID_AA64PFR0_AMU)	|
>>>>> +			 FEATURE(ID_AA64PFR0_MPAM)	|
>>>>> +			 FEATURE(ID_AA64PFR0_SEL2)	|
>>>>> +			 FEATURE(ID_AA64PFR0_RAS)	|
>>>>> +			 FEATURE(ID_AA64PFR0_SVE)	|
>>>>> +			 FEATURE(ID_AA64PFR0_EL3)	|
>>>>> +			 FEATURE(ID_AA64PFR0_EL2));
>>>>> +		/* 64bit EL2/EL3 only */
>>>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
>>>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64PFR1_EL1:
>>>>> +		/* Only support SSBS */
>>>>> +		val &= FEATURE(ID_AA64PFR1_SSBS);
>>>>> +		break;
>>>>> +
>>>>> +	case SYS_ID_AA64MMFR0_EL1:
>>>>> +		/* Hide ECV, FGT, ExS, Secure Memory */
>>>>> +		val &= ~(GENMASK_ULL(63, 43)			|
>>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
>>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
>>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
>>>>> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
>>>>> +
>>>>> +		/* Disallow unsupported S2 page sizes */
>>>>> +		switch (PAGE_SIZE) {
>>>>> +		case SZ_64K:
>>>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
>>>>> +			fallthrough;
>>>>> +		case SZ_16K:
>>>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
>>>>> +			fallthrough;
>>>>> +		case SZ_4K:
>>>>> +			/* Support everything */
>>>>> +			break;
>>>>> +		}
>>>>
>>>> It seems to me that Host hypervisor(L0) has to boot with 4KB page size
>>>> to support all (4, 16 and 64KB) page sizes at L1, any specific reason
>>>> for this restriction?
>>>
>>> Well, yes.
>>>
>>> If you have a L0 that has booted with (let's say) 64kB page size, how
>>> do you provide S2 mappings with 4kB granularity so that you can
>>> implement the permissions that a L1 guest hypervisor can impose on its
>>> own guest, given that KVM currently mandates S1 and S2 to use the same
>>> page sizes?
>>>
>>> You can't. That's why we tell the guest hypervisor how much we
>>> support, and the guest hypervisor can decide to go ahead or not
>>> depending on what it does.
>>>
>>> If one day we can support S2 mappings that are smaller than the host
>>> page sizes, then we'll be able to allow to advertise all page sizes.
>>> But I wouldn't hold my breath for this to happen.
>>
>> Thanks for the detailed explanation!.
>> Can we put one line comment that explains why this manipulation?
>> It would be helpful to see a comment like S2 PAGE_SIZE should be
>> at-least the size of Host PAGE_SIZE?
> 
> Can do, but we need to get the terminology straight, because this is
> very quickly becoming confusing. Something like:
> 
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index 7c9dd1edf011..d35a947f5679 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -850,7 +850,12 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
>   			/* Support everything */
>   			break;
>   		}
> -		/* Advertize supported S2 page sizes */
> +		/*
> +		 * Since we can't support a guest S2 page size smaller than
> +		 * the host's own page size (due to KVM only populating its
> +		 * own S2 using the kernel's page size), advertise the
> +		 * limitation using FEAT_GTG.
> +		 */

I have tried booting L0 with 4K page-size and L1 with 64K and with this 
config the L2/NestedVM boot hangs. I have tried L2 with page-sizes 4K 
and 64K(though S1 page size of L2 should not matter?).


>   		switch (PAGE_SIZE) {
>   		case SZ_4K:
>   			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
> 
> So not quite a one line comment! ;-)
> 
> Ultimately, all there is to know is in the description of FEAT_GTG in
> the ARMv8 ARM.
> 
> Thanks,
> 
> 	M.
> 

Thanks,
Ganapat
Marc Zyngier Jan. 27, 2022, 1:04 p.m. UTC | #8
On Fri, 21 Jan 2022 11:33:30 +0000,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
> Hi Marc,
> 
> On 21-12-2021 02:40 pm, Marc Zyngier wrote:
> > On Tue, 21 Dec 2021 06:03:49 +0000,
> > Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> >> 
> >> 
> >> 
> >> On 20-12-2021 03:26 pm, Marc Zyngier wrote:
> >>> On Mon, 20 Dec 2021 07:26:50 +0000,
> >>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> >>>> 
> >>>> 
> >>>> Hi Marc,
> >>>> 
> >>>> On 30-11-2021 01:31 am, Marc Zyngier wrote:
> >>>>> As there is a number of features that we either can't support,
> >>>>> or don't want to support right away with NV, let's add some
> >>>>> basic filtering so that we don't advertize silly things to the
> >>>>> EL2 guest.
> >>>>> 
> >>>>> Whilst we are at it, avertize ARMv8.4-TTL as well as ARMv8.5-GTG.
> >>>>> 
> >>>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
> >>>>> ---
> >>>>>     arch/arm64/include/asm/kvm_nested.h |   6 ++
> >>>>>     arch/arm64/kvm/nested.c             | 152 ++++++++++++++++++++++++++++
> >>>>>     arch/arm64/kvm/sys_regs.c           |   4 +-
> >>>>>     arch/arm64/kvm/sys_regs.h           |   2 +
> >>>>>     4 files changed, 163 insertions(+), 1 deletion(-)
> >>>>> 
> >>>>> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> >>>>> index 07c15f51cf86..026ddaad972c 100644
> >>>>> --- a/arch/arm64/include/asm/kvm_nested.h
> >>>>> +++ b/arch/arm64/include/asm/kvm_nested.h
> >>>>> @@ -67,4 +67,10 @@ extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
> >>>>>     extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
> >>>>>     extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
> >>>>>     +struct sys_reg_params;
> >>>>> +struct sys_reg_desc;
> >>>>> +
> >>>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> >>>>> +			  const struct sys_reg_desc *r);
> >>>>> +
> >>>>>     #endif /* __ARM64_KVM_NESTED_H */
> >>>>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> >>>>> index 42a96c8d2adc..19b674983e13 100644
> >>>>> --- a/arch/arm64/kvm/nested.c
> >>>>> +++ b/arch/arm64/kvm/nested.c
> >>>>> @@ -20,6 +20,10 @@
> >>>>>     #include <linux/kvm_host.h>
> >>>>>       #include <asm/kvm_emulate.h>
> >>>>> +#include <asm/kvm_nested.h>
> >>>>> +#include <asm/sysreg.h>
> >>>>> +
> >>>>> +#include "sys_regs.h"
> >>>>>       /*
> >>>>>      * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> >>>>> @@ -38,3 +42,151 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
> >>>>>       	return -EINVAL;
> >>>>>     }
> >>>>> +
> >>>>> +/*
> >>>>> + * Our emulated CPU doesn't support all the possible features. For the
> >>>>> + * sake of simplicity (and probably mental sanity), wipe out a number
> >>>>> + * of feature bits we don't intend to support for the time being.
> >>>>> + * This list should get updated as new features get added to the NV
> >>>>> + * support, and new extension to the architecture.
> >>>>> + */
> >>>>> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> >>>>> +			  const struct sys_reg_desc *r)
> >>>>> +{
> >>>>> +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
> >>>>> +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
> >>>>> +	u64 val, tmp;
> >>>>> +
> >>>>> +	if (!nested_virt_in_use(v))
> >>>>> +		return;
> >>>>> +
> >>>>> +	val = p->regval;
> >>>>> +
> >>>>> +	switch (id) {
> >>>>> +	case SYS_ID_AA64ISAR0_EL1:
> >>>>> +		/* Support everything but O.S. and Range TLBIs */
> >>>>> +		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
> >>>>> +			 GENMASK_ULL(27, 24)		|
> >>>>> +			 GENMASK_ULL(3, 0));
> >>>>> +		break;
> >>>>> +
> >>>>> +	case SYS_ID_AA64ISAR1_EL1:
> >>>>> +		/* Support everything but PtrAuth and Spec Invalidation */
> >>>>> +		val &= ~(GENMASK_ULL(63, 56)		|
> >>>>> +			 FEATURE(ID_AA64ISAR1_SPECRES)	|
> >>>>> +			 FEATURE(ID_AA64ISAR1_GPI)	|
> >>>>> +			 FEATURE(ID_AA64ISAR1_GPA)	|
> >>>>> +			 FEATURE(ID_AA64ISAR1_API)	|
> >>>>> +			 FEATURE(ID_AA64ISAR1_APA));
> >>>>> +		break;
> >>>>> +
> >>>>> +	case SYS_ID_AA64PFR0_EL1:
> >>>>> +		/* No AMU, MPAM, S-EL2, RAS or SVE */
> >>>>> +		val &= ~(GENMASK_ULL(55, 52)		|
> >>>>> +			 FEATURE(ID_AA64PFR0_AMU)	|
> >>>>> +			 FEATURE(ID_AA64PFR0_MPAM)	|
> >>>>> +			 FEATURE(ID_AA64PFR0_SEL2)	|
> >>>>> +			 FEATURE(ID_AA64PFR0_RAS)	|
> >>>>> +			 FEATURE(ID_AA64PFR0_SVE)	|
> >>>>> +			 FEATURE(ID_AA64PFR0_EL3)	|
> >>>>> +			 FEATURE(ID_AA64PFR0_EL2));
> >>>>> +		/* 64bit EL2/EL3 only */
> >>>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
> >>>>> +		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
> >>>>> +		break;
> >>>>> +
> >>>>> +	case SYS_ID_AA64PFR1_EL1:
> >>>>> +		/* Only support SSBS */
> >>>>> +		val &= FEATURE(ID_AA64PFR1_SSBS);
> >>>>> +		break;
> >>>>> +
> >>>>> +	case SYS_ID_AA64MMFR0_EL1:
> >>>>> +		/* Hide ECV, FGT, ExS, Secure Memory */
> >>>>> +		val &= ~(GENMASK_ULL(63, 43)			|
> >>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
> >>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
> >>>>> +			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
> >>>>> +			 FEATURE(ID_AA64MMFR0_SNSMEM));
> >>>>> +
> >>>>> +		/* Disallow unsupported S2 page sizes */
> >>>>> +		switch (PAGE_SIZE) {
> >>>>> +		case SZ_64K:
> >>>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
> >>>>> +			fallthrough;
> >>>>> +		case SZ_16K:
> >>>>> +			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
> >>>>> +			fallthrough;
> >>>>> +		case SZ_4K:
> >>>>> +			/* Support everything */
> >>>>> +			break;
> >>>>> +		}
> >>>> 
> >>>> It seems to me that Host hypervisor(L0) has to boot with 4KB page size
> >>>> to support all (4, 16 and 64KB) page sizes at L1, any specific reason
> >>>> for this restriction?
> >>> 
> >>> Well, yes.
> >>> 
> >>> If you have a L0 that has booted with (let's say) 64kB page size, how
> >>> do you provide S2 mappings with 4kB granularity so that you can
> >>> implement the permissions that a L1 guest hypervisor can impose on its
> >>> own guest, given that KVM currently mandates S1 and S2 to use the same
> >>> page sizes?
> >>> 
> >>> You can't. That's why we tell the guest hypervisor how much we
> >>> support, and the guest hypervisor can decide to go ahead or not
> >>> depending on what it does.
> >>> 
> >>> If one day we can support S2 mappings that are smaller than the host
> >>> page sizes, then we'll be able to allow to advertise all page sizes.
> >>> But I wouldn't hold my breath for this to happen.
> >> 
> >> Thanks for the detailed explanation!.
> >> Can we put one line comment that explains why this manipulation?
> >> It would be helpful to see a comment like S2 PAGE_SIZE should be
> >> at-least the size of Host PAGE_SIZE?
> > 
> > Can do, but we need to get the terminology straight, because this is
> > very quickly becoming confusing. Something like:
> > 
> > diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> > index 7c9dd1edf011..d35a947f5679 100644
> > --- a/arch/arm64/kvm/nested.c
> > +++ b/arch/arm64/kvm/nested.c
> > @@ -850,7 +850,12 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> >   			/* Support everything */
> >   			break;
> >   		}
> > -		/* Advertize supported S2 page sizes */
> > +		/*
> > +		 * Since we can't support a guest S2 page size smaller than
> > +		 * the host's own page size (due to KVM only populating its
> > +		 * own S2 using the kernel's page size), advertise the
> > +		 * limitation using FEAT_GTG.
> > +		 */
> 
> I have tried booting L0 with 4K page-size and L1 with 64K and with
> this config the L2/NestedVM boot hangs. I have tried L2 with
> page-sizes 4K and 64K(though S1 page size of L2 should not matter?).

S1 shouldn't matter, but if that's what you are seeing, there is
obviously an issue trying to satisfy a translation fault in this
configuration, and we get stuck.

Could you please add some tracing and work out whether this is the
case? I bet this is an issue trying to combine the L1 S2 translation
(which will be 64kB aligned) with the faulting IPA address, but I
can't immediately pinpoint the bug.

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 07c15f51cf86..026ddaad972c 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -67,4 +67,10 @@  extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
 extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
 extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
 
+struct sys_reg_params;
+struct sys_reg_desc;
+
+void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
+			  const struct sys_reg_desc *r);
+
 #endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 42a96c8d2adc..19b674983e13 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -20,6 +20,10 @@ 
 #include <linux/kvm_host.h>
 
 #include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+#include <asm/sysreg.h>
+
+#include "sys_regs.h"
 
 /*
  * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
@@ -38,3 +42,151 @@  int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
 
 	return -EINVAL;
 }
+
+/*
+ * Our emulated CPU doesn't support all the possible features. For the
+ * sake of simplicity (and probably mental sanity), wipe out a number
+ * of feature bits we don't intend to support for the time being.
+ * This list should get updated as new features get added to the NV
+ * support, and new extension to the architecture.
+ */
+void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
+			  const struct sys_reg_desc *r)
+{
+	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+	u64 val, tmp;
+
+	if (!nested_virt_in_use(v))
+		return;
+
+	val = p->regval;
+
+	switch (id) {
+	case SYS_ID_AA64ISAR0_EL1:
+		/* Support everything but O.S. and Range TLBIs */
+		val &= ~(FEATURE(ID_AA64ISAR0_TLB)	|
+			 GENMASK_ULL(27, 24)		|
+			 GENMASK_ULL(3, 0));
+		break;
+
+	case SYS_ID_AA64ISAR1_EL1:
+		/* Support everything but PtrAuth and Spec Invalidation */
+		val &= ~(GENMASK_ULL(63, 56)		|
+			 FEATURE(ID_AA64ISAR1_SPECRES)	|
+			 FEATURE(ID_AA64ISAR1_GPI)	|
+			 FEATURE(ID_AA64ISAR1_GPA)	|
+			 FEATURE(ID_AA64ISAR1_API)	|
+			 FEATURE(ID_AA64ISAR1_APA));
+		break;
+
+	case SYS_ID_AA64PFR0_EL1:
+		/* No AMU, MPAM, S-EL2, RAS or SVE */
+		val &= ~(GENMASK_ULL(55, 52)		|
+			 FEATURE(ID_AA64PFR0_AMU)	|
+			 FEATURE(ID_AA64PFR0_MPAM)	|
+			 FEATURE(ID_AA64PFR0_SEL2)	|
+			 FEATURE(ID_AA64PFR0_RAS)	|
+			 FEATURE(ID_AA64PFR0_SVE)	|
+			 FEATURE(ID_AA64PFR0_EL3)	|
+			 FEATURE(ID_AA64PFR0_EL2));
+		/* 64bit EL2/EL3 only */
+		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
+		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL3), 0b0001);
+		break;
+
+	case SYS_ID_AA64PFR1_EL1:
+		/* Only support SSBS */
+		val &= FEATURE(ID_AA64PFR1_SSBS);
+		break;
+
+	case SYS_ID_AA64MMFR0_EL1:
+		/* Hide ECV, FGT, ExS, Secure Memory */
+		val &= ~(GENMASK_ULL(63, 43)			|
+			 FEATURE(ID_AA64MMFR0_TGRAN4_2)		|
+			 FEATURE(ID_AA64MMFR0_TGRAN16_2)	|
+			 FEATURE(ID_AA64MMFR0_TGRAN64_2)	|
+			 FEATURE(ID_AA64MMFR0_SNSMEM));
+
+		/* Disallow unsupported S2 page sizes */
+		switch (PAGE_SIZE) {
+		case SZ_64K:
+			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0001);
+			fallthrough;
+		case SZ_16K:
+			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0001);
+			fallthrough;
+		case SZ_4K:
+			/* Support everything */
+			break;
+		}
+		/* Advertize supported S2 page sizes */
+		switch (PAGE_SIZE) {
+		case SZ_4K:
+			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN4_2), 0b0010);
+			fallthrough;
+		case SZ_16K:
+			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN16_2), 0b0010);
+			fallthrough;
+		case SZ_64K:
+			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_TGRAN64_2), 0b0010);
+			break;
+		}
+		/* Cap PARange to 40bits */
+		tmp = FIELD_GET(FEATURE(ID_AA64MMFR0_PARANGE), val);
+		if (tmp > 0b0010) {
+			val &= ~FEATURE(ID_AA64MMFR0_PARANGE);
+			val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_PARANGE), 0b0010);
+		}
+		break;
+
+	case SYS_ID_AA64MMFR1_EL1:
+		val &= (FEATURE(ID_AA64MMFR1_PAN)	|
+			FEATURE(ID_AA64MMFR1_LOR)	|
+			FEATURE(ID_AA64MMFR1_HPD)	|
+			FEATURE(ID_AA64MMFR1_VHE)	|
+			FEATURE(ID_AA64MMFR1_VMIDBITS));
+		break;
+
+	case SYS_ID_AA64MMFR2_EL1:
+		val &= ~(FEATURE(ID_AA64MMFR2_EVT)	|
+			 FEATURE(ID_AA64MMFR2_BBM)	|
+			 FEATURE(ID_AA64MMFR2_TTL)	|
+			 GENMASK_ULL(47, 44)		|
+			 FEATURE(ID_AA64MMFR2_ST)	|
+			 FEATURE(ID_AA64MMFR2_CCIDX)	|
+			 FEATURE(ID_AA64MMFR2_LVA));
+
+		/* Force TTL support */
+		val |= FIELD_PREP(FEATURE(ID_AA64MMFR2_TTL), 0b0001);
+		break;
+
+	case SYS_ID_AA64DFR0_EL1:
+		/* Only limited support for PMU, Debug, BPs and WPs */
+		val &= (FEATURE(ID_AA64DFR0_PMSVER)	|
+			FEATURE(ID_AA64DFR0_WRPS)	|
+			FEATURE(ID_AA64DFR0_BRPS)	|
+			FEATURE(ID_AA64DFR0_DEBUGVER));
+
+		/* Cap PMU to ARMv8.1 */
+		tmp = FIELD_GET(FEATURE(ID_AA64DFR0_PMUVER), val);
+		if (tmp > 0b0100) {
+			val &= ~FEATURE(ID_AA64DFR0_PMUVER);
+			val |= FIELD_PREP(FEATURE(ID_AA64DFR0_PMUVER), 0b0100);
+		}
+		/* Cap Debug to ARMv8.1 */
+		tmp = FIELD_GET(FEATURE(ID_AA64DFR0_DEBUGVER), val);
+		if (tmp > 0b0111) {
+			val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
+			val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 0b0111);
+		}
+		break;
+
+	default:
+		/* Unknown register, just wipe it clean */
+		val = 0;
+		break;
+	}
+
+	p->regval = val;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 9deedd5a058f..19b33ccb61b8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1431,8 +1431,10 @@  static bool access_id_reg(struct kvm_vcpu *vcpu,
 			  const struct sys_reg_desc *r)
 {
 	bool raz = sysreg_visible_as_raz(vcpu, r);
+	bool ret = __access_id_reg(vcpu, p, r, raz);
 
-	return __access_id_reg(vcpu, p, r, raz);
+	access_nested_id_reg(vcpu, p, r);
+	return ret;
 }
 
 static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index cc0cc95a0280..d260c26b1834 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -201,4 +201,6 @@  const struct sys_reg_desc *find_reg_by_id(u64 id,
 	CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)),	\
 	Op2(sys_reg_Op2(reg))
 
+#define FEATURE(x)	(GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
+
 #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */