diff mbox series

[RFC,v3,03/29] KVM: arm64: Introduce struct id_reg_info

Message ID 20211117064359.2362060-4-reijiw@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Make CPU ID registers writable by userspace | expand

Commit Message

Reiji Watanabe Nov. 17, 2021, 6:43 a.m. UTC
This patch lays the groundwork to make ID registers writable.

Introduce struct id_reg_info for an ID register to manage the
register specific control of its value for the guest, and provide set
of functions commonly used for ID registers to make them writable.

The id_reg_info is used to do register specific initialization,
validation of the ID register and etc.  Not all ID registers must
have the id_reg_info. ID registers that don't have the id_reg_info
are handled in a common way that is applied to all ID registers.

At present, changing an ID register from userspace is allowed only
if the ID register has the id_reg_info, but that will be changed
by the following patches.

No ID register has the structure yet and the following patches
will add the id_reg_info for some ID registers.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
---
 arch/arm64/include/asm/sysreg.h |   1 +
 arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
 2 files changed, 218 insertions(+), 9 deletions(-)

Comments

Eric Auger Nov. 18, 2021, 8:36 p.m. UTC | #1
Hi Reiji,

On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> This patch lays the groundwork to make ID registers writable.
> 
> Introduce struct id_reg_info for an ID register to manage the
> register specific control of its value for the guest, and provide set
> of functions commonly used for ID registers to make them writable.
> 
> The id_reg_info is used to do register specific initialization,
> validation of the ID register and etc.  Not all ID registers must
> have the id_reg_info. ID registers that don't have the id_reg_info
> are handled in a common way that is applied to all ID registers.
> 
> At present, changing an ID register from userspace is allowed only
> if the ID register has the id_reg_info, but that will be changed
> by the following patches.
> 
> No ID register has the structure yet and the following patches
> will add the id_reg_info for some ID registers.
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> ---
>  arch/arm64/include/asm/sysreg.h |   1 +
>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>  2 files changed, 218 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 16b3f1a1d468..597609f26331 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -1197,6 +1197,7 @@
>  #define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
>  
>  #define ARM64_FEATURE_FIELD_BITS	4
> +#define ARM64_FEATURE_FIELD_MASK	((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>  
>  /* Create a mask for the feature bits of the specified feature. */
>  #define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 5608d3410660..1552cd5581b7 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>  		return read_zero(vcpu, p);
>  }
>  
> +/*
> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> + * ftr_check_types of id_reg_info.
> + */
> +enum feature_check_type {
> +	FCT_LOWER_SAFE = 0,
> +	FCT_HIGHER_SAFE,
> +	FCT_HIGHER_OR_ZERO_SAFE,
> +	FCT_EXACT,
> +	FCT_EXACT_OR_ZERO_SAFE,
> +	FCT_IGNORE,	/* Don't check (any value is fine) */
Maybe you can remove the _SAFE suffix (EXACT does not have it).
s/EXACT/EQUAL ?
> +};
> +
> +static int arm64_check_feature_one(enum feature_check_type type, int val,
> +				   int limit)
> +{
> +	bool is_safe = false;
> +
> +	if (val == limit)
> +		return 0;
even if the type is unexpected?
> +
> +	switch (type) {
> +	case FCT_LOWER_SAFE:
> +		is_safe = (val <= limit);
> +		break;
> +	case FCT_HIGHER_OR_ZERO_SAFE:
> +		if (val == 0) {
> +			is_safe = true;
> +			break;
> +		}
> +		fallthrough;
> +	case FCT_HIGHER_SAFE:
> +		is_safe = (val >= limit);
> +		break;
> +	case FCT_EXACT:
> +		break;
> +	case FCT_EXACT_OR_ZERO_SAFE:
> +		is_safe = (val == 0);
> +		break;
> +	case FCT_IGNORE:
> +		is_safe = true;
> +		break;
> +	default:
> +		WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> +		break;
> +	}
> +
> +	return is_safe ? 0 : -1;
> +}
> +
> +#define	FCT_TYPE_MASK		0x7
> +#define	FCT_TYPE_SHIFT		1
> +#define	FCT_SIGN_MASK		0x1
> +#define	FCT_SIGN_SHIFT		0
> +#define	FCT_TYPE(val)	((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> +#define	FCT_SIGN(val)	((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> +
> +#define	MAKE_FCT(shift, type, sign)				\
> +	((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |	\
> +	       (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> +
> +/* For signed field */
> +#define	S_FCT(shift, type)	MAKE_FCT(shift, type, 1)
> +/* For unigned field */
> +#define	U_FCT(shift, type)	MAKE_FCT(shift, type, 0)
> +
> +/*
> + * @val and @lim are both a value of the ID register. The function checks
> + * if all features indicated in @val can be supported for guests on the host,
> + * which supports features indicated in @lim. @check_types indicates how> + * features in the ID register needs to be checked.
> + * See comments for id_reg_info's ftr_check_types field for more detail.
What about RES0 fields which may exist? add a comment to reassure about
the fact they are properly handled if there are?
> + */
> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> +{
> +	int i;
> +
> +	for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> +		u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> +		bool is_sign = FCT_SIGN(ftr_check);
> +		enum feature_check_type fctype = FCT_TYPE(ftr_check);
> +		int fval, flim, ret;
> +
> +		fval = cpuid_feature_extract_field(val, i, is_sign);
> +		flim = cpuid_feature_extract_field(lim, i, is_sign);
> +
> +		ret = arm64_check_feature_one(fctype, fval, flim);
> +		if (ret)
> +			return -E2BIG;
> +	}
> +	return 0;
> +}
> +
> +struct id_reg_info {
> +	u32	sys_reg;	/* Register ID */
use struct kernel-doc comments instead?
> +
> +	/*
> +	 * Limit value of the register for a vcpu. The value is the sanitized
> +	 * system value with bits cleared for unsupported features for the
> +	 * guest.
> +	 */
> +	u64	vcpu_limit_val;
> +
> +	/*
> +	 * The ftr_check_types is comprised of a set of 4 bits fields.
nit: s/bits field/bit field here and below
> +	 * Each 4 bits field is for a feature indicated by the same bits
> +	 * field of the ID register and indicates how the feature support
> +	 * for guests needs to be checked.
> +	 * The bit 0 indicates that the corresponding ID register field
> +	 * is signed(1) or unsigned(0).
> +	 * The bits [3:1] hold feature_check_type for the field.
> +	 * If all zero, all features in the ID register are treated as unsigned
> +	 * fields and checked based on Principles of the ID scheme for fields
> +	 * in ID registers (FCT_LOWER_SAFE of feature_check_type).
values set by the guest are checked against host ID field values
according to FCT_LOWER_SAFE test? You do not actually explicitly explain
what the check is about although this may be obvious for you?
> +	 */
> +	u64	ftr_check_types;
> +
> +	/* Initialization function of the id_reg_info */
> +	void (*init)(struct id_reg_info *id_reg);
> +
> +	/* Register specific validation function */
validation callback? it does not register anything. We have check
customization means already in ftr_check_types so it is difficult to
guess at that point why this cb is needed, all the more so it applies
after the ftr_checks.
> +	int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
> +			u64 val);
> +
> +	/* Return the reset value of the register for the vCPU */
> +	u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
> +			     const struct id_reg_info *id_reg);
> +};
> +
> +static void id_reg_info_init(struct id_reg_info *id_reg)
> +{
> +	id_reg->vcpu_limit_val = read_sanitised_ftr_reg(id_reg->sys_reg);
> +	if (id_reg->init)
> +		id_reg->init(id_reg);
> +}
> +
> +/*
> + * An ID register that needs special handling to control the value for the
> + * guest must have its own id_reg_info in id_reg_info_table.
> + * (i.e. the reset value is different from the host's sanitized value,
> + * the value is affected by opt-in features, some fields needs specific
s/needs/need
> + * validation, etc.)
> + */
> +#define	GET_ID_REG_INFO(id)	(id_reg_info_table[IDREG_IDX(id)])
> +static struct id_reg_info *id_reg_info_table[KVM_ARM_ID_REG_MAX_NUM] = {};
> +
> +static int validate_id_reg(struct kvm_vcpu *vcpu,
> +			   const struct sys_reg_desc *rd, u64 val)
> +{
> +	u32 id = reg_to_encoding(rd);
> +	const struct id_reg_info *id_reg = GET_ID_REG_INFO(id);
> +	u64 limit, check_types;
> +	int err;
> +
> +	if (id_reg) {
> +		check_types = id_reg->ftr_check_types;
> +		limit = id_reg->vcpu_limit_val;
> +	} else {
> +		/* All fields are treated as unsigned and FCT_LOWER_SAFE */
> +		check_types = 0;
> +		limit = read_sanitised_ftr_reg(id);
> +	}
> +
> +	/* Check if the value indicates any feature that is not in the limit. */
> +	err = arm64_check_features(check_types, val, limit);
> +	if (err)
> +		return err;
> +
> +	if (id_reg && id_reg->validate)
> +		/* Run the ID register specific validity check. */
> +		err = id_reg->validate(vcpu, id_reg, val);
> +
> +	return err;
> +}
> +
>  /*
>   * ARMv8.1 mandates at least a trivial LORegion implementation, where all the
>   * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
> @@ -1183,11 +1358,19 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
>  static void reset_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
>  {
>  	u32 id = reg_to_encoding(rd);
> +	struct id_reg_info *id_reg;
> +	u64 val;
>  
>  	if (vcpu_has_reset_once(vcpu))
>  		return;
>  
> -	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = read_sanitised_ftr_reg(id);
> +	id_reg = GET_ID_REG_INFO(id);
> +	if (id_reg && id_reg->get_reset_val)
> +		val = id_reg->get_reset_val(vcpu, id_reg);
> +	else
> +		val = read_sanitised_ftr_reg(id);
> +
> +	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = val;
>  }
>  
>  static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
> @@ -1232,11 +1415,7 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> -/*
> - * cpufeature ID register user accessors
> - *
> - * We don't allow the effective value to be changed.
> - */
> +/* cpufeature ID register user accessors */
>  static int __get_id_reg(const struct kvm_vcpu *vcpu,
>  			const struct sys_reg_desc *rd, void __user *uaddr,
>  			bool raz)
> @@ -1247,11 +1426,12 @@ static int __get_id_reg(const struct kvm_vcpu *vcpu,
>  	return reg_to_user(uaddr, &val, id);
>  }
>  
> -static int __set_id_reg(const struct kvm_vcpu *vcpu,
> +static int __set_id_reg(struct kvm_vcpu *vcpu,
>  			const struct sys_reg_desc *rd, void __user *uaddr,
>  			bool raz)
>  {
>  	const u64 id = sys_reg_to_index(rd);
> +	u32 encoding = reg_to_encoding(rd);
>  	int err;
>  	u64 val;
>  
> @@ -1259,10 +1439,22 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
>  	if (err)
>  		return err;
>  
> -	/* This is what we mean by invariant: you can't change it. */
> -	if (val != read_id_reg(vcpu, rd, raz))
> +	/* Don't allow to change the reg unless the reg has id_reg_info */
> +	if (val != read_id_reg(vcpu, rd, raz) && !GET_ID_REG_INFO(encoding))
>  		return -EINVAL;
>  
> +	if (raz)
> +		return 0;
> +
> +	/* Don't allow to change the reg after the first KVM_RUN. */
> +	if (vcpu->arch.has_run_once)
> +		return -EINVAL;
> +
> +	err = validate_id_reg(vcpu, rd, val);
> +	if (err)
> +		return err;
> +
> +	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(encoding)) = val;
>  	return 0;
>  }
>  
> @@ -2826,6 +3018,20 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>  	return write_demux_regids(uindices);
>  }
>  
> +static void id_reg_info_init_all(void)
> +{
> +	int i;
> +	struct id_reg_info *id_reg;
> +
> +	for (i = 0; i < ARRAY_SIZE(id_reg_info_table); i++) {
> +		id_reg = (struct id_reg_info *)id_reg_info_table[i];
> +		if (!id_reg)
> +			continue;
> +
> +		id_reg_info_init(id_reg);
> +	}
> +}
> +
>  void kvm_sys_reg_table_init(void)
>  {
>  	unsigned int i;
> @@ -2860,4 +3066,6 @@ void kvm_sys_reg_table_init(void)
>  			break;
>  	/* Clear all higher bits. */
>  	cache_levels &= (1 << (i*3))-1;
> +
> +	id_reg_info_init_all();
>  }
> 
Thanks

Eric
Reiji Watanabe Nov. 19, 2021, 4:47 a.m. UTC | #2
Hi Eric,

On Thu, Nov 18, 2021 at 12:36 PM Eric Auger <eauger@redhat.com> wrote:
>
> Hi Reiji,
>
> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> > This patch lays the groundwork to make ID registers writable.
> >
> > Introduce struct id_reg_info for an ID register to manage the
> > register specific control of its value for the guest, and provide set
> > of functions commonly used for ID registers to make them writable.
> >
> > The id_reg_info is used to do register specific initialization,
> > validation of the ID register and etc.  Not all ID registers must
> > have the id_reg_info. ID registers that don't have the id_reg_info
> > are handled in a common way that is applied to all ID registers.
> >
> > At present, changing an ID register from userspace is allowed only
> > if the ID register has the id_reg_info, but that will be changed
> > by the following patches.
> >
> > No ID register has the structure yet and the following patches
> > will add the id_reg_info for some ID registers.
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > ---
> >  arch/arm64/include/asm/sysreg.h |   1 +
> >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> >  2 files changed, 218 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 16b3f1a1d468..597609f26331 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -1197,6 +1197,7 @@
> >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> >
> >  #define ARM64_FEATURE_FIELD_BITS     4
> > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> >
> >  /* Create a mask for the feature bits of the specified feature. */
> >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 5608d3410660..1552cd5581b7 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> >               return read_zero(vcpu, p);
> >  }
> >
> > +/*
> > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > + * ftr_check_types of id_reg_info.
> > + */
> > +enum feature_check_type {
> > +     FCT_LOWER_SAFE = 0,
> > +     FCT_HIGHER_SAFE,
> > +     FCT_HIGHER_OR_ZERO_SAFE,
> > +     FCT_EXACT,
> > +     FCT_EXACT_OR_ZERO_SAFE,
> > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> Maybe you can remove the _SAFE suffix (EXACT does not have it).

I am inclined to keep 'SAFE' (otherwise, I am likely to forget
if lower is safe or not).

> s/EXACT/EQUAL ?

I will fix that FCT_EXACT to FCT_EQUAL_SAFE.

> > +};
> > +
> > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > +                                int limit)
> > +{
> > +     bool is_safe = false;
> > +
> > +     if (val == limit)
> > +             return 0;
> even if the type is unexpected?

I will remove it.

> > +
> > +     switch (type) {
> > +     case FCT_LOWER_SAFE:
> > +             is_safe = (val <= limit);
> > +             break;
> > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > +             if (val == 0) {
> > +                     is_safe = true;
> > +                     break;
> > +             }
> > +             fallthrough;
> > +     case FCT_HIGHER_SAFE:
> > +             is_safe = (val >= limit);
> > +             break;
> > +     case FCT_EXACT:
> > +             break;
> > +     case FCT_EXACT_OR_ZERO_SAFE:
> > +             is_safe = (val == 0);
> > +             break;
> > +     case FCT_IGNORE:
> > +             is_safe = true;
> > +             break;
> > +     default:
> > +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> > +             break;
> > +     }
> > +
> > +     return is_safe ? 0 : -1;
> > +}
> > +
> > +#define      FCT_TYPE_MASK           0x7
> > +#define      FCT_TYPE_SHIFT          1
> > +#define      FCT_SIGN_MASK           0x1
> > +#define      FCT_SIGN_SHIFT          0
> > +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> > +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> > +
> > +#define      MAKE_FCT(shift, type, sign)                             \
> > +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> > +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> > +
> > +/* For signed field */
> > +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> > +/* For unigned field */
> > +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> > +
> > +/*
> > + * @val and @lim are both a value of the ID register. The function checks
> > + * if all features indicated in @val can be supported for guests on the host,
> > + * which supports features indicated in @lim. @check_types indicates how> + * features in the ID register needs to be checked.
> > + * See comments for id_reg_info's ftr_check_types field for more detail.
> What about RES0 fields which may exist? add a comment to reassure about
> the fact they are properly handled if there are?

Any fields including RES0 should be checked based on check_types.
I will explicitly state that in the comment.

> > + */
> > +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> > +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> > +             bool is_sign = FCT_SIGN(ftr_check);
> > +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> > +             int fval, flim, ret;
> > +
> > +             fval = cpuid_feature_extract_field(val, i, is_sign);
> > +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> > +
> > +             ret = arm64_check_feature_one(fctype, fval, flim);
> > +             if (ret)
> > +                     return -E2BIG;
> > +     }
> > +     return 0;
> > +}
> > +
> > +struct id_reg_info {
> > +     u32     sys_reg;        /* Register ID */
> use struct kernel-doc comments instead?
> > +
> > +     /*
> > +      * Limit value of the register for a vcpu. The value is the sanitized
> > +      * system value with bits cleared for unsupported features for the
> > +      * guest.
> > +      */
> > +     u64     vcpu_limit_val;
> > +
> > +     /*
> > +      * The ftr_check_types is comprised of a set of 4 bits fields.
> nit: s/bits field/bit field here and below

I will fix them.

> > +      * Each 4 bits field is for a feature indicated by the same bits
> > +      * field of the ID register and indicates how the feature support
> > +      * for guests needs to be checked.
> > +      * The bit 0 indicates that the corresponding ID register field
> > +      * is signed(1) or unsigned(0).
> > +      * The bits [3:1] hold feature_check_type for the field.
> > +      * If all zero, all features in the ID register are treated as unsigned
> > +      * fields and checked based on Principles of the ID scheme for fields
> > +      * in ID registers (FCT_LOWER_SAFE of feature_check_type).
> values set by the guest are checked against host ID field values
> according to FCT_LOWER_SAFE test? You do not actually explicitly explain
> what the check is about although this may be obvious for you?

How about this ?

        /*
         * The ftr_check_types is comprised of a set of 4 bit fields.
         * Each 4 bit field is for a feature indicated by the same bit field
         * of the ID register and indicates how the field needs to be checked
         * (by arm64_check_feature_one) against the host's ID field when
         * userspace tries to set the register.
         * The bit 0 indicates that the corresponding ID register field is
         * signed(1) or unsigned(0). The bits [3:1] hold feature_check_type
         * for the field (FCT_LOWER_SAFE == 0, etc).
         * e.g. for ID_AA64PFR0_EL1.SVE(bits [35:32]), bits[35:32] of
         * ftr_check_types for the register should be 0. It means the SVE
         * field is treated as an unsigned field, and userspace can set the
         * field to a equal or lower value than the host's ID field value.
         */

> > +      */
> > +     u64     ftr_check_types;
> > +
> > +     /* Initialization function of the id_reg_info */
> > +     void (*init)(struct id_reg_info *id_reg);
> > +
> > +     /* Register specific validation function */
> validation callback? it does not register anything. We have check
> customization means already in ftr_check_types so it is difficult to
> guess at that point why this cb is needed, all the more so it applies
> after the ftr_checks.

I am going to add the following comment. Does it look clear enough for you ?

        /*
         * This is an optional ID register specific validation function.
         * When userspace tries to set the ID register, arm64_check_features()
         * will check if the requested value indicates any features that cannot
         * be supported by KVM on the host.  But, some ID register fields need
         * a special checking and this function can be used for such fields.
         * e.g. KVM_CREATE_DEVICE must be used to configure GICv3 for a guest.
         * ID_AA64PFR0_EL1.GIC shouldn't be set to 1 unless GICv3 is configured.
         * The validation function for ID_AA64PFR0_EL1 could be used to check
         * the field is consistent with GICv3 configuration.
         */

> > +     int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
> > +                     u64 val);
> > +
> > +     /* Return the reset value of the register for the vCPU */
> > +     u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
> > +                          const struct id_reg_info *id_reg);
> > +};
> > +
> > +static void id_reg_info_init(struct id_reg_info *id_reg)
> > +{
> > +     id_reg->vcpu_limit_val = read_sanitised_ftr_reg(id_reg->sys_reg);
> > +     if (id_reg->init)
> > +             id_reg->init(id_reg);
> > +}
> > +
> > +/*
> > + * An ID register that needs special handling to control the value for the
> > + * guest must have its own id_reg_info in id_reg_info_table.
> > + * (i.e. the reset value is different from the host's sanitized value,
> > + * the value is affected by opt-in features, some fields needs specific
> s/needs/need

I will fix it.

Thank you for your review !

Regards
Reiji
Marc Zyngier Nov. 21, 2021, 12:37 p.m. UTC | #3
On Wed, 17 Nov 2021 06:43:33 +0000,
Reiji Watanabe <reijiw@google.com> wrote:
> 
> This patch lays the groundwork to make ID registers writable.
> 
> Introduce struct id_reg_info for an ID register to manage the
> register specific control of its value for the guest, and provide set
> of functions commonly used for ID registers to make them writable.
> 
> The id_reg_info is used to do register specific initialization,
> validation of the ID register and etc.  Not all ID registers must
> have the id_reg_info. ID registers that don't have the id_reg_info
> are handled in a common way that is applied to all ID registers.
> 
> At present, changing an ID register from userspace is allowed only
> if the ID register has the id_reg_info, but that will be changed
> by the following patches.
> 
> No ID register has the structure yet and the following patches
> will add the id_reg_info for some ID registers.
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> ---
>  arch/arm64/include/asm/sysreg.h |   1 +
>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>  2 files changed, 218 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 16b3f1a1d468..597609f26331 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -1197,6 +1197,7 @@
>  #define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
>  
>  #define ARM64_FEATURE_FIELD_BITS	4
> +#define ARM64_FEATURE_FIELD_MASK	((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>  
>  /* Create a mask for the feature bits of the specified feature. */
>  #define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 5608d3410660..1552cd5581b7 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>  		return read_zero(vcpu, p);
>  }
>  
> +/*
> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> + * ftr_check_types of id_reg_info.
> + */
> +enum feature_check_type {
> +	FCT_LOWER_SAFE = 0,
> +	FCT_HIGHER_SAFE,
> +	FCT_HIGHER_OR_ZERO_SAFE,
> +	FCT_EXACT,
> +	FCT_EXACT_OR_ZERO_SAFE,
> +	FCT_IGNORE,	/* Don't check (any value is fine) */
> +};
> +
> +static int arm64_check_feature_one(enum feature_check_type type, int val,
> +				   int limit)
> +{
> +	bool is_safe = false;
> +
> +	if (val == limit)
> +		return 0;
> +
> +	switch (type) {
> +	case FCT_LOWER_SAFE:
> +		is_safe = (val <= limit);
> +		break;
> +	case FCT_HIGHER_OR_ZERO_SAFE:
> +		if (val == 0) {
> +			is_safe = true;
> +			break;
> +		}
> +		fallthrough;
> +	case FCT_HIGHER_SAFE:
> +		is_safe = (val >= limit);
> +		break;
> +	case FCT_EXACT:
> +		break;
> +	case FCT_EXACT_OR_ZERO_SAFE:
> +		is_safe = (val == 0);
> +		break;
> +	case FCT_IGNORE:
> +		is_safe = true;
> +		break;
> +	default:
> +		WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> +		break;
> +	}
> +
> +	return is_safe ? 0 : -1;
> +}
> +
> +#define	FCT_TYPE_MASK		0x7
> +#define	FCT_TYPE_SHIFT		1
> +#define	FCT_SIGN_MASK		0x1
> +#define	FCT_SIGN_SHIFT		0
> +#define	FCT_TYPE(val)	((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> +#define	FCT_SIGN(val)	((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> +
> +#define	MAKE_FCT(shift, type, sign)				\
> +	((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |	\
> +	       (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> +
> +/* For signed field */
> +#define	S_FCT(shift, type)	MAKE_FCT(shift, type, 1)
> +/* For unigned field */
> +#define	U_FCT(shift, type)	MAKE_FCT(shift, type, 0)
> +
> +/*
> + * @val and @lim are both a value of the ID register. The function checks
> + * if all features indicated in @val can be supported for guests on the host,
> + * which supports features indicated in @lim. @check_types indicates how
> + * features in the ID register needs to be checked.
> + * See comments for id_reg_info's ftr_check_types field for more detail.
> + */
> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> +{
> +	int i;
> +
> +	for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> +		u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> +		bool is_sign = FCT_SIGN(ftr_check);
> +		enum feature_check_type fctype = FCT_TYPE(ftr_check);
> +		int fval, flim, ret;
> +
> +		fval = cpuid_feature_extract_field(val, i, is_sign);
> +		flim = cpuid_feature_extract_field(lim, i, is_sign);
> +
> +		ret = arm64_check_feature_one(fctype, fval, flim);
> +		if (ret)
> +			return -E2BIG;
> +	}
> +	return 0;
> +}

All this logic seems to reinvent what we already have in
arch/arm64/kernel/cpufeature.c. I'd rather we rely on it and maintain
a single idreg handling library.

Could you outline what is missing in the cpufeature code that requires
you to invent your own? I'm sure Suzuki could help here to make it
directly usable.

Thanks,

	M.
Marc Zyngier Nov. 21, 2021, 12:37 p.m. UTC | #4
On Fri, 19 Nov 2021 04:47:53 +0000,
Reiji Watanabe <reijiw@google.com> wrote:
> 
> I am going to add the following comment. Does it look clear enough for you ?
> 
>         /*
>          * This is an optional ID register specific validation function.
>          * When userspace tries to set the ID register, arm64_check_features()
>          * will check if the requested value indicates any features that cannot
>          * be supported by KVM on the host.  But, some ID register fields need
>          * a special checking and this function can be used for such fields.
>          * e.g. KVM_CREATE_DEVICE must be used to configure GICv3 for a guest.
>          * ID_AA64PFR0_EL1.GIC shouldn't be set to 1 unless GICv3 is configured.

There is no such requirement. GICv3 has a compatibility interface, and
although KVM doesn't make use of it, there is no reason not to expose
that GICv3 exists on the CPU even if not using it. Even more, this has
been the case forever, and making this change now would probably break
migration.

	M.
Reiji Watanabe Nov. 23, 2021, 12:56 a.m. UTC | #5
On Sun, Nov 21, 2021 at 4:37 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Fri, 19 Nov 2021 04:47:53 +0000,
> Reiji Watanabe <reijiw@google.com> wrote:
> >
> > I am going to add the following comment. Does it look clear enough for you ?
> >
> >         /*
> >          * This is an optional ID register specific validation function.
> >          * When userspace tries to set the ID register, arm64_check_features()
> >          * will check if the requested value indicates any features that cannot
> >          * be supported by KVM on the host.  But, some ID register fields need
> >          * a special checking and this function can be used for such fields.
> >          * e.g. KVM_CREATE_DEVICE must be used to configure GICv3 for a guest.
> >          * ID_AA64PFR0_EL1.GIC shouldn't be set to 1 unless GICv3 is configured.
>
> There is no such requirement. GICv3 has a compatibility interface, and
> although KVM doesn't make use of it, there is no reason not to expose
> that GICv3 exists on the CPU even if not using it. Even more, this has
> been the case forever, and making this change now would probably break
> migration.

Shockingly, I somehow misunderstood what read_id_reg() did for GICv3...
I will use a different example for it.

Thanks,
Reiji
Eric Auger Nov. 24, 2021, 6:22 p.m. UTC | #6
Hi Reiji,

On 11/19/21 5:47 AM, Reiji Watanabe wrote:
> Hi Eric,
> 
> On Thu, Nov 18, 2021 at 12:36 PM Eric Auger <eauger@redhat.com> wrote:
>>
>> Hi Reiji,
>>
>> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
>>> This patch lays the groundwork to make ID registers writable.
>>>
>>> Introduce struct id_reg_info for an ID register to manage the
>>> register specific control of its value for the guest, and provide set
>>> of functions commonly used for ID registers to make them writable.
>>>
>>> The id_reg_info is used to do register specific initialization,
>>> validation of the ID register and etc.  Not all ID registers must
>>> have the id_reg_info. ID registers that don't have the id_reg_info
>>> are handled in a common way that is applied to all ID registers.
>>>
>>> At present, changing an ID register from userspace is allowed only
>>> if the ID register has the id_reg_info, but that will be changed
>>> by the following patches.
>>>
>>> No ID register has the structure yet and the following patches
>>> will add the id_reg_info for some ID registers.
>>>
>>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
>>> ---
>>>  arch/arm64/include/asm/sysreg.h |   1 +
>>>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>>>  2 files changed, 218 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>> index 16b3f1a1d468..597609f26331 100644
>>> --- a/arch/arm64/include/asm/sysreg.h
>>> +++ b/arch/arm64/include/asm/sysreg.h
>>> @@ -1197,6 +1197,7 @@
>>>  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
>>>
>>>  #define ARM64_FEATURE_FIELD_BITS     4
>>> +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>>>
>>>  /* Create a mask for the feature bits of the specified feature. */
>>>  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 5608d3410660..1552cd5581b7 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>>>               return read_zero(vcpu, p);
>>>  }
>>>
>>> +/*
>>> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
>>> + * ftr_check_types of id_reg_info.
>>> + */
>>> +enum feature_check_type {
>>> +     FCT_LOWER_SAFE = 0,
>>> +     FCT_HIGHER_SAFE,
>>> +     FCT_HIGHER_OR_ZERO_SAFE,
>>> +     FCT_EXACT,
>>> +     FCT_EXACT_OR_ZERO_SAFE,
>>> +     FCT_IGNORE,     /* Don't check (any value is fine) */
>> Maybe you can remove the _SAFE suffix (EXACT does not have it).
> 
> I am inclined to keep 'SAFE' (otherwise, I am likely to forget
> if lower is safe or not).
> 
>> s/EXACT/EQUAL ?
> 
> I will fix that FCT_EXACT to FCT_EQUAL_SAFE.
> 
>>> +};
>>> +
>>> +static int arm64_check_feature_one(enum feature_check_type type, int val,
>>> +                                int limit)
>>> +{
>>> +     bool is_safe = false;
>>> +
>>> +     if (val == limit)
>>> +             return 0;
>> even if the type is unexpected?
> 
> I will remove it.
then you need to modify the handling of FCT_EXACT*.
> 
>>> +
>>> +     switch (type) {
>>> +     case FCT_LOWER_SAFE:
>>> +             is_safe = (val <= limit);
>>> +             break;
>>> +     case FCT_HIGHER_OR_ZERO_SAFE:
>>> +             if (val == 0) {
>>> +                     is_safe = true;
>>> +                     break;
>>> +             }
>>> +             fallthrough;
>>> +     case FCT_HIGHER_SAFE:
>>> +             is_safe = (val >= limit);
>>> +             break;
>>> +     case FCT_EXACT:
>>> +             break;
>>> +     case FCT_EXACT_OR_ZERO_SAFE:
>>> +             is_safe = (val == 0);
>>> +             break;
>>> +     case FCT_IGNORE:
>>> +             is_safe = true;
>>> +             break;
>>> +     default:
>>> +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
>>> +             break;
>>> +     }
>>> +
>>> +     return is_safe ? 0 : -1;
>>> +}
>>> +
>>> +#define      FCT_TYPE_MASK           0x7
>>> +#define      FCT_TYPE_SHIFT          1
>>> +#define      FCT_SIGN_MASK           0x1
>>> +#define      FCT_SIGN_SHIFT          0
>>> +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
>>> +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
>>> +
>>> +#define      MAKE_FCT(shift, type, sign)                             \
>>> +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
>>> +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
>>> +
>>> +/* For signed field */
>>> +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
>>> +/* For unigned field */
>>> +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
>>> +
>>> +/*
>>> + * @val and @lim are both a value of the ID register. The function checks
>>> + * if all features indicated in @val can be supported for guests on the host,
>>> + * which supports features indicated in @lim. @check_types indicates how> + * features in the ID register needs to be checked.
>>> + * See comments for id_reg_info's ftr_check_types field for more detail.
>> What about RES0 fields which may exist? add a comment to reassure about
>> the fact they are properly handled if there are?
> 
> Any fields including RES0 should be checked based on check_types.
> I will explicitly state that in the comment.
> 
>>> + */
>>> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
>>> +{
>>> +     int i;
>>> +
>>> +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
>>> +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
>>> +             bool is_sign = FCT_SIGN(ftr_check);
>>> +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
>>> +             int fval, flim, ret;
>>> +
>>> +             fval = cpuid_feature_extract_field(val, i, is_sign);
>>> +             flim = cpuid_feature_extract_field(lim, i, is_sign);
>>> +
>>> +             ret = arm64_check_feature_one(fctype, fval, flim);
>>> +             if (ret)
>>> +                     return -E2BIG;
>>> +     }
>>> +     return 0;
>>> +}
>>> +
>>> +struct id_reg_info {
>>> +     u32     sys_reg;        /* Register ID */
>> use struct kernel-doc comments instead?
>>> +
>>> +     /*
>>> +      * Limit value of the register for a vcpu. The value is the sanitized
>>> +      * system value with bits cleared for unsupported features for the
>>> +      * guest.
>>> +      */
>>> +     u64     vcpu_limit_val;
>>> +
>>> +     /*
>>> +      * The ftr_check_types is comprised of a set of 4 bits fields.
>> nit: s/bits field/bit field here and below
> 
> I will fix them.
> 
>>> +      * Each 4 bits field is for a feature indicated by the same bits
>>> +      * field of the ID register and indicates how the feature support
>>> +      * for guests needs to be checked.
>>> +      * The bit 0 indicates that the corresponding ID register field
>>> +      * is signed(1) or unsigned(0).
>>> +      * The bits [3:1] hold feature_check_type for the field.
>>> +      * If all zero, all features in the ID register are treated as unsigned
>>> +      * fields and checked based on Principles of the ID scheme for fields
>>> +      * in ID registers (FCT_LOWER_SAFE of feature_check_type).
>> values set by the guest are checked against host ID field values
>> according to FCT_LOWER_SAFE test? You do not actually explicitly explain
>> what the check is about although this may be obvious for you?
> 
> How about this ?
> 
>         /*
>          * The ftr_check_types is comprised of a set of 4 bit fields.
>          * Each 4 bit field is for a feature indicated by the same bit field
>          * of the ID register and indicates how the field needs to be checked
>          * (by arm64_check_feature_one) against the host's ID field when
>          * userspace tries to set the register.
>          * The bit 0 indicates that the corresponding ID register field is
>          * signed(1) or unsigned(0). The bits [3:1] hold feature_check_type
>          * for the field (FCT_LOWER_SAFE == 0, etc).
>          * e.g. for ID_AA64PFR0_EL1.SVE(bits [35:32]), bits[35:32] of
>          * ftr_check_types for the register should be 0. It means the SVE
>          * field is treated as an unsigned field, and userspace can set the
>          * field to a equal or lower value than the host's ID field value.
>          */
yep sounds clearer to me.
> 
>>> +      */
>>> +     u64     ftr_check_types;
>>> +
>>> +     /* Initialization function of the id_reg_info */
>>> +     void (*init)(struct id_reg_info *id_reg);
>>> +
>>> +     /* Register specific validation function */
>> validation callback? it does not register anything. We have check
>> customization means already in ftr_check_types so it is difficult to
>> guess at that point why this cb is needed, all the more so it applies
>> after the ftr_checks.
> 
> I am going to add the following comment. Does it look clear enough for you ?
> 
>         /*
>          * This is an optional ID register specific validation function.
>          * When userspace tries to set the ID register, arm64_check_features()
>          * will check if the requested value indicates any features that cannot
>          * be supported by KVM on the host.  But, some ID register fields need
>          * a special checking and this function can be used for such fields.
>          * e.g. KVM_CREATE_DEVICE must be used to configure GICv3 for a guest.
>          * ID_AA64PFR0_EL1.GIC shouldn't be set to 1 unless GICv3 is configured.
>          * The validation function for ID_AA64PFR0_EL1 could be used to check
>          * the field is consistent with GICv3 configuration.
>          */
> 
>>> +     int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
>>> +                     u64 val);
>>> +
>>> +     /* Return the reset value of the register for the vCPU */
>>> +     u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
>>> +                          const struct id_reg_info *id_reg);
>>> +};
>>> +
>>> +static void id_reg_info_init(struct id_reg_info *id_reg)
>>> +{
>>> +     id_reg->vcpu_limit_val = read_sanitised_ftr_reg(id_reg->sys_reg);
>>> +     if (id_reg->init)
>>> +             id_reg->init(id_reg);
>>> +}
>>> +
>>> +/*
>>> + * An ID register that needs special handling to control the value for the
>>> + * guest must have its own id_reg_info in id_reg_info_table.
>>> + * (i.e. the reset value is different from the host's sanitized value,
>>> + * the value is affected by opt-in features, some fields needs specific
>> s/needs/need
> 
> I will fix it.
> 
> Thank you for your review !
> 
> Regards
> Reiji
> 

Thanks

Eric
Eric Auger Nov. 24, 2021, 9:07 p.m. UTC | #7
Hi Reiji,

On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> This patch lays the groundwork to make ID registers writable.
> 
> Introduce struct id_reg_info for an ID register to manage the
> register specific control of its value for the guest, and provide set
> of functions commonly used for ID registers to make them writable.
> 
> The id_reg_info is used to do register specific initialization,
> validation of the ID register and etc.  Not all ID registers must
> have the id_reg_info. ID registers that don't have the id_reg_info
> are handled in a common way that is applied to all ID registers.
> 
> At present, changing an ID register from userspace is allowed only
> if the ID register has the id_reg_info, but that will be changed
> by the following patches.
> 
> No ID register has the structure yet and the following patches
> will add the id_reg_info for some ID registers.
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> ---
>  arch/arm64/include/asm/sysreg.h |   1 +
>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>  2 files changed, 218 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 16b3f1a1d468..597609f26331 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -1197,6 +1197,7 @@
>  #define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
>  
>  #define ARM64_FEATURE_FIELD_BITS	4
> +#define ARM64_FEATURE_FIELD_MASK	((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>  
>  /* Create a mask for the feature bits of the specified feature. */
>  #define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 5608d3410660..1552cd5581b7 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>  		return read_zero(vcpu, p);
>  }
>  
> +/*
> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> + * ftr_check_types of id_reg_info.
> + */
> +enum feature_check_type {
> +	FCT_LOWER_SAFE = 0,
> +	FCT_HIGHER_SAFE,
> +	FCT_HIGHER_OR_ZERO_SAFE,
> +	FCT_EXACT,
> +	FCT_EXACT_OR_ZERO_SAFE,
> +	FCT_IGNORE,	/* Don't check (any value is fine) */
> +};
> +
> +static int arm64_check_feature_one(enum feature_check_type type, int val,
> +				   int limit)
> +{
> +	bool is_safe = false;
> +
> +	if (val == limit)
> +		return 0;
> +
> +	switch (type) {
> +	case FCT_LOWER_SAFE:
> +		is_safe = (val <= limit);
> +		break;
> +	case FCT_HIGHER_OR_ZERO_SAFE:
> +		if (val == 0) {
> +			is_safe = true;
> +			break;
> +		}
> +		fallthrough;
> +	case FCT_HIGHER_SAFE:
> +		is_safe = (val >= limit);
> +		break;
> +	case FCT_EXACT:
> +		break;
> +	case FCT_EXACT_OR_ZERO_SAFE:
> +		is_safe = (val == 0);
> +		break;
> +	case FCT_IGNORE:
> +		is_safe = true;
> +		break;
> +	default:
> +		WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> +		break;
> +	}
> +
> +	return is_safe ? 0 : -1;
> +}
> +
> +#define	FCT_TYPE_MASK		0x7
> +#define	FCT_TYPE_SHIFT		1
> +#define	FCT_SIGN_MASK		0x1
> +#define	FCT_SIGN_SHIFT		0
> +#define	FCT_TYPE(val)	((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> +#define	FCT_SIGN(val)	((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> +
> +#define	MAKE_FCT(shift, type, sign)				\
> +	((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |	\
> +	       (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> +
> +/* For signed field */
> +#define	S_FCT(shift, type)	MAKE_FCT(shift, type, 1)
> +/* For unigned field */
> +#define	U_FCT(shift, type)	MAKE_FCT(shift, type, 0)
> +
> +/*
> + * @val and @lim are both a value of the ID register. The function checks
> + * if all features indicated in @val can be supported for guests on the host,
> + * which supports features indicated in @lim. @check_types indicates how
> + * features in the ID register needs to be checked.
> + * See comments for id_reg_info's ftr_check_types field for more detail.
> + */
> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> +{
> +	int i;
> +
> +	for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> +		u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> +		bool is_sign = FCT_SIGN(ftr_check);
> +		enum feature_check_type fctype = FCT_TYPE(ftr_check);
> +		int fval, flim, ret;
> +
> +		fval = cpuid_feature_extract_field(val, i, is_sign);
> +		flim = cpuid_feature_extract_field(lim, i, is_sign);
> +
> +		ret = arm64_check_feature_one(fctype, fval, flim);
> +		if (ret)
> +			return -E2BIG;
nit: -EINVAL may be better because depending on the check type this may
not mean too big.

Eric
> +	}
> +	return 0;
> +}
> +
> +struct id_reg_info {
> +	u32	sys_reg;	/* Register ID */
> +
> +	/*
> +	 * Limit value of the register for a vcpu. The value is the sanitized
> +	 * system value with bits cleared for unsupported features for the
> +	 * guest.
> +	 */
> +	u64	vcpu_limit_val;
> +
> +	/*
> +	 * The ftr_check_types is comprised of a set of 4 bits fields.
> +	 * Each 4 bits field is for a feature indicated by the same bits
> +	 * field of the ID register and indicates how the feature support
> +	 * for guests needs to be checked.
> +	 * The bit 0 indicates that the corresponding ID register field
> +	 * is signed(1) or unsigned(0).
> +	 * The bits [3:1] hold feature_check_type for the field.
> +	 * If all zero, all features in the ID register are treated as unsigned
> +	 * fields and checked based on Principles of the ID scheme for fields
> +	 * in ID registers (FCT_LOWER_SAFE of feature_check_type).
> +	 */
> +	u64	ftr_check_types;
> +
> +	/* Initialization function of the id_reg_info */
> +	void (*init)(struct id_reg_info *id_reg);
> +
> +	/* Register specific validation function */
> +	int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
> +			u64 val);
> +
> +	/* Return the reset value of the register for the vCPU */
> +	u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
> +			     const struct id_reg_info *id_reg);
> +};
> +
> +static void id_reg_info_init(struct id_reg_info *id_reg)
> +{
> +	id_reg->vcpu_limit_val = read_sanitised_ftr_reg(id_reg->sys_reg);
> +	if (id_reg->init)
> +		id_reg->init(id_reg);
> +}
> +
> +/*
> + * An ID register that needs special handling to control the value for the
> + * guest must have its own id_reg_info in id_reg_info_table.
> + * (i.e. the reset value is different from the host's sanitized value,
> + * the value is affected by opt-in features, some fields needs specific
> + * validation, etc.)
> + */
> +#define	GET_ID_REG_INFO(id)	(id_reg_info_table[IDREG_IDX(id)])
> +static struct id_reg_info *id_reg_info_table[KVM_ARM_ID_REG_MAX_NUM] = {};
> +
> +static int validate_id_reg(struct kvm_vcpu *vcpu,
> +			   const struct sys_reg_desc *rd, u64 val)
> +{
> +	u32 id = reg_to_encoding(rd);
> +	const struct id_reg_info *id_reg = GET_ID_REG_INFO(id);
> +	u64 limit, check_types;
> +	int err;
> +
> +	if (id_reg) {
> +		check_types = id_reg->ftr_check_types;
> +		limit = id_reg->vcpu_limit_val;
> +	} else {
> +		/* All fields are treated as unsigned and FCT_LOWER_SAFE */
> +		check_types = 0;
> +		limit = read_sanitised_ftr_reg(id);
> +	}
> +
> +	/* Check if the value indicates any feature that is not in the limit. */
> +	err = arm64_check_features(check_types, val, limit);
> +	if (err)
> +		return err;
> +
> +	if (id_reg && id_reg->validate)
> +		/* Run the ID register specific validity check. */
> +		err = id_reg->validate(vcpu, id_reg, val);
> +
> +	return err;
> +}
> +
>  /*
>   * ARMv8.1 mandates at least a trivial LORegion implementation, where all the
>   * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
> @@ -1183,11 +1358,19 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
>  static void reset_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
>  {
>  	u32 id = reg_to_encoding(rd);
> +	struct id_reg_info *id_reg;
> +	u64 val;
>  
>  	if (vcpu_has_reset_once(vcpu))
>  		return;
>  
> -	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = read_sanitised_ftr_reg(id);
> +	id_reg = GET_ID_REG_INFO(id);
> +	if (id_reg && id_reg->get_reset_val)
> +		val = id_reg->get_reset_val(vcpu, id_reg);
> +	else
> +		val = read_sanitised_ftr_reg(id);
> +
> +	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = val;
>  }
>  
>  static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
> @@ -1232,11 +1415,7 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> -/*
> - * cpufeature ID register user accessors
> - *
> - * We don't allow the effective value to be changed.
> - */
> +/* cpufeature ID register user accessors */
>  static int __get_id_reg(const struct kvm_vcpu *vcpu,
>  			const struct sys_reg_desc *rd, void __user *uaddr,
>  			bool raz)
> @@ -1247,11 +1426,12 @@ static int __get_id_reg(const struct kvm_vcpu *vcpu,
>  	return reg_to_user(uaddr, &val, id);
>  }
>  
> -static int __set_id_reg(const struct kvm_vcpu *vcpu,
> +static int __set_id_reg(struct kvm_vcpu *vcpu,
>  			const struct sys_reg_desc *rd, void __user *uaddr,
>  			bool raz)
>  {
>  	const u64 id = sys_reg_to_index(rd);
> +	u32 encoding = reg_to_encoding(rd);
>  	int err;
>  	u64 val;
>  
> @@ -1259,10 +1439,22 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
>  	if (err)
>  		return err;
>  
> -	/* This is what we mean by invariant: you can't change it. */
> -	if (val != read_id_reg(vcpu, rd, raz))
> +	/* Don't allow to change the reg unless the reg has id_reg_info */
> +	if (val != read_id_reg(vcpu, rd, raz) && !GET_ID_REG_INFO(encoding))
>  		return -EINVAL;
>  
> +	if (raz)
> +		return 0;
> +
> +	/* Don't allow to change the reg after the first KVM_RUN. */
> +	if (vcpu->arch.has_run_once)> +		return -EINVAL;
> +
> +	err = validate_id_reg(vcpu, rd, val);
> +	if (err)
> +		return err;
> +
> +	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(encoding)) = val;
>  	return 0;
>  }
>  
> @@ -2826,6 +3018,20 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>  	return write_demux_regids(uindices);
>  }
>  
> +static void id_reg_info_init_all(void)
> +{
> +	int i;
> +	struct id_reg_info *id_reg;
> +
> +	for (i = 0; i < ARRAY_SIZE(id_reg_info_table); i++) {
> +		id_reg = (struct id_reg_info *)id_reg_info_table[i];
> +		if (!id_reg)
> +			continue;
> +
> +		id_reg_info_init(id_reg);
> +	}
> +}
> +
>  void kvm_sys_reg_table_init(void)
>  {
>  	unsigned int i;
> @@ -2860,4 +3066,6 @@ void kvm_sys_reg_table_init(void)
>  			break;
>  	/* Clear all higher bits. */
>  	cache_levels &= (1 << (i*3))-1;
> +
> +	id_reg_info_init_all();
>  }
>
Reiji Watanabe Nov. 25, 2021, 5:27 a.m. UTC | #8
On Sun, Nov 21, 2021 at 4:37 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Wed, 17 Nov 2021 06:43:33 +0000,
> Reiji Watanabe <reijiw@google.com> wrote:
> >
> > This patch lays the groundwork to make ID registers writable.
> >
> > Introduce struct id_reg_info for an ID register to manage the
> > register specific control of its value for the guest, and provide set
> > of functions commonly used for ID registers to make them writable.
> >
> > The id_reg_info is used to do register specific initialization,
> > validation of the ID register and etc.  Not all ID registers must
> > have the id_reg_info. ID registers that don't have the id_reg_info
> > are handled in a common way that is applied to all ID registers.
> >
> > At present, changing an ID register from userspace is allowed only
> > if the ID register has the id_reg_info, but that will be changed
> > by the following patches.
> >
> > No ID register has the structure yet and the following patches
> > will add the id_reg_info for some ID registers.
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > ---
> >  arch/arm64/include/asm/sysreg.h |   1 +
> >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> >  2 files changed, 218 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 16b3f1a1d468..597609f26331 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -1197,6 +1197,7 @@
> >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> >
> >  #define ARM64_FEATURE_FIELD_BITS     4
> > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> >
> >  /* Create a mask for the feature bits of the specified feature. */
> >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 5608d3410660..1552cd5581b7 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> >               return read_zero(vcpu, p);
> >  }
> >
> > +/*
> > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > + * ftr_check_types of id_reg_info.
> > + */
> > +enum feature_check_type {
> > +     FCT_LOWER_SAFE = 0,
> > +     FCT_HIGHER_SAFE,
> > +     FCT_HIGHER_OR_ZERO_SAFE,
> > +     FCT_EXACT,
> > +     FCT_EXACT_OR_ZERO_SAFE,
> > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> > +};
> > +
> > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > +                                int limit)
> > +{
> > +     bool is_safe = false;
> > +
> > +     if (val == limit)
> > +             return 0;
> > +
> > +     switch (type) {
> > +     case FCT_LOWER_SAFE:
> > +             is_safe = (val <= limit);
> > +             break;
> > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > +             if (val == 0) {
> > +                     is_safe = true;
> > +                     break;
> > +             }
> > +             fallthrough;
> > +     case FCT_HIGHER_SAFE:
> > +             is_safe = (val >= limit);
> > +             break;
> > +     case FCT_EXACT:
> > +             break;
> > +     case FCT_EXACT_OR_ZERO_SAFE:
> > +             is_safe = (val == 0);
> > +             break;
> > +     case FCT_IGNORE:
> > +             is_safe = true;
> > +             break;
> > +     default:
> > +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> > +             break;
> > +     }
> > +
> > +     return is_safe ? 0 : -1;
> > +}
> > +
> > +#define      FCT_TYPE_MASK           0x7
> > +#define      FCT_TYPE_SHIFT          1
> > +#define      FCT_SIGN_MASK           0x1
> > +#define      FCT_SIGN_SHIFT          0
> > +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> > +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> > +
> > +#define      MAKE_FCT(shift, type, sign)                             \
> > +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> > +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> > +
> > +/* For signed field */
> > +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> > +/* For unigned field */
> > +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> > +
> > +/*
> > + * @val and @lim are both a value of the ID register. The function checks
> > + * if all features indicated in @val can be supported for guests on the host,
> > + * which supports features indicated in @lim. @check_types indicates how
> > + * features in the ID register needs to be checked.
> > + * See comments for id_reg_info's ftr_check_types field for more detail.
> > + */
> > +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> > +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> > +             bool is_sign = FCT_SIGN(ftr_check);
> > +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> > +             int fval, flim, ret;
> > +
> > +             fval = cpuid_feature_extract_field(val, i, is_sign);
> > +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> > +
> > +             ret = arm64_check_feature_one(fctype, fval, flim);
> > +             if (ret)
> > +                     return -E2BIG;
> > +     }
> > +     return 0;
> > +}
>
> All this logic seems to reinvent what we already have in
> arch/arm64/kernel/cpufeature.c. I'd rather we rely on it and maintain
> a single idreg handling library.
>
> Could you outline what is missing in the cpufeature code that requires
> you to invent your own? I'm sure Suzuki could help here to make it
> directly usable.

The issue is that there are some fields whose arm64_ftr_bits don't
match what (I think) I need.  However, looking into that option again,
it seems that the number of such fields are fewer than I originally
thought (I misunderstood some earlier).

They are just three fields below.  The common checking process can be
skipped for those fields (will restore ignore_mask field in id_reg_info
as I had in v1 patch, which is treated like FCT_IGNORE in the v3 patch),
and I will have their ID register specific validation function do
what I want to check into the fields.

 - AA64DFR0.DEBUGVER:
   Its .type is FTR_EXACT.
   I want to treat its .type as FTR_LOWER_SAFE for the check.

 - AA64DFR0.PMUVER:
   Its .sign is FTR_SIGNED and .type is FTR_EXACT.
   I want to treat its .sign as FTR_UNSIGNED and .type as
   FTR_LOWER_SAFE for the check.

 - DFR0.PERFMON:
   Its .sign is FTR_SIGNED (Its .type is FTR_LOWER_SAFE).
   I want to treat its .sign field as FTR_UNSIGNED for the check.

   (NOTE: For PMUVER and PERFMON, Arm ARM says "if the field value
    is not 0xf the field is treated as an unsigned value")

Thanks,
Reiji
Reiji Watanabe Nov. 25, 2021, 6:05 a.m. UTC | #9
Hi Eric,

On Wed, Nov 24, 2021 at 10:22 AM Eric Auger <eauger@redhat.com> wrote:
>
> Hi Reiji,
>
> On 11/19/21 5:47 AM, Reiji Watanabe wrote:
> > Hi Eric,
> >
> > On Thu, Nov 18, 2021 at 12:36 PM Eric Auger <eauger@redhat.com> wrote:
> >>
> >> Hi Reiji,
> >>
> >> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> >>> This patch lays the groundwork to make ID registers writable.
> >>>
> >>> Introduce struct id_reg_info for an ID register to manage the
> >>> register specific control of its value for the guest, and provide set
> >>> of functions commonly used for ID registers to make them writable.
> >>>
> >>> The id_reg_info is used to do register specific initialization,
> >>> validation of the ID register and etc.  Not all ID registers must
> >>> have the id_reg_info. ID registers that don't have the id_reg_info
> >>> are handled in a common way that is applied to all ID registers.
> >>>
> >>> At present, changing an ID register from userspace is allowed only
> >>> if the ID register has the id_reg_info, but that will be changed
> >>> by the following patches.
> >>>
> >>> No ID register has the structure yet and the following patches
> >>> will add the id_reg_info for some ID registers.
> >>>
> >>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> >>> ---
> >>>  arch/arm64/include/asm/sysreg.h |   1 +
> >>>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> >>>  2 files changed, 218 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> >>> index 16b3f1a1d468..597609f26331 100644
> >>> --- a/arch/arm64/include/asm/sysreg.h
> >>> +++ b/arch/arm64/include/asm/sysreg.h
> >>> @@ -1197,6 +1197,7 @@
> >>>  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> >>>
> >>>  #define ARM64_FEATURE_FIELD_BITS     4
> >>> +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> >>>
> >>>  /* Create a mask for the feature bits of the specified feature. */
> >>>  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> >>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >>> index 5608d3410660..1552cd5581b7 100644
> >>> --- a/arch/arm64/kvm/sys_regs.c
> >>> +++ b/arch/arm64/kvm/sys_regs.c
> >>> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> >>>               return read_zero(vcpu, p);
> >>>  }
> >>>
> >>> +/*
> >>> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> >>> + * ftr_check_types of id_reg_info.
> >>> + */
> >>> +enum feature_check_type {
> >>> +     FCT_LOWER_SAFE = 0,
> >>> +     FCT_HIGHER_SAFE,
> >>> +     FCT_HIGHER_OR_ZERO_SAFE,
> >>> +     FCT_EXACT,
> >>> +     FCT_EXACT_OR_ZERO_SAFE,
> >>> +     FCT_IGNORE,     /* Don't check (any value is fine) */
> >> Maybe you can remove the _SAFE suffix (EXACT does not have it).
> >
> > I am inclined to keep 'SAFE' (otherwise, I am likely to forget
> > if lower is safe or not).
> >
> >> s/EXACT/EQUAL ?
> >
> > I will fix that FCT_EXACT to FCT_EQUAL_SAFE.
> >
> >>> +};
> >>> +
> >>> +static int arm64_check_feature_one(enum feature_check_type type, int val,
> >>> +                                int limit)
> >>> +{
> >>> +     bool is_safe = false;
> >>> +
> >>> +     if (val == limit)
> >>> +             return 0;
> >> even if the type is unexpected?
> >
> > I will remove it.
> then you need to modify the handling of FCT_EXACT*.

Thank you for the comment. Yes, I understand.
That being said, I might probably make "val == limit" safe
unconditionally as I will move those implementations
to arch/arm64/kernel/cpufeature.c and utilize existing things
as much as I can.
(https://lore.kernel.org/linux-arm-kernel/CAAeT=FxwzRF0YZmmoEmq3xRHnhun-BCx_FeEQrOVLgzwseSy4w@mail.gmail.com/)


> >>> +
> >>> +     switch (type) {
> >>> +     case FCT_LOWER_SAFE:
> >>> +             is_safe = (val <= limit);
> >>> +             break;
> >>> +     case FCT_HIGHER_OR_ZERO_SAFE:
> >>> +             if (val == 0) {
> >>> +                     is_safe = true;
> >>> +                     break;
> >>> +             }
> >>> +             fallthrough;
> >>> +     case FCT_HIGHER_SAFE:
> >>> +             is_safe = (val >= limit);
> >>> +             break;
> >>> +     case FCT_EXACT:
> >>> +             break;
> >>> +     case FCT_EXACT_OR_ZERO_SAFE:
> >>> +             is_safe = (val == 0);
> >>> +             break;
> >>> +     case FCT_IGNORE:
> >>> +             is_safe = true;
> >>> +             break;
> >>> +     default:
> >>> +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> >>> +             break;
> >>> +     }
> >>> +
> >>> +     return is_safe ? 0 : -1;
> >>> +}
> >>> +
> >>> +#define      FCT_TYPE_MASK           0x7
> >>> +#define      FCT_TYPE_SHIFT          1
> >>> +#define      FCT_SIGN_MASK           0x1
> >>> +#define      FCT_SIGN_SHIFT          0
> >>> +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> >>> +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> >>> +
> >>> +#define      MAKE_FCT(shift, type, sign)                             \
> >>> +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> >>> +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> >>> +
> >>> +/* For signed field */
> >>> +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> >>> +/* For unigned field */
> >>> +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> >>> +
> >>> +/*
> >>> + * @val and @lim are both a value of the ID register. The function checks
> >>> + * if all features indicated in @val can be supported for guests on the host,
> >>> + * which supports features indicated in @lim. @check_types indicates how> + * features in the ID register needs to be checked.
> >>> + * See comments for id_reg_info's ftr_check_types field for more detail.
> >> What about RES0 fields which may exist? add a comment to reassure about
> >> the fact they are properly handled if there are?
> >
> > Any fields including RES0 should be checked based on check_types.
> > I will explicitly state that in the comment.
> >
> >>> + */
> >>> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> >>> +{
> >>> +     int i;
> >>> +
> >>> +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> >>> +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> >>> +             bool is_sign = FCT_SIGN(ftr_check);
> >>> +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> >>> +             int fval, flim, ret;
> >>> +
> >>> +             fval = cpuid_feature_extract_field(val, i, is_sign);
> >>> +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> >>> +
> >>> +             ret = arm64_check_feature_one(fctype, fval, flim);
> >>> +             if (ret)
> >>> +                     return -E2BIG;
> >>> +     }
> >>> +     return 0;
> >>> +}
> >>> +
> >>> +struct id_reg_info {
> >>> +     u32     sys_reg;        /* Register ID */
> >> use struct kernel-doc comments instead?
> >>> +
> >>> +     /*
> >>> +      * Limit value of the register for a vcpu. The value is the sanitized
> >>> +      * system value with bits cleared for unsupported features for the
> >>> +      * guest.
> >>> +      */
> >>> +     u64     vcpu_limit_val;
> >>> +
> >>> +     /*
> >>> +      * The ftr_check_types is comprised of a set of 4 bits fields.
> >> nit: s/bits field/bit field here and below
> >
> > I will fix them.
> >
> >>> +      * Each 4 bits field is for a feature indicated by the same bits
> >>> +      * field of the ID register and indicates how the feature support
> >>> +      * for guests needs to be checked.
> >>> +      * The bit 0 indicates that the corresponding ID register field
> >>> +      * is signed(1) or unsigned(0).
> >>> +      * The bits [3:1] hold feature_check_type for the field.
> >>> +      * If all zero, all features in the ID register are treated as unsigned
> >>> +      * fields and checked based on Principles of the ID scheme for fields
> >>> +      * in ID registers (FCT_LOWER_SAFE of feature_check_type).
> >> values set by the guest are checked against host ID field values
> >> according to FCT_LOWER_SAFE test? You do not actually explicitly explain
> >> what the check is about although this may be obvious for you?
> >
> > How about this ?
> >
> >         /*
> >          * The ftr_check_types is comprised of a set of 4 bit fields.
> >          * Each 4 bit field is for a feature indicated by the same bit field
> >          * of the ID register and indicates how the field needs to be checked
> >          * (by arm64_check_feature_one) against the host's ID field when
> >          * userspace tries to set the register.
> >          * The bit 0 indicates that the corresponding ID register field is
> >          * signed(1) or unsigned(0). The bits [3:1] hold feature_check_type
> >          * for the field (FCT_LOWER_SAFE == 0, etc).
> >          * e.g. for ID_AA64PFR0_EL1.SVE(bits [35:32]), bits[35:32] of
> >          * ftr_check_types for the register should be 0. It means the SVE
> >          * field is treated as an unsigned field, and userspace can set the
> >          * field to a equal or lower value than the host's ID field value.
> >          */
> yep sounds clearer to me.

Thank you for the review !
(ftr_check_types will be gone due to the same reason above though...)

Regards,
Reiji
Reiji Watanabe Nov. 25, 2021, 6:40 a.m. UTC | #10
Hi Eric,

On Wed, Nov 24, 2021 at 1:07 PM Eric Auger <eauger@redhat.com> wrote:
>
> Hi Reiji,
>
> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> > This patch lays the groundwork to make ID registers writable.
> >
> > Introduce struct id_reg_info for an ID register to manage the
> > register specific control of its value for the guest, and provide set
> > of functions commonly used for ID registers to make them writable.
> >
> > The id_reg_info is used to do register specific initialization,
> > validation of the ID register and etc.  Not all ID registers must
> > have the id_reg_info. ID registers that don't have the id_reg_info
> > are handled in a common way that is applied to all ID registers.
> >
> > At present, changing an ID register from userspace is allowed only
> > if the ID register has the id_reg_info, but that will be changed
> > by the following patches.
> >
> > No ID register has the structure yet and the following patches
> > will add the id_reg_info for some ID registers.
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > ---
> >  arch/arm64/include/asm/sysreg.h |   1 +
> >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> >  2 files changed, 218 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 16b3f1a1d468..597609f26331 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -1197,6 +1197,7 @@
> >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> >
> >  #define ARM64_FEATURE_FIELD_BITS     4
> > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> >
> >  /* Create a mask for the feature bits of the specified feature. */
> >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 5608d3410660..1552cd5581b7 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> >               return read_zero(vcpu, p);
> >  }
> >
> > +/*
> > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > + * ftr_check_types of id_reg_info.
> > + */
> > +enum feature_check_type {
> > +     FCT_LOWER_SAFE = 0,
> > +     FCT_HIGHER_SAFE,
> > +     FCT_HIGHER_OR_ZERO_SAFE,
> > +     FCT_EXACT,
> > +     FCT_EXACT_OR_ZERO_SAFE,
> > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> > +};
> > +
> > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > +                                int limit)
> > +{
> > +     bool is_safe = false;
> > +
> > +     if (val == limit)
> > +             return 0;
> > +
> > +     switch (type) {
> > +     case FCT_LOWER_SAFE:
> > +             is_safe = (val <= limit);
> > +             break;
> > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > +             if (val == 0) {
> > +                     is_safe = true;
> > +                     break;
> > +             }
> > +             fallthrough;
> > +     case FCT_HIGHER_SAFE:
> > +             is_safe = (val >= limit);
> > +             break;
> > +     case FCT_EXACT:
> > +             break;
> > +     case FCT_EXACT_OR_ZERO_SAFE:
> > +             is_safe = (val == 0);
> > +             break;
> > +     case FCT_IGNORE:
> > +             is_safe = true;
> > +             break;
> > +     default:
> > +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> > +             break;
> > +     }
> > +
> > +     return is_safe ? 0 : -1;
> > +}
> > +
> > +#define      FCT_TYPE_MASK           0x7
> > +#define      FCT_TYPE_SHIFT          1
> > +#define      FCT_SIGN_MASK           0x1
> > +#define      FCT_SIGN_SHIFT          0
> > +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> > +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> > +
> > +#define      MAKE_FCT(shift, type, sign)                             \
> > +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> > +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> > +
> > +/* For signed field */
> > +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> > +/* For unigned field */
> > +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> > +
> > +/*
> > + * @val and @lim are both a value of the ID register. The function checks
> > + * if all features indicated in @val can be supported for guests on the host,
> > + * which supports features indicated in @lim. @check_types indicates how
> > + * features in the ID register needs to be checked.
> > + * See comments for id_reg_info's ftr_check_types field for more detail.
> > + */
> > +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> > +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> > +             bool is_sign = FCT_SIGN(ftr_check);
> > +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> > +             int fval, flim, ret;
> > +
> > +             fval = cpuid_feature_extract_field(val, i, is_sign);
> > +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> > +
> > +             ret = arm64_check_feature_one(fctype, fval, flim);
> > +             if (ret)
> > +                     return -E2BIG;
> nit: -EINVAL may be better because depending on the check type this may
> not mean too big.

Yes, that is correct.

This error case means that userspace tried to configure features
or a higher level of features that were not supported on the host.
In that sense, I chose -E2BIG.

I wanted to use an error code specific to this particular case, which
I think makes debugging userspace issue easier when KVM_SET_ONE_REG
fails, and I couldn't find other error codes that fit this case better.
So, I'm trying to avoid using -EINVAL, which is used for other failure
cases.

If you have any other suggested error code for this,
that would be very helpful:)

Thanks,
Reiji
Alexandru Elisei Dec. 1, 2021, 3:24 p.m. UTC | #11
Hi Reiji,

On Tue, Nov 16, 2021 at 10:43:33PM -0800, Reiji Watanabe wrote:
> This patch lays the groundwork to make ID registers writable.
> 
> Introduce struct id_reg_info for an ID register to manage the
> register specific control of its value for the guest, and provide set
> of functions commonly used for ID registers to make them writable.
> 
> The id_reg_info is used to do register specific initialization,
> validation of the ID register and etc.  Not all ID registers must
> have the id_reg_info. ID registers that don't have the id_reg_info
> are handled in a common way that is applied to all ID registers.
> 
> At present, changing an ID register from userspace is allowed only
> if the ID register has the id_reg_info, but that will be changed
> by the following patches.
> 
> No ID register has the structure yet and the following patches
> will add the id_reg_info for some ID registers.
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> ---
>  arch/arm64/include/asm/sysreg.h |   1 +
>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>  2 files changed, 218 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 16b3f1a1d468..597609f26331 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -1197,6 +1197,7 @@
>  #define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
>  
>  #define ARM64_FEATURE_FIELD_BITS	4
> +#define ARM64_FEATURE_FIELD_MASK	((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>  
>  /* Create a mask for the feature bits of the specified feature. */
>  #define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 5608d3410660..1552cd5581b7 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>  		return read_zero(vcpu, p);
>  }
>  
> +/*
> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> + * ftr_check_types of id_reg_info.
> + */
> +enum feature_check_type {
> +	FCT_LOWER_SAFE = 0,
> +	FCT_HIGHER_SAFE,
> +	FCT_HIGHER_OR_ZERO_SAFE,
> +	FCT_EXACT,
> +	FCT_EXACT_OR_ZERO_SAFE,
> +	FCT_IGNORE,	/* Don't check (any value is fine) */
> +};
> +
> +static int arm64_check_feature_one(enum feature_check_type type, int val,
> +				   int limit)
> +{
> +	bool is_safe = false;
> +
> +	if (val == limit)
> +		return 0;
> +
> +	switch (type) {
> +	case FCT_LOWER_SAFE:
> +		is_safe = (val <= limit);
> +		break;
> +	case FCT_HIGHER_OR_ZERO_SAFE:
> +		if (val == 0) {
> +			is_safe = true;
> +			break;
> +		}
> +		fallthrough;
> +	case FCT_HIGHER_SAFE:
> +		is_safe = (val >= limit);
> +		break;
> +	case FCT_EXACT:
> +		break;
> +	case FCT_EXACT_OR_ZERO_SAFE:
> +		is_safe = (val == 0);
> +		break;
> +	case FCT_IGNORE:

What happens if the a new feature is added and the field has a particular
meaning? How are you going to deal with old userspace implementations that
use a value here which now is not allowed or it affects the guest?

Thanks,
Alex
Alexandru Elisei Dec. 1, 2021, 3:38 p.m. UTC | #12
Hi Reiji,

On Wed, Nov 24, 2021 at 09:27:32PM -0800, Reiji Watanabe wrote:
> On Sun, Nov 21, 2021 at 4:37 AM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Wed, 17 Nov 2021 06:43:33 +0000,
> > Reiji Watanabe <reijiw@google.com> wrote:
> > >
> > > This patch lays the groundwork to make ID registers writable.
> > >
> > > Introduce struct id_reg_info for an ID register to manage the
> > > register specific control of its value for the guest, and provide set
> > > of functions commonly used for ID registers to make them writable.
> > >
> > > The id_reg_info is used to do register specific initialization,
> > > validation of the ID register and etc.  Not all ID registers must
> > > have the id_reg_info. ID registers that don't have the id_reg_info
> > > are handled in a common way that is applied to all ID registers.
> > >
> > > At present, changing an ID register from userspace is allowed only
> > > if the ID register has the id_reg_info, but that will be changed
> > > by the following patches.
> > >
> > > No ID register has the structure yet and the following patches
> > > will add the id_reg_info for some ID registers.
> > >
> > > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > > ---
> > >  arch/arm64/include/asm/sysreg.h |   1 +
> > >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> > >  2 files changed, 218 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > > index 16b3f1a1d468..597609f26331 100644
> > > --- a/arch/arm64/include/asm/sysreg.h
> > > +++ b/arch/arm64/include/asm/sysreg.h
> > > @@ -1197,6 +1197,7 @@
> > >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> > >
> > >  #define ARM64_FEATURE_FIELD_BITS     4
> > > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> > >
> > >  /* Create a mask for the feature bits of the specified feature. */
> > >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > > index 5608d3410660..1552cd5581b7 100644
> > > --- a/arch/arm64/kvm/sys_regs.c
> > > +++ b/arch/arm64/kvm/sys_regs.c
> > > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> > >               return read_zero(vcpu, p);
> > >  }
> > >
> > > +/*
> > > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > > + * ftr_check_types of id_reg_info.
> > > + */
> > > +enum feature_check_type {
> > > +     FCT_LOWER_SAFE = 0,
> > > +     FCT_HIGHER_SAFE,
> > > +     FCT_HIGHER_OR_ZERO_SAFE,
> > > +     FCT_EXACT,
> > > +     FCT_EXACT_OR_ZERO_SAFE,
> > > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> > > +};
> > > +
> > > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > > +                                int limit)
> > > +{
> > > +     bool is_safe = false;
> > > +
> > > +     if (val == limit)
> > > +             return 0;
> > > +
> > > +     switch (type) {
> > > +     case FCT_LOWER_SAFE:
> > > +             is_safe = (val <= limit);
> > > +             break;
> > > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > > +             if (val == 0) {
> > > +                     is_safe = true;
> > > +                     break;
> > > +             }
> > > +             fallthrough;
> > > +     case FCT_HIGHER_SAFE:
> > > +             is_safe = (val >= limit);
> > > +             break;
> > > +     case FCT_EXACT:
> > > +             break;
> > > +     case FCT_EXACT_OR_ZERO_SAFE:
> > > +             is_safe = (val == 0);
> > > +             break;
> > > +     case FCT_IGNORE:
> > > +             is_safe = true;
> > > +             break;
> > > +     default:
> > > +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> > > +             break;
> > > +     }
> > > +
> > > +     return is_safe ? 0 : -1;
> > > +}
> > > +
> > > +#define      FCT_TYPE_MASK           0x7
> > > +#define      FCT_TYPE_SHIFT          1
> > > +#define      FCT_SIGN_MASK           0x1
> > > +#define      FCT_SIGN_SHIFT          0
> > > +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> > > +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> > > +
> > > +#define      MAKE_FCT(shift, type, sign)                             \
> > > +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> > > +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> > > +
> > > +/* For signed field */
> > > +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> > > +/* For unigned field */
> > > +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> > > +
> > > +/*
> > > + * @val and @lim are both a value of the ID register. The function checks
> > > + * if all features indicated in @val can be supported for guests on the host,
> > > + * which supports features indicated in @lim. @check_types indicates how
> > > + * features in the ID register needs to be checked.
> > > + * See comments for id_reg_info's ftr_check_types field for more detail.
> > > + */
> > > +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> > > +{
> > > +     int i;
> > > +
> > > +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> > > +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> > > +             bool is_sign = FCT_SIGN(ftr_check);
> > > +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> > > +             int fval, flim, ret;
> > > +
> > > +             fval = cpuid_feature_extract_field(val, i, is_sign);
> > > +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> > > +
> > > +             ret = arm64_check_feature_one(fctype, fval, flim);
> > > +             if (ret)
> > > +                     return -E2BIG;
> > > +     }
> > > +     return 0;
> > > +}
> >
> > All this logic seems to reinvent what we already have in
> > arch/arm64/kernel/cpufeature.c. I'd rather we rely on it and maintain
> > a single idreg handling library.
> >
> > Could you outline what is missing in the cpufeature code that requires
> > you to invent your own? I'm sure Suzuki could help here to make it
> > directly usable.
> 
> The issue is that there are some fields whose arm64_ftr_bits don't
> match what (I think) I need.  However, looking into that option again,
> it seems that the number of such fields are fewer than I originally
> thought (I misunderstood some earlier).
> 
> They are just three fields below.  The common checking process can be
> skipped for those fields (will restore ignore_mask field in id_reg_info
> as I had in v1 patch, which is treated like FCT_IGNORE in the v3 patch),
> and I will have their ID register specific validation function do
> what I want to check into the fields.
> 
>  - AA64DFR0.DEBUGVER:
>    Its .type is FTR_EXACT.
>    I want to treat its .type as FTR_LOWER_SAFE for the check.
> 
>  - AA64DFR0.PMUVER:
>    Its .sign is FTR_SIGNED and .type is FTR_EXACT.
>    I want to treat its .sign as FTR_UNSIGNED and .type as
>    FTR_LOWER_SAFE for the check.
> 
>  - DFR0.PERFMON:
>    Its .sign is FTR_SIGNED (Its .type is FTR_LOWER_SAFE).
>    I want to treat its .sign field as FTR_UNSIGNED for the check.
> 
>    (NOTE: For PMUVER and PERFMON, Arm ARM says "if the field value
>     is not 0xf the field is treated as an unsigned value")
> 

I don't think it's required that you use the same ID register field
definitions from cpufeature.c, you can create your own field definitions
for the KVM registers if they are different. But if you use the same
structs and field attributes from cpufeature.h, then you can reuse the
functions from cpufeature.c. I think that's what Marc was suggesting
(someone please correct me if I'm wrong).

The way Linux handles cpu features is already complicated, I think reusing
the same mechanism would be preferable from a maintenance and correctness
perspective. Unless you need something that is missing from cpu features
for which is unreasonable or impossible to add support.

Thanks,
Alex
Reiji Watanabe Dec. 2, 2021, 4:09 a.m. UTC | #13
Hi Alex,

On Wed, Dec 1, 2021 at 7:24 AM Alexandru Elisei
<alexandru.elisei@arm.com> wrote:
>
> Hi Reiji,
>
> On Tue, Nov 16, 2021 at 10:43:33PM -0800, Reiji Watanabe wrote:
> > This patch lays the groundwork to make ID registers writable.
> >
> > Introduce struct id_reg_info for an ID register to manage the
> > register specific control of its value for the guest, and provide set
> > of functions commonly used for ID registers to make them writable.
> >
> > The id_reg_info is used to do register specific initialization,
> > validation of the ID register and etc.  Not all ID registers must
> > have the id_reg_info. ID registers that don't have the id_reg_info
> > are handled in a common way that is applied to all ID registers.
> >
> > At present, changing an ID register from userspace is allowed only
> > if the ID register has the id_reg_info, but that will be changed
> > by the following patches.
> >
> > No ID register has the structure yet and the following patches
> > will add the id_reg_info for some ID registers.
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > ---
> >  arch/arm64/include/asm/sysreg.h |   1 +
> >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> >  2 files changed, 218 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 16b3f1a1d468..597609f26331 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -1197,6 +1197,7 @@
> >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> >
> >  #define ARM64_FEATURE_FIELD_BITS     4
> > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> >
> >  /* Create a mask for the feature bits of the specified feature. */
> >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 5608d3410660..1552cd5581b7 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> >               return read_zero(vcpu, p);
> >  }
> >
> > +/*
> > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > + * ftr_check_types of id_reg_info.
> > + */
> > +enum feature_check_type {
> > +     FCT_LOWER_SAFE = 0,
> > +     FCT_HIGHER_SAFE,
> > +     FCT_HIGHER_OR_ZERO_SAFE,
> > +     FCT_EXACT,
> > +     FCT_EXACT_OR_ZERO_SAFE,
> > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> > +};
> > +
> > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > +                                int limit)
> > +{
> > +     bool is_safe = false;
> > +
> > +     if (val == limit)
> > +             return 0;
> > +
> > +     switch (type) {
> > +     case FCT_LOWER_SAFE:
> > +             is_safe = (val <= limit);
> > +             break;
> > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > +             if (val == 0) {
> > +                     is_safe = true;
> > +                     break;
> > +             }
> > +             fallthrough;
> > +     case FCT_HIGHER_SAFE:
> > +             is_safe = (val >= limit);
> > +             break;
> > +     case FCT_EXACT:
> > +             break;
> > +     case FCT_EXACT_OR_ZERO_SAFE:
> > +             is_safe = (val == 0);
> > +             break;
> > +     case FCT_IGNORE:
>
> What happens if the a new feature is added and the field has a particular
> meaning? How are you going to deal with old userspace implementations that
> use a value here which now is not allowed or it affects the guest?

With this v3 series, unless KVM is changed for the new field,
a new feature will be treated as lower safe (that's the default).
If the field won't fit any of those cases, FCT_IGNORE needs to be
used for the field, and the ID register specific validation function,
which will be registered in id_reg_info, needs to validate the field.

Old userspace implementation shouldn't be affected because the default
values (the values right after the first KVM_ARM_VCPU_INIT) for
ID registers won't be changed by this series (patch-9 changes
AA64DFR0.PMUVER/DFR0.PERFMON but it is due to a bug fix), and the
default value, which is basically same as @limit (or indicates
less or smaller level of features than @limit for features that
can be configured by KVM_ARM_VCPU_INIT, etc), is always allowed
by arm64_check_feature_one().

Having said that, arm64_check_feature_one() will be gone from the next
version, and the similar checking will be done by a new function in
arch/arm64/kernel/cpufeature.c that will use arm64_ftr_bits instead.

  https://lore.kernel.org/all/CAAeT=FxwzRF0YZmmoEmq3xRHnhun-BCx_FeEQrOVLgzwseSy4w@mail.gmail.com/

Unless KVM is changed for the new field, it will be validated based
on arm64_ftr_bits for the field.  If KVM needs to handle the field
differently, then we will have the new function ignore the field,
and will have the ID register specific validation function handle
the field.

Thanks,
Reiji
Reiji Watanabe Dec. 2, 2021, 4:32 a.m. UTC | #14
Hi Alex,

On Wed, Dec 1, 2021 at 7:39 AM Alexandru Elisei
<alexandru.elisei@arm.com> wrote:
>
> Hi Reiji,
>
> On Wed, Nov 24, 2021 at 09:27:32PM -0800, Reiji Watanabe wrote:
> > On Sun, Nov 21, 2021 at 4:37 AM Marc Zyngier <maz@kernel.org> wrote:
> > >
> > > On Wed, 17 Nov 2021 06:43:33 +0000,
> > > Reiji Watanabe <reijiw@google.com> wrote:
> > > >
> > > > This patch lays the groundwork to make ID registers writable.
> > > >
> > > > Introduce struct id_reg_info for an ID register to manage the
> > > > register specific control of its value for the guest, and provide set
> > > > of functions commonly used for ID registers to make them writable.
> > > >
> > > > The id_reg_info is used to do register specific initialization,
> > > > validation of the ID register and etc.  Not all ID registers must
> > > > have the id_reg_info. ID registers that don't have the id_reg_info
> > > > are handled in a common way that is applied to all ID registers.
> > > >
> > > > At present, changing an ID register from userspace is allowed only
> > > > if the ID register has the id_reg_info, but that will be changed
> > > > by the following patches.
> > > >
> > > > No ID register has the structure yet and the following patches
> > > > will add the id_reg_info for some ID registers.
> > > >
> > > > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > > > ---
> > > >  arch/arm64/include/asm/sysreg.h |   1 +
> > > >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> > > >  2 files changed, 218 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > > > index 16b3f1a1d468..597609f26331 100644
> > > > --- a/arch/arm64/include/asm/sysreg.h
> > > > +++ b/arch/arm64/include/asm/sysreg.h
> > > > @@ -1197,6 +1197,7 @@
> > > >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> > > >
> > > >  #define ARM64_FEATURE_FIELD_BITS     4
> > > > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> > > >
> > > >  /* Create a mask for the feature bits of the specified feature. */
> > > >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > > > index 5608d3410660..1552cd5581b7 100644
> > > > --- a/arch/arm64/kvm/sys_regs.c
> > > > +++ b/arch/arm64/kvm/sys_regs.c
> > > > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> > > >               return read_zero(vcpu, p);
> > > >  }
> > > >
> > > > +/*
> > > > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > > > + * ftr_check_types of id_reg_info.
> > > > + */
> > > > +enum feature_check_type {
> > > > +     FCT_LOWER_SAFE = 0,
> > > > +     FCT_HIGHER_SAFE,
> > > > +     FCT_HIGHER_OR_ZERO_SAFE,
> > > > +     FCT_EXACT,
> > > > +     FCT_EXACT_OR_ZERO_SAFE,
> > > > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> > > > +};
> > > > +
> > > > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > > > +                                int limit)
> > > > +{
> > > > +     bool is_safe = false;
> > > > +
> > > > +     if (val == limit)
> > > > +             return 0;
> > > > +
> > > > +     switch (type) {
> > > > +     case FCT_LOWER_SAFE:
> > > > +             is_safe = (val <= limit);
> > > > +             break;
> > > > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > > > +             if (val == 0) {
> > > > +                     is_safe = true;
> > > > +                     break;
> > > > +             }
> > > > +             fallthrough;
> > > > +     case FCT_HIGHER_SAFE:
> > > > +             is_safe = (val >= limit);
> > > > +             break;
> > > > +     case FCT_EXACT:
> > > > +             break;
> > > > +     case FCT_EXACT_OR_ZERO_SAFE:
> > > > +             is_safe = (val == 0);
> > > > +             break;
> > > > +     case FCT_IGNORE:
> > > > +             is_safe = true;
> > > > +             break;
> > > > +     default:
> > > > +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> > > > +             break;
> > > > +     }
> > > > +
> > > > +     return is_safe ? 0 : -1;
> > > > +}
> > > > +
> > > > +#define      FCT_TYPE_MASK           0x7
> > > > +#define      FCT_TYPE_SHIFT          1
> > > > +#define      FCT_SIGN_MASK           0x1
> > > > +#define      FCT_SIGN_SHIFT          0
> > > > +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> > > > +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> > > > +
> > > > +#define      MAKE_FCT(shift, type, sign)                             \
> > > > +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> > > > +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> > > > +
> > > > +/* For signed field */
> > > > +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> > > > +/* For unigned field */
> > > > +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> > > > +
> > > > +/*
> > > > + * @val and @lim are both a value of the ID register. The function checks
> > > > + * if all features indicated in @val can be supported for guests on the host,
> > > > + * which supports features indicated in @lim. @check_types indicates how
> > > > + * features in the ID register needs to be checked.
> > > > + * See comments for id_reg_info's ftr_check_types field for more detail.
> > > > + */
> > > > +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> > > > +{
> > > > +     int i;
> > > > +
> > > > +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> > > > +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> > > > +             bool is_sign = FCT_SIGN(ftr_check);
> > > > +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> > > > +             int fval, flim, ret;
> > > > +
> > > > +             fval = cpuid_feature_extract_field(val, i, is_sign);
> > > > +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> > > > +
> > > > +             ret = arm64_check_feature_one(fctype, fval, flim);
> > > > +             if (ret)
> > > > +                     return -E2BIG;
> > > > +     }
> > > > +     return 0;
> > > > +}
> > >
> > > All this logic seems to reinvent what we already have in
> > > arch/arm64/kernel/cpufeature.c. I'd rather we rely on it and maintain
> > > a single idreg handling library.
> > >
> > > Could you outline what is missing in the cpufeature code that requires
> > > you to invent your own? I'm sure Suzuki could help here to make it
> > > directly usable.
> >
> > The issue is that there are some fields whose arm64_ftr_bits don't
> > match what (I think) I need.  However, looking into that option again,
> > it seems that the number of such fields are fewer than I originally
> > thought (I misunderstood some earlier).
> >
> > They are just three fields below.  The common checking process can be
> > skipped for those fields (will restore ignore_mask field in id_reg_info
> > as I had in v1 patch, which is treated like FCT_IGNORE in the v3 patch),
> > and I will have their ID register specific validation function do
> > what I want to check into the fields.
> >
> >  - AA64DFR0.DEBUGVER:
> >    Its .type is FTR_EXACT.
> >    I want to treat its .type as FTR_LOWER_SAFE for the check.
> >
> >  - AA64DFR0.PMUVER:
> >    Its .sign is FTR_SIGNED and .type is FTR_EXACT.
> >    I want to treat its .sign as FTR_UNSIGNED and .type as
> >    FTR_LOWER_SAFE for the check.
> >
> >  - DFR0.PERFMON:
> >    Its .sign is FTR_SIGNED (Its .type is FTR_LOWER_SAFE).
> >    I want to treat its .sign field as FTR_UNSIGNED for the check.
> >
> >    (NOTE: For PMUVER and PERFMON, Arm ARM says "if the field value
> >     is not 0xf the field is treated as an unsigned value")
> >
>
> I don't think it's required that you use the same ID register field
> definitions from cpufeature.c, you can create your own field definitions
> for the KVM registers if they are different. But if you use the same
> structs and field attributes from cpufeature.h, then you can reuse the
> functions from cpufeature.c. I think that's what Marc was suggesting
> (someone please correct me if I'm wrong).
>
> The way Linux handles cpu features is already complicated, I think reusing
> the same mechanism would be preferable from a maintenance and correctness
> perspective. Unless you need something that is missing from cpu features
> for which is unreasonable or impossible to add support.

Thank you for your comments ! I will explore a way that is easy to
maintain for both cpufeature.c and KVM.  Since almost all ID register
fields can be handled in the same way as cpufeature.c, I will probably
use the same ID register fields though.

Thanks,
Reiji
Eric Auger Dec. 2, 2021, 12:51 p.m. UTC | #15
On 11/25/21 7:40 AM, Reiji Watanabe wrote:
> Hi Eric,
> 
> On Wed, Nov 24, 2021 at 1:07 PM Eric Auger <eauger@redhat.com> wrote:
>>
>> Hi Reiji,
>>
>> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
>>> This patch lays the groundwork to make ID registers writable.
>>>
>>> Introduce struct id_reg_info for an ID register to manage the
>>> register specific control of its value for the guest, and provide set
>>> of functions commonly used for ID registers to make them writable.
>>>
>>> The id_reg_info is used to do register specific initialization,
>>> validation of the ID register and etc.  Not all ID registers must
>>> have the id_reg_info. ID registers that don't have the id_reg_info
>>> are handled in a common way that is applied to all ID registers.
>>>
>>> At present, changing an ID register from userspace is allowed only
>>> if the ID register has the id_reg_info, but that will be changed
>>> by the following patches.
>>>
>>> No ID register has the structure yet and the following patches
>>> will add the id_reg_info for some ID registers.
>>>
>>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
>>> ---
>>>  arch/arm64/include/asm/sysreg.h |   1 +
>>>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>>>  2 files changed, 218 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>> index 16b3f1a1d468..597609f26331 100644
>>> --- a/arch/arm64/include/asm/sysreg.h
>>> +++ b/arch/arm64/include/asm/sysreg.h
>>> @@ -1197,6 +1197,7 @@
>>>  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
>>>
>>>  #define ARM64_FEATURE_FIELD_BITS     4
>>> +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>>>
>>>  /* Create a mask for the feature bits of the specified feature. */
>>>  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 5608d3410660..1552cd5581b7 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>>>               return read_zero(vcpu, p);
>>>  }
>>>
>>> +/*
>>> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
>>> + * ftr_check_types of id_reg_info.
>>> + */
>>> +enum feature_check_type {
>>> +     FCT_LOWER_SAFE = 0,
>>> +     FCT_HIGHER_SAFE,
>>> +     FCT_HIGHER_OR_ZERO_SAFE,
>>> +     FCT_EXACT,
>>> +     FCT_EXACT_OR_ZERO_SAFE,
>>> +     FCT_IGNORE,     /* Don't check (any value is fine) */
>>> +};
>>> +
>>> +static int arm64_check_feature_one(enum feature_check_type type, int val,
>>> +                                int limit)
>>> +{
>>> +     bool is_safe = false;
>>> +
>>> +     if (val == limit)
>>> +             return 0;
>>> +
>>> +     switch (type) {
>>> +     case FCT_LOWER_SAFE:
>>> +             is_safe = (val <= limit);
>>> +             break;
>>> +     case FCT_HIGHER_OR_ZERO_SAFE:
>>> +             if (val == 0) {
>>> +                     is_safe = true;
>>> +                     break;
>>> +             }
>>> +             fallthrough;
>>> +     case FCT_HIGHER_SAFE:
>>> +             is_safe = (val >= limit);
>>> +             break;
>>> +     case FCT_EXACT:
>>> +             break;
>>> +     case FCT_EXACT_OR_ZERO_SAFE:
>>> +             is_safe = (val == 0);
>>> +             break;
>>> +     case FCT_IGNORE:
>>> +             is_safe = true;
>>> +             break;
>>> +     default:
>>> +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
>>> +             break;
>>> +     }
>>> +
>>> +     return is_safe ? 0 : -1;
>>> +}
>>> +
>>> +#define      FCT_TYPE_MASK           0x7
>>> +#define      FCT_TYPE_SHIFT          1
>>> +#define      FCT_SIGN_MASK           0x1
>>> +#define      FCT_SIGN_SHIFT          0
>>> +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
>>> +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
>>> +
>>> +#define      MAKE_FCT(shift, type, sign)                             \
>>> +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
>>> +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
>>> +
>>> +/* For signed field */
>>> +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
>>> +/* For unigned field */
>>> +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
>>> +
>>> +/*
>>> + * @val and @lim are both a value of the ID register. The function checks
>>> + * if all features indicated in @val can be supported for guests on the host,
>>> + * which supports features indicated in @lim. @check_types indicates how
>>> + * features in the ID register needs to be checked.
>>> + * See comments for id_reg_info's ftr_check_types field for more detail.
>>> + */
>>> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
>>> +{
>>> +     int i;
>>> +
>>> +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
>>> +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
>>> +             bool is_sign = FCT_SIGN(ftr_check);
>>> +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
>>> +             int fval, flim, ret;
>>> +
>>> +             fval = cpuid_feature_extract_field(val, i, is_sign);
>>> +             flim = cpuid_feature_extract_field(lim, i, is_sign);
>>> +
>>> +             ret = arm64_check_feature_one(fctype, fval, flim);
>>> +             if (ret)
>>> +                     return -E2BIG;
>> nit: -EINVAL may be better because depending on the check type this may
>> not mean too big.
> 
> Yes, that is correct.
> 
> This error case means that userspace tried to configure features
> or a higher level of features that were not supported on the host.
> In that sense, I chose -E2BIG.
> 
> I wanted to use an error code specific to this particular case, which
> I think makes debugging userspace issue easier when KVM_SET_ONE_REG
> fails, and I couldn't find other error codes that fit this case better.
> So, I'm trying to avoid using -EINVAL, which is used for other failure
> cases.
> 
> If you have any other suggested error code for this,
> that would be very helpful:)

OK faire enought, that's a nit anyway

Eric
> 
> Thanks,
> Reiji
>
Eric Auger Dec. 2, 2021, 12:51 p.m. UTC | #16
Hi Reiji,

On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> This patch lays the groundwork to make ID registers writable.
> 
> Introduce struct id_reg_info for an ID register to manage the
> register specific control of its value for the guest, and provide set
> of functions commonly used for ID registers to make them writable.
> 
> The id_reg_info is used to do register specific initialization,
> validation of the ID register and etc.  Not all ID registers must
> have the id_reg_info. ID registers that don't have the id_reg_info
> are handled in a common way that is applied to all ID registers.
> 
> At present, changing an ID register from userspace is allowed only
> if the ID register has the id_reg_info, but that will be changed
> by the following patches.
> 
> No ID register has the structure yet and the following patches
> will add the id_reg_info for some ID registers.
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> ---
>  arch/arm64/include/asm/sysreg.h |   1 +
>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>  2 files changed, 218 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 16b3f1a1d468..597609f26331 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -1197,6 +1197,7 @@
>  #define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
>  
>  #define ARM64_FEATURE_FIELD_BITS	4
> +#define ARM64_FEATURE_FIELD_MASK	((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>  
>  /* Create a mask for the feature bits of the specified feature. */
>  #define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 5608d3410660..1552cd5581b7 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>  		return read_zero(vcpu, p);
>  }
>  
> +/*
> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> + * ftr_check_types of id_reg_info.
> + */
> +enum feature_check_type {
> +	FCT_LOWER_SAFE = 0,
> +	FCT_HIGHER_SAFE,
> +	FCT_HIGHER_OR_ZERO_SAFE,
> +	FCT_EXACT,
> +	FCT_EXACT_OR_ZERO_SAFE,
> +	FCT_IGNORE,	/* Don't check (any value is fine) */
> +};
> +
> +static int arm64_check_feature_one(enum feature_check_type type, int val,
> +				   int limit)
> +{
> +	bool is_safe = false;
> +
> +	if (val == limit)
> +		return 0;
> +
> +	switch (type) {
> +	case FCT_LOWER_SAFE:
> +		is_safe = (val <= limit);
> +		break;
> +	case FCT_HIGHER_OR_ZERO_SAFE:
> +		if (val == 0) {
> +			is_safe = true;
> +			break;
> +		}
> +		fallthrough;
> +	case FCT_HIGHER_SAFE:
> +		is_safe = (val >= limit);
> +		break;
> +	case FCT_EXACT:
> +		break;
> +	case FCT_EXACT_OR_ZERO_SAFE:
> +		is_safe = (val == 0);
> +		break;
> +	case FCT_IGNORE:
> +		is_safe = true;
> +		break;
> +	default:
> +		WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> +		break;
> +	}
> +
> +	return is_safe ? 0 : -1;
> +}
> +
> +#define	FCT_TYPE_MASK		0x7
> +#define	FCT_TYPE_SHIFT		1
> +#define	FCT_SIGN_MASK		0x1
> +#define	FCT_SIGN_SHIFT		0
> +#define	FCT_TYPE(val)	((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> +#define	FCT_SIGN(val)	((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> +
> +#define	MAKE_FCT(shift, type, sign)				\
> +	((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |	\
> +	       (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> +
> +/* For signed field */
> +#define	S_FCT(shift, type)	MAKE_FCT(shift, type, 1)
> +/* For unigned field */
> +#define	U_FCT(shift, type)	MAKE_FCT(shift, type, 0)
> +
> +/*
> + * @val and @lim are both a value of the ID register. The function checks
> + * if all features indicated in @val can be supported for guests on the host,
> + * which supports features indicated in @lim. @check_types indicates how
> + * features in the ID register needs to be checked.
> + * See comments for id_reg_info's ftr_check_types field for more detail.
> + */
> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> +{
> +	int i;
> +
> +	for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> +		u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> +		bool is_sign = FCT_SIGN(ftr_check);
> +		enum feature_check_type fctype = FCT_TYPE(ftr_check);
> +		int fval, flim, ret;
> +
> +		fval = cpuid_feature_extract_field(val, i, is_sign);
> +		flim = cpuid_feature_extract_field(lim, i, is_sign);
> +
> +		ret = arm64_check_feature_one(fctype, fval, flim);
> +		if (ret)
> +			return -E2BIG;
> +	}
> +	return 0;
> +}
> +
> +struct id_reg_info {
> +	u32	sys_reg;	/* Register ID */
> +
> +	/*
> +	 * Limit value of the register for a vcpu. The value is the sanitized
> +	 * system value with bits cleared for unsupported features for the
> +	 * guest.
> +	 */
> +	u64	vcpu_limit_val;
> +
> +	/*
> +	 * The ftr_check_types is comprised of a set of 4 bits fields.
> +	 * Each 4 bits field is for a feature indicated by the same bits
> +	 * field of the ID register and indicates how the feature support
> +	 * for guests needs to be checked.
> +	 * The bit 0 indicates that the corresponding ID register field
> +	 * is signed(1) or unsigned(0).
> +	 * The bits [3:1] hold feature_check_type for the field.
> +	 * If all zero, all features in the ID register are treated as unsigned
> +	 * fields and checked based on Principles of the ID scheme for fields
> +	 * in ID registers (FCT_LOWER_SAFE of feature_check_type).
> +	 */
> +	u64	ftr_check_types;
> +
> +	/* Initialization function of the id_reg_info */
> +	void (*init)(struct id_reg_info *id_reg);
> +
> +	/* Register specific validation function */
> +	int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
> +			u64 val);
> +
> +	/* Return the reset value of the register for the vCPU */
> +	u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
> +			     const struct id_reg_info *id_reg);
It is unclear to me why we need 2 different callbacks, ie. init and
get_reset_val. ID_REGS can only be accessed from user space after the
vcpu reset, right? So couldn't we have a single cb instead of this
overwrite mechanism?

Thanks

Eric
> +};
> +
> +static void id_reg_info_init(struct id_reg_info *id_reg)
> +{
> +	id_reg->vcpu_limit_val = read_sanitised_ftr_reg(id_reg->sys_reg);
> +	if (id_reg->init)
> +		id_reg->init(id_reg);
> +}
> +
> +/*
> + * An ID register that needs special handling to control the value for the
> + * guest must have its own id_reg_info in id_reg_info_table.
> + * (i.e. the reset value is different from the host's sanitized value,
> + * the value is affected by opt-in features, some fields needs specific
> + * validation, etc.)
> + */
> +#define	GET_ID_REG_INFO(id)	(id_reg_info_table[IDREG_IDX(id)])
> +static struct id_reg_info *id_reg_info_table[KVM_ARM_ID_REG_MAX_NUM] = {};
> +
> +static int validate_id_reg(struct kvm_vcpu *vcpu,
> +			   const struct sys_reg_desc *rd, u64 val)
> +{
> +	u32 id = reg_to_encoding(rd);
> +	const struct id_reg_info *id_reg = GET_ID_REG_INFO(id);
> +	u64 limit, check_types;
> +	int err;
> +
> +	if (id_reg) {
> +		check_types = id_reg->ftr_check_types;
> +		limit = id_reg->vcpu_limit_val;
> +	} else {
> +		/* All fields are treated as unsigned and FCT_LOWER_SAFE */
> +		check_types = 0;
> +		limit = read_sanitised_ftr_reg(id);
> +	}
> +
> +	/* Check if the value indicates any feature that is not in the limit. */
> +	err = arm64_check_features(check_types, val, limit);
> +	if (err)
> +		return err;
> +
> +	if (id_reg && id_reg->validate)
> +		/* Run the ID register specific validity check. */
> +		err = id_reg->validate(vcpu, id_reg, val);
> +
> +	return err;
> +}
> +
>  /*
>   * ARMv8.1 mandates at least a trivial LORegion implementation, where all the
>   * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
> @@ -1183,11 +1358,19 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
>  static void reset_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
>  {
>  	u32 id = reg_to_encoding(rd);
> +	struct id_reg_info *id_reg;
> +	u64 val;
>  
>  	if (vcpu_has_reset_once(vcpu))
>  		return;
>  
> -	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = read_sanitised_ftr_reg(id);
> +	id_reg = GET_ID_REG_INFO(id);
> +	if (id_reg && id_reg->get_reset_val)
> +		val = id_reg->get_reset_val(vcpu, id_reg);
> +	else
> +		val = read_sanitised_ftr_reg(id);
> +
> +	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = val;
>  }
>  
>  static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
> @@ -1232,11 +1415,7 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> -/*
> - * cpufeature ID register user accessors
> - *
> - * We don't allow the effective value to be changed.
> - */
> +/* cpufeature ID register user accessors */
>  static int __get_id_reg(const struct kvm_vcpu *vcpu,
>  			const struct sys_reg_desc *rd, void __user *uaddr,
>  			bool raz)
> @@ -1247,11 +1426,12 @@ static int __get_id_reg(const struct kvm_vcpu *vcpu,
>  	return reg_to_user(uaddr, &val, id);
>  }
>  
> -static int __set_id_reg(const struct kvm_vcpu *vcpu,
> +static int __set_id_reg(struct kvm_vcpu *vcpu,
>  			const struct sys_reg_desc *rd, void __user *uaddr,
>  			bool raz)
>  {
>  	const u64 id = sys_reg_to_index(rd);
> +	u32 encoding = reg_to_encoding(rd);
>  	int err;
>  	u64 val;
>  
> @@ -1259,10 +1439,22 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
>  	if (err)
>  		return err;
>  
> -	/* This is what we mean by invariant: you can't change it. */
> -	if (val != read_id_reg(vcpu, rd, raz))
> +	/* Don't allow to change the reg unless the reg has id_reg_info */
> +	if (val != read_id_reg(vcpu, rd, raz) && !GET_ID_REG_INFO(encoding))
>  		return -EINVAL;
>  
> +	if (raz)
> +		return 0;
> +
> +	/* Don't allow to change the reg after the first KVM_RUN. */
> +	if (vcpu->arch.has_run_once)
> +		return -EINVAL;
> +
> +	err = validate_id_reg(vcpu, rd, val);
> +	if (err)
> +		return err;
> +
> +	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(encoding)) = val;
>  	return 0;
>  }
>  
> @@ -2826,6 +3018,20 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>  	return write_demux_regids(uindices);
>  }
>  
> +static void id_reg_info_init_all(void)
> +{
> +	int i;
> +	struct id_reg_info *id_reg;
> +
> +	for (i = 0; i < ARRAY_SIZE(id_reg_info_table); i++) {
> +		id_reg = (struct id_reg_info *)id_reg_info_table[i];
> +		if (!id_reg)
> +			continue;
> +
> +		id_reg_info_init(id_reg);
> +	}
> +}
> +
>  void kvm_sys_reg_table_init(void)
>  {
>  	unsigned int i;
> @@ -2860,4 +3066,6 @@ void kvm_sys_reg_table_init(void)
>  			break;
>  	/* Clear all higher bits. */
>  	cache_levels &= (1 << (i*3))-1;
> +
> +	id_reg_info_init_all();
>  }
>
Reiji Watanabe Dec. 4, 2021, 4:35 a.m. UTC | #17
Hi Eric,

On Thu, Dec 2, 2021 at 4:51 AM Eric Auger <eauger@redhat.com> wrote:
>
> Hi Reiji,
>
> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
> > This patch lays the groundwork to make ID registers writable.
> >
> > Introduce struct id_reg_info for an ID register to manage the
> > register specific control of its value for the guest, and provide set
> > of functions commonly used for ID registers to make them writable.
> >
> > The id_reg_info is used to do register specific initialization,
> > validation of the ID register and etc.  Not all ID registers must
> > have the id_reg_info. ID registers that don't have the id_reg_info
> > are handled in a common way that is applied to all ID registers.
> >
> > At present, changing an ID register from userspace is allowed only
> > if the ID register has the id_reg_info, but that will be changed
> > by the following patches.
> >
> > No ID register has the structure yet and the following patches
> > will add the id_reg_info for some ID registers.
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > ---
> >  arch/arm64/include/asm/sysreg.h |   1 +
> >  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
> >  2 files changed, 218 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 16b3f1a1d468..597609f26331 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -1197,6 +1197,7 @@
> >  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
> >
> >  #define ARM64_FEATURE_FIELD_BITS     4
> > +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
> >
> >  /* Create a mask for the feature bits of the specified feature. */
> >  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 5608d3410660..1552cd5581b7 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
> >               return read_zero(vcpu, p);
> >  }
> >
> > +/*
> > + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
> > + * ftr_check_types of id_reg_info.
> > + */
> > +enum feature_check_type {
> > +     FCT_LOWER_SAFE = 0,
> > +     FCT_HIGHER_SAFE,
> > +     FCT_HIGHER_OR_ZERO_SAFE,
> > +     FCT_EXACT,
> > +     FCT_EXACT_OR_ZERO_SAFE,
> > +     FCT_IGNORE,     /* Don't check (any value is fine) */
> > +};
> > +
> > +static int arm64_check_feature_one(enum feature_check_type type, int val,
> > +                                int limit)
> > +{
> > +     bool is_safe = false;
> > +
> > +     if (val == limit)
> > +             return 0;
> > +
> > +     switch (type) {
> > +     case FCT_LOWER_SAFE:
> > +             is_safe = (val <= limit);
> > +             break;
> > +     case FCT_HIGHER_OR_ZERO_SAFE:
> > +             if (val == 0) {
> > +                     is_safe = true;
> > +                     break;
> > +             }
> > +             fallthrough;
> > +     case FCT_HIGHER_SAFE:
> > +             is_safe = (val >= limit);
> > +             break;
> > +     case FCT_EXACT:
> > +             break;
> > +     case FCT_EXACT_OR_ZERO_SAFE:
> > +             is_safe = (val == 0);
> > +             break;
> > +     case FCT_IGNORE:
> > +             is_safe = true;
> > +             break;
> > +     default:
> > +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
> > +             break;
> > +     }
> > +
> > +     return is_safe ? 0 : -1;
> > +}
> > +
> > +#define      FCT_TYPE_MASK           0x7
> > +#define      FCT_TYPE_SHIFT          1
> > +#define      FCT_SIGN_MASK           0x1
> > +#define      FCT_SIGN_SHIFT          0
> > +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
> > +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
> > +
> > +#define      MAKE_FCT(shift, type, sign)                             \
> > +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
> > +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
> > +
> > +/* For signed field */
> > +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
> > +/* For unigned field */
> > +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
> > +
> > +/*
> > + * @val and @lim are both a value of the ID register. The function checks
> > + * if all features indicated in @val can be supported for guests on the host,
> > + * which supports features indicated in @lim. @check_types indicates how
> > + * features in the ID register needs to be checked.
> > + * See comments for id_reg_info's ftr_check_types field for more detail.
> > + */
> > +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
> > +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
> > +             bool is_sign = FCT_SIGN(ftr_check);
> > +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
> > +             int fval, flim, ret;
> > +
> > +             fval = cpuid_feature_extract_field(val, i, is_sign);
> > +             flim = cpuid_feature_extract_field(lim, i, is_sign);
> > +
> > +             ret = arm64_check_feature_one(fctype, fval, flim);
> > +             if (ret)
> > +                     return -E2BIG;
> > +     }
> > +     return 0;
> > +}
> > +
> > +struct id_reg_info {
> > +     u32     sys_reg;        /* Register ID */
> > +
> > +     /*
> > +      * Limit value of the register for a vcpu. The value is the sanitized
> > +      * system value with bits cleared for unsupported features for the
> > +      * guest.
> > +      */
> > +     u64     vcpu_limit_val;
> > +
> > +     /*
> > +      * The ftr_check_types is comprised of a set of 4 bits fields.
> > +      * Each 4 bits field is for a feature indicated by the same bits
> > +      * field of the ID register and indicates how the feature support
> > +      * for guests needs to be checked.
> > +      * The bit 0 indicates that the corresponding ID register field
> > +      * is signed(1) or unsigned(0).
> > +      * The bits [3:1] hold feature_check_type for the field.
> > +      * If all zero, all features in the ID register are treated as unsigned
> > +      * fields and checked based on Principles of the ID scheme for fields
> > +      * in ID registers (FCT_LOWER_SAFE of feature_check_type).
> > +      */
> > +     u64     ftr_check_types;
> > +
> > +     /* Initialization function of the id_reg_info */
> > +     void (*init)(struct id_reg_info *id_reg);
> > +
> > +     /* Register specific validation function */
> > +     int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
> > +                     u64 val);
> > +
> > +     /* Return the reset value of the register for the vCPU */
> > +     u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
> > +                          const struct id_reg_info *id_reg);
> It is unclear to me why we need 2 different callbacks, ie. init and
> get_reset_val. ID_REGS can only be accessed from user space after the
> vcpu reset, right? So couldn't we have a single cb instead of this
> overwrite mechanism?

Thank you for the comment.

What the init() does needs to be done just once.
It initializes the id_reg_info itself (not for the ID register of vCPU).
And the data initialized by the init() is used not just for the
overwrite mechanism at the vcpu reset but for other purposes as well.

What the get_reset_val does needs to be done for every initial vCPU reset.
It provides the initial value for the vCPU, which depends on its feature
configuration that is configured by KVM_ARM_VCPU_INIT (or other APIs).

Of course there are other ways to achieve the same, and it's entirely
possible to have a single function though.  I just chose to use a
separate function for each of those two different purposes.

Thanks,
Reiji
Eric Auger Dec. 7, 2021, 9:36 a.m. UTC | #18
Hi Reiji,

On 12/4/21 5:35 AM, Reiji Watanabe wrote:
> Hi Eric,
> 
> On Thu, Dec 2, 2021 at 4:51 AM Eric Auger <eauger@redhat.com> wrote:
>>
>> Hi Reiji,
>>
>> On 11/17/21 7:43 AM, Reiji Watanabe wrote:
>>> This patch lays the groundwork to make ID registers writable.
>>>
>>> Introduce struct id_reg_info for an ID register to manage the
>>> register specific control of its value for the guest, and provide set
>>> of functions commonly used for ID registers to make them writable.
>>>
>>> The id_reg_info is used to do register specific initialization,
>>> validation of the ID register and etc.  Not all ID registers must
>>> have the id_reg_info. ID registers that don't have the id_reg_info
>>> are handled in a common way that is applied to all ID registers.
>>>
>>> At present, changing an ID register from userspace is allowed only
>>> if the ID register has the id_reg_info, but that will be changed
>>> by the following patches.
>>>
>>> No ID register has the structure yet and the following patches
>>> will add the id_reg_info for some ID registers.
>>>
>>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
>>> ---
>>>  arch/arm64/include/asm/sysreg.h |   1 +
>>>  arch/arm64/kvm/sys_regs.c       | 226 ++++++++++++++++++++++++++++++--
>>>  2 files changed, 218 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>> index 16b3f1a1d468..597609f26331 100644
>>> --- a/arch/arm64/include/asm/sysreg.h
>>> +++ b/arch/arm64/include/asm/sysreg.h
>>> @@ -1197,6 +1197,7 @@
>>>  #define ICH_VTR_TDS_MASK     (1 << ICH_VTR_TDS_SHIFT)
>>>
>>>  #define ARM64_FEATURE_FIELD_BITS     4
>>> +#define ARM64_FEATURE_FIELD_MASK     ((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
>>>
>>>  /* Create a mask for the feature bits of the specified feature. */
>>>  #define ARM64_FEATURE_MASK(x)        (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 5608d3410660..1552cd5581b7 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -265,6 +265,181 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>>>               return read_zero(vcpu, p);
>>>  }
>>>
>>> +/*
>>> + * A value for FCT_LOWER_SAFE must be zero and changing that will affect
>>> + * ftr_check_types of id_reg_info.
>>> + */
>>> +enum feature_check_type {
>>> +     FCT_LOWER_SAFE = 0,
>>> +     FCT_HIGHER_SAFE,
>>> +     FCT_HIGHER_OR_ZERO_SAFE,
>>> +     FCT_EXACT,
>>> +     FCT_EXACT_OR_ZERO_SAFE,
>>> +     FCT_IGNORE,     /* Don't check (any value is fine) */
>>> +};
>>> +
>>> +static int arm64_check_feature_one(enum feature_check_type type, int val,
>>> +                                int limit)
>>> +{
>>> +     bool is_safe = false;
>>> +
>>> +     if (val == limit)
>>> +             return 0;
>>> +
>>> +     switch (type) {
>>> +     case FCT_LOWER_SAFE:
>>> +             is_safe = (val <= limit);
>>> +             break;
>>> +     case FCT_HIGHER_OR_ZERO_SAFE:
>>> +             if (val == 0) {
>>> +                     is_safe = true;
>>> +                     break;
>>> +             }
>>> +             fallthrough;
>>> +     case FCT_HIGHER_SAFE:
>>> +             is_safe = (val >= limit);
>>> +             break;
>>> +     case FCT_EXACT:
>>> +             break;
>>> +     case FCT_EXACT_OR_ZERO_SAFE:
>>> +             is_safe = (val == 0);
>>> +             break;
>>> +     case FCT_IGNORE:
>>> +             is_safe = true;
>>> +             break;
>>> +     default:
>>> +             WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
>>> +             break;
>>> +     }
>>> +
>>> +     return is_safe ? 0 : -1;
>>> +}
>>> +
>>> +#define      FCT_TYPE_MASK           0x7
>>> +#define      FCT_TYPE_SHIFT          1
>>> +#define      FCT_SIGN_MASK           0x1
>>> +#define      FCT_SIGN_SHIFT          0
>>> +#define      FCT_TYPE(val)   ((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
>>> +#define      FCT_SIGN(val)   ((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
>>> +
>>> +#define      MAKE_FCT(shift, type, sign)                             \
>>> +     ((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |   \
>>> +            (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
>>> +
>>> +/* For signed field */
>>> +#define      S_FCT(shift, type)      MAKE_FCT(shift, type, 1)
>>> +/* For unigned field */
>>> +#define      U_FCT(shift, type)      MAKE_FCT(shift, type, 0)
>>> +
>>> +/*
>>> + * @val and @lim are both a value of the ID register. The function checks
>>> + * if all features indicated in @val can be supported for guests on the host,
>>> + * which supports features indicated in @lim. @check_types indicates how
>>> + * features in the ID register needs to be checked.
>>> + * See comments for id_reg_info's ftr_check_types field for more detail.
>>> + */
>>> +static int arm64_check_features(u64 check_types, u64 val, u64 lim)
>>> +{
>>> +     int i;
>>> +
>>> +     for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
>>> +             u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
>>> +             bool is_sign = FCT_SIGN(ftr_check);
>>> +             enum feature_check_type fctype = FCT_TYPE(ftr_check);
>>> +             int fval, flim, ret;
>>> +
>>> +             fval = cpuid_feature_extract_field(val, i, is_sign);
>>> +             flim = cpuid_feature_extract_field(lim, i, is_sign);
>>> +
>>> +             ret = arm64_check_feature_one(fctype, fval, flim);
>>> +             if (ret)
>>> +                     return -E2BIG;
>>> +     }
>>> +     return 0;
>>> +}
>>> +
>>> +struct id_reg_info {
>>> +     u32     sys_reg;        /* Register ID */
>>> +
>>> +     /*
>>> +      * Limit value of the register for a vcpu. The value is the sanitized
>>> +      * system value with bits cleared for unsupported features for the
>>> +      * guest.
>>> +      */
>>> +     u64     vcpu_limit_val;
>>> +
>>> +     /*
>>> +      * The ftr_check_types is comprised of a set of 4 bits fields.
>>> +      * Each 4 bits field is for a feature indicated by the same bits
>>> +      * field of the ID register and indicates how the feature support
>>> +      * for guests needs to be checked.
>>> +      * The bit 0 indicates that the corresponding ID register field
>>> +      * is signed(1) or unsigned(0).
>>> +      * The bits [3:1] hold feature_check_type for the field.
>>> +      * If all zero, all features in the ID register are treated as unsigned
>>> +      * fields and checked based on Principles of the ID scheme for fields
>>> +      * in ID registers (FCT_LOWER_SAFE of feature_check_type).
>>> +      */
>>> +     u64     ftr_check_types;
>>> +
>>> +     /* Initialization function of the id_reg_info */
>>> +     void (*init)(struct id_reg_info *id_reg);
>>> +
>>> +     /* Register specific validation function */
>>> +     int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
>>> +                     u64 val);
>>> +
>>> +     /* Return the reset value of the register for the vCPU */
>>> +     u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
>>> +                          const struct id_reg_info *id_reg);
>> It is unclear to me why we need 2 different callbacks, ie. init and
>> get_reset_val. ID_REGS can only be accessed from user space after the
>> vcpu reset, right? So couldn't we have a single cb instead of this
>> overwrite mechanism?
> 
> Thank you for the comment.
> 
> What the init() does needs to be done just once.
> It initializes the id_reg_info itself (not for the ID register of vCPU).
> And the data initialized by the init() is used not just for the
> overwrite mechanism at the vcpu reset but for other purposes as well.
> 
> What the get_reset_val does needs to be done for every initial vCPU reset.
> It provides the initial value for the vCPU, which depends on its feature
> configuration that is configured by KVM_ARM_VCPU_INIT (or other APIs).
> 
> Of course there are other ways to achieve the same, and it's entirely
> possible to have a single function though.  I just chose to use a
> separate function for each of those two different purposes.

OK fair enough. Was thinking that maybe it would simplify the code if we
had a single 'reset" cb but up to you.

Thanks

Eric
> 
> Thanks,
> Reiji
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 16b3f1a1d468..597609f26331 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1197,6 +1197,7 @@ 
 #define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
 
 #define ARM64_FEATURE_FIELD_BITS	4
+#define ARM64_FEATURE_FIELD_MASK	((1ull << ARM64_FEATURE_FIELD_BITS) - 1)
 
 /* Create a mask for the feature bits of the specified feature. */
 #define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5608d3410660..1552cd5581b7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -265,6 +265,181 @@  static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
+/*
+ * A value for FCT_LOWER_SAFE must be zero and changing that will affect
+ * ftr_check_types of id_reg_info.
+ */
+enum feature_check_type {
+	FCT_LOWER_SAFE = 0,
+	FCT_HIGHER_SAFE,
+	FCT_HIGHER_OR_ZERO_SAFE,
+	FCT_EXACT,
+	FCT_EXACT_OR_ZERO_SAFE,
+	FCT_IGNORE,	/* Don't check (any value is fine) */
+};
+
+static int arm64_check_feature_one(enum feature_check_type type, int val,
+				   int limit)
+{
+	bool is_safe = false;
+
+	if (val == limit)
+		return 0;
+
+	switch (type) {
+	case FCT_LOWER_SAFE:
+		is_safe = (val <= limit);
+		break;
+	case FCT_HIGHER_OR_ZERO_SAFE:
+		if (val == 0) {
+			is_safe = true;
+			break;
+		}
+		fallthrough;
+	case FCT_HIGHER_SAFE:
+		is_safe = (val >= limit);
+		break;
+	case FCT_EXACT:
+		break;
+	case FCT_EXACT_OR_ZERO_SAFE:
+		is_safe = (val == 0);
+		break;
+	case FCT_IGNORE:
+		is_safe = true;
+		break;
+	default:
+		WARN_ONCE(1, "Unexpected feature_check_type (%d)\n", type);
+		break;
+	}
+
+	return is_safe ? 0 : -1;
+}
+
+#define	FCT_TYPE_MASK		0x7
+#define	FCT_TYPE_SHIFT		1
+#define	FCT_SIGN_MASK		0x1
+#define	FCT_SIGN_SHIFT		0
+#define	FCT_TYPE(val)	((val >> FCT_TYPE_SHIFT) & FCT_TYPE_MASK)
+#define	FCT_SIGN(val)	((val >> FCT_SIGN_SHIFT) & FCT_SIGN_MASK)
+
+#define	MAKE_FCT(shift, type, sign)				\
+	((u64)((((type) & FCT_TYPE_MASK) << FCT_TYPE_SHIFT) |	\
+	       (((sign) & FCT_SIGN_MASK) << FCT_SIGN_SHIFT)) << (shift))
+
+/* For signed field */
+#define	S_FCT(shift, type)	MAKE_FCT(shift, type, 1)
+/* For unigned field */
+#define	U_FCT(shift, type)	MAKE_FCT(shift, type, 0)
+
+/*
+ * @val and @lim are both a value of the ID register. The function checks
+ * if all features indicated in @val can be supported for guests on the host,
+ * which supports features indicated in @lim. @check_types indicates how
+ * features in the ID register needs to be checked.
+ * See comments for id_reg_info's ftr_check_types field for more detail.
+ */
+static int arm64_check_features(u64 check_types, u64 val, u64 lim)
+{
+	int i;
+
+	for (i = 0; i < 64; i += ARM64_FEATURE_FIELD_BITS) {
+		u8 ftr_check = (check_types >> i) & ARM64_FEATURE_FIELD_MASK;
+		bool is_sign = FCT_SIGN(ftr_check);
+		enum feature_check_type fctype = FCT_TYPE(ftr_check);
+		int fval, flim, ret;
+
+		fval = cpuid_feature_extract_field(val, i, is_sign);
+		flim = cpuid_feature_extract_field(lim, i, is_sign);
+
+		ret = arm64_check_feature_one(fctype, fval, flim);
+		if (ret)
+			return -E2BIG;
+	}
+	return 0;
+}
+
+struct id_reg_info {
+	u32	sys_reg;	/* Register ID */
+
+	/*
+	 * Limit value of the register for a vcpu. The value is the sanitized
+	 * system value with bits cleared for unsupported features for the
+	 * guest.
+	 */
+	u64	vcpu_limit_val;
+
+	/*
+	 * The ftr_check_types is comprised of a set of 4 bits fields.
+	 * Each 4 bits field is for a feature indicated by the same bits
+	 * field of the ID register and indicates how the feature support
+	 * for guests needs to be checked.
+	 * The bit 0 indicates that the corresponding ID register field
+	 * is signed(1) or unsigned(0).
+	 * The bits [3:1] hold feature_check_type for the field.
+	 * If all zero, all features in the ID register are treated as unsigned
+	 * fields and checked based on Principles of the ID scheme for fields
+	 * in ID registers (FCT_LOWER_SAFE of feature_check_type).
+	 */
+	u64	ftr_check_types;
+
+	/* Initialization function of the id_reg_info */
+	void (*init)(struct id_reg_info *id_reg);
+
+	/* Register specific validation function */
+	int (*validate)(struct kvm_vcpu *vcpu, const struct id_reg_info *id_reg,
+			u64 val);
+
+	/* Return the reset value of the register for the vCPU */
+	u64 (*get_reset_val)(struct kvm_vcpu *vcpu,
+			     const struct id_reg_info *id_reg);
+};
+
+static void id_reg_info_init(struct id_reg_info *id_reg)
+{
+	id_reg->vcpu_limit_val = read_sanitised_ftr_reg(id_reg->sys_reg);
+	if (id_reg->init)
+		id_reg->init(id_reg);
+}
+
+/*
+ * An ID register that needs special handling to control the value for the
+ * guest must have its own id_reg_info in id_reg_info_table.
+ * (i.e. the reset value is different from the host's sanitized value,
+ * the value is affected by opt-in features, some fields needs specific
+ * validation, etc.)
+ */
+#define	GET_ID_REG_INFO(id)	(id_reg_info_table[IDREG_IDX(id)])
+static struct id_reg_info *id_reg_info_table[KVM_ARM_ID_REG_MAX_NUM] = {};
+
+static int validate_id_reg(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_desc *rd, u64 val)
+{
+	u32 id = reg_to_encoding(rd);
+	const struct id_reg_info *id_reg = GET_ID_REG_INFO(id);
+	u64 limit, check_types;
+	int err;
+
+	if (id_reg) {
+		check_types = id_reg->ftr_check_types;
+		limit = id_reg->vcpu_limit_val;
+	} else {
+		/* All fields are treated as unsigned and FCT_LOWER_SAFE */
+		check_types = 0;
+		limit = read_sanitised_ftr_reg(id);
+	}
+
+	/* Check if the value indicates any feature that is not in the limit. */
+	err = arm64_check_features(check_types, val, limit);
+	if (err)
+		return err;
+
+	if (id_reg && id_reg->validate)
+		/* Run the ID register specific validity check. */
+		err = id_reg->validate(vcpu, id_reg, val);
+
+	return err;
+}
+
 /*
  * ARMv8.1 mandates at least a trivial LORegion implementation, where all the
  * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
@@ -1183,11 +1358,19 @@  static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
 static void reset_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
 {
 	u32 id = reg_to_encoding(rd);
+	struct id_reg_info *id_reg;
+	u64 val;
 
 	if (vcpu_has_reset_once(vcpu))
 		return;
 
-	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = read_sanitised_ftr_reg(id);
+	id_reg = GET_ID_REG_INFO(id);
+	if (id_reg && id_reg->get_reset_val)
+		val = id_reg->get_reset_val(vcpu, id_reg);
+	else
+		val = read_sanitised_ftr_reg(id);
+
+	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(id)) = val;
 }
 
 static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
@@ -1232,11 +1415,7 @@  static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-/*
- * cpufeature ID register user accessors
- *
- * We don't allow the effective value to be changed.
- */
+/* cpufeature ID register user accessors */
 static int __get_id_reg(const struct kvm_vcpu *vcpu,
 			const struct sys_reg_desc *rd, void __user *uaddr,
 			bool raz)
@@ -1247,11 +1426,12 @@  static int __get_id_reg(const struct kvm_vcpu *vcpu,
 	return reg_to_user(uaddr, &val, id);
 }
 
-static int __set_id_reg(const struct kvm_vcpu *vcpu,
+static int __set_id_reg(struct kvm_vcpu *vcpu,
 			const struct sys_reg_desc *rd, void __user *uaddr,
 			bool raz)
 {
 	const u64 id = sys_reg_to_index(rd);
+	u32 encoding = reg_to_encoding(rd);
 	int err;
 	u64 val;
 
@@ -1259,10 +1439,22 @@  static int __set_id_reg(const struct kvm_vcpu *vcpu,
 	if (err)
 		return err;
 
-	/* This is what we mean by invariant: you can't change it. */
-	if (val != read_id_reg(vcpu, rd, raz))
+	/* Don't allow to change the reg unless the reg has id_reg_info */
+	if (val != read_id_reg(vcpu, rd, raz) && !GET_ID_REG_INFO(encoding))
 		return -EINVAL;
 
+	if (raz)
+		return 0;
+
+	/* Don't allow to change the reg after the first KVM_RUN. */
+	if (vcpu->arch.has_run_once)
+		return -EINVAL;
+
+	err = validate_id_reg(vcpu, rd, val);
+	if (err)
+		return err;
+
+	__vcpu_sys_reg(vcpu, IDREG_SYS_IDX(encoding)) = val;
 	return 0;
 }
 
@@ -2826,6 +3018,20 @@  int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	return write_demux_regids(uindices);
 }
 
+static void id_reg_info_init_all(void)
+{
+	int i;
+	struct id_reg_info *id_reg;
+
+	for (i = 0; i < ARRAY_SIZE(id_reg_info_table); i++) {
+		id_reg = (struct id_reg_info *)id_reg_info_table[i];
+		if (!id_reg)
+			continue;
+
+		id_reg_info_init(id_reg);
+	}
+}
+
 void kvm_sys_reg_table_init(void)
 {
 	unsigned int i;
@@ -2860,4 +3066,6 @@  void kvm_sys_reg_table_init(void)
 			break;
 	/* Clear all higher bits. */
 	cache_levels &= (1 << (i*3))-1;
+
+	id_reg_info_init_all();
 }