diff mbox series

[v4,1/2] KVM: arm/arm64: Add save/restore support for firmware workaround state

Message ID 20190228234334.20456-2-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show
Series [v4,1/2] KVM: arm/arm64: Add save/restore support for firmware workaround state | expand

Commit Message

Andre Przywara Feb. 28, 2019, 11:43 p.m. UTC
KVM implements the firmware interface for mitigating cache speculation
vulnerabilities. Guests may use this interface to ensure mitigation is
active.
If we want to migrate such a guest to a host with a different support
level for those workarounds, migration might need to fail, to ensure that
critical guests don't loose their protection.

Introduce a way for userland to save and restore the workarounds state.
On restoring we do checks that make sure we don't downgrade our
mitigation level.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arch/arm/include/asm/kvm_emulate.h   |  10 +++
 arch/arm/include/uapi/asm/kvm.h      |  10 +++
 arch/arm64/include/asm/kvm_emulate.h |  14 +++
 arch/arm64/include/uapi/asm/kvm.h    |   9 ++
 virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
 5 files changed, 155 insertions(+), 16 deletions(-)

Comments

Steven Price March 1, 2019, 2:57 p.m. UTC | #1
On 28/02/2019 23:43, Andre Przywara wrote:
> KVM implements the firmware interface for mitigating cache speculation
> vulnerabilities. Guests may use this interface to ensure mitigation is
> active.
> If we want to migrate such a guest to a host with a different support
> level for those workarounds, migration might need to fail, to ensure that
> critical guests don't loose their protection.
> 
> Introduce a way for userland to save and restore the workarounds state.
> On restoring we do checks that make sure we don't downgrade our
> mitigation level.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  arch/arm/include/asm/kvm_emulate.h   |  10 +++
>  arch/arm/include/uapi/asm/kvm.h      |  10 +++
>  arch/arm64/include/asm/kvm_emulate.h |  14 +++
>  arch/arm64/include/uapi/asm/kvm.h    |   9 ++
>  virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
>  5 files changed, 155 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
> index 8927cae7c966..663a02d7e6f4 100644
> --- a/arch/arm/include/asm/kvm_emulate.h
> +++ b/arch/arm/include/asm/kvm_emulate.h
> @@ -283,6 +283,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>  	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
>  }
>  
> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> +{
> +	return false;
> +}
> +
> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> +						      bool flag)
> +{
> +}
> +
>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>  {
>  	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
> index 4602464ebdfb..ba4d2afe65e3 100644
> --- a/arch/arm/include/uapi/asm/kvm.h
> +++ b/arch/arm/include/uapi/asm/kvm.h
> @@ -214,6 +214,16 @@ struct kvm_vcpu_events {
>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)
>  
>  /* Device Control API: ARM VGIC */
>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index d3842791e1c4..c00c17c9adb6 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -348,6 +348,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>  	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
>  }
>  
> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
> +}
> +
> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> +						      bool flag)
> +{
> +	if (flag)
> +		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
> +	else
> +		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
> +}
> +
>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>  {
>  	if (vcpu_mode_is_32bit(vcpu)) {
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index 97c3478ee6e7..367e96fe654e 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -225,6 +225,15 @@ struct kvm_vcpu_events {
>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
>  
>  /* Device Control API: ARM VGIC */
>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
> index 9b73d3ad918a..e65664c09b12 100644
> --- a/virt/kvm/arm/psci.c
> +++ b/virt/kvm/arm/psci.c
> @@ -445,42 +445,97 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>  
>  int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
>  {
> -	return 1;		/* PSCI version */
> +	return 3;		/* PSCI version and two workaround registers */
>  }
>  
>  int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>  {
> -	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
> +	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
> +		return -EFAULT;
> +
> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
> +		return -EFAULT;
> +
> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
>  		return -EFAULT;
>  
>  	return 0;
>  }
>  
> +#define KVM_REG_FEATURE_LEVEL_WIDTH	4
> +#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
> +
> +/*
> + * Convert the workaround level into an easy-to-compare number, where higher
> + * values mean better protection.
> + */
> +static int get_kernel_wa_level(u64 regid)
> +{
> +	switch (regid) {
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> +		if (kvm_arm_harden_branch_predictor())
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
> +		else
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> +		switch (kvm_arm_have_ssbd()) {
> +		case KVM_SSBD_FORCE_DISABLE:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
> +		case KVM_SSBD_KERNEL:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
> +		case KVM_SSBD_FORCE_ENABLE:
> +		case KVM_SSBD_MITIGATED:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
> +		case KVM_SSBD_UNKNOWN:
> +		default:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  {
> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> -		void __user *uaddr = (void __user *)(long)reg->addr;
> -		u64 val;
> +	void __user *uaddr = (void __user *)(long)reg->addr;
> +	u64 val;
>  
> +	switch (reg->id) {
> +	case KVM_REG_ARM_PSCI_VERSION:
>  		val = kvm_psci_version(vcpu, vcpu->kvm);
> -		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> -			return -EFAULT;
> -
> -		return 0;
> +		break;
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
> +		break;
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
> +		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&
> +		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))
> +			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
> +		break;
> +	default:
> +		return -ENOENT;
>  	}
>  
> -	return -EINVAL;
> +	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> +		return -EFAULT;
> +
> +	return 0;
>  }
>  
>  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  {
> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> -		void __user *uaddr = (void __user *)(long)reg->addr;
> -		bool wants_02;
> -		u64 val;
> +	void __user *uaddr = (void __user *)(long)reg->addr;
> +	u64 val;
> +	int wa_level;
> +
> +	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> +		return -EFAULT;
>  
> -		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> -			return -EFAULT;
> +	switch (reg->id) {
> +	case KVM_REG_ARM_PSCI_VERSION:
> +	{
> +		bool wants_02;
>  
>  		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
>  
> @@ -497,6 +552,47 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  			vcpu->kvm->arch.psci_version = val;
>  			return 0;
>  		}
> +		break;
> +	}
> +
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> +		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
> +			return -EINVAL;
> +
> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;

NIT: Because we now error out if any unknown bits are set, this line is
a no-op (there cannot be any bits outside the mask in val).

> +
> +		/* For now we only accept the very same workaround level. */
> +		if (get_kernel_wa_level(reg->id) != wa_level)
> +			return -EINVAL;
> +
> +		return 0;
> +
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> +		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
> +			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
> +			return -EINVAL;
> +
> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;

This is fine because the _ENABLED flag is separate.

Steve
Eric Auger March 21, 2019, 12:54 p.m. UTC | #2
Hi Andre,

On 3/1/19 12:43 AM, Andre Przywara wrote:
> KVM implements the firmware interface for mitigating cache speculation
> vulnerabilities. Guests may use this interface to ensure mitigation is
> active.
> If we want to migrate such a guest to a host with a different support
> level for those workarounds, migration might need to fail, to ensure that
> critical guests don't loose their protection.
> 
> Introduce a way for userland to save and restore the workarounds state.
> On restoring we do checks that make sure we don't downgrade our
> mitigation level.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  arch/arm/include/asm/kvm_emulate.h   |  10 +++
>  arch/arm/include/uapi/asm/kvm.h      |  10 +++
>  arch/arm64/include/asm/kvm_emulate.h |  14 +++
>  arch/arm64/include/uapi/asm/kvm.h    |   9 ++
>  virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
>  5 files changed, 155 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
> index 8927cae7c966..663a02d7e6f4 100644
> --- a/arch/arm/include/asm/kvm_emulate.h
> +++ b/arch/arm/include/asm/kvm_emulate.h
> @@ -283,6 +283,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>  	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
>  }
>  
> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> +{
> +	return false;
> +}
> +
> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> +						      bool flag)
> +{
> +}
> +
>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>  {
>  	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
> index 4602464ebdfb..ba4d2afe65e3 100644
> --- a/arch/arm/include/uapi/asm/kvm.h
> +++ b/arch/arm/include/uapi/asm/kvm.h
> @@ -214,6 +214,16 @@ struct kvm_vcpu_events {
>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
Would be worth adding a comment saying that values are chosen so that
higher values mean better protection. Otherwise it looks strange
NOT_AVAIL/AVAIL/UNAFFECTED values are not the same for both workarounds.	
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)

>  
>  /* Device Control API: ARM VGIC */
>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index d3842791e1c4..c00c17c9adb6 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -348,6 +348,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>  	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
>  }
>  
> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
> +}
> +
> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> +						      bool flag)
> +{
> +	if (flag)
> +		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
> +	else
> +		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
> +}
> +
>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>  {
>  	if (vcpu_mode_is_32bit(vcpu)) {
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index 97c3478ee6e7..367e96fe654e 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -225,6 +225,15 @@ struct kvm_vcpu_events {
>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
>  
>  /* Device Control API: ARM VGIC */
>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
> index 9b73d3ad918a..e65664c09b12 100644
> --- a/virt/kvm/arm/psci.c
> +++ b/virt/kvm/arm/psci.c
> @@ -445,42 +445,97 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>  
>  int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
>  {
> -	return 1;		/* PSCI version */
> +	return 3;		/* PSCI version and two workaround registers */
>  }
>  
>  int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>  {
> -	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
> +	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
> +		return -EFAULT;
> +
> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
> +		return -EFAULT;
> +
> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
>  		return -EFAULT;
Wouldn't it make sense to have a const array somewhere listing the FW
regs and putting KVM_REG_ARM_FW_REG[i]? Also kvm_arm_get_fw_num_regs
could return the ARRAY_SIZE.

vcpu arg is never used actually (not related to this patch).
>  
>  	return 0;
>  }
>  
> +#define KVM_REG_FEATURE_LEVEL_WIDTH	4
> +#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
> +
> +/*
> + * Convert the workaround level into an easy-to-compare number, where higher
> + * values mean better protection.
> + */
> +static int get_kernel_wa_level(u64 regid)
> +{
> +	switch (regid) {
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> +		if (kvm_arm_harden_branch_predictor())
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
> +		else
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> +		switch (kvm_arm_have_ssbd()) {
> +		case KVM_SSBD_FORCE_DISABLE:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
> +		case KVM_SSBD_KERNEL:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
> +		case KVM_SSBD_FORCE_ENABLE:
> +		case KVM_SSBD_MITIGATED:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
> +		case KVM_SSBD_UNKNOWN:
> +		default:
> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
> +		}
> +	}
> +
> +	return 0;
I would rather return -EINVAL although the function is not called for
any invalid reg.
> +}
> +
>  int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  {
> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> -		void __user *uaddr = (void __user *)(long)reg->addr;
> -		u64 val;
> +	void __user *uaddr = (void __user *)(long)reg->addr;
> +	u64 val;
>  
> +	switch (reg->id) {
> +	case KVM_REG_ARM_PSCI_VERSION:
>  		val = kvm_psci_version(vcpu, vcpu->kvm);
> -		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> -			return -EFAULT;
> -
> -		return 0;
> +		break;
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
Can get_kernel_wa_level return something outside of
KVM_REG_FEATURE_LEVEL_MASK?
> +		break;
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
same here
> +		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&> +		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))
nit: if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
kvm_arm_get_vcpu_workaround_2_flag(vcpu)).
> +			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
> +		break;
> +	default:
> +		return -ENOENT;
>  	}
>  
> -	return -EINVAL;
> +	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> +		return -EFAULT;
> +
> +	return 0;
>  }
>  
>  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  {
> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> -		void __user *uaddr = (void __user *)(long)reg->addr;
> -		bool wants_02;
> -		u64 val;
> +	void __user *uaddr = (void __user *)(long)reg->addr;
> +	u64 val;
> +	int wa_level;
> +
> +	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> +		return -EFAULT;
>  
> -		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> -			return -EFAULT;
> +	switch (reg->id) {
> +	case KVM_REG_ARM_PSCI_VERSION:
> +	{
> +		bool wants_02;
>  
>  		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
>  
> @@ -497,6 +552,47 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  			vcpu->kvm->arch.psci_version = val;
>  			return 0;
>  		}
> +		break;
> +	}
> +
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> +		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
> +			return -EINVAL;
> +
> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
not needed
> +
> +		/* For now we only accept the very same workaround level. */
> +		if (get_kernel_wa_level(reg->id) != wa_level)
> +			return -EINVAL;
> +
> +		return 0;
> +
> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> +		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
> +			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))> +			return -EINVAL;
> +
> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
> +
> +		if (get_kernel_wa_level(reg->id) < wa_level)
> +			return -EINVAL;
> +
worth a comment?
> +		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
> +			return 0;
> +
> +		switch (wa_level) {
> +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
> +			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
> +			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
> +			break;
> +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED:
> +			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);
Looks strange to me we enable the flag when unaffected.

> +			break;
> +		}
> +
> +		return 0;
> +	default:
> +		return -ENOENT;
>  	}
>  
>  	return -EINVAL;
> 

Thanks

Eric
Andre Przywara March 21, 2019, 5:35 p.m. UTC | #3
On Thu, 21 Mar 2019 13:54:07 +0100
Auger Eric <eric.auger@redhat.com> wrote:

Hi Eric,

many thanks for looking at this!
I was about to prepare a new revision.

> Hi Andre,
> 
> On 3/1/19 12:43 AM, Andre Przywara wrote:
> > KVM implements the firmware interface for mitigating cache speculation
> > vulnerabilities. Guests may use this interface to ensure mitigation is
> > active.
> > If we want to migrate such a guest to a host with a different support
> > level for those workarounds, migration might need to fail, to ensure that
> > critical guests don't loose their protection.
> > 
> > Introduce a way for userland to save and restore the workarounds state.
> > On restoring we do checks that make sure we don't downgrade our
> > mitigation level.
> > 
> > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > ---
> >  arch/arm/include/asm/kvm_emulate.h   |  10 +++
> >  arch/arm/include/uapi/asm/kvm.h      |  10 +++
> >  arch/arm64/include/asm/kvm_emulate.h |  14 +++
> >  arch/arm64/include/uapi/asm/kvm.h    |   9 ++
> >  virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
> >  5 files changed, 155 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
> > index 8927cae7c966..663a02d7e6f4 100644
> > --- a/arch/arm/include/asm/kvm_emulate.h
> > +++ b/arch/arm/include/asm/kvm_emulate.h
> > @@ -283,6 +283,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >  	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
> >  }
> >  
> > +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> > +{
> > +	return false;
> > +}
> > +
> > +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> > +						      bool flag)
> > +{
> > +}
> > +
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> >  	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
> > diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
> > index 4602464ebdfb..ba4d2afe65e3 100644
> > --- a/arch/arm/include/uapi/asm/kvm.h
> > +++ b/arch/arm/include/uapi/asm/kvm.h
> > @@ -214,6 +214,16 @@ struct kvm_vcpu_events {
> >  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
> >  					 KVM_REG_ARM_FW | ((r) & 0xffff))
> >  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1  
> Would be worth adding a comment saying that values are chosen so that
> higher values mean better protection. Otherwise it looks strange
> NOT_AVAIL/AVAIL/UNAFFECTED values are not the same for both workarounds.	
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)  
> 
> >  
> >  /* Device Control API: ARM VGIC */
> >  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > index d3842791e1c4..c00c17c9adb6 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -348,6 +348,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >  	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> >  }
> >  
> > +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> > +{
> > +	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
> > +}
> > +
> > +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> > +						      bool flag)
> > +{
> > +	if (flag)
> > +		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
> > +	else
> > +		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
> > +}
> > +
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> >  	if (vcpu_mode_is_32bit(vcpu)) {
> > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> > index 97c3478ee6e7..367e96fe654e 100644
> > --- a/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > @@ -225,6 +225,15 @@ struct kvm_vcpu_events {
> >  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
> >  					 KVM_REG_ARM_FW | ((r) & 0xffff))
> >  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
> >  
> >  /* Device Control API: ARM VGIC */
> >  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> > diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
> > index 9b73d3ad918a..e65664c09b12 100644
> > --- a/virt/kvm/arm/psci.c
> > +++ b/virt/kvm/arm/psci.c
> > @@ -445,42 +445,97 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
> >  
> >  int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
> >  {
> > -	return 1;		/* PSCI version */
> > +	return 3;		/* PSCI version and two workaround registers */
> >  }
> >  
> >  int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
> >  {
> > -	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
> > +	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
> > +		return -EFAULT;
> > +
> > +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
> > +		return -EFAULT;
> > +
> > +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
> >  		return -EFAULT;  
> Wouldn't it make sense to have a const array somewhere listing the FW
> regs and putting KVM_REG_ARM_FW_REG[i]? Also kvm_arm_get_fw_num_regs
> could return the ARRAY_SIZE.

Yeah, makes sense. I was cowardly pushing this off to the one adding the next firmware register ;-)

> vcpu arg is never used actually (not related to this patch).
> >  
> >  	return 0;
> >  }
> >  
> > +#define KVM_REG_FEATURE_LEVEL_WIDTH	4
> > +#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
> > +
> > +/*
> > + * Convert the workaround level into an easy-to-compare number, where higher
> > + * values mean better protection.
> > + */
> > +static int get_kernel_wa_level(u64 regid)
> > +{
> > +	switch (regid) {
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> > +		if (kvm_arm_harden_branch_predictor())
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
> > +		else
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> > +		switch (kvm_arm_have_ssbd()) {
> > +		case KVM_SSBD_FORCE_DISABLE:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
> > +		case KVM_SSBD_KERNEL:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
> > +		case KVM_SSBD_FORCE_ENABLE:
> > +		case KVM_SSBD_MITIGATED:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
> > +		case KVM_SSBD_UNKNOWN:
> > +		default:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
> > +		}
> > +	}
> > +
> > +	return 0;  
> I would rather return -EINVAL although the function is not called for
> any invalid reg.
> > +}
> > +
> >  int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> >  {
> > -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> > -		void __user *uaddr = (void __user *)(long)reg->addr;
> > -		u64 val;
> > +	void __user *uaddr = (void __user *)(long)reg->addr;
> > +	u64 val;
> >  
> > +	switch (reg->id) {
> > +	case KVM_REG_ARM_PSCI_VERSION:
> >  		val = kvm_psci_version(vcpu, vcpu->kvm);
> > -		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> > -			return -EFAULT;
> > -
> > -		return 0;
> > +		break;
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> > +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
> Can get_kernel_wa_level return something outside of
> KVM_REG_FEATURE_LEVEL_MASK?
> > +		break;
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> > +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
> same here
> > +		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&> +		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))  
> nit: if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
> kvm_arm_get_vcpu_workaround_2_flag(vcpu)).

Yeah, might be better, since we operate in the KVM_REG_ARM_ namespace here.

> > +			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
> > +		break;
> > +	default:
> > +		return -ENOENT;
> >  	}
> >  
> > -	return -EINVAL;
> > +	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> > +		return -EFAULT;
> > +
> > +	return 0;
> >  }
> >  
> >  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> >  {
> > -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> > -		void __user *uaddr = (void __user *)(long)reg->addr;
> > -		bool wants_02;
> > -		u64 val;
> > +	void __user *uaddr = (void __user *)(long)reg->addr;
> > +	u64 val;
> > +	int wa_level;
> > +
> > +	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> > +		return -EFAULT;
> >  
> > -		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> > -			return -EFAULT;
> > +	switch (reg->id) {
> > +	case KVM_REG_ARM_PSCI_VERSION:
> > +	{
> > +		bool wants_02;
> >  
> >  		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
> >  
> > @@ -497,6 +552,47 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> >  			vcpu->kvm->arch.psci_version = val;
> >  			return 0;
> >  		}
> > +		break;
> > +	}
> > +
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> > +		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
> > +			return -EINVAL;
> > +
> > +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;  
> not needed

Yeah, I kept it in for symmetry reasons. I was also afraid that someone
changes the check above in the future.

> > +
> > +		/* For now we only accept the very same workaround level. */
> > +		if (get_kernel_wa_level(reg->id) != wa_level)
> > +			return -EINVAL;
> > +
> > +		return 0;
> > +
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> > +		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
> > +			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))> +			return -EINVAL;
> > +
> > +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
> > +
> > +		if (get_kernel_wa_level(reg->id) < wa_level)
> > +			return -EINVAL;
> > +  
> worth a comment?

True.

> > +		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
> > +			return 0;
> > +
> > +		switch (wa_level) {
> > +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
> > +			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
> > +			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
> > +			break;
> > +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED:
> > +			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);  
> Looks strange to me we enable the flag when unaffected.

UNAFFECTED is the *requested* level here, so the original kernel had it.
It means "always active or not needed". We get here if the local kernel
has a *switchable* workaround, so we have to make sure it's enabled, to
match the original workaround level.
Will add a comment before the switch.

Cheers,
Andre.


> 
> > +			break;
> > +		}
> > +
> > +		return 0;
> > +	default:
> > +		return -ENOENT;
> >  	}
> >  
> >  	return -EINVAL;
> >   
> 
> Thanks
> 
> Eric
Eric Auger March 21, 2019, 6:03 p.m. UTC | #4
Hi Andre,

On 3/21/19 6:35 PM, Andre Przywara wrote:
> On Thu, 21 Mar 2019 13:54:07 +0100
> Auger Eric <eric.auger@redhat.com> wrote:
> 
> Hi Eric,
> 
> many thanks for looking at this!
> I was about to prepare a new revision.
> 
>> Hi Andre,
>>
>> On 3/1/19 12:43 AM, Andre Przywara wrote:
>>> KVM implements the firmware interface for mitigating cache speculation
>>> vulnerabilities. Guests may use this interface to ensure mitigation is
>>> active.
>>> If we want to migrate such a guest to a host with a different support
>>> level for those workarounds, migration might need to fail, to ensure that
>>> critical guests don't loose their protection.
>>>
>>> Introduce a way for userland to save and restore the workarounds state.
>>> On restoring we do checks that make sure we don't downgrade our
>>> mitigation level.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>  arch/arm/include/asm/kvm_emulate.h   |  10 +++
>>>  arch/arm/include/uapi/asm/kvm.h      |  10 +++
>>>  arch/arm64/include/asm/kvm_emulate.h |  14 +++
>>>  arch/arm64/include/uapi/asm/kvm.h    |   9 ++
>>>  virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
>>>  5 files changed, 155 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
>>> index 8927cae7c966..663a02d7e6f4 100644
>>> --- a/arch/arm/include/asm/kvm_emulate.h
>>> +++ b/arch/arm/include/asm/kvm_emulate.h
>>> @@ -283,6 +283,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>>>  	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
>>>  }
>>>  
>>> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
>>> +{
>>> +	return false;
>>> +}
>>> +
>>> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
>>> +						      bool flag)
>>> +{
>>> +}
>>> +
>>>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>>>  {
>>>  	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
>>> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
>>> index 4602464ebdfb..ba4d2afe65e3 100644
>>> --- a/arch/arm/include/uapi/asm/kvm.h
>>> +++ b/arch/arm/include/uapi/asm/kvm.h
>>> @@ -214,6 +214,16 @@ struct kvm_vcpu_events {
>>>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
>>>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>>>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1  
>> Would be worth adding a comment saying that values are chosen so that
>> higher values mean better protection. Otherwise it looks strange
>> NOT_AVAIL/AVAIL/UNAFFECTED values are not the same for both workarounds.	
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)  
>>
>>>  
>>>  /* Device Control API: ARM VGIC */
>>>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
>>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>>> index d3842791e1c4..c00c17c9adb6 100644
>>> --- a/arch/arm64/include/asm/kvm_emulate.h
>>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>>> @@ -348,6 +348,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>>>  	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
>>>  }
>>>  
>>> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
>>> +{
>>> +	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
>>> +}
>>> +
>>> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
>>> +						      bool flag)
>>> +{
>>> +	if (flag)
>>> +		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
>>> +	else
>>> +		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
>>> +}
>>> +
>>>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>>>  {
>>>  	if (vcpu_mode_is_32bit(vcpu)) {
>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>>> index 97c3478ee6e7..367e96fe654e 100644
>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>> @@ -225,6 +225,15 @@ struct kvm_vcpu_events {
>>>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>>>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>>>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
>>>  
>>>  /* Device Control API: ARM VGIC */
>>>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
>>> diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
>>> index 9b73d3ad918a..e65664c09b12 100644
>>> --- a/virt/kvm/arm/psci.c
>>> +++ b/virt/kvm/arm/psci.c
>>> @@ -445,42 +445,97 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>>>  
>>>  int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
>>>  {
>>> -	return 1;		/* PSCI version */
>>> +	return 3;		/* PSCI version and two workaround registers */
>>>  }
>>>  
>>>  int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>>>  {
>>> -	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
>>> +	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
>>> +		return -EFAULT;
>>> +
>>> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
>>> +		return -EFAULT;
>>> +
>>> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
>>>  		return -EFAULT;  
>> Wouldn't it make sense to have a const array somewhere listing the FW
>> regs and putting KVM_REG_ARM_FW_REG[i]? Also kvm_arm_get_fw_num_regs
>> could return the ARRAY_SIZE.
> 
> Yeah, makes sense. I was cowardly pushing this off to the one adding the next firmware register ;-)
> 
>> vcpu arg is never used actually (not related to this patch).
>>>  
>>>  	return 0;
>>>  }
>>>  
>>> +#define KVM_REG_FEATURE_LEVEL_WIDTH	4
>>> +#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
>>> +
>>> +/*
>>> + * Convert the workaround level into an easy-to-compare number, where higher
>>> + * values mean better protection.
>>> + */
>>> +static int get_kernel_wa_level(u64 regid)
>>> +{
>>> +	switch (regid) {
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
>>> +		if (kvm_arm_harden_branch_predictor())
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
>>> +		else
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
>>> +		switch (kvm_arm_have_ssbd()) {
>>> +		case KVM_SSBD_FORCE_DISABLE:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
>>> +		case KVM_SSBD_KERNEL:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
>>> +		case KVM_SSBD_FORCE_ENABLE:
>>> +		case KVM_SSBD_MITIGATED:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
>>> +		case KVM_SSBD_UNKNOWN:
>>> +		default:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
>>> +		}
>>> +	}
>>> +
>>> +	return 0;  
>> I would rather return -EINVAL although the function is not called for
>> any invalid reg.
>>> +}
>>> +
>>>  int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>>  {
>>> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
>>> -		void __user *uaddr = (void __user *)(long)reg->addr;
>>> -		u64 val;
>>> +	void __user *uaddr = (void __user *)(long)reg->addr;
>>> +	u64 val;
>>>  
>>> +	switch (reg->id) {
>>> +	case KVM_REG_ARM_PSCI_VERSION:
>>>  		val = kvm_psci_version(vcpu, vcpu->kvm);
>>> -		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
>>> -			return -EFAULT;
>>> -
>>> -		return 0;
>>> +		break;
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
>>> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
>> Can get_kernel_wa_level return something outside of
>> KVM_REG_FEATURE_LEVEL_MASK?
>>> +		break;
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
>>> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
>> same here
>>> +		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&> +		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))  
>> nit: if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
>> kvm_arm_get_vcpu_workaround_2_flag(vcpu)).
> 
> Yeah, might be better, since we operate in the KVM_REG_ARM_ namespace here.
> 
>>> +			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
>>> +		break;
>>> +	default:
>>> +		return -ENOENT;
>>>  	}
>>>  
>>> -	return -EINVAL;
>>> +	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
>>> +		return -EFAULT;
>>> +
>>> +	return 0;
>>>  }
>>>  
>>>  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>>  {
>>> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
>>> -		void __user *uaddr = (void __user *)(long)reg->addr;
>>> -		bool wants_02;
>>> -		u64 val;
>>> +	void __user *uaddr = (void __user *)(long)reg->addr;
>>> +	u64 val;
>>> +	int wa_level;
>>> +
>>> +	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
>>> +		return -EFAULT;
>>>  
>>> -		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
>>> -			return -EFAULT;
>>> +	switch (reg->id) {
>>> +	case KVM_REG_ARM_PSCI_VERSION:
>>> +	{
>>> +		bool wants_02;
>>>  
>>>  		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
>>>  
>>> @@ -497,6 +552,47 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>>  			vcpu->kvm->arch.psci_version = val;
>>>  			return 0;
>>>  		}
>>> +		break;
>>> +	}
>>> +
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
>>> +		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
>>> +			return -EINVAL;
>>> +
>>> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;  
>> not needed
> 
> Yeah, I kept it in for symmetry reasons. I was also afraid that someone
> changes the check above in the future.
> 
>>> +
>>> +		/* For now we only accept the very same workaround level. */
>>> +		if (get_kernel_wa_level(reg->id) != wa_level)
>>> +			return -EINVAL;
>>> +
>>> +		return 0;
>>> +
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
>>> +		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
>>> +			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))> +			return -EINVAL;
>>> +
>>> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
>>> +
>>> +		if (get_kernel_wa_level(reg->id) < wa_level)
>>> +			return -EINVAL;
>>> +  
>> worth a comment?
> 
> True.
> 
>>> +		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
>>> +			return 0;
>>> +
>>> +		switch (wa_level) {
>>> +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
>>> +			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
>>> +			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
>>> +			break;
>>> +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED:
>>> +			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);  
>> Looks strange to me we enable the flag when unaffected.
> 
> UNAFFECTED is the *requested* level here, so the original kernel had it.
> It means "always active or not needed". We get here if the local kernel
> has a *switchable* workaround, so we have to make sure it's enabled, to
> match the original workaround level.
> Will add a comment before the switch.
OK I get it now. Adding such kind of comments at main decision levels
would definitively help (me).

Thanks

Eric
> 
> Cheers,
> Andre.
> 
> 
>>
>>> +			break;
>>> +		}
>>> +
>>> +		return 0;
>>> +	default:
>>> +		return -ENOENT;
>>>  	}
>>>  
>>>  	return -EINVAL;
>>>   
>>
>> Thanks
>>
>> Eric
>
Andre Przywara April 15, 2019, 12:33 p.m. UTC | #5
On Thu, 21 Mar 2019 13:54:07 +0100
Auger Eric <eric.auger@redhat.com> wrote:

Hi Eric,

sorry, forgot to hit "Send" this morning ;-)

> On 3/1/19 12:43 AM, Andre Przywara wrote:
> > KVM implements the firmware interface for mitigating cache speculation
> > vulnerabilities. Guests may use this interface to ensure mitigation is
> > active.
> > If we want to migrate such a guest to a host with a different support
> > level for those workarounds, migration might need to fail, to ensure that
> > critical guests don't loose their protection.
> > 
> > Introduce a way for userland to save and restore the workarounds state.
> > On restoring we do checks that make sure we don't downgrade our
> > mitigation level.
> > 
> > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > ---
> >  arch/arm/include/asm/kvm_emulate.h   |  10 +++
> >  arch/arm/include/uapi/asm/kvm.h      |  10 +++
> >  arch/arm64/include/asm/kvm_emulate.h |  14 +++
> >  arch/arm64/include/uapi/asm/kvm.h    |   9 ++
> >  virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
> >  5 files changed, 155 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
> > index 8927cae7c966..663a02d7e6f4 100644
> > --- a/arch/arm/include/asm/kvm_emulate.h
> > +++ b/arch/arm/include/asm/kvm_emulate.h
> > @@ -283,6 +283,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >  	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
> >  }
> >  
> > +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> > +{
> > +	return false;
> > +}
> > +
> > +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> > +						      bool flag)
> > +{
> > +}
> > +
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> >  	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
> > diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
> > index 4602464ebdfb..ba4d2afe65e3 100644
> > --- a/arch/arm/include/uapi/asm/kvm.h
> > +++ b/arch/arm/include/uapi/asm/kvm.h
> > @@ -214,6 +214,16 @@ struct kvm_vcpu_events {
> >  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
> >  					 KVM_REG_ARM_FW | ((r) & 0xffff))
> >  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1  
> Would be worth adding a comment saying that values are chosen so that
> higher values mean better protection. Otherwise it looks strange
> NOT_AVAIL/AVAIL/UNAFFECTED values are not the same for both workarounds.	
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)  
> 
> >  
> >  /* Device Control API: ARM VGIC */
> >  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > index d3842791e1c4..c00c17c9adb6 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -348,6 +348,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >  	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> >  }
> >  
> > +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
> > +{
> > +	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
> > +}
> > +
> > +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
> > +						      bool flag)
> > +{
> > +	if (flag)
> > +		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
> > +	else
> > +		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
> > +}
> > +
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> >  	if (vcpu_mode_is_32bit(vcpu)) {
> > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> > index 97c3478ee6e7..367e96fe654e 100644
> > --- a/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > @@ -225,6 +225,15 @@ struct kvm_vcpu_events {
> >  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
> >  					 KVM_REG_ARM_FW | ((r) & 0xffff))
> >  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
> > +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
> >  
> >  /* Device Control API: ARM VGIC */
> >  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
> > diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
> > index 9b73d3ad918a..e65664c09b12 100644
> > --- a/virt/kvm/arm/psci.c
> > +++ b/virt/kvm/arm/psci.c
> > @@ -445,42 +445,97 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
> >  
> >  int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
> >  {
> > -	return 1;		/* PSCI version */
> > +	return 3;		/* PSCI version and two workaround registers */
> >  }
> >  
> >  int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
> >  {
> > -	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
> > +	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
> > +		return -EFAULT;
> > +
> > +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
> > +		return -EFAULT;
> > +
> > +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
> >  		return -EFAULT;  
> Wouldn't it make sense to have a const array somewhere listing the FW
> regs and putting KVM_REG_ARM_FW_REG[i]? Also kvm_arm_get_fw_num_regs
> could return the ARRAY_SIZE.

Decided to cheekily leaving this exercise to the next user of the firmware register interface - or to yet another version of this series ;-)

> vcpu arg is never used actually (not related to this patch).

I think there is some sense in allowing per-VCPU values of firmware registers, we just don't use it at the moment, since we rely on homogeneous CPUs for most operations. But that's just a property of the currently existing firmware registers.

> >  
> >  	return 0;
> >  }
> >  
> > +#define KVM_REG_FEATURE_LEVEL_WIDTH	4
> > +#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
> > +
> > +/*
> > + * Convert the workaround level into an easy-to-compare number, where higher
> > + * values mean better protection.
> > + */
> > +static int get_kernel_wa_level(u64 regid)
> > +{
> > +	switch (regid) {
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> > +		if (kvm_arm_harden_branch_predictor())
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
> > +		else
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> > +		switch (kvm_arm_have_ssbd()) {
> > +		case KVM_SSBD_FORCE_DISABLE:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
> > +		case KVM_SSBD_KERNEL:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
> > +		case KVM_SSBD_FORCE_ENABLE:
> > +		case KVM_SSBD_MITIGATED:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
> > +		case KVM_SSBD_UNKNOWN:
> > +		default:
> > +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
> > +		}
> > +	}
> > +
> > +	return 0;  
> I would rather return -EINVAL although the function is not called for
> any invalid reg.

Good point.

> > +}
> > +
> >  int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> >  {
> > -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> > -		void __user *uaddr = (void __user *)(long)reg->addr;
> > -		u64 val;
> > +	void __user *uaddr = (void __user *)(long)reg->addr;
> > +	u64 val;
> >  
> > +	switch (reg->id) {
> > +	case KVM_REG_ARM_PSCI_VERSION:
> >  		val = kvm_psci_version(vcpu, vcpu->kvm);
> > -		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> > -			return -EFAULT;
> > -
> > -		return 0;
> > +		break;
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> > +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
> Can get_kernel_wa_level return something outside of
> KVM_REG_FEATURE_LEVEL_MASK?

No, not at the moment. But I find it better to keep the mask, as this goes out to userland, so we want to make sure this is within the documented range.

Cheers,
Andre.


> > +		break;
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> > +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
> same here
> > +		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&> +		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))  
> nit: if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
> kvm_arm_get_vcpu_workaround_2_flag(vcpu)).
> > +			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
> > +		break;
> > +	default:
> > +		return -ENOENT;
> >  	}
> >  
> > -	return -EINVAL;
> > +	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
> > +		return -EFAULT;
> > +
> > +	return 0;
> >  }
> >  
> >  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> >  {
> > -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
> > -		void __user *uaddr = (void __user *)(long)reg->addr;
> > -		bool wants_02;
> > -		u64 val;
> > +	void __user *uaddr = (void __user *)(long)reg->addr;
> > +	u64 val;
> > +	int wa_level;
> > +
> > +	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> > +		return -EFAULT;
> >  
> > -		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
> > -			return -EFAULT;
> > +	switch (reg->id) {
> > +	case KVM_REG_ARM_PSCI_VERSION:
> > +	{
> > +		bool wants_02;
> >  
> >  		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
> >  
> > @@ -497,6 +552,47 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> >  			vcpu->kvm->arch.psci_version = val;
> >  			return 0;
> >  		}
> > +		break;
> > +	}
> > +
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
> > +		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
> > +			return -EINVAL;
> > +
> > +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;  
> not needed
> > +
> > +		/* For now we only accept the very same workaround level. */
> > +		if (get_kernel_wa_level(reg->id) != wa_level)
> > +			return -EINVAL;
> > +
> > +		return 0;
> > +
> > +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
> > +		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
> > +			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))> +			return -EINVAL;
> > +
> > +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
> > +
> > +		if (get_kernel_wa_level(reg->id) < wa_level)
> > +			return -EINVAL;
> > +  
> worth a comment?
> > +		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
> > +			return 0;
> > +
> > +		switch (wa_level) {
> > +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
> > +			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
> > +			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
> > +			break;
> > +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED:
> > +			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);  
> Looks strange to me we enable the flag when unaffected.
> 
> > +			break;
> > +		}
> > +
> > +		return 0;
> > +	default:
> > +		return -ENOENT;
> >  	}
> >  
> >  	return -EINVAL;
> >   
> 
> Thanks
> 
> Eric
Eric Auger April 26, 2019, 3:07 p.m. UTC | #6
Hi Andre,

On 3/21/19 6:35 PM, Andre Przywara wrote:
> On Thu, 21 Mar 2019 13:54:07 +0100
> Auger Eric <eric.auger@redhat.com> wrote:
> 
> Hi Eric,
> 
> many thanks for looking at this!
> I was about to prepare a new revision.
> 
>> Hi Andre,
>>
>> On 3/1/19 12:43 AM, Andre Przywara wrote:
>>> KVM implements the firmware interface for mitigating cache speculation
>>> vulnerabilities. Guests may use this interface to ensure mitigation is
>>> active.
>>> If we want to migrate such a guest to a host with a different support
>>> level for those workarounds, migration might need to fail, to ensure that
>>> critical guests don't loose their protection.
>>>
>>> Introduce a way for userland to save and restore the workarounds state.
>>> On restoring we do checks that make sure we don't downgrade our
>>> mitigation level.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>  arch/arm/include/asm/kvm_emulate.h   |  10 +++
>>>  arch/arm/include/uapi/asm/kvm.h      |  10 +++
>>>  arch/arm64/include/asm/kvm_emulate.h |  14 +++
>>>  arch/arm64/include/uapi/asm/kvm.h    |   9 ++
>>>  virt/kvm/arm/psci.c                  | 128 +++++++++++++++++++++++----
>>>  5 files changed, 155 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
>>> index 8927cae7c966..663a02d7e6f4 100644
>>> --- a/arch/arm/include/asm/kvm_emulate.h
>>> +++ b/arch/arm/include/asm/kvm_emulate.h
>>> @@ -283,6 +283,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>>>  	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
>>>  }
>>>  
>>> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
>>> +{
>>> +	return false;
>>> +}
>>> +
>>> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
>>> +						      bool flag)
>>> +{
>>> +}
>>> +
>>>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>>>  {
>>>  	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
>>> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
>>> index 4602464ebdfb..ba4d2afe65e3 100644
>>> --- a/arch/arm/include/uapi/asm/kvm.h
>>> +++ b/arch/arm/include/uapi/asm/kvm.h
>>> @@ -214,6 +214,16 @@ struct kvm_vcpu_events {
>>>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
>>>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>>>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1  
>> Would be worth adding a comment saying that values are chosen so that
>> higher values mean better protection. Otherwise it looks strange
>> NOT_AVAIL/AVAIL/UNAFFECTED values are not the same for both workarounds.	
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)  
>>
>>>  
>>>  /* Device Control API: ARM VGIC */
>>>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
>>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>>> index d3842791e1c4..c00c17c9adb6 100644
>>> --- a/arch/arm64/include/asm/kvm_emulate.h
>>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>>> @@ -348,6 +348,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>>>  	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
>>>  }
>>>  
>>> +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
>>> +{
>>> +	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
>>> +}
>>> +
>>> +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
>>> +						      bool flag)
>>> +{
>>> +	if (flag)
>>> +		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
>>> +	else
>>> +		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
>>> +}
>>> +
>>>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>>>  {
>>>  	if (vcpu_mode_is_32bit(vcpu)) {
>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>>> index 97c3478ee6e7..367e96fe654e 100644
>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>> @@ -225,6 +225,15 @@ struct kvm_vcpu_events {
>>>  #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>>>  					 KVM_REG_ARM_FW | ((r) & 0xffff))
>>>  #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
>>> +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
>>>  
>>>  /* Device Control API: ARM VGIC */
>>>  #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
>>> diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
>>> index 9b73d3ad918a..e65664c09b12 100644
>>> --- a/virt/kvm/arm/psci.c
>>> +++ b/virt/kvm/arm/psci.c
>>> @@ -445,42 +445,97 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>>>  
>>>  int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
>>>  {
>>> -	return 1;		/* PSCI version */
>>> +	return 3;		/* PSCI version and two workaround registers */
>>>  }
>>>  
>>>  int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>>>  {
>>> -	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
>>> +	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
>>> +		return -EFAULT;
>>> +
>>> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
>>> +		return -EFAULT;
>>> +
>>> +	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
>>>  		return -EFAULT;  
>> Wouldn't it make sense to have a const array somewhere listing the FW
>> regs and putting KVM_REG_ARM_FW_REG[i]? Also kvm_arm_get_fw_num_regs
>> could return the ARRAY_SIZE.
> 
> Yeah, makes sense. I was cowardly pushing this off to the one adding the next firmware register ;-)

;-)
> 
>> vcpu arg is never used actually (not related to this patch).
>>>  
>>>  	return 0;
>>>  }
>>>  
>>> +#define KVM_REG_FEATURE_LEVEL_WIDTH	4
>>> +#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
>>> +
>>> +/*
>>> + * Convert the workaround level into an easy-to-compare number, where higher
>>> + * values mean better protection.
>>> + */
>>> +static int get_kernel_wa_level(u64 regid)
>>> +{
>>> +	switch (regid) {
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
>>> +		if (kvm_arm_harden_branch_predictor())
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
>>> +		else
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
>>> +		switch (kvm_arm_have_ssbd()) {
>>> +		case KVM_SSBD_FORCE_DISABLE:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
>>> +		case KVM_SSBD_KERNEL:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
>>> +		case KVM_SSBD_FORCE_ENABLE:
>>> +		case KVM_SSBD_MITIGATED:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
>>> +		case KVM_SSBD_UNKNOWN:
>>> +		default:
>>> +			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
>>> +		}
>>> +	}
>>> +
>>> +	return 0;  
>> I would rather return -EINVAL although the function is not called for
>> any invalid reg.
>>> +}
>>> +
>>>  int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>>  {
>>> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
>>> -		void __user *uaddr = (void __user *)(long)reg->addr;
>>> -		u64 val;
>>> +	void __user *uaddr = (void __user *)(long)reg->addr;
>>> +	u64 val;
>>>  
>>> +	switch (reg->id) {
>>> +	case KVM_REG_ARM_PSCI_VERSION:
>>>  		val = kvm_psci_version(vcpu, vcpu->kvm);
>>> -		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
>>> -			return -EFAULT;
>>> -
>>> -		return 0;
>>> +		break;
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
>>> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
>> Can get_kernel_wa_level return something outside of
>> KVM_REG_FEATURE_LEVEL_MASK?
>>> +		break;
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
>>> +		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;  
>> same here
>>> +		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&> +		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))  
>> nit: if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
>> kvm_arm_get_vcpu_workaround_2_flag(vcpu)).
> 
> Yeah, might be better, since we operate in the KVM_REG_ARM_ namespace here.
> 
>>> +			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
>>> +		break;
>>> +	default:
>>> +		return -ENOENT;
>>>  	}
>>>  
>>> -	return -EINVAL;
>>> +	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
>>> +		return -EFAULT;
>>> +
>>> +	return 0;
>>>  }
>>>  
>>>  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>>  {
>>> -	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
>>> -		void __user *uaddr = (void __user *)(long)reg->addr;
>>> -		bool wants_02;
>>> -		u64 val;
>>> +	void __user *uaddr = (void __user *)(long)reg->addr;
>>> +	u64 val;
>>> +	int wa_level;
>>> +
>>> +	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
>>> +		return -EFAULT;
>>>  
>>> -		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
>>> -			return -EFAULT;
>>> +	switch (reg->id) {
>>> +	case KVM_REG_ARM_PSCI_VERSION:
>>> +	{
>>> +		bool wants_02;
>>>  
>>>  		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
>>>  
>>> @@ -497,6 +552,47 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>>  			vcpu->kvm->arch.psci_version = val;
>>>  			return 0;
>>>  		}
>>> +		break;
>>> +	}
>>> +
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
>>> +		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
>>> +			return -EINVAL;
>>> +
>>> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;  
>> not needed
> 
> Yeah, I kept it in for symmetry reasons. I was also afraid that someone
> changes the check above in the future.
> 
>>> +
>>> +		/* For now we only accept the very same workaround level. */
>>> +		if (get_kernel_wa_level(reg->id) != wa_level)
>>> +			return -EINVAL;
>>> +
>>> +		return 0;
>>> +
>>> +	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
>>> +		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
>>> +			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))> +			return -EINVAL;
>>> +
>>> +		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
>>> +
>>> +		if (get_kernel_wa_level(reg->id) < wa_level)
>>> +			return -EINVAL;
>>> +  
>> worth a comment?
> 
> True.
> 
>>> +		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
>>> +			return 0;
>>> +
>>> +		switch (wa_level) {
>>> +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
>>> +			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
>>> +			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
>>> +			break;
>>> +		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED:
>>> +			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);  
>> Looks strange to me we enable the flag when unaffected.
> 
> UNAFFECTED is the *requested* level here, so the original kernel had it.
> It means "always active or not needed". We get here if the local kernel
> has a *switchable* workaround, so we have to make sure it's enabled, to
> match the original workaround level.
> Will add a comment before the switch.

Yes that's clear now. I had hard time with the terminology to be honest.

Thanks

Eric
> 
> Cheers,
> Andre.
> 
> 
>>
>>> +			break;
>>> +		}
>>> +
>>> +		return 0;
>>> +	default:
>>> +		return -ENOENT;
>>>  	}
>>>  
>>>  	return -EINVAL;
>>>   
>>
>> Thanks
>>
>> Eric
>
diff mbox series

Patch

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 8927cae7c966..663a02d7e6f4 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -283,6 +283,16 @@  static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 	return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
 }
 
+static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
+static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
+						      bool flag)
+{
+}
+
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
 	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 4602464ebdfb..ba4d2afe65e3 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -214,6 +214,16 @@  struct kvm_vcpu_events {
 #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
 					 KVM_REG_ARM_FW | ((r) & 0xffff))
 #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_UNAFFECTED	2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED	(1U << 4)
 
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d3842791e1c4..c00c17c9adb6 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -348,6 +348,20 @@  static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
 }
 
+static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
+}
+
+static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
+						      bool flag)
+{
+	if (flag)
+		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
+	else
+		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
+}
+
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
 	if (vcpu_mode_is_32bit(vcpu)) {
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 97c3478ee6e7..367e96fe654e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -225,6 +225,15 @@  struct kvm_vcpu_events {
 #define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 					 KVM_REG_ARM_FW | ((r) & 0xffff))
 #define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1	KVM_REG_ARM_FW_REG(1)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL	0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL	1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL	0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN	1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL	2
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED	3
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     (1U << 4)
 
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index 9b73d3ad918a..e65664c09b12 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -445,42 +445,97 @@  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 
 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
 {
-	return 1;		/* PSCI version */
+	return 3;		/* PSCI version and two workaround registers */
 }
 
 int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
-	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
+	if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
+		return -EFAULT;
+
+	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
+		return -EFAULT;
+
+	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
 		return -EFAULT;
 
 	return 0;
 }
 
+#define KVM_REG_FEATURE_LEVEL_WIDTH	4
+#define KVM_REG_FEATURE_LEVEL_MASK	(BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
+
+/*
+ * Convert the workaround level into an easy-to-compare number, where higher
+ * values mean better protection.
+ */
+static int get_kernel_wa_level(u64 regid)
+{
+	switch (regid) {
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+		if (kvm_arm_harden_branch_predictor())
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
+		else
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+		switch (kvm_arm_have_ssbd()) {
+		case KVM_SSBD_FORCE_DISABLE:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+		case KVM_SSBD_KERNEL:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
+		case KVM_SSBD_FORCE_ENABLE:
+		case KVM_SSBD_MITIGATED:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED;
+		case KVM_SSBD_UNKNOWN:
+		default:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
+		}
+	}
+
+	return 0;
+}
+
 int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
-	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
-		void __user *uaddr = (void __user *)(long)reg->addr;
-		u64 val;
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
 
+	switch (reg->id) {
+	case KVM_REG_ARM_PSCI_VERSION:
 		val = kvm_psci_version(vcpu, vcpu->kvm);
-		if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
-			return -EFAULT;
-
-		return 0;
+		break;
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+		break;
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+		if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL &&
+		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))
+			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
+		break;
+	default:
+		return -ENOENT;
 	}
 
-	return -EINVAL;
+	if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
 }
 
 int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
-	if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
-		void __user *uaddr = (void __user *)(long)reg->addr;
-		bool wants_02;
-		u64 val;
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int wa_level;
+
+	if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
 
-		if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
-			return -EFAULT;
+	switch (reg->id) {
+	case KVM_REG_ARM_PSCI_VERSION:
+	{
+		bool wants_02;
 
 		wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
 
@@ -497,6 +552,47 @@  int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 			vcpu->kvm->arch.psci_version = val;
 			return 0;
 		}
+		break;
+	}
+
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
+			return -EINVAL;
+
+		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
+
+		/* For now we only accept the very same workaround level. */
+		if (get_kernel_wa_level(reg->id) != wa_level)
+			return -EINVAL;
+
+		return 0;
+
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+		if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
+			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
+			return -EINVAL;
+
+		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
+
+		if (get_kernel_wa_level(reg->id) < wa_level)
+			return -EINVAL;
+
+		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
+			return 0;
+
+		switch (wa_level) {
+		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
+			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
+			break;
+		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNAFFECTED:
+			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);
+			break;
+		}
+
+		return 0;
+	default:
+		return -ENOENT;
 	}
 
 	return -EINVAL;