[-v5.1] x86/kvm: Implement HWCR support
diff mbox series

Message ID 20190418122842.GF27160@zn.tnic
State New
Headers show
Series
  • [-v5.1] x86/kvm: Implement HWCR support
Related show

Commit Message

Boris Petkov April 18, 2019, 12:28 p.m. UTC
Hi all,

ok here's v5.1 with most of Sean's feedback addressed. The function
checking whether HWCR[18] is set, I've renamed to can_set_mci_status()
and left it to return bool because it really is used in boolean context,
answering the question "Can I set MCi_STATUS MSRs?"

And now it all looks simple and clean, just how I like it! :-)

Thx.

---
From: Borislav Petkov <bp@suse.de>

The hardware configuration register has some useful bits which can be
used by guests. Implement McStatusWrEn which can be used by guests when
injecting MCEs with the in-kernel mce-inject module.

For that, we need to set bit 18 - McStatusWrEn - first, before writing
the MCi_STATUS registers (otherwise we #GP).

Add the required machinery to do so.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Jim Mattson <jmattson@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: KVM <kvm@vger.kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/x86.c              | 33 +++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)

Comments

Sean Christopherson April 18, 2019, 1:56 p.m. UTC | #1
On Thu, Apr 18, 2019 at 02:28:50PM +0200, Borislav Petkov wrote:
> Hi all,
> 
> ok here's v5.1 with most of Sean's feedback addressed. The function
> checking whether HWCR[18] is set, I've renamed to can_set_mci_status()
> and left it to return bool because it really is used in boolean context,
> answering the question "Can I set MCi_STATUS MSRs?"
> 
> And now it all looks simple and clean, just how I like it! :-)
> 
> Thx.
> 
> ---
> From: Borislav Petkov <bp@suse.de>
> 
> The hardware configuration register has some useful bits which can be
> used by guests. Implement McStatusWrEn which can be used by guests when
> injecting MCEs with the in-kernel mce-inject module.
> 
> For that, we need to set bit 18 - McStatusWrEn - first, before writing
> the MCi_STATUS registers (otherwise we #GP).
> 
> Add the required machinery to do so.
> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: KVM <kvm@vger.kernel.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/x86.c              | 33 +++++++++++++++++++++++++++------
>  2 files changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 159b5988292f..541c431df806 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
>  
>  	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
>  	bool l1tf_flush_l1d;
> +
> +	/* AMD MSRC001_0015 Hardware Configuration */
> +	u64 msr_hwcr;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 099b851dabaf..10f6acc6494c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2273,6 +2273,18 @@ static void kvmclock_sync_fn(struct work_struct *work)
>  					KVMCLOCK_SYNC_PERIOD);
>  }
>  
> +/*
> + * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
> + */
> +static bool can_set_mci_status(struct kvm_vcpu *vcpu)
> +{
> +	/* McStatusWrEn enabled? */
> +	if (guest_cpuid_is_amd(vcpu))
> +		return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
> +
> +	return false;
> +}
> +
>  static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
>  	u64 mcg_cap = vcpu->arch.mcg_cap;
> @@ -2304,9 +2316,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			if ((offset & 0x3) == 0 &&
>  			    data != 0 && (data | (1 << 10)) != ~(u64)0)
>  				return -1;
> -			if (!msr_info->host_initiated &&
> -				(offset & 0x3) == 1 && data != 0)
> -				return -1;
> +
> +			/* MCi_STATUS */
> +			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
> +				if (!can_set_mci_status(vcpu))

This doesn't allow writing '0' regardless of msr_hwcr.BIT(18), which was
previously supported.  And there's no need for multiple if statements.

> +					return -1;
> +			}
> +
>  			vcpu->arch.mce_banks[offset] = data;
>  			break;
>  		}
> @@ -2455,8 +2471,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		data &= ~(u64)0x40;	/* ignore flush filter disable */
>  		data &= ~(u64)0x100;	/* ignore ignne emulation enable */
>  		data &= ~(u64)0x8;	/* ignore TLB cache disable */
> -		data &= ~(u64)0x40000;  /* ignore Mc status write enable */
> -		if (data != 0) {
> +
> +		/* Handle McStatusWrEn */
> +		if (data == BIT_ULL(18)) {
> +			vcpu->arch.msr_hwcr = data;
> +		} else if (data != 0) {
>  			vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
>  				    data);
>  			return 1;
> @@ -2730,7 +2749,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_K8_SYSCFG:
>  	case MSR_K8_TSEG_ADDR:
>  	case MSR_K8_TSEG_MASK:
> -	case MSR_K7_HWCR:
>  	case MSR_VM_HSAVE_PA:
>  	case MSR_K8_INT_PENDING_MSG:
>  	case MSR_AMD64_NB_CFG:
> @@ -2894,6 +2912,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_MISC_FEATURES_ENABLES:
>  		msr_info->data = vcpu->arch.msr_misc_features_enables;
>  		break;
> +	case MSR_K7_HWCR:
> +		msr_info->data = vcpu->arch.msr_hwcr;
> +		break;
>  	default:
>  		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
>  			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
> -- 
> 2.21.0
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Good mailing practices for 400: avoid top-posting and trim the reply.
Boris Petkov April 18, 2019, 2:19 p.m. UTC | #2
On Thu, Apr 18, 2019 at 06:56:06AM -0700, Sean Christopherson wrote:
> This doesn't allow writing '0' regardless of msr_hwcr.BIT(18), which was
> previously supported.

Restored the old conditional:

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 10f6acc6494c..f74f1280745b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2318,7 +2318,8 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 				return -1;
 
 			/* MCi_STATUS */
-			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
+			if (!msr_info->host_initiated &&
+			    (offset & 0x3) == 1 && data != 0) {
 				if (!can_set_mci_status(vcpu))
 					return -1;
 			}

> And there's no need for multiple if statements.

It is a bit more readable this way.

Actually, I'd break that if above into smaller if-statements with
flipped logic to make it even more readable:

	if (msr_info->host_initiated)
		goto set_msr;

	if (!(offset & 0x3))
		goto set_msr;

	if (!data)
		goto set_msr;

	if (!can_set_mci_status(vcpu))
		return -1;

set_msr:
	...


Anything else? Can I send v5.2 now?
Sean Christopherson April 18, 2019, 2:38 p.m. UTC | #3
On Thu, Apr 18, 2019 at 04:19:49PM +0200, Borislav Petkov wrote:
> On Thu, Apr 18, 2019 at 06:56:06AM -0700, Sean Christopherson wrote:
> > This doesn't allow writing '0' regardless of msr_hwcr.BIT(18), which was
> > previously supported.
> 
> Restored the old conditional:
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 10f6acc6494c..f74f1280745b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2318,7 +2318,8 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  				return -1;
>  
>  			/* MCi_STATUS */
> -			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
> +			if (!msr_info->host_initiated &&
> +			    (offset & 0x3) == 1 && data != 0) {
>  				if (!can_set_mci_status(vcpu))
>  					return -1;
>  			}
> 
> > And there's no need for multiple if statements.
> 
> It is a bit more readable this way.
> 
> Actually, I'd break that if above into smaller if-statements with
> flipped logic to make it even more readable:
> 
> 	if (msr_info->host_initiated)
> 		goto set_msr;
> 
> 	if (!(offset & 0x3))
> 		goto set_msr;
> 
> 	if (!data)
> 		goto set_msr;
> 
> 	if (!can_set_mci_status(vcpu))
> 		return -1;

At that point it probably makes sense to rework the whole block as there
are existing issues in both the CTL and STATUS flows:

  - Fault cases should return '1', not '-1'.

  - host_initiated should be checked for the CTL case


My personal preference would be to combine the host_initiated and !data
checks for brevity, so something like:

	if (msr_info->host_initiated || !data)
		goto set_msr;

	offset = msr - MSR_IA32_MC0_CTL;
	if ((offset & 0x3) == 0 && (data | (1 << 10)) != ~(u64)0)
		return 1;
	if ((offset & 0x3) == 1 && !can_set_mci_status(vcpu))
		return 1;

set_msr:

> 
> set_msr:
> 	...
> 
> 
> Anything else? Can I send v5.2 now?
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Good mailing practices for 400: avoid top-posting and trim the reply.
Paolo Bonzini April 18, 2019, 4:34 p.m. UTC | #4
On 18/04/19 14:28, Borislav Petkov wrote:
> The hardware configuration register has some useful bits which can be
> used by guests. Implement McStatusWrEn which can be used by guests when
> injecting MCEs with the in-kernel mce-inject module.
> 
> For that, we need to set bit 18 - McStatusWrEn - first, before writing
> the MCi_STATUS registers (otherwise we #GP).
> 
> Add the required machinery to do so.

Just one little thing missing:

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb49d2cee68a..6e23343c6b36 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1172,6 +1172,8 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)
 	MSR_MISC_FEATURES_ENABLES,
 	MSR_AMD64_VIRT_SPEC_CTRL,
 	MSR_IA32_POWER_CTL,
+
+	MSR_K7_HWCR,
 };

 static unsigned num_emulated_msrs;

Queued with this change.

Paolo

> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: KVM <kvm@vger.kernel.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/x86.c              | 33 +++++++++++++++++++++++++++------
>  2 files changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 159b5988292f..541c431df806 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
>  
>  	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
>  	bool l1tf_flush_l1d;
> +
> +	/* AMD MSRC001_0015 Hardware Configuration */
> +	u64 msr_hwcr;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 099b851dabaf..10f6acc6494c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2273,6 +2273,18 @@ static void kvmclock_sync_fn(struct work_struct *work)
>  					KVMCLOCK_SYNC_PERIOD);
>  }
>  
> +/*
> + * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
> + */
> +static bool can_set_mci_status(struct kvm_vcpu *vcpu)
> +{
> +	/* McStatusWrEn enabled? */
> +	if (guest_cpuid_is_amd(vcpu))
> +		return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
> +
> +	return false;
> +}
> +
>  static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
>  	u64 mcg_cap = vcpu->arch.mcg_cap;
> @@ -2304,9 +2316,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			if ((offset & 0x3) == 0 &&
>  			    data != 0 && (data | (1 << 10)) != ~(u64)0)
>  				return -1;
> -			if (!msr_info->host_initiated &&
> -				(offset & 0x3) == 1 && data != 0)
> -				return -1;
> +
> +			/* MCi_STATUS */
> +			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
> +				if (!can_set_mci_status(vcpu))
> +					return -1;
> +			}
> +
>  			vcpu->arch.mce_banks[offset] = data;
>  			break;
>  		}
> @@ -2455,8 +2471,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		data &= ~(u64)0x40;	/* ignore flush filter disable */
>  		data &= ~(u64)0x100;	/* ignore ignne emulation enable */
>  		data &= ~(u64)0x8;	/* ignore TLB cache disable */
> -		data &= ~(u64)0x40000;  /* ignore Mc status write enable */
> -		if (data != 0) {
> +
> +		/* Handle McStatusWrEn */
> +		if (data == BIT_ULL(18)) {
> +			vcpu->arch.msr_hwcr = data;
> +		} else if (data != 0) {
>  			vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
>  				    data);
>  			return 1;
> @@ -2730,7 +2749,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_K8_SYSCFG:
>  	case MSR_K8_TSEG_ADDR:
>  	case MSR_K8_TSEG_MASK:
> -	case MSR_K7_HWCR:
>  	case MSR_VM_HSAVE_PA:
>  	case MSR_K8_INT_PENDING_MSG:
>  	case MSR_AMD64_NB_CFG:
> @@ -2894,6 +2912,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_MISC_FEATURES_ENABLES:
>  		msr_info->data = vcpu->arch.msr_misc_features_enables;
>  		break;
> +	case MSR_K7_HWCR:
> +		msr_info->data = vcpu->arch.msr_hwcr;
> +		break;
>  	default:
>  		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
>  			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
> -- 2.21.0

Queued, thanks.
Paolo Bonzini April 18, 2019, 4:36 p.m. UTC | #5
On 18/04/19 16:19, Borislav Petkov wrote:
> On Thu, Apr 18, 2019 at 06:56:06AM -0700, Sean Christopherson wrote:
>> This doesn't allow writing '0' regardless of msr_hwcr.BIT(18), which was
>> previously supported.
> 
> Restored the old conditional:
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 10f6acc6494c..f74f1280745b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2318,7 +2318,8 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  				return -1;
>  
>  			/* MCi_STATUS */
> -			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
> +			if (!msr_info->host_initiated &&
> +			    (offset & 0x3) == 1 && data != 0) {
>  				if (!can_set_mci_status(vcpu))
>  					return -1;
>  			}
> 
>> And there's no need for multiple if statements.
> 
> It is a bit more readable this way.
> 
> Actually, I'd break that if above into smaller if-statements with
> flipped logic to make it even more readable:
> 
> 	if (msr_info->host_initiated)
> 		goto set_msr;
> 
> 	if (!(offset & 0x3))
> 		goto set_msr;
> 
> 	if (!data)
> 		goto set_msr;
> 
> 	if (!can_set_mci_status(vcpu))
> 		return -1;
> 
> set_msr:
> 	...
> 
> 
> Anything else? Can I send v5.2 now?

No need, squashing this in before running away for vacation.

Paolo

Patch
diff mbox series

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 159b5988292f..541c431df806 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -780,6 +780,9 @@  struct kvm_vcpu_arch {
 
 	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
 	bool l1tf_flush_l1d;
+
+	/* AMD MSRC001_0015 Hardware Configuration */
+	u64 msr_hwcr;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabaf..10f6acc6494c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2273,6 +2273,18 @@  static void kvmclock_sync_fn(struct work_struct *work)
 					KVMCLOCK_SYNC_PERIOD);
 }
 
+/*
+ * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
+ */
+static bool can_set_mci_status(struct kvm_vcpu *vcpu)
+{
+	/* McStatusWrEn enabled? */
+	if (guest_cpuid_is_amd(vcpu))
+		return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
+
+	return false;
+}
+
 static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2304,9 +2316,13 @@  static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			if ((offset & 0x3) == 0 &&
 			    data != 0 && (data | (1 << 10)) != ~(u64)0)
 				return -1;
-			if (!msr_info->host_initiated &&
-				(offset & 0x3) == 1 && data != 0)
-				return -1;
+
+			/* MCi_STATUS */
+			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
+				if (!can_set_mci_status(vcpu))
+					return -1;
+			}
+
 			vcpu->arch.mce_banks[offset] = data;
 			break;
 		}
@@ -2455,8 +2471,11 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		data &= ~(u64)0x40;	/* ignore flush filter disable */
 		data &= ~(u64)0x100;	/* ignore ignne emulation enable */
 		data &= ~(u64)0x8;	/* ignore TLB cache disable */
-		data &= ~(u64)0x40000;  /* ignore Mc status write enable */
-		if (data != 0) {
+
+		/* Handle McStatusWrEn */
+		if (data == BIT_ULL(18)) {
+			vcpu->arch.msr_hwcr = data;
+		} else if (data != 0) {
 			vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
 				    data);
 			return 1;
@@ -2730,7 +2749,6 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_K8_SYSCFG:
 	case MSR_K8_TSEG_ADDR:
 	case MSR_K8_TSEG_MASK:
-	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
 	case MSR_K8_INT_PENDING_MSG:
 	case MSR_AMD64_NB_CFG:
@@ -2894,6 +2912,9 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_MISC_FEATURES_ENABLES:
 		msr_info->data = vcpu->arch.msr_misc_features_enables;
 		break;
+	case MSR_K7_HWCR:
+		msr_info->data = vcpu->arch.msr_hwcr;
+		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
 			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);