diff mbox series

[-v5] x86/kvm: Implement HWCR support

Message ID 20190408090946.GA2315@zn.tnic (mailing list archive)
State New, archived
Headers show
Series [-v5] x86/kvm: Implement HWCR support | expand

Commit Message

Borislav Petkov April 8, 2019, 9:09 a.m. UTC
Hi all,

here's v5 which keeps the HWCR functionality in kvm/x86.c so that
emulation of AMD guests on Intel hw still can work.

--
From: Borislav Petkov <bp@suse.de>

The AMD hardware configuration register has some useful bits which can
be used by guests. Implement McStatusWrEn which can be used by guests
when injecting MCEs with the in-kernel mce-inject module.

For that, we need to set bit 18 - McStatusWrEn - first, before writing
the MCi_STATUS registers (otherwise we #GP).

Add the required machinery to do so.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Jim Mattson <jmattson@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: KVM <kvm@vger.kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/x86.c              | 45 ++++++++++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 6 deletions(-)

Comments

Sean Christopherson April 8, 2019, 2:41 p.m. UTC | #1
On Mon, Apr 08, 2019 at 11:09:46AM +0200, Borislav Petkov wrote:
> Hi all,
> 
> here's v5 which keeps the HWCR functionality in kvm/x86.c so that
> emulation of AMD guests on Intel hw still can work.
> 
> --
> From: Borislav Petkov <bp@suse.de>
> 
> The AMD hardware configuration register has some useful bits which can
> be used by guests. Implement McStatusWrEn which can be used by guests
> when injecting MCEs with the in-kernel mce-inject module.
> 
> For that, we need to set bit 18 - McStatusWrEn - first, before writing
> the MCi_STATUS registers (otherwise we #GP).
> 
> Add the required machinery to do so.
> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: KVM <kvm@vger.kernel.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/x86.c              | 45 ++++++++++++++++++++++++++++-----
>  2 files changed, 42 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 159b5988292f..541c431df806 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
>  
>  	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
>  	bool l1tf_flush_l1d;
> +
> +	/* AMD MSRC001_0015 Hardware Configuration */
> +	u64 msr_hwcr;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 099b851dabaf..e07f21c1d2d4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2273,6 +2273,30 @@ static void kvmclock_sync_fn(struct work_struct *work)
>  					KVMCLOCK_SYNC_PERIOD);
>  }
>  
> +/*
> + * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
> + */
> +static bool __set_mci_status(struct kvm_vcpu *vcpu, struct msr_data *msr_info)

The name and return value semantics are confusing since kvm_set_msr*()
returns '1' to signal fault, '0' to signal success.  Most MSR helpers have
"valid" somewhere in the function name to avoid confusion regarding the
return value.

> +{
> +	if (guest_cpuid_is_amd(vcpu)) {
> +		struct msr_data tmp;
> +
> +		tmp.index = MSR_K7_HWCR;
> +
> +		if (kvm_get_msr_common(vcpu, &tmp))

No need to get through kvm_get_msr_common(), vcpu->arch.msr_hwcr can be
queried directly.  Going that route would likely eliminate the need for a
helper func, i.e. avoid the naming confusion and the comment below.

> +			return false;
> +
> +		/* McStatusWrEn enabled? */
> +		if (tmp.data & BIT_ULL(18))
> +			return true;
> +	}
> +
> +	if (msr_info->data != 0)

Irrevelant if this helper gets dropped, but why not "return !msr_info->data;"?

> +		return false;
> +
> +	return true;
> +}
> +
>  static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
>  	u64 mcg_cap = vcpu->arch.mcg_cap;
> @@ -2304,9 +2328,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			if ((offset & 0x3) == 0 &&
>  			    data != 0 && (data | (1 << 10)) != ~(u64)0)
>  				return -1;
> -			if (!msr_info->host_initiated &&
> -				(offset & 0x3) == 1 && data != 0)
> -				return -1;
> +
> +			/* MCi_STATUS */
> +			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
> +				if (!__set_mci_status(vcpu, msr_info))
> +						return -1;
> +			}
> +
>  			vcpu->arch.mce_banks[offset] = data;
>  			break;
>  		}
> @@ -2455,8 +2483,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		data &= ~(u64)0x40;	/* ignore flush filter disable */
>  		data &= ~(u64)0x100;	/* ignore ignne emulation enable */
>  		data &= ~(u64)0x8;	/* ignore TLB cache disable */
> -		data &= ~(u64)0x40000;  /* ignore Mc status write enable */
> -		if (data != 0) {
> +
> +		/* Handle McStatusWrEn */
> +		if (data == BIT_ULL(18)) {
> +			vcpu->arch.msr_hwcr = data;
> +		} else if (data != 0) {
>  			vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
>  				    data);
>  			return 1;
> @@ -2730,7 +2761,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_K8_SYSCFG:
>  	case MSR_K8_TSEG_ADDR:
>  	case MSR_K8_TSEG_MASK:
> -	case MSR_K7_HWCR:
>  	case MSR_VM_HSAVE_PA:
>  	case MSR_K8_INT_PENDING_MSG:
>  	case MSR_AMD64_NB_CFG:
> @@ -2894,6 +2924,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_MISC_FEATURES_ENABLES:
>  		msr_info->data = vcpu->arch.msr_misc_features_enables;
>  		break;
> +	case MSR_K7_HWCR:
> +		msr_info->data = vcpu->arch.msr_hwcr;
> +		break;
>  	default:
>  		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
>  			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
> -- 
> 2.21.0
> 
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Good mailing practices for 400: avoid top-posting and trim the reply.
Borislav Petkov April 8, 2019, 2:57 p.m. UTC | #2
On Mon, Apr 08, 2019 at 07:41:15AM -0700, Sean Christopherson wrote:
> > +{
> > +	if (guest_cpuid_is_amd(vcpu)) {
> > +		struct msr_data tmp;
> > +
> > +		tmp.index = MSR_K7_HWCR;
> > +
> > +		if (kvm_get_msr_common(vcpu, &tmp))
> 
> No need to get through kvm_get_msr_common(), vcpu->arch.msr_hwcr can be
> queried directly.  Going that route would likely eliminate the need for a
> helper func, i.e. avoid the naming confusion and the comment below.

Agree with every point except this: the helper function abstracts
the guest cpuid check and the rest of the handling nicely. If I put
everything in an already too big default: label in set_msr_mce(), then
the code there will become even more unreadable than it is.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 159b5988292f..541c431df806 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -780,6 +780,9 @@  struct kvm_vcpu_arch {
 
 	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
 	bool l1tf_flush_l1d;
+
+	/* AMD MSRC001_0015 Hardware Configuration */
+	u64 msr_hwcr;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabaf..e07f21c1d2d4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2273,6 +2273,30 @@  static void kvmclock_sync_fn(struct work_struct *work)
 					KVMCLOCK_SYNC_PERIOD);
 }
 
+/*
+ * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
+ */
+static bool __set_mci_status(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	if (guest_cpuid_is_amd(vcpu)) {
+		struct msr_data tmp;
+
+		tmp.index = MSR_K7_HWCR;
+
+		if (kvm_get_msr_common(vcpu, &tmp))
+			return false;
+
+		/* McStatusWrEn enabled? */
+		if (tmp.data & BIT_ULL(18))
+			return true;
+	}
+
+	if (msr_info->data != 0)
+		return false;
+
+	return true;
+}
+
 static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2304,9 +2328,13 @@  static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			if ((offset & 0x3) == 0 &&
 			    data != 0 && (data | (1 << 10)) != ~(u64)0)
 				return -1;
-			if (!msr_info->host_initiated &&
-				(offset & 0x3) == 1 && data != 0)
-				return -1;
+
+			/* MCi_STATUS */
+			if ((offset & 0x3) == 1 && !msr_info->host_initiated) {
+				if (!__set_mci_status(vcpu, msr_info))
+						return -1;
+			}
+
 			vcpu->arch.mce_banks[offset] = data;
 			break;
 		}
@@ -2455,8 +2483,11 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		data &= ~(u64)0x40;	/* ignore flush filter disable */
 		data &= ~(u64)0x100;	/* ignore ignne emulation enable */
 		data &= ~(u64)0x8;	/* ignore TLB cache disable */
-		data &= ~(u64)0x40000;  /* ignore Mc status write enable */
-		if (data != 0) {
+
+		/* Handle McStatusWrEn */
+		if (data == BIT_ULL(18)) {
+			vcpu->arch.msr_hwcr = data;
+		} else if (data != 0) {
 			vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
 				    data);
 			return 1;
@@ -2730,7 +2761,6 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_K8_SYSCFG:
 	case MSR_K8_TSEG_ADDR:
 	case MSR_K8_TSEG_MASK:
-	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
 	case MSR_K8_INT_PENDING_MSG:
 	case MSR_AMD64_NB_CFG:
@@ -2894,6 +2924,9 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_MISC_FEATURES_ENABLES:
 		msr_info->data = vcpu->arch.msr_misc_features_enables;
 		break;
+	case MSR_K7_HWCR:
+		msr_info->data = vcpu->arch.msr_hwcr;
+		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
 			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);