diff mbox series

[v4,5/9] KVM: x86: Untag LAM bits when applicable

Message ID 20230209024022.3371768-6-robert.hu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Linear Address Masking (LAM) KVM Enabling | expand

Commit Message

Robert Hoo Feb. 9, 2023, 2:40 a.m. UTC
Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
extended, from highest effective address bit.
Note that LAM_U48 and LA57 has some effective bits overlap. This patch
gives a WARN() on that case.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
---
 arch/x86/kvm/vmx/vmx.c |  3 +++
 arch/x86/kvm/x86.c     |  4 ++++
 arch/x86/kvm/x86.h     | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)

Comments

Chao Gao Feb. 10, 2023, 3:04 p.m. UTC | #1
On Thu, Feb 09, 2023 at 10:40:18AM +0800, Robert Hoo wrote:
>Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
>extended, from highest effective address bit.

>Note that LAM_U48 and LA57 has some effective bits overlap. This patch
>gives a WARN() on that case.

Why emit a WARN()? the behavior is undefined or something KVM cannot
emulated?

>
>Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
>Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
>---
> arch/x86/kvm/vmx/vmx.c |  3 +++
> arch/x86/kvm/x86.c     |  4 ++++
> arch/x86/kvm/x86.h     | 32 ++++++++++++++++++++++++++++++++
> 3 files changed, 39 insertions(+)
>
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index 66edd091f145..e4f14d1bdd2f 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -2163,6 +2163,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> 		    (!msr_info->host_initiated &&
> 		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
> 			return 1;
>+
>+		data = kvm_untagged_addr(data, vcpu);

This is a MSR write, so LAM masking isn't performed on this write
according to LAM spec. Am I misunderstanding something?

>+
> 		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
> 		    (data & MSR_IA32_BNDCFGS_RSVD))
> 			return 1;
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 312aea1854ae..1bdc8c0c80c0 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -1809,6 +1809,10 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
> 	case MSR_KERNEL_GS_BASE:
> 	case MSR_CSTAR:
> 	case MSR_LSTAR:
>+		/*
>+		 * The strict canonical checking still applies to MSR
>+		 * writing even LAM is enabled.
>+		 */
> 		if (is_noncanonical_address(data, vcpu))

LAM spec says:

	Processors that support LAM continue to require the addresses written to
	control registers or MSRs be 57-bit canonical if the processor supports
	5-level paging or 48-bit canonical if it supports only 4-level paging

My understanding is 57-bit canonical checking is performed if the processor
__supports__ 5-level paging. Then the is_noncanonical_address() here is
arguably wrong. Could you double-confirm and fix it?

> 			return 1;
> 		break;
>diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
>index 8ec5cc983062..7228895d4a6f 100644
>--- a/arch/x86/kvm/x86.h
>+++ b/arch/x86/kvm/x86.h
>@@ -201,6 +201,38 @@ static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
> 	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
> }
> 
>+#ifdef CONFIG_X86_64

I don't get the purpose of the #ifdef. Shouldn't you check if the vcpu
is in 64-bit long mode?

>+/* untag addr for guest, according to vCPU CR3 and CR4 settings */
>+static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
>+{
>+	if (addr >> 63 == 0) {
>+		/* User pointers */
>+		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
>+			addr = __canonical_address(addr, 57);

braces are missing.

https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces

>+		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
>+			/*
>+			 * If guest enabled 5-level paging and LAM_U48,
>+			 * bit 47 should be 0, bit 48:56 contains meta data
>+			 * although bit 47:56 are valid 5-level address
>+			 * bits.
>+			 * If LAM_U48 and 4-level paging, bit47 is 0.
>+			 */
>+			WARN_ON(addr & _BITUL(47));
>+			addr = __canonical_address(addr, 48);
>+		}
>+	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
>+		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)

use kvm_read_cr4_bits here to save potential VMCS_READs.

>+			addr = __canonical_address(addr, 57);
>+		else
>+			addr = __canonical_address(addr, 48);
>+	}
>+
>+	return addr;
>+}
>+#else
>+#define kvm_untagged_addr(addr, vcpu)	(addr)
>+#endif
>+
> static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
> 					gva_t gva, gfn_t gfn, unsigned access)
> {
>-- 
>2.31.1
>
Robert Hoo Feb. 11, 2023, 5:57 a.m. UTC | #2
On Fri, 2023-02-10 at 23:04 +0800, Chao Gao wrote:
> Why emit a WARN()? the behavior is undefined or something KVM cannot
> emulated?
> 
Oh, it should have been removed per Yuan's comment on last version;
sorry forgot to.
Originally, I wrote an WARN_ON() (perhaps WARN_ON_ONCE() is better)
here, as an assert this should never happen to KVM. But now considering
emulation part, it's possible. 

> > 
[...]
> 
> This is a MSR write, so LAM masking isn't performed on this write
> according to LAM spec.

Right.

> Am I misunderstanding something?
> 
> > +
[...]
> > @@ -1809,6 +1809,10 @@ static int __kvm_set_msr(struct kvm_vcpu
> > *vcpu, u32 index, u64 data,
> > 	case MSR_KERNEL_GS_BASE:
> > 	case MSR_CSTAR:
> > 	case MSR_LSTAR:
> > +		/*
> > +		 * The strict canonical checking still applies to MSR
> > +		 * writing even LAM is enabled.
> > +		 */
> > 		if (is_noncanonical_address(data, vcpu))
> 
> LAM spec says:
> 
> 	Processors that support LAM continue to require the addresses
> written to
> 	control registers or MSRs be 57-bit canonical if the processor
> supports
> 	5-level paging or 48-bit canonical if it supports only 4-level
> paging
> 
> My understanding is 57-bit canonical checking is performed if the
> processor
> __supports__ 5-level paging. Then the is_noncanonical_address() here
> is
> arguably wrong. Could you double-confirm and fix it?
> 
Emm, condition of "support" V.S. "enabled", you mean
vcpu_virt_addr_bits(), right?
{
	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
}

This is worth double-confirm. Let me check with Yu. Thanks.

> > 			return 1;
> > 		break;
> > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> > index 8ec5cc983062..7228895d4a6f 100644
> > --- a/arch/x86/kvm/x86.h
> > +++ b/arch/x86/kvm/x86.h
> > @@ -201,6 +201,38 @@ static inline bool is_noncanonical_address(u64
> > la, struct kvm_vcpu *vcpu)
> > 	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
> > }
> > 
> > +#ifdef CONFIG_X86_64
> 
> I don't get the purpose of the #ifdef. Shouldn't you check if the
> vcpu
> is in 64-bit long mode?
> 
Good question, thanks. It inspires me:
1) can 32-bit host/kvm support 64-bit guest kernel?
2) if !64-bit mode (guest), should CPUID enumeration gone? or #GP on
guest attempts to set CR3/CR4 lam bits?

> > +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> > +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu
> > *vcpu)
> > +{
> > +	if (addr >> 63 == 0) {
> > +		/* User pointers */
> > +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> > +			addr = __canonical_address(addr, 57);
> 
> braces are missing.

Careful review, thanks.
Curious this escaped checkpatch.pl.
> 
> https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
> 
> > +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> > +			/*
> > +			 * If guest enabled 5-level paging and LAM_U48,
> > +			 * bit 47 should be 0, bit 48:56 contains meta
> > data
> > +			 * although bit 47:56 are valid 5-level address
> > +			 * bits.
> > +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> > +			 */
> > +			WARN_ON(addr & _BITUL(47));
> > +			addr = __canonical_address(addr, 48);
> > +		}
> > +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /*
> > Supervisor pointers */
> > +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> 
> use kvm_read_cr4_bits here to save potential VMCS_READs.

Right, thanks.
Binbin Wu Feb. 16, 2023, 6:37 a.m. UTC | #3
On 2/11/2023 1:57 PM, Robert Hoo wrote:
> On Fri, 2023-02-10 at 23:04 +0800, Chao Gao wrote:
>
>>> @@ -1809,6 +1809,10 @@ static int __kvm_set_msr(struct kvm_vcpu
>>> *vcpu, u32 index, u64 data,
>>> 	case MSR_KERNEL_GS_BASE:
>>> 	case MSR_CSTAR:
>>> 	case MSR_LSTAR:
>>> +		/*
>>> +		 * The strict canonical checking still applies to MSR
>>> +		 * writing even LAM is enabled.
>>> +		 */
>>> 		if (is_noncanonical_address(data, vcpu))
>> LAM spec says:
>>
>> 	Processors that support LAM continue to require the addresses
>> written to
>> 	control registers or MSRs be 57-bit canonical if the processor
>> supports
>> 	5-level paging or 48-bit canonical if it supports only 4-level
>> paging
>>
>> My understanding is 57-bit canonical checking is performed if the
>> processor
>> __supports__ 5-level paging. Then the is_noncanonical_address() here
>> is
>> arguably wrong. Could you double-confirm and fix it?
>>
> Emm, condition of "support" V.S. "enabled", you mean
> vcpu_virt_addr_bits(), right?
> {
> 	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
> }
>
> This is worth double-confirm. Let me check with Yu. Thanks.

I have a check about the definition of canonical address checking in 
Intel SDM.
It says: “In 64-bit mode, an address is considered to be in canonical 
form if address bits 63 through to the most-significant implemented bit 
by the microarchitecture are set to either all ones or all zeros”.
So the processor canonical address checking bit width depends on the 
"support" of 5-level paging, not the "enable status" of it.
And the description is aligned with LAM spec.

The current code for canonical check is not strictly aligned with the 
hardware behavior.
IMHO, the current code can work properly becasue the design of virtual 
memory map of OSes in 4-level paging mode still makes bits 63:47 
identical for canonical addresses (, which is reasonable and necessary 
for back compatibility), after the processor supports 5-level paging .
So for functionality, the current implementation should be OK.

Want to hear more options about whether we need to modify the code to 
strictly align with the hardware behavior.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 66edd091f145..e4f14d1bdd2f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2163,6 +2163,9 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		    (!msr_info->host_initiated &&
 		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
 			return 1;
+
+		data = kvm_untagged_addr(data, vcpu);
+
 		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
 		    (data & MSR_IA32_BNDCFGS_RSVD))
 			return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 312aea1854ae..1bdc8c0c80c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1809,6 +1809,10 @@  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 	case MSR_KERNEL_GS_BASE:
 	case MSR_CSTAR:
 	case MSR_LSTAR:
+		/*
+		 * The strict canonical checking still applies to MSR
+		 * writing even LAM is enabled.
+		 */
 		if (is_noncanonical_address(data, vcpu))
 			return 1;
 		break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8ec5cc983062..7228895d4a6f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -201,6 +201,38 @@  static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
 	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
 }
 
+#ifdef CONFIG_X86_64
+/* untag addr for guest, according to vCPU CR3 and CR4 settings */
+static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
+{
+	if (addr >> 63 == 0) {
+		/* User pointers */
+		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
+			addr = __canonical_address(addr, 57);
+		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
+			/*
+			 * If guest enabled 5-level paging and LAM_U48,
+			 * bit 47 should be 0, bit 48:56 contains meta data
+			 * although bit 47:56 are valid 5-level address
+			 * bits.
+			 * If LAM_U48 and 4-level paging, bit47 is 0.
+			 */
+			WARN_ON(addr & _BITUL(47));
+			addr = __canonical_address(addr, 48);
+		}
+	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
+		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
+			addr = __canonical_address(addr, 57);
+		else
+			addr = __canonical_address(addr, 48);
+	}
+
+	return addr;
+}
+#else
+#define kvm_untagged_addr(addr, vcpu)	(addr)
+#endif
+
 static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 					gva_t gva, gfn_t gfn, unsigned access)
 {