diff mbox series

[v3,6/9] KVM: x86: Untag LAM bits when applicable

Message ID 20221209044557.1496580-7-robert.hu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Linear Address Masking (LAM) KVM Enabling | expand

Commit Message

Robert Hoo Dec. 9, 2022, 4:45 a.m. UTC
Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
extended, from highest effective address bit.
Note that LAM_U48 and LA57 has some effective bits overlap. This patch
gives a WARN() on that case.

Now the only applicable possible case that addresses passed down from VM
with LAM bits is those for MPX MSRs.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
---
 arch/x86/kvm/vmx/vmx.c |  3 +++
 arch/x86/kvm/x86.c     |  5 +++++
 arch/x86/kvm/x86.h     | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)

Comments

Yuan Yao Dec. 19, 2022, 7:32 a.m. UTC | #1
On Fri, Dec 09, 2022 at 12:45:54PM +0800, Robert Hoo wrote:
> Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
> extended, from highest effective address bit.
> Note that LAM_U48 and LA57 has some effective bits overlap. This patch
> gives a WARN() on that case.
>
> Now the only applicable possible case that addresses passed down from VM
> with LAM bits is those for MPX MSRs.
>
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c |  3 +++
>  arch/x86/kvm/x86.c     |  5 +++++
>  arch/x86/kvm/x86.h     | 37 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 45 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9985dbb63e7b..16ddd3fcd3cb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		    (!msr_info->host_initiated &&
>  		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
>  			return 1;
> +
> +		data = kvm_untagged_addr(data, vcpu);
> +
>  		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
>  		    (data & MSR_IA32_BNDCFGS_RSVD))
>  			return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb1f2c20e19e..0a446b45e3d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1812,6 +1812,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  	case MSR_KERNEL_GS_BASE:
>  	case MSR_CSTAR:
>  	case MSR_LSTAR:
> +		/*
> +		 * LAM applies only addresses used for data accesses.

Confused due to the MSR_KERNEL_GS_BASE also used for data accessing,
how about add below:
The strict canonical checking is sitll appplied to MSR writing even
LAM is enabled.

> +		 * Tagged address should never reach here.
> +		 * Strict canonical check still applies here.
> +		 */
>  		if (is_noncanonical_address(data, vcpu))
>  			return 1;
>  		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6c1fbe27616f..f5a2a15783c6 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -195,11 +195,48 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
>  	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
>  }
>
> +static inline u64 get_canonical(u64 la, u8 vaddr_bits)
> +{
> +	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
> +}
> +
>  static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
>  {
>  	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
>  }
>
> +#ifdef CONFIG_X86_64
> +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
> +{
> +	if (addr >> 63 == 0) {
> +		/* User pointers */
> +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> +			addr = get_canonical(addr, 57);
> +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> +			/*
> +			 * If guest enabled 5-level paging and LAM_U48,
> +			 * bit 47 should be 0, bit 48:56 contains meta data
> +			 * although bit 47:56 are valid 5-level address

Still 48:56.

> +			 * bits.
> +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> +			 */
> +			WARN_ON(addr & _BITUL(47));
> +			addr = get_canonical(addr, 48);
> +		}
> +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
> +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> +			addr = get_canonical(addr, 57);
> +		else
> +			addr = get_canonical(addr, 48);
> +	}
> +
> +	return addr;
> +}
> +#else
> +#define kvm_untagged_addr(addr, vcpu)	(addr)
> +#endif
> +
>  static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>  					gva_t gva, gfn_t gfn, unsigned access)
>  {
> --
> 2.31.1
>
Yuan Yao Dec. 19, 2022, 9:45 a.m. UTC | #2
On Fri, Dec 09, 2022 at 12:45:54PM +0800, Robert Hoo wrote:
> Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
> extended, from highest effective address bit.
> Note that LAM_U48 and LA57 has some effective bits overlap. This patch
> gives a WARN() on that case.
>
> Now the only applicable possible case that addresses passed down from VM
> with LAM bits is those for MPX MSRs.

How about the instruction emulation case ? e.g. KVM on behalf of CPU
to do linear address accessing ? In this case the kvm_untagged_addr()
should also be used to mask out the linear address, otherwise unexpected
#GP(or other exception) will be injected into guest.

Please see all callers of __is_canonical_address()

>
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c |  3 +++
>  arch/x86/kvm/x86.c     |  5 +++++
>  arch/x86/kvm/x86.h     | 37 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 45 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9985dbb63e7b..16ddd3fcd3cb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		    (!msr_info->host_initiated &&
>  		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
>  			return 1;
> +
> +		data = kvm_untagged_addr(data, vcpu);
> +
>  		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
>  		    (data & MSR_IA32_BNDCFGS_RSVD))
>  			return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb1f2c20e19e..0a446b45e3d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1812,6 +1812,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  	case MSR_KERNEL_GS_BASE:
>  	case MSR_CSTAR:
>  	case MSR_LSTAR:
> +		/*
> +		 * LAM applies only addresses used for data accesses.
> +		 * Tagged address should never reach here.
> +		 * Strict canonical check still applies here.
> +		 */
>  		if (is_noncanonical_address(data, vcpu))
>  			return 1;
>  		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6c1fbe27616f..f5a2a15783c6 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -195,11 +195,48 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
>  	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
>  }
>
> +static inline u64 get_canonical(u64 la, u8 vaddr_bits)
> +{
> +	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
> +}
> +
>  static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
>  {
>  	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
>  }
>
> +#ifdef CONFIG_X86_64
> +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
> +{
> +	if (addr >> 63 == 0) {
> +		/* User pointers */
> +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> +			addr = get_canonical(addr, 57);
> +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> +			/*
> +			 * If guest enabled 5-level paging and LAM_U48,
> +			 * bit 47 should be 0, bit 48:56 contains meta data
> +			 * although bit 47:56 are valid 5-level address
> +			 * bits.
> +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> +			 */
> +			WARN_ON(addr & _BITUL(47));
> +			addr = get_canonical(addr, 48);
> +		}
> +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
> +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> +			addr = get_canonical(addr, 57);
> +		else
> +			addr = get_canonical(addr, 48);
> +	}
> +
> +	return addr;
> +}
> +#else
> +#define kvm_untagged_addr(addr, vcpu)	(addr)
> +#endif
> +
>  static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>  					gva_t gva, gfn_t gfn, unsigned access)
>  {
> --
> 2.31.1
>
Robert Hoo Dec. 20, 2022, 2:07 p.m. UTC | #3
On Mon, 2022-12-19 at 15:32 +0800, Yuan Yao wrote:
> On Fri, Dec 09, 2022 at 12:45:54PM +0800, Robert Hoo wrote:
> > Define kvm_untagged_addr() per LAM feature spec: Address high bits
> > are sign
> > extended, from highest effective address bit.
> > Note that LAM_U48 and LA57 has some effective bits overlap. This
> > patch
> > gives a WARN() on that case.
> > 
> > Now the only applicable possible case that addresses passed down
> > from VM
> > with LAM bits is those for MPX MSRs.
> > 
> > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> > Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> > ---
> >  arch/x86/kvm/vmx/vmx.c |  3 +++
> >  arch/x86/kvm/x86.c     |  5 +++++
> >  arch/x86/kvm/x86.h     | 37 +++++++++++++++++++++++++++++++++++++
> >  3 files changed, 45 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 9985dbb63e7b..16ddd3fcd3cb 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr_info)
> >  		    (!msr_info->host_initiated &&
> >  		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
> >  			return 1;
> > +
> > +		data = kvm_untagged_addr(data, vcpu);
> > +
> >  		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
> >  		    (data & MSR_IA32_BNDCFGS_RSVD))
> >  			return 1;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index eb1f2c20e19e..0a446b45e3d6 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1812,6 +1812,11 @@ static int __kvm_set_msr(struct kvm_vcpu
> > *vcpu, u32 index, u64 data,
> >  	case MSR_KERNEL_GS_BASE:
> >  	case MSR_CSTAR:
> >  	case MSR_LSTAR:
> > +		/*
> > +		 * LAM applies only addresses used for data accesses.
> 
> Confused due to the MSR_KERNEL_GS_BASE also used for data accessing,
> how about add below:
> The strict canonical checking is sitll appplied to MSR writing even
> LAM is enabled.

OK

...
> > +#ifdef CONFIG_X86_64
> > +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> > +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu
> > *vcpu)
> > +{
> > +	if (addr >> 63 == 0) {
> > +		/* User pointers */
> > +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> > +			addr = get_canonical(addr, 57);
> > +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> > +			/*
> > +			 * If guest enabled 5-level paging and LAM_U48,
> > +			 * bit 47 should be 0, bit 48:56 contains meta
> > data
> > +			 * although bit 47:56 are valid 5-level address
> 
> Still 48:56.

OK
Robert Hoo Dec. 20, 2022, 2:07 p.m. UTC | #4
On Mon, 2022-12-19 at 17:45 +0800, Yuan Yao wrote:
> On Fri, Dec 09, 2022 at 12:45:54PM +0800, Robert Hoo wrote:
> > Define kvm_untagged_addr() per LAM feature spec: Address high bits
> > are sign
> > extended, from highest effective address bit.
> > Note that LAM_U48 and LA57 has some effective bits overlap. This
> > patch
> > gives a WARN() on that case.
> > 
> > Now the only applicable possible case that addresses passed down
> > from VM
> > with LAM bits is those for MPX MSRs.
> 
> How about the instruction emulation case ? e.g. KVM on behalf of CPU
> to do linear address accessing ? In this case the kvm_untagged_addr()
> should also be used to mask out the linear address, otherwise
> unexpected
> #GP(or other exception) will be injected into guest.
> 
> Please see all callers of __is_canonical_address()
> 
Emm, I take a look at the callers, looks like they're segment registers
and MSRs. Per spec (ISE 10.4): processors that support LAM continue to
require the addresses written to control registers or MSRs be legacy
canonical. So, like the handling on your last commented point on this
patch, such situation needs no changes, i.e. legacy canonical still
applied.
Yang, Weijiang Dec. 21, 2022, 12:35 a.m. UTC | #5
On 12/9/2022 12:45 PM, Robert Hoo wrote:
> Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
> extended, from highest effective address bit.
> Note that LAM_U48 and LA57 has some effective bits overlap. This patch
> gives a WARN() on that case.
>
> Now the only applicable possible case that addresses passed down from VM
> with LAM bits is those for MPX MSRs.
>
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>   arch/x86/kvm/vmx/vmx.c |  3 +++
>   arch/x86/kvm/x86.c     |  5 +++++
>   arch/x86/kvm/x86.h     | 37 +++++++++++++++++++++++++++++++++++++
>   3 files changed, 45 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9985dbb63e7b..16ddd3fcd3cb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		    (!msr_info->host_initiated &&
>   		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
>   			return 1;
> +
> +		data = kvm_untagged_addr(data, vcpu);
> +
>   		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
>   		    (data & MSR_IA32_BNDCFGS_RSVD))
>   			return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb1f2c20e19e..0a446b45e3d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1812,6 +1812,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>   	case MSR_KERNEL_GS_BASE:
>   	case MSR_CSTAR:
>   	case MSR_LSTAR:
> +		/*
> +		 * LAM applies only addresses used for data accesses.
> +		 * Tagged address should never reach here.
> +		 * Strict canonical check still applies here.
> +		 */
>   		if (is_noncanonical_address(data, vcpu))
>   			return 1;
>   		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6c1fbe27616f..f5a2a15783c6 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -195,11 +195,48 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
>   	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
>   }
>   
> +static inline u64 get_canonical(u64 la, u8 vaddr_bits)
> +{
> +	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
> +}
> +


There's already a helper for the calculation: __canonical_address(), and 
it's used in KVM

before set MSR_IA32_SYSENTER_ESP/MSR_IA32_SYSENTER_EIP.


>   static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
>   {
>   	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
>   }
>   
> +#ifdef CONFIG_X86_64
> +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
> +{
> +	if (addr >> 63 == 0) {
> +		/* User pointers */
> +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> +			addr = get_canonical(addr, 57);
> +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> +			/*
> +			 * If guest enabled 5-level paging and LAM_U48,
> +			 * bit 47 should be 0, bit 48:56 contains meta data
> +			 * although bit 47:56 are valid 5-level address
> +			 * bits.
> +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> +			 */
> +			WARN_ON(addr & _BITUL(47));
> +			addr = get_canonical(addr, 48);
> +		}
> +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
> +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> +			addr = get_canonical(addr, 57);
> +		else
> +			addr = get_canonical(addr, 48);
> +	}
> +
> +	return addr;
> +}
> +#else
> +#define kvm_untagged_addr(addr, vcpu)	(addr)
> +#endif
> +
>   static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>   					gva_t gva, gfn_t gfn, unsigned access)
>   {
Robert Hoo Dec. 21, 2022, 1:38 a.m. UTC | #6
On Wed, 2022-12-21 at 08:35 +0800, Yang, Weijiang wrote: 
> > +static inline u64 get_canonical(u64 la, u8 vaddr_bits)
> > +{
> > +	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
> > +}
> > +
> 
> 
> There's already a helper for the calculation: __canonical_address(),
> and 
> it's used in KVM
> 
> before set MSR_IA32_SYSENTER_ESP/MSR_IA32_SYSENTER_EIP.
> 
Nice, thanks Weijiang.
Yuan Yao Dec. 21, 2022, 2:38 a.m. UTC | #7
On Tue, Dec 20, 2022 at 10:07:57PM +0800, Robert Hoo wrote:
> On Mon, 2022-12-19 at 17:45 +0800, Yuan Yao wrote:
> > On Fri, Dec 09, 2022 at 12:45:54PM +0800, Robert Hoo wrote:
> > > Define kvm_untagged_addr() per LAM feature spec: Address high bits
> > > are sign
> > > extended, from highest effective address bit.
> > > Note that LAM_U48 and LA57 has some effective bits overlap. This
> > > patch
> > > gives a WARN() on that case.
> > >
> > > Now the only applicable possible case that addresses passed down
> > > from VM
> > > with LAM bits is those for MPX MSRs.
> >
> > How about the instruction emulation case ? e.g. KVM on behalf of CPU
> > to do linear address accessing ? In this case the kvm_untagged_addr()
> > should also be used to mask out the linear address, otherwise
> > unexpected
> > #GP(or other exception) will be injected into guest.
> >
> > Please see all callers of __is_canonical_address()
> >
> Emm, I take a look at the callers, looks like they're segment registers
> and MSRs. Per spec (ISE 10.4): processors that support LAM continue to

__linearize() is using __is_canonical_address() for 64 bit mode, and this
is the code path for memory reading/writing emulation, what will happen
if a LAM enabled guest appiled metadata to the address and KVM emulates
the memory accessing for it ?

> require the addresses written to control registers or MSRs be legacy
> canonical. So, like the handling on your last commented point on this
> patch, such situation needs no changes, i.e. legacy canonical still
> applied.
>
Yuan Yao Dec. 21, 2022, 2:55 a.m. UTC | #8
On Fri, Dec 09, 2022 at 12:45:54PM +0800, Robert Hoo wrote:
> Define kvm_untagged_addr() per LAM feature spec: Address high bits are sign
> extended, from highest effective address bit.
> Note that LAM_U48 and LA57 has some effective bits overlap. This patch
> gives a WARN() on that case.
>
> Now the only applicable possible case that addresses passed down from VM
> with LAM bits is those for MPX MSRs.
>
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c |  3 +++
>  arch/x86/kvm/x86.c     |  5 +++++
>  arch/x86/kvm/x86.h     | 37 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 45 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9985dbb63e7b..16ddd3fcd3cb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		    (!msr_info->host_initiated &&
>  		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
>  			return 1;
> +
> +		data = kvm_untagged_addr(data, vcpu);
> +
>  		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
>  		    (data & MSR_IA32_BNDCFGS_RSVD))
>  			return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb1f2c20e19e..0a446b45e3d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1812,6 +1812,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  	case MSR_KERNEL_GS_BASE:
>  	case MSR_CSTAR:
>  	case MSR_LSTAR:
> +		/*
> +		 * LAM applies only addresses used for data accesses.
> +		 * Tagged address should never reach here.
> +		 * Strict canonical check still applies here.
> +		 */
>  		if (is_noncanonical_address(data, vcpu))
>  			return 1;
>  		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6c1fbe27616f..f5a2a15783c6 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -195,11 +195,48 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
>  	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
>  }
>
> +static inline u64 get_canonical(u64 la, u8 vaddr_bits)
> +{
> +	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
> +}
> +
>  static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
>  {
>  	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
>  }
>
> +#ifdef CONFIG_X86_64
> +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
> +{
> +	if (addr >> 63 == 0) {
> +		/* User pointers */
> +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> +			addr = get_canonical(addr, 57);
> +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> +			/*
> +			 * If guest enabled 5-level paging and LAM_U48,
> +			 * bit 47 should be 0, bit 48:56 contains meta data
> +			 * although bit 47:56 are valid 5-level address
> +			 * bits.
> +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> +			 */
> +			WARN_ON(addr & _BITUL(47));

IIUC, LAM_47 userspace canonical checking rule requests "bit 63 == bit 47 == 0"
before sign-extened the address.

if so looks it's guest's fault to not follow the LAM canonical checking rule,
what's the behavior of such violation on bare metal, #GP ? The behavior
shuld be same for guest instead of WARN_ON() on host, host does nothing wrong.

> +			addr = get_canonical(addr, 48);
> +		}
> +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
> +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> +			addr = get_canonical(addr, 57);
> +		else
> +			addr = get_canonical(addr, 48);
> +	}
> +
> +	return addr;
> +}
> +#else
> +#define kvm_untagged_addr(addr, vcpu)	(addr)
> +#endif
> +
>  static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>  					gva_t gva, gfn_t gfn, unsigned access)
>  {
> --
> 2.31.1
>
Yu Zhang Dec. 21, 2022, 8:02 a.m. UTC | #9
> Emm, I take a look at the callers, looks like they're segment registers
> and MSRs. Per spec (ISE 10.4): processors that support LAM continue to
> require the addresses written to control registers or MSRs be legacy
> canonical. So, like the handling on your last commented point on this
> patch, such situation needs no changes, i.e. legacy canonical still
> applied.
> 
Well, it's not about the control register or MSR emulation. It is about
the instruction decoder, which may encounter an instruction with a memory
operand with LAM bits occupied. 

B.R.
Yu
Yu Zhang Dec. 21, 2022, 8:14 a.m. UTC | #10
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9985dbb63e7b..16ddd3fcd3cb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		    (!msr_info->host_initiated &&
>  		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
>  			return 1;
> +
> +		data = kvm_untagged_addr(data, vcpu);

Do we really need to take pains to trigger the kvm_untagged_addr()
unconditionally? I mean, LAM may not be enabled by the guest or even
not exposed to the guest at all.

> +
>  		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
>  		    (data & MSR_IA32_BNDCFGS_RSVD))
>  			return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb1f2c20e19e..0a446b45e3d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1812,6 +1812,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  	case MSR_KERNEL_GS_BASE:
>  	case MSR_CSTAR:
>  	case MSR_LSTAR:
> +		/*
> +		 * LAM applies only addresses used for data accesses.
> +		 * Tagged address should never reach here.
> +		 * Strict canonical check still applies here.
> +		 */
>  		if (is_noncanonical_address(data, vcpu))
>  			return 1;
>  		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6c1fbe27616f..f5a2a15783c6 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -195,11 +195,48 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
>  	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
>  }
>  
> +static inline u64 get_canonical(u64 la, u8 vaddr_bits)
> +{
> +	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
> +}
> +

We already have a __canonical_address() in linux, no need to re-invent
another one. :)

B.R.
Yu
Robert Hoo Dec. 21, 2022, 8:22 a.m. UTC | #11
On Wed, 2022-12-21 at 10:55 +0800, Yuan Yao wrote:
> > +#ifdef CONFIG_X86_64
> > +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> > +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu
> > *vcpu)
> > +{
> > +	if (addr >> 63 == 0) {
> > +		/* User pointers */
> > +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> > +			addr = get_canonical(addr, 57);
> > +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> > +			/*
> > +			 * If guest enabled 5-level paging and LAM_U48,
> > +			 * bit 47 should be 0, bit 48:56 contains meta
> > data
> > +			 * although bit 47:56 are valid 5-level address
> > +			 * bits.
> > +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> > +			 */
> > +			WARN_ON(addr & _BITUL(47));
> 
> IIUC, LAM_47 userspace canonical checking rule requests "bit 63 ==
> bit 47 == 0"
> before sign-extened the address.
> 
> if so looks it's guest's fault to not follow the LAM canonical
> checking rule,
> what's the behavior of such violation on bare metal, #GP ? 

Spec (ISE 10.2) doesn't mention a #GP for this case. IIUC, those
overlap bits are zeroed.
Current native Kernel LAM only enables LAM_U57.

> The behavior
> shuld be same for guest instead of WARN_ON() on host, host does
> nothing wrong.
> 
Agree that host did nothing wrong. So remove WARN_ON(), use a pr_warn()
instead? I here meant to warn that such case happens.
Yu Zhang Dec. 21, 2022, 8:37 a.m. UTC | #12
On Wed, Dec 21, 2022 at 04:14:39PM +0800, Yu Zhang wrote:
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 9985dbb63e7b..16ddd3fcd3cb 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -2134,6 +2134,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		    (!msr_info->host_initiated &&
> >  		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
> >  			return 1;
> > +
> > +		data = kvm_untagged_addr(data, vcpu);
> 
> Do we really need to take pains to trigger the kvm_untagged_addr()
> unconditionally? I mean, LAM may not be enabled by the guest or even
> not exposed to the guest at all.
> 

Ouch... I just realized, that unlike the paging mode, LAM can
be enabled per-thread, instead of per-VM... 

B.R.
Yu
Robert Hoo Dec. 21, 2022, 8:49 a.m. UTC | #13
On Wed, 2022-12-21 at 16:02 +0800, Yu Zhang wrote:
> > Emm, I take a look at the callers, looks like they're segment
> > registers
> > and MSRs. Per spec (ISE 10.4): processors that support LAM continue
> > to
> > require the addresses written to control registers or MSRs be
> > legacy
> > canonical. So, like the handling on your last commented point on
> > this
> > patch, such situation needs no changes, i.e. legacy canonical still
> > applied.
> > 
> 
> Well, it's not about the control register or MSR emulation. It is
> about
> the instruction decoder, which may encounter an instruction with a
> memory
> operand with LAM bits occupied. 
> 
OK, combine reply to you and Yuan's comments here.
So you're talking about when KVM emulates an instruction, and that
instruction is accessing memory, and the address for the memory can be
LAM tagged.
I think instruction emulation and memory access should be separated,
and LAM rules should apply to memory access phase. But frankly
speaking, I haven't looked into such case yet. Can you name an example
of such emulated instruction? I can take a look, hoping that the
emulation accessing memory falls into same code path as page fault
handling.

> B.R.
> Yu
Yuan Yao Dec. 21, 2022, 9:35 a.m. UTC | #14
On Wed, Dec 21, 2022 at 04:22:33PM +0800, Robert Hoo wrote:
> On Wed, 2022-12-21 at 10:55 +0800, Yuan Yao wrote:
> > > +#ifdef CONFIG_X86_64
> > > +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> > > +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu
> > > *vcpu)
> > > +{
> > > +	if (addr >> 63 == 0) {
> > > +		/* User pointers */
> > > +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> > > +			addr = get_canonical(addr, 57);
> > > +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> > > +			/*
> > > +			 * If guest enabled 5-level paging and LAM_U48,
> > > +			 * bit 47 should be 0, bit 48:56 contains meta
> > > data
> > > +			 * although bit 47:56 are valid 5-level address
> > > +			 * bits.
> > > +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> > > +			 */
> > > +			WARN_ON(addr & _BITUL(47));
> >
> > IIUC, LAM_47 userspace canonical checking rule requests "bit 63 ==
> > bit 47 == 0"
> > before sign-extened the address.
> >
> > if so looks it's guest's fault to not follow the LAM canonical
> > checking rule,
> > what's the behavior of such violation on bare metal, #GP ?
>
> Spec (ISE 10.2) doesn't mention a #GP for this case. IIUC, those
> overlap bits are zeroed.

I mean the behavior of violation of "bit 63 == bit 47 == 0" rule,
yes no words in ISE 10.2/3 describe the behavior of such violation
case, but do you know more details of this or had some experiments
on hardware/SIMIC ?

> Current native Kernel LAM only enables LAM_U57.
>
> > The behavior
> > shuld be same for guest instead of WARN_ON() on host, host does
> > nothing wrong.
> >
> Agree that host did nothing wrong. So remove WARN_ON(), use a pr_warn()
> instead? I here meant to warn that such case happens.

I personally think that's not necessary if no hardware behavior is defined
for such violation, KVM doesn't need to warn such guest violation on host,
like KVM already done for many other cases (e.g #GP is injected for
restricted canonical violation but not WARN_ON)

>
>
Yu Zhang Dec. 21, 2022, 10:10 a.m. UTC | #15
On Wed, Dec 21, 2022 at 04:49:26PM +0800, Robert Hoo wrote:
> On Wed, 2022-12-21 at 16:02 +0800, Yu Zhang wrote:
> > > Emm, I take a look at the callers, looks like they're segment
> > > registers
> > > and MSRs. Per spec (ISE 10.4): processors that support LAM continue
> > > to
> > > require the addresses written to control registers or MSRs be
> > > legacy
> > > canonical. So, like the handling on your last commented point on
> > > this
> > > patch, such situation needs no changes, i.e. legacy canonical still
> > > applied.
> > > 
> > 
> > Well, it's not about the control register or MSR emulation. It is
> > about
> > the instruction decoder, which may encounter an instruction with a
> > memory
> > operand with LAM bits occupied. 
> > 
> OK, combine reply to you and Yuan's comments here.
> So you're talking about when KVM emulates an instruction, and that
> instruction is accessing memory, and the address for the memory can be
> LAM tagged.
> I think instruction emulation and memory access should be separated,
> and LAM rules should apply to memory access phase. But frankly
> speaking, I haven't looked into such case yet. Can you name an example
> of such emulated instruction? I can take a look, hoping that the
> emulation accessing memory falls into same code path as page fault
> handling.

I do not know the usage case of LAM. According to the spec, LAM does
not apply to instruction fetches, so guest rip and target addresses
in instructions such as jump, call etc. do not need special treatment.
But the spec does not say if LAM can be used to MMIO addresses... 

B.R.
Yu
Yu Zhang Dec. 21, 2022, 10:22 a.m. UTC | #16
> > >
> > > IIUC, LAM_47 userspace canonical checking rule requests "bit 63 ==
> > > bit 47 == 0"
> > > before sign-extened the address.
> > >
> > > if so looks it's guest's fault to not follow the LAM canonical
> > > checking rule,
> > > what's the behavior of such violation on bare metal, #GP ?
> >
> > Spec (ISE 10.2) doesn't mention a #GP for this case. IIUC, those
> > overlap bits are zeroed.
> 
> I mean the behavior of violation of "bit 63 == bit 47 == 0" rule,
> yes no words in ISE 10.2/3 describe the behavior of such violation
> case, but do you know more details of this or had some experiments
> on hardware/SIMIC ?

Yes, the ISE is vague. But I do believe a #GP will be generated for
such violation, and KVM shall inject one if guest does no follow the
requirement, because such check is called(by the spec) as a "modified
canonicality check".

Anyway, we'd better confirm with the spec owner, instead of making
assumptions by ourselves. :)

B.R.
Yu
Yuan Yao Dec. 21, 2022, 10:30 a.m. UTC | #17
On Wed, Dec 21, 2022 at 06:10:32PM +0800, Yu Zhang wrote:
> On Wed, Dec 21, 2022 at 04:49:26PM +0800, Robert Hoo wrote:
> > On Wed, 2022-12-21 at 16:02 +0800, Yu Zhang wrote:
> > > > Emm, I take a look at the callers, looks like they're segment
> > > > registers
> > > > and MSRs. Per spec (ISE 10.4): processors that support LAM continue
> > > > to
> > > > require the addresses written to control registers or MSRs be
> > > > legacy
> > > > canonical. So, like the handling on your last commented point on
> > > > this
> > > > patch, such situation needs no changes, i.e. legacy canonical still
> > > > applied.
> > > >
> > >
> > > Well, it's not about the control register or MSR emulation. It is
> > > about
> > > the instruction decoder, which may encounter an instruction with a
> > > memory
> > > operand with LAM bits occupied.
> > >
> > OK, combine reply to you and Yuan's comments here.
> > So you're talking about when KVM emulates an instruction, and that
> > instruction is accessing memory, and the address for the memory can be
> > LAM tagged.
> > I think instruction emulation and memory access should be separated,
> > and LAM rules should apply to memory access phase. But frankly
> > speaking, I haven't looked into such case yet. Can you name an example
> > of such emulated instruction? I can take a look, hoping that the
> > emulation accessing memory falls into same code path as page fault
> > handling.
>
> I do not know the usage case of LAM. According to the spec, LAM does
> not apply to instruction fetches, so guest rip and target addresses
> in instructions such as jump, call etc. do not need special treatment.
> But the spec does not say if LAM can be used to MMIO addresses...

The MMIO accessing in guest is also via GVA, so any emulated
device MMIO accessing hits this case. KVM checks GVA firstly even in TDP
case(which KVM already has GPA in hand) before start to "real"
accessing the GPA:

segmented_read/write() -> linearize() -> __linearize()

>
> B.R.
> Yu
>
Yuan Yao Dec. 21, 2022, 10:33 a.m. UTC | #18
OBOn Wed, Dec 21, 2022 at 06:22:47PM +0800, Yu Zhang wrote:
> > > >
> > > > IIUC, LAM_47 userspace canonical checking rule requests "bit 63 ==
> > > > bit 47 == 0"
> > > > before sign-extened the address.
> > > >
> > > > if so looks it's guest's fault to not follow the LAM canonical
> > > > checking rule,
> > > > what's the behavior of such violation on bare metal, #GP ?
> > >
> > > Spec (ISE 10.2) doesn't mention a #GP for this case. IIUC, those
> > > overlap bits are zeroed.
> >
> > I mean the behavior of violation of "bit 63 == bit 47 == 0" rule,
> > yes no words in ISE 10.2/3 describe the behavior of such violation
> > case, but do you know more details of this or had some experiments
> > on hardware/SIMIC ?
>
> Yes, the ISE is vague. But I do believe a #GP will be generated for
> such violation, and KVM shall inject one if guest does no follow the
> requirement, because such check is called(by the spec) as a "modified
> canonicality check".

Me too and that's why I had replies here :-)

>
> Anyway, we'd better confirm with the spec owner, instead of making
> assumptions by ourselves. :)

Agree!

>
> B.R.
> Yu
Yu Zhang Dec. 21, 2022, 12:40 p.m. UTC | #19
On Wed, Dec 21, 2022 at 06:30:31PM +0800, Yuan Yao wrote:
> On Wed, Dec 21, 2022 at 06:10:32PM +0800, Yu Zhang wrote:
> > On Wed, Dec 21, 2022 at 04:49:26PM +0800, Robert Hoo wrote:
> > > On Wed, 2022-12-21 at 16:02 +0800, Yu Zhang wrote:
> > > > > Emm, I take a look at the callers, looks like they're segment
> > > > > registers
> > > > > and MSRs. Per spec (ISE 10.4): processors that support LAM continue
> > > > > to
> > > > > require the addresses written to control registers or MSRs be
> > > > > legacy
> > > > > canonical. So, like the handling on your last commented point on
> > > > > this
> > > > > patch, such situation needs no changes, i.e. legacy canonical still
> > > > > applied.
> > > > >
> > > >
> > > > Well, it's not about the control register or MSR emulation. It is
> > > > about
> > > > the instruction decoder, which may encounter an instruction with a
> > > > memory
> > > > operand with LAM bits occupied.
> > > >
> > > OK, combine reply to you and Yuan's comments here.
> > > So you're talking about when KVM emulates an instruction, and that
> > > instruction is accessing memory, and the address for the memory can be
> > > LAM tagged.
> > > I think instruction emulation and memory access should be separated,
> > > and LAM rules should apply to memory access phase. But frankly
> > > speaking, I haven't looked into such case yet. Can you name an example
> > > of such emulated instruction? I can take a look, hoping that the
> > > emulation accessing memory falls into same code path as page fault
> > > handling.
> >
> > I do not know the usage case of LAM. According to the spec, LAM does
> > not apply to instruction fetches, so guest rip and target addresses
> > in instructions such as jump, call etc. do not need special treatment.
> > But the spec does not say if LAM can be used to MMIO addresses...
> 
> The MMIO accessing in guest is also via GVA, so any emulated
> device MMIO accessing hits this case. KVM checks GVA firstly even in TDP

Yes. And sorry, I meant the spec does not say LAM can not be used
to MMIO addresses.

> case(which KVM already has GPA in hand) before start to "real"
> accessing the GPA:
> 
> segmented_read/write() -> linearize() -> __linearize()
> 

B.R.
Yu
Yu Zhang Dec. 22, 2022, 8:21 a.m. UTC | #20
> > > > >
> > > > > Well, it's not about the control register or MSR emulation. It is
> > > > > about
> > > > > the instruction decoder, which may encounter an instruction with a
> > > > > memory
> > > > > operand with LAM bits occupied.
> > > > >
> > > > OK, combine reply to you and Yuan's comments here.
> > > > So you're talking about when KVM emulates an instruction, and that
> > > > instruction is accessing memory, and the address for the memory can be
> > > > LAM tagged.
> > > > I think instruction emulation and memory access should be separated,
> > > > and LAM rules should apply to memory access phase. But frankly
> > > > speaking, I haven't looked into such case yet. Can you name an example
> > > > of such emulated instruction? I can take a look, hoping that the
> > > > emulation accessing memory falls into same code path as page fault
> > > > handling.
> > >
> > > I do not know the usage case of LAM. According to the spec, LAM does
> > > not apply to instruction fetches, so guest rip and target addresses
> > > in instructions such as jump, call etc. do not need special treatment.
> > > But the spec does not say if LAM can be used to MMIO addresses...
> > 
> > The MMIO accessing in guest is also via GVA, so any emulated
> > device MMIO accessing hits this case. KVM checks GVA firstly even in TDP
> 
> Yes. And sorry, I meant the spec does not say LAM can not be used
> to MMIO addresses.
> 
BTW, it is not just about MMIO. Normal memory address can also trigger the
linearize(), e.g., memory operand of io instructions, though I still have
no idea if this could be one of the usage cases of LAM.

B.R.
Yu
Yuan Yao Dec. 23, 2022, 2:36 a.m. UTC | #21
On Thu, Dec 22, 2022 at 04:21:32PM +0800, Yu Zhang wrote:
> > > > > >
> > > > > > Well, it's not about the control register or MSR emulation. It is
> > > > > > about
> > > > > > the instruction decoder, which may encounter an instruction with a
> > > > > > memory
> > > > > > operand with LAM bits occupied.
> > > > > >
> > > > > OK, combine reply to you and Yuan's comments here.
> > > > > So you're talking about when KVM emulates an instruction, and that
> > > > > instruction is accessing memory, and the address for the memory can be
> > > > > LAM tagged.
> > > > > I think instruction emulation and memory access should be separated,
> > > > > and LAM rules should apply to memory access phase. But frankly
> > > > > speaking, I haven't looked into such case yet. Can you name an example
> > > > > of such emulated instruction? I can take a look, hoping that the
> > > > > emulation accessing memory falls into same code path as page fault
> > > > > handling.
> > > >
> > > > I do not know the usage case of LAM. According to the spec, LAM does
> > > > not apply to instruction fetches, so guest rip and target addresses
> > > > in instructions such as jump, call etc. do not need special treatment.
> > > > But the spec does not say if LAM can be used to MMIO addresses...
> > >
> > > The MMIO accessing in guest is also via GVA, so any emulated
> > > device MMIO accessing hits this case. KVM checks GVA firstly even in TDP
> >
> > Yes. And sorry, I meant the spec does not say LAM can not be used
> > to MMIO addresses.
> >
> BTW, it is not just about MMIO. Normal memory address can also trigger the
> linearize(), e.g., memory operand of io instructions, though I still have
> no idea if this could be one of the usage cases of LAM.

Yes you are right, the emulated normal memory accessing should also be
considered.

Emm... to me I think the IOS/OUTS instruction family should be part of
LAM usage case, but yet no such explicity description about this in ISE...

>
> B.R.
> Yu
Robert Hoo Dec. 23, 2022, 3:55 a.m. UTC | #22
On Fri, 2022-12-23 at 10:36 +0800, Yuan Yao wrote:
> On Thu, Dec 22, 2022 at 04:21:32PM +0800, Yu Zhang wrote:
> > > > > > > 
> > > > > > > Well, it's not about the control register or MSR
> > > > > > > emulation. It is
> > > > > > > about
> > > > > > > the instruction decoder, which may encounter an
> > > > > > > instruction with a
> > > > > > > memory
> > > > > > > operand with LAM bits occupied.
> > > > > > > 
> > > > > > 
> > > > > > OK, combine reply to you and Yuan's comments here.
> > > > > > So you're talking about when KVM emulates an instruction,
> > > > > > and that
> > > > > > instruction is accessing memory, and the address for the
> > > > > > memory can be
> > > > > > LAM tagged.
> > > > > > I think instruction emulation and memory access should be
> > > > > > separated,
> > > > > > and LAM rules should apply to memory access phase. But
> > > > > > frankly
> > > > > > speaking, I haven't looked into such case yet. Can you name
> > > > > > an example
> > > > > > of such emulated instruction? I can take a look, hoping
> > > > > > that the
> > > > > > emulation accessing memory falls into same code path as
> > > > > > page fault
> > > > > > handling.
> > > > > 
> > > > > I do not know the usage case of LAM. According to the spec,
> > > > > LAM does
> > > > > not apply to instruction fetches, so guest rip and target
> > > > > addresses
> > > > > in instructions such as jump, call etc. do not need special
> > > > > treatment.
> > > > > But the spec does not say if LAM can be used to MMIO
> > > > > addresses...
> > > > 
> > > > The MMIO accessing in guest is also via GVA, so any emulated
> > > > device MMIO accessing hits this case. KVM checks GVA firstly
> > > > even in TDP
> > > 
> > > Yes. And sorry, I meant the spec does not say LAM can not be used
> > > to MMIO addresses.
> > > 
> > 
> > BTW, it is not just about MMIO. Normal memory address can also
> > trigger the
> > linearize(), e.g., memory operand of io instructions, though I
> > still have
> > no idea if this could be one of the usage cases of LAM.
> 
> Yes you are right, the emulated normal memory accessing should also
> be
> considered.
> 
> Emm... to me I think the IOS/OUTS instruction family should be part
> of
> LAM usage case, but yet no such explicity description about this in
> ISE...
> 
What instructions will be emulated by KVM now? I don't think KVM will
emulate all that would otherwise #UD.
For callers from handle page fault path, that address has been untagged
by HW before exit to KVM.
Binbin Wu Dec. 28, 2022, 8:32 a.m. UTC | #23
On 12/9/2022 12:45 PM, Robert Hoo wrote
> +#ifdef CONFIG_X86_64
> +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
> +{
> +	if (addr >> 63 == 0) {
> +		/* User pointers */
> +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> +			addr = get_canonical(addr, 57);

According to the spec, LAM_U57/LAM_SUP also performs a modified 
canonicality check.

Why the check only be done for LAM_U48, but not for LAM_U57 and LAM_SUP 
cases?


> +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> +			/*
> +			 * If guest enabled 5-level paging and LAM_U48,
> +			 * bit 47 should be 0, bit 48:56 contains meta data
> +			 * although bit 47:56 are valid 5-level address
> +			 * bits.
> +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> +			 */
> +			WARN_ON(addr & _BITUL(47));
> +			addr = get_canonical(addr, 48);
> +		}
> +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
> +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> +			addr = get_canonical(addr, 57);
> +		else
> +			addr = get_canonical(addr, 48);
> +	}
> +
> +	return addr;
> +}
> +#else
> +#define kvm_untagged_addr(addr, vcpu)	(addr)
> +#endif
> +
>   static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>   					gva_t gva, gfn_t gfn, unsigned access)
>   {
Robert Hoo Dec. 29, 2022, 12:41 a.m. UTC | #24
On Wed, 2022-12-28 at 16:32 +0800, Binbin Wu wrote:
> On 12/9/2022 12:45 PM, Robert Hoo wrote
> > +#ifdef CONFIG_X86_64
> > +/* untag addr for guest, according to vCPU CR3 and CR4 settings */
> > +static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu
> > *vcpu)
> > +{
> > +	if (addr >> 63 == 0) {
> > +		/* User pointers */
> > +		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
> > +			addr = get_canonical(addr, 57);
> 
> According to the spec, LAM_U57/LAM_SUP also performs a modified 
> canonicality check.
> 
> Why the check only be done for LAM_U48, but not for LAM_U57 and
> LAM_SUP 
> cases?
> 
Doesn't this check for LAM_U57?
And below else if branch checks LAM_U48.
And below outer else if branch checks CR4.LAM_SUP.
> 
> > +		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
> > +			/*
> > +			 * If guest enabled 5-level paging and LAM_U48,
> > +			 * bit 47 should be 0, bit 48:56 contains meta
> > data
> > +			 * although bit 47:56 are valid 5-level address
> > +			 * bits.
> > +			 * If LAM_U48 and 4-level paging, bit47 is 0.
> > +			 */
> > +			WARN_ON(addr & _BITUL(47));
> > +			addr = get_canonical(addr, 48);
> > +		}
> > +	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /*
> > Supervisor pointers */
> > +		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
> > +			addr = get_canonical(addr, 57);
> > +		else
> > +			addr = get_canonical(addr, 48);
> > +	}
> > +
> > +	return addr;
> > +}
...
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9985dbb63e7b..16ddd3fcd3cb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2134,6 +2134,9 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		    (!msr_info->host_initiated &&
 		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
 			return 1;
+
+		data = kvm_untagged_addr(data, vcpu);
+
 		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
 		    (data & MSR_IA32_BNDCFGS_RSVD))
 			return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb1f2c20e19e..0a446b45e3d6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1812,6 +1812,11 @@  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 	case MSR_KERNEL_GS_BASE:
 	case MSR_CSTAR:
 	case MSR_LSTAR:
+		/*
+		 * LAM applies only addresses used for data accesses.
+		 * Tagged address should never reach here.
+		 * Strict canonical check still applies here.
+		 */
 		if (is_noncanonical_address(data, vcpu))
 			return 1;
 		break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6c1fbe27616f..f5a2a15783c6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -195,11 +195,48 @@  static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
 	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
 }
 
+static inline u64 get_canonical(u64 la, u8 vaddr_bits)
+{
+	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
 static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
 {
 	return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
 }
 
+#ifdef CONFIG_X86_64
+/* untag addr for guest, according to vCPU CR3 and CR4 settings */
+static inline u64 kvm_untagged_addr(u64 addr, struct kvm_vcpu *vcpu)
+{
+	if (addr >> 63 == 0) {
+		/* User pointers */
+		if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U57)
+			addr = get_canonical(addr, 57);
+		else if (kvm_read_cr3(vcpu) & X86_CR3_LAM_U48) {
+			/*
+			 * If guest enabled 5-level paging and LAM_U48,
+			 * bit 47 should be 0, bit 48:56 contains meta data
+			 * although bit 47:56 are valid 5-level address
+			 * bits.
+			 * If LAM_U48 and 4-level paging, bit47 is 0.
+			 */
+			WARN_ON(addr & _BITUL(47));
+			addr = get_canonical(addr, 48);
+		}
+	} else if (kvm_read_cr4(vcpu) & X86_CR4_LAM_SUP) { /* Supervisor pointers */
+		if (kvm_read_cr4(vcpu) & X86_CR4_LA57)
+			addr = get_canonical(addr, 57);
+		else
+			addr = get_canonical(addr, 48);
+	}
+
+	return addr;
+}
+#else
+#define kvm_untagged_addr(addr, vcpu)	(addr)
+#endif
+
 static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 					gva_t gva, gfn_t gfn, unsigned access)
 {