diff mbox series

[5/6] KVM: VMX: Always intercept accesses to unsupported "extended" x2APIC regs

Message ID 20230107011025.565472-6-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: x2APIC reserved bits/regs fixes | expand

Commit Message

Sean Christopherson Jan. 7, 2023, 1:10 a.m. UTC
Don't clear the "read" bits for x2APIC registers above SELF_IPI (APIC regs
0x400 - 0xff0, MSRs 0x840 - 0x8ff).  KVM doesn't emulate registers in that
space (there are a smattering of AMD-only extensions) and so should
intercept reads in order to inject #GP.  When APICv is fully enabled,
Intel hardware doesn't validate the registers on RDMSR and instead blindly
retrieves data from the vAPIC page, i.e. it's software's responsibility to
intercept reads to non-existent MSRs.

Fixes: 8d14695f9542 ("x86, apicv: add virtual x2apic support")
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmx.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

Comments

Maxim Levitsky Jan. 8, 2023, 6:07 p.m. UTC | #1
On Sat, 2023-01-07 at 01:10 +0000, Sean Christopherson wrote:
> Don't clear the "read" bits for x2APIC registers above SELF_IPI (APIC regs
> 0x400 - 0xff0, MSRs 0x840 - 0x8ff).  KVM doesn't emulate registers in that
> space (there are a smattering of AMD-only extensions) and so should
> intercept reads in order to inject #GP.  When APICv is fully enabled,
> Intel hardware doesn't validate the registers on RDMSR and instead blindly
> retrieves data from the vAPIC page, i.e. it's software's responsibility to
> intercept reads to non-existent MSRs.
> 
> Fixes: 8d14695f9542 ("x86, apicv: add virtual x2apic support")
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 38 ++++++++++++++++++++------------------
>  1 file changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c788aa382611..82c61c16f8f5 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4018,26 +4018,17 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
>  		vmx_set_msr_bitmap_write(msr_bitmap, msr);
>  }
>  
> -static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
> -{
> -	unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
> -	unsigned long read_intercept;
> -	int msr;
> -
> -	read_intercept = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
> -
> -	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
> -		unsigned int read_idx = msr / BITS_PER_LONG;
> -		unsigned int write_idx = read_idx + (0x800 / sizeof(long));
> -
> -		msr_bitmap[read_idx] = read_intercept;
> -		msr_bitmap[write_idx] = ~0ul;
> -	}
> -}
> -
>  static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
>  {
> +	/*
> +	 * x2APIC indices for 64-bit accesses into the RDMSR and WRMSR halves
> +	 * of the MSR bitmap.  KVM emulates APIC registers up through 0x3f0,
> +	 * i.e. MSR 0x83f, and so only needs to dynamically manipulate 64 bits.
> +	 */
The above comment is better to be placed down below, near the actual write,
otherwise it is confusing.

> +	const int read_idx = APIC_BASE_MSR / BITS_PER_LONG_LONG;
> +	const int write_idx = read_idx + (0x800 / sizeof(u64));
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	u64 *msr_bitmap = (u64 *)vmx->vmcs01.msr_bitmap;
>  	u8 mode;
>  
>  	if (!cpu_has_vmx_msr_bitmap())
> @@ -4058,7 +4049,18 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
>  
>  	vmx->x2apic_msr_bitmap_mode = mode;
>  
> -	vmx_reset_x2apic_msrs(vcpu, mode);
> +	/*
> +	 * Reset the bitmap for MSRs 0x800 - 0x83f.  Leave AMD's uber-extended
> +	 * registers (0x840 and above) intercepted, KVM doesn't support them.

I don't think AMD calls them uber-extended. Just extended.

From a quick glance, these could have beeing very useful for VFIO passthrough of INT-X interrupts,
removing the need to mask the interrupt on per PCI device basis - instead you can just leave
the IRQ pending in ISR, while using SEOI and IER to ignore this pending bit for host.

I understand that the days of INT-X are long gone (and especially days of shared IRQ lines...)
and every sane device uses MSI/-X instead, but still.


> +	 * Intercept all writes by default and poke holes as needed.  Pass
> +	 * through all reads by default in x2APIC+APICv mode, as all registers
> +	 * except the current timer count are passed through for read.
> +	 */
> +	if (mode & MSR_BITMAP_MODE_X2APIC_APICV)
> +		msr_bitmap[read_idx] = 0;
> +	else
> +		msr_bitmap[read_idx] = ~0ull;
> +	msr_bitmap[write_idx] = ~0ull;
>  
>  	/*
>  	 * TPR reads and writes can be virtualized even if virtual interrupt

Other than the note about the comment,

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>


Best regards,
	Maxim Levitsky
Sean Christopherson Jan. 9, 2023, 4:32 p.m. UTC | #2
On Sun, Jan 08, 2023, Maxim Levitsky wrote:
> On Sat, 2023-01-07 at 01:10 +0000, Sean Christopherson wrote:
> >  static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
> >  {
> > +	/*
> > +	 * x2APIC indices for 64-bit accesses into the RDMSR and WRMSR halves
> > +	 * of the MSR bitmap.  KVM emulates APIC registers up through 0x3f0,
> > +	 * i.e. MSR 0x83f, and so only needs to dynamically manipulate 64 bits.
> > +	 */
> The above comment is better to be placed down below, near the actual write,
> otherwise it is confusing.

Can you elaborate on why it's confusing?  The intent of this specific comment is
to capture why the index calculations use BITS_PER_LONG_LONG and sizeof(u64).

> > +	const int read_idx = APIC_BASE_MSR / BITS_PER_LONG_LONG;
> > +	const int write_idx = read_idx + (0x800 / sizeof(u64));
> >  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +	u64 *msr_bitmap = (u64 *)vmx->vmcs01.msr_bitmap;
> >  	u8 mode;
> >  
> >  	if (!cpu_has_vmx_msr_bitmap())
> > @@ -4058,7 +4049,18 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
> >  
> >  	vmx->x2apic_msr_bitmap_mode = mode;
> >  
> > -	vmx_reset_x2apic_msrs(vcpu, mode);
> > +	/*
> > +	 * Reset the bitmap for MSRs 0x800 - 0x83f.  Leave AMD's uber-extended
> > +	 * registers (0x840 and above) intercepted, KVM doesn't support them.
> 
> I don't think AMD calls them uber-extended. Just extended.

Yeah, I took some creative liberaties.  I want to avoid confusion with the more
common use of Extended APIC (x2APIC).
Jim Mattson Jan. 9, 2023, 5:25 p.m. UTC | #3
On Fri, Jan 6, 2023 at 5:10 PM Sean Christopherson <seanjc@google.com> wrote:
>
> Don't clear the "read" bits for x2APIC registers above SELF_IPI (APIC regs

Odd use of quotation marks in the shortlog  and here.

> 0x400 - 0xff0, MSRs 0x840 - 0x8ff).  KVM doesn't emulate registers in that
> space (there are a smattering of AMD-only extensions) and so should
> intercept reads in order to inject #GP.  When APICv is fully enabled,
> Intel hardware doesn't validate the registers on RDMSR and instead blindly
> retrieves data from the vAPIC page, i.e. it's software's responsibility to
> intercept reads to non-existent MSRs.
>
> Fixes: 8d14695f9542 ("x86, apicv: add virtual x2apic support")
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c788aa382611..82c61c16f8f5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4018,26 +4018,17 @@  void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 		vmx_set_msr_bitmap_write(msr_bitmap, msr);
 }
 
-static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
-{
-	unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
-	unsigned long read_intercept;
-	int msr;
-
-	read_intercept = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
-
-	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-		unsigned int read_idx = msr / BITS_PER_LONG;
-		unsigned int write_idx = read_idx + (0x800 / sizeof(long));
-
-		msr_bitmap[read_idx] = read_intercept;
-		msr_bitmap[write_idx] = ~0ul;
-	}
-}
-
 static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * x2APIC indices for 64-bit accesses into the RDMSR and WRMSR halves
+	 * of the MSR bitmap.  KVM emulates APIC registers up through 0x3f0,
+	 * i.e. MSR 0x83f, and so only needs to dynamically manipulate 64 bits.
+	 */
+	const int read_idx = APIC_BASE_MSR / BITS_PER_LONG_LONG;
+	const int write_idx = read_idx + (0x800 / sizeof(u64));
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u64 *msr_bitmap = (u64 *)vmx->vmcs01.msr_bitmap;
 	u8 mode;
 
 	if (!cpu_has_vmx_msr_bitmap())
@@ -4058,7 +4049,18 @@  static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
 
 	vmx->x2apic_msr_bitmap_mode = mode;
 
-	vmx_reset_x2apic_msrs(vcpu, mode);
+	/*
+	 * Reset the bitmap for MSRs 0x800 - 0x83f.  Leave AMD's uber-extended
+	 * registers (0x840 and above) intercepted, KVM doesn't support them.
+	 * Intercept all writes by default and poke holes as needed.  Pass
+	 * through all reads by default in x2APIC+APICv mode, as all registers
+	 * except the current timer count are passed through for read.
+	 */
+	if (mode & MSR_BITMAP_MODE_X2APIC_APICV)
+		msr_bitmap[read_idx] = 0;
+	else
+		msr_bitmap[read_idx] = ~0ull;
+	msr_bitmap[write_idx] = ~0ull;
 
 	/*
 	 * TPR reads and writes can be virtualized even if virtual interrupt