diff mbox series

[3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

Message ID 20200402155554.27705-4-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series x86: KVM: VMX: Add basic split-lock #AC handling | expand

Commit Message

Sean Christopherson April 2, 2020, 3:55 p.m. UTC
From: Xiaoyao Li <xiaoyao.li@intel.com>

Two types #AC can be generated in Intel CPUs:
 1. legacy alignment check #AC
 2. split lock #AC

Reflect #AC back into the guest if the guest has legacy alignment checks
enabled or if SLD is disabled.  If SLD is enabled, treat the guest like
a host userspace application by calling handle_user_split_lock().  If
the #AC is handled (SLD disabled and TIF_SLD set), then simply resume
the guest.  If the #AC isn't handled, i.e. host is sld_fatal, then
forward the #AC to the userspace VMM, similar to sending SIGBUS.

Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

Comments

Thomas Gleixner April 2, 2020, 5:19 p.m. UTC | #1
Sean Christopherson <sean.j.christopherson@intel.com> writes:
> @@ -4623,6 +4623,12 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
>  	return 1;
>  }
>  
> +static inline bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu)

I used a different function name intentionally so the check for 'guest
want's split lock #AC' can go there as well once it's sorted.

> +{
> +	return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
> +	       (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
> +}
> +
>  static int handle_exception_nmi(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4688,9 +4694,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
>  		return handle_rmode_exception(vcpu, ex_no, error_code);
>  
>  	switch (ex_no) {
> -	case AC_VECTOR:
> -		kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> -		return 1;
>  	case DB_VECTOR:
>  		dr6 = vmcs_readl(EXIT_QUALIFICATION);
>  		if (!(vcpu->guest_debug &
> @@ -4719,6 +4722,27 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
>  		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
>  		kvm_run->debug.arch.exception = ex_no;
>  		break;
> +	case AC_VECTOR:
> +		/*
> +		 * Reflect #AC to the guest if it's expecting the #AC, i.e. has
> +		 * legacy alignment check enabled.  Pre-check host split lock
> +		 * turned on to avoid the VMREADs needed to check legacy #AC,
> +		 * i.e. reflect the #AC if the only possible source is legacy
> +		 * alignment checks.
> +		 */
> +		if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||

I think the right thing to do here is to make this really independent of
that feature, i.e. inject the exception if

 (CPL==3 && CR0.AM && EFLAGS.AC) || (FUTURE && (GUEST_TEST_CTRL & SLD))

iow. when its really clear that the guest asked for it. If there is an
actual #AC with SLD disabled and !(CPL==3 && CR0.AM && EFLAGS.AC) then
something is badly wrong and the thing should just die. That's why I
separated handle_guest_split_lock() and tell about that case.

Thanks,

        tglx
Sean Christopherson April 2, 2020, 5:40 p.m. UTC | #2
On Thu, Apr 02, 2020 at 07:19:44PM +0200, Thomas Gleixner wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> > @@ -4623,6 +4623,12 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
> >  	return 1;
> >  }
> >  
> > +static inline bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu)
> 
> I used a different function name intentionally so the check for 'guest
> want's split lock #AC' can go there as well once it's sorted.

Heh, IIRC, I advised Xiaoyao to do the opposite so that the injection logic
in the #AC case statement was more or less complete without having to dive
into the helper, e.g. the resulting code looks like this once split-lock is
exposed to the guest:

	if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
	    guest_cpu_alignment_check_enabled(vcpu) ||
	    guest_cpu_sld_on(vmx)) {
		kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
		return 1;
	}

> > +{
> > +	return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
> > +	       (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
> > +}
> > +
> >  static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> >  {
> >  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > @@ -4688,9 +4694,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> >  		return handle_rmode_exception(vcpu, ex_no, error_code);
> >  
> >  	switch (ex_no) {
> > -	case AC_VECTOR:
> > -		kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> > -		return 1;
> >  	case DB_VECTOR:
> >  		dr6 = vmcs_readl(EXIT_QUALIFICATION);
> >  		if (!(vcpu->guest_debug &
> > @@ -4719,6 +4722,27 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> >  		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> >  		kvm_run->debug.arch.exception = ex_no;
> >  		break;
> > +	case AC_VECTOR:
> > +		/*
> > +		 * Reflect #AC to the guest if it's expecting the #AC, i.e. has
> > +		 * legacy alignment check enabled.  Pre-check host split lock
> > +		 * turned on to avoid the VMREADs needed to check legacy #AC,
> > +		 * i.e. reflect the #AC if the only possible source is legacy
> > +		 * alignment checks.
> > +		 */
> > +		if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
> 
> I think the right thing to do here is to make this really independent of
> that feature, i.e. inject the exception if
> 
>  (CPL==3 && CR0.AM && EFLAGS.AC) || (FUTURE && (GUEST_TEST_CTRL & SLD))
> 
> iow. when its really clear that the guest asked for it. If there is an
> actual #AC with SLD disabled and !(CPL==3 && CR0.AM && EFLAGS.AC) then
> something is badly wrong and the thing should just die. That's why I
> separated handle_guest_split_lock() and tell about that case.

That puts KVM in a weird spot if/when intercepting #AC is no longer
necessary, e.g. "if" future CPUs happen to gain a feature that traps into
the hypervisor (KVM) if a potential near-infinite ucode loop is detected.

The only reason KVM intercepts #AC (before split-lock) is to prevent a
malicious guest from executing a DoS attack on the host by putting the #AC
handler in ring 3.  Current CPUs will get stuck in ucode vectoring #AC
faults more or less indefinitely, e.g. long enough to trigger watchdogs in
the host.

Injecting #AC if and only if KVM is 100% certain the guest wants the #AC
would lead to divergent behavior if KVM chose to not intercept #AC, e.g.
some theoretical unknown #AC source would conditionally result in exits to
userspace depending on whether or not KVM wanted to intercept #AC for
other reasons.

That's why we went with the approach of reflecting #AC unless KVM detected
that the #AC was host-induced.
Thomas Gleixner April 2, 2020, 8:07 p.m. UTC | #3
Sean,

Sean Christopherson <sean.j.christopherson@intel.com> writes:
> On Thu, Apr 02, 2020 at 07:19:44PM +0200, Thomas Gleixner wrote:
>> Sean Christopherson <sean.j.christopherson@intel.com> writes:
>> > +	case AC_VECTOR:
>> > +		/*
>> > +		 * Reflect #AC to the guest if it's expecting the #AC, i.e. has
>> > +		 * legacy alignment check enabled.  Pre-check host split lock
>> > +		 * turned on to avoid the VMREADs needed to check legacy #AC,
>> > +		 * i.e. reflect the #AC if the only possible source is legacy
>> > +		 * alignment checks.
>> > +		 */
>> > +		if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
>> 
>> I think the right thing to do here is to make this really independent of
>> that feature, i.e. inject the exception if
>> 
>>  (CPL==3 && CR0.AM && EFLAGS.AC) || (FUTURE && (GUEST_TEST_CTRL & SLD))
>> 
>> iow. when its really clear that the guest asked for it. If there is an
>> actual #AC with SLD disabled and !(CPL==3 && CR0.AM && EFLAGS.AC) then
>> something is badly wrong and the thing should just die. That's why I
>> separated handle_guest_split_lock() and tell about that case.
>
> That puts KVM in a weird spot if/when intercepting #AC is no longer
> necessary, e.g. "if" future CPUs happen to gain a feature that traps into
> the hypervisor (KVM) if a potential near-infinite ucode loop is detected.
>
> The only reason KVM intercepts #AC (before split-lock) is to prevent a
> malicious guest from executing a DoS attack on the host by putting the #AC
> handler in ring 3.  Current CPUs will get stuck in ucode vectoring #AC
> faults more or less indefinitely, e.g. long enough to trigger watchdogs in
> the host.

Which is thankfully well documented in the VMX code and the
corresponding chapter in the SDM. 

> Injecting #AC if and only if KVM is 100% certain the guest wants the #AC
> would lead to divergent behavior if KVM chose to not intercept #AC, e.g.

AFAICT, #AC is not really something which is performance relevant, but I
might obviously be uninformed on that.

Assumed it is not, then there is neither a hard requirement nor a real
incentive to give up on intercepting #AC even when future CPUs have a
fix for the above wreckage.

> some theoretical unknown #AC source would conditionally result in exits to
> userspace depending on whether or not KVM wanted to intercept #AC for
> other reasons.

I'd rather like to know when there is an unknown #AC source instead of
letting the guest silently swallow it.

TBH, the more I learn about this, the more I tend to just give up on
this whole split lock stuff in its current form and wait until HW folks
provide something which is actually usable:

   - Per thread
   - Properly distinguishable from a regular #AC via error code

OTOH, that means I won't be able to use it before retirement. Oh well.

Thanks,

        tglx
Andy Lutomirski April 2, 2020, 8:36 p.m. UTC | #4
> On Apr 2, 2020, at 1:07 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> 

> 
> 
> TBH, the more I learn about this, the more I tend to just give up on
> this whole split lock stuff in its current form and wait until HW folks
> provide something which is actually usable:
> 
>   - Per thread
>   - Properly distinguishable from a regular #AC via error code

Why the latter?  I would argue that #AC from CPL3 with EFLAGS.AC set is almost by construction not a split lock. In particular, if you meet these conditions, how exactly can you do a split lock without simultaneously triggering an alignment check?  (Maybe CMPXCHG16B?

> 
> OTOH, that means I won't be able to use it before retirement. Oh well.
> 
> Thanks,
> 
>        tglx
Peter Zijlstra April 2, 2020, 8:48 p.m. UTC | #5
On Thu, Apr 02, 2020 at 10:07:07PM +0200, Thomas Gleixner wrote:

If we're doing wish lists, I have one more ;-)

> TBH, the more I learn about this, the more I tend to just give up on
> this whole split lock stuff in its current form and wait until HW folks
> provide something which is actually usable:
> 
>    - Per thread
>    - Properly distinguishable from a regular #AC via error code

   - is an actual alignment check.

That is, disallow all unaligned LOCK prefixed ops, not just those that
happen to straddle a cacheline.
Sean Christopherson April 2, 2020, 8:51 p.m. UTC | #6
On Thu, Apr 02, 2020 at 10:07:07PM +0200, Thomas Gleixner wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> > On Thu, Apr 02, 2020 at 07:19:44PM +0200, Thomas Gleixner wrote:
> >> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> > That puts KVM in a weird spot if/when intercepting #AC is no longer
> > necessary, e.g. "if" future CPUs happen to gain a feature that traps into
> > the hypervisor (KVM) if a potential near-infinite ucode loop is detected.
> >
> > The only reason KVM intercepts #AC (before split-lock) is to prevent a
> > malicious guest from executing a DoS attack on the host by putting the #AC
> > handler in ring 3.  Current CPUs will get stuck in ucode vectoring #AC
> > faults more or less indefinitely, e.g. long enough to trigger watchdogs in
> > the host.
> 
> Which is thankfully well documented in the VMX code and the
> corresponding chapter in the SDM. 
> 
> > Injecting #AC if and only if KVM is 100% certain the guest wants the #AC
> > would lead to divergent behavior if KVM chose to not intercept #AC, e.g.
> 
> AFAICT, #AC is not really something which is performance relevant, but I
> might obviously be uninformed on that.
> 
> Assumed it is not, then there is neither a hard requirement nor a real
> incentive to give up on intercepting #AC even when future CPUs have a
> fix for the above wreckage.

Agreed that there's no hard requirement, but general speaking, the less KVM
needs to poke into the guest the better.

> > some theoretical unknown #AC source would conditionally result in exits to
> > userspace depending on whether or not KVM wanted to intercept #AC for
> > other reasons.
> 
> I'd rather like to know when there is an unknown #AC source instead of
> letting the guest silently swallow it.

Trying to prevent the guest from squashing a spurious fault is a fools
errand.   For example, with nested virtualization, the correct behavior
from an architectural perspective is to forward exceptions from L2 (the
nested VM) to L1 (the direct VM) that L1 wants to intercept.  E.g. if L1
wants to intercept #AC faults that happen in L2, then KVM reflects all #AC
faults into L1 as VM-Exits without ever reaching this code.

Nested virt aside, detecting spurious #AC and a few other exceptions is
mostly feasible, but for many exceptions it's flat out impossible.

Anyways, this particular case isn't a sticking point, i.e. I'd be ok with
exiting to userspace on a spurious #AC, I just don't see the value in doing
so.  Returning KVM_EXIT_EXCEPTION doesn't necessarily equate to throwing up
a red flag, e.g. from a kernel perspective you'd still be relying on the
userspace VMM to report the error in a sane manner.  I think at one point
Xiaoyao had a WARN_ON for a spurious #AC, but it was removed because the
odds of a false positive due to some funky corner case seemed higher than
detecting a CPU bug.

> TBH, the more I learn about this, the more I tend to just give up on
> this whole split lock stuff in its current form and wait until HW folks
> provide something which is actually usable:
> 
>    - Per thread
>    - Properly distinguishable from a regular #AC via error code
> 
> OTOH, that means I won't be able to use it before retirement. Oh well.
> 
> Thanks,
> 
>         tglx
Thomas Gleixner April 2, 2020, 10:27 p.m. UTC | #7
Sean,

Sean Christopherson <sean.j.christopherson@intel.com> writes:
> On Thu, Apr 02, 2020 at 10:07:07PM +0200, Thomas Gleixner wrote:
>> Sean Christopherson <sean.j.christopherson@intel.com> writes:
>> AFAICT, #AC is not really something which is performance relevant, but I
>> might obviously be uninformed on that.
>> 
>> Assumed it is not, then there is neither a hard requirement nor a real
>> incentive to give up on intercepting #AC even when future CPUs have a
>> fix for the above wreckage.
>
> Agreed that there's no hard requirement, but general speaking, the less KVM
> needs to poke into the guest the better.

Fair enough.

>> > some theoretical unknown #AC source would conditionally result in exits to
>> > userspace depending on whether or not KVM wanted to intercept #AC for
>> > other reasons.
>> 
>> I'd rather like to know when there is an unknown #AC source instead of
>> letting the guest silently swallow it.
>
> Trying to prevent the guest from squashing a spurious fault is a fools
> errand.   For example, with nested virtualization, the correct behavior
> from an architectural perspective is to forward exceptions from L2 (the
> nested VM) to L1 (the direct VM) that L1 wants to intercept.  E.g. if L1
> wants to intercept #AC faults that happen in L2, then KVM reflects all #AC
> faults into L1 as VM-Exits without ever reaching this code.

Which means L1 should have some handling for that case at least those L1
hypervisors which we can fix. If we want to go there.

> Anyways, this particular case isn't a sticking point, i.e. I'd be ok with
> exiting to userspace on a spurious #AC, I just don't see the value in doing
> so.  Returning KVM_EXIT_EXCEPTION doesn't necessarily equate to throwing up
> a red flag, e.g. from a kernel perspective you'd still be relying on the
> userspace VMM to report the error in a sane manner.  I think at one point
> Xiaoyao had a WARN_ON for a spurious #AC, but it was removed because the
> odds of a false positive due to some funky corner case seemed higher than
> detecting a CPU bug.

Agreed. Relying on the user space side to crash and burn the guest is
probably wishful thinking. So the right thing might be to just kill it
right at the spot.

But coming back to the above discussion:

    if (!cpu_has(SLD) || guest_wants_regular_ac()) {
    	kvm_queue_exception_e();
        return 1;
    }

vs.

    if (guest_wants_regular_ac()) {
    	kvm_queue_exception_e();
        return 1;
    }

The only situation where this makes a difference is when the CPU does
not support SLD. If it does the thing became ambiguous today.

With my previous attempt to bring sanity into this by not setting the
feature flag when SLD is disabled at the command line, the check is
consistent.

But the detection of unaware hypervisors with the module scan brings us
into a situation where we have sld_state == sld_off and the feature flag
being set because we can't undo it anymore.

So now you load VMWare which disables SLD, but the feature bit stays and
then when you unload it and load VMX afterwards then you have exactly
the same situation as with the feature check removed. Consistency gone.

So with that we might want to go back to the sld_state check instead of
the cpu feature check, but that does not make it more consistent:

  As I just verified, it's possible to load the vmware module parallel
  to the KVM/VMX one.

So either we deal with it in some way or just decide that SLD and HV
modules which do not have the MOD_INFO(sld_safe) magic cannot be loaded
when SLD is enabled on the host. I'm fine with the latter :)

What a mess.

Thanks,

        tglx
Nadav Amit April 2, 2020, 10:40 p.m. UTC | #8
> On Apr 2, 2020, at 3:27 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> 
>  As I just verified, it's possible to load the vmware module parallel
>  to the KVM/VMX one.
> 
> So either we deal with it in some way or just decide that SLD and HV
> modules which do not have the MOD_INFO(sld_safe) magic cannot be loaded
> when SLD is enabled on the host. I'm fine with the latter :)
> 
> What a mess.

[ +Doug ]

Just to communicate the information that was given to me: we do intend to
fix the SLD issue in VMware and if needed to release a minor version that
addresses it. Having said that, there are other hypervisors, such as
virtualbox or jailhouse, which would have a similar issue.
Thomas Gleixner April 2, 2020, 11:03 p.m. UTC | #9
Nadav Amit <namit@vmware.com> writes:
> Just to communicate the information that was given to me: we do intend to
> fix the SLD issue in VMware and if needed to release a minor version that
> addresses it. Having said that, there are other hypervisors, such as
> virtualbox or jailhouse, which would have a similar issue.

I'm well aware of that and even if VMWare fixes this, this still will
trip up users which fail to install updates for one reason or the other
and leave them puzzled. Maybe we just should not care at all.

Despite that I might have mentioned it before: What a mess ...

Thanks,

        tglx
Steven Rostedt April 2, 2020, 11:08 p.m. UTC | #10
On Thu, 2 Apr 2020 22:40:03 +0000
Nadav Amit <namit@vmware.com> wrote:

> > On Apr 2, 2020, at 3:27 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > 
> >  As I just verified, it's possible to load the vmware module parallel
> >  to the KVM/VMX one.
> > 
> > So either we deal with it in some way or just decide that SLD and HV
> > modules which do not have the MOD_INFO(sld_safe) magic cannot be loaded
> > when SLD is enabled on the host. I'm fine with the latter :)
> > 
> > What a mess.  
> 
> [ +Doug ]
> 
> Just to communicate the information that was given to me: we do intend to
> fix the SLD issue in VMware and if needed to release a minor version that
> addresses it. Having said that, there are other hypervisors, such as
> virtualbox or jailhouse, which would have a similar issue.

If we go the approach of not letting VM modules load if it doesn't have the
sld_safe flag set, how is this different than a VM module not loading due
to kabi breakage?

If we prevent it from loading (and keeping from having to go into this
inconsistent state that Thomas described), it would encourage people to get
the latest modules, and the maintainers of said modules motivation to
update them.

-- Steve
Kenneth Crudup April 2, 2020, 11:16 p.m. UTC | #11
On Thu, 2 Apr 2020, Steven Rostedt wrote:

> If we go the approach of not letting VM modules load if it doesn't have the
> sld_safe flag set, how is this different than a VM module not loading due
> to kabi breakage?

Why not a compromise: if such a module is attempted to be loaded, print up
a message saying something akin to "turn the parameter 'split_lock_detect'
off" as we reject loading it- and if we see that we've booted with it off
just splat a WARN_ON() if someone tries to load such modules?

	-Kenny
Jim Mattson April 2, 2020, 11:18 p.m. UTC | #12
On Thu, Apr 2, 2020 at 4:16 PM Kenneth R. Crudup <kenny@panix.com> wrote:
>
>
> On Thu, 2 Apr 2020, Steven Rostedt wrote:
>
> > If we go the approach of not letting VM modules load if it doesn't have the
> > sld_safe flag set, how is this different than a VM module not loading due
> > to kabi breakage?
>
> Why not a compromise: if such a module is attempted to be loaded, print up
> a message saying something akin to "turn the parameter 'split_lock_detect'
> off" as we reject loading it- and if we see that we've booted with it off
> just splat a WARN_ON() if someone tries to load such modules?

What modules are we talking about? I thought we were discussing L1
hypervisors, which are just binary blobs. The only modules at the L0
level are kvm and kvm_intel.
Thomas Gleixner April 3, 2020, 12:16 p.m. UTC | #13
Jim,

Jim Mattson <jmattson@google.com> writes:
> On Thu, Apr 2, 2020 at 4:16 PM Kenneth R. Crudup <kenny@panix.com> wrote:
>> On Thu, 2 Apr 2020, Steven Rostedt wrote:
>>
>> > If we go the approach of not letting VM modules load if it doesn't have the
>> > sld_safe flag set, how is this different than a VM module not loading due
>> > to kabi breakage?
>>
>> Why not a compromise: if such a module is attempted to be loaded, print up
>> a message saying something akin to "turn the parameter 'split_lock_detect'
>> off" as we reject loading it- and if we see that we've booted with it off
>> just splat a WARN_ON() if someone tries to load such modules?
>
> What modules are we talking about? I thought we were discussing L1
> hypervisors, which are just binary blobs. The only modules at the L0
> level are kvm and kvm_intel.

Maybe in your world, but VmWare (which got this started), VirtualBox,
Jailhouse and who knows what else _are_ L0 hypervisors. Otherwise we
wouldn't have that conversation at all.

Thanks,

        tglx
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 458e684dfbdc..a96cfda0a5b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4623,6 +4623,12 @@  static int handle_machine_check(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static inline bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu)
+{
+	return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+	       (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
+}
+
 static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4688,9 +4694,6 @@  static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 		return handle_rmode_exception(vcpu, ex_no, error_code);
 
 	switch (ex_no) {
-	case AC_VECTOR:
-		kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
-		return 1;
 	case DB_VECTOR:
 		dr6 = vmcs_readl(EXIT_QUALIFICATION);
 		if (!(vcpu->guest_debug &
@@ -4719,6 +4722,27 @@  static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
 		kvm_run->debug.arch.exception = ex_no;
 		break;
+	case AC_VECTOR:
+		/*
+		 * Reflect #AC to the guest if it's expecting the #AC, i.e. has
+		 * legacy alignment check enabled.  Pre-check host split lock
+		 * turned on to avoid the VMREADs needed to check legacy #AC,
+		 * i.e. reflect the #AC if the only possible source is legacy
+		 * alignment checks.
+		 */
+		if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
+		    guest_cpu_alignment_check_enabled(vcpu)) {
+			kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+			return 1;
+		}
+
+		/*
+		 * Forward the #AC to userspace if kernel policy does not allow
+		 * temporarily disabling split lock detection.
+		 */
+		if (handle_user_split_lock(kvm_rip_read(vcpu)))
+			return 1;
+		fallthrough;
 	default:
 		kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
 		kvm_run->ex.exception = ex_no;