diff mbox series

KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits

Message ID 20241121065039.183716-1-zijie.wei@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits | expand

Commit Message

wzj Nov. 21, 2024, 6:50 a.m. UTC
Address performance issues caused by a vector being reused by a
non-IOAPIC source.

commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record the vector that is pending at
the time of injection. Then, upon the next guest exit, clean up the
ioapic_handled_vectors corresponding to the vector number that was
pending. This ensures that interrupts are properly handled and prevents
performance issues.

Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.c           | 11 +++++++++--
 arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

Comments

Sean Christopherson Dec. 17, 2024, 11:04 p.m. UTC | #1
On Thu, Nov 21, 2024, weizijie wrote:
> Address performance issues caused by a vector being reused by a
> non-IOAPIC source.
> 
> commit 0fc5a36dd6b3
> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
> addressed the issues related to EOI and IOAPIC reconfiguration races.
> However, it has introduced some performance concerns:
> 
> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
> already in service can unintentionally trigger a VM exit for other
> interrupts that normally do not require one, due to the settings of
> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
> runtime, this issue persists, continuing to adversely affect
> performance.
> 
> Simple Fix Proposal:
> A straightforward solution is to record the vector that is pending at
> the time of injection. Then, upon the next guest exit, clean up the
> ioapic_handled_vectors corresponding to the vector number that was
> pending. This ensures that interrupts are properly handled and prevents
> performance issues.
> 
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>

Your SoB should be last, and assuming Xuyun is a co-author, they need to be
credited via Co-developed-by.  See Documentation/process/submitting-patches.rst
for details.

> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/ioapic.c           | 11 +++++++++--
>  arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
>  3 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e159e44a6a1b..b008c933d2ab 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	hpa_t hv_root_tdp;
>  #endif
> +	DECLARE_BITMAP(ioapic_pending_vectors, 256);
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..6f5a88dc63da 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -284,6 +284,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)

The split IRQ chip mode should have the same enhancement.

>  	spin_lock(&ioapic->lock);
>  
> +	bitmap_zero(vcpu->arch.ioapic_pending_vectors, 256);

Rather than use a bitmap, what does performance look like if this is a single u8
that tracks the highest in-service IRQ at the time of the last scan?  And then
when that IRQ is EOI'd, do a full KVM_REQ_SCAN_IOAPIC instead of
KVM_REQ_LOAD_EOI_EXITMAP?  Having multiple interrupts in-flight at the time of
scan should be quite rare.

I like the idea, but burning 32 bytes for an edge case of an edge case seems
unnecessary.
 
> +
>  	/* Make sure we see any missing RTC EOI */
>  	if (test_bit(vcpu->vcpu_id, dest_map->map))
>  		__set_bit(dest_map->vectors[vcpu->vcpu_id],
> @@ -297,10 +299,15 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>  			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>  
>  			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						e->fields.dest_id, dm) ||
> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
> +						e->fields.dest_id, dm))
> +				__set_bit(e->fields.vector,
> +					  ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
>  				__set_bit(e->fields.vector,
>  					  ioapic_handled_vectors);
> +				__set_bit(e->fields.vector,
> +					  vcpu->arch.ioapic_pending_vectors);
> +			}
>  		}
>  	}
>  	spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0f008f5ef6f0..572e6f9b8602 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5710,6 +5710,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
>  
>  	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>  	kvm_apic_set_eoi_accelerated(vcpu, vector);
> +
> +	/* When there are instances where ioapic_handled_vectors is
> +	 * set due to pending interrupts, clean up the record and the
> +	 * corresponding bit after the interrupt is completed.
> +	 */
> +	if (test_bit(vector, vcpu->arch.ioapic_pending_vectors)) {

This belongs in common code, probably kvm_ioapic_send_eoi().

> +		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
> +		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
> +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
> +	}
>  	return 1;
>  }
>  
> -- 
> 2.43.5
>
wzj Dec. 27, 2024, 7:30 a.m. UTC | #2
On December 18, 2024, Sean Christopherson wrote:
> On Thu, Nov 21, 2024, weizijie wrote:
>> Address performance issues caused by a vector being reused by a
>> non-IOAPIC source.
>>
>> commit 0fc5a36dd6b3
>> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
>> addressed the issues related to EOI and IOAPIC reconfiguration races.
>> However, it has introduced some performance concerns:
>>
>> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
>> already in service can unintentionally trigger a VM exit for other
>> interrupts that normally do not require one, due to the settings of
>> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
>> runtime, this issue persists, continuing to adversely affect
>> performance.
>>
>> Simple Fix Proposal:
>> A straightforward solution is to record the vector that is pending at
>> the time of injection. Then, upon the next guest exit, clean up the
>> ioapic_handled_vectors corresponding to the vector number that was
>> pending. This ensures that interrupts are properly handled and prevents
>> performance issues.
>>
>> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
>> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Your SoB should be last, and assuming Xuyun is a co-author, they need to be
> credited via Co-developed-by.  See Documentation/process/submitting-patches.rst
> for details.

I'm sincerely apologize, that was my oversight. I will add 
Co-developed-by and move my SoB to the end.

>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  1 +
>>   arch/x86/kvm/ioapic.c           | 11 +++++++++--
>>   arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
>>   3 files changed, 20 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index e159e44a6a1b..b008c933d2ab 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
>>   #if IS_ENABLED(CONFIG_HYPERV)
>>   	hpa_t hv_root_tdp;
>>   #endif
>> +	DECLARE_BITMAP(ioapic_pending_vectors, 256);
>>   };
>>   
>>   struct kvm_lpage_info {
>> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>> index 995eb5054360..6f5a88dc63da 100644
>> --- a/arch/x86/kvm/ioapic.c
>> +++ b/arch/x86/kvm/ioapic.c
>> @@ -284,6 +284,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
> The split IRQ chip mode should have the same enhancement.

You are absolutely right; the split IRQ has the same issue.

We will apply the same enhancement here as will.

>
>>   	spin_lock(&ioapic->lock);
>>   
>> +	bitmap_zero(vcpu->arch.ioapic_pending_vectors, 256);
> Rather than use a bitmap, what does performance look like if this is a single u8
> that tracks the highest in-service IRQ at the time of the last scan?  And then
> when that IRQ is EOI'd, do a full KVM_REQ_SCAN_IOAPIC instead of
> KVM_REQ_LOAD_EOI_EXITMAP?  Having multiple interrupts in-flight at the time of
> scan should be quite rare.
>
> I like the idea, but burning 32 bytes for an edge case of an edge case seems
> unnecessary.

This is truly an excellent modification suggestion. Your comprehensive 
consideration is impressive. Using just a u8 to record highest 
in-service IRQ and only redoing a full KVM_REQ_SCAN_IOAPIC when the 
recorded IRQ is EOI'd not only avoids impacting other interrupts that 
should cause a vm exit, but also saves space.

>   
>> +
>>   	/* Make sure we see any missing RTC EOI */
>>   	if (test_bit(vcpu->vcpu_id, dest_map->map))
>>   		__set_bit(dest_map->vectors[vcpu->vcpu_id],
>> @@ -297,10 +299,15 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>>   			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>>   
>>   			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
>> -						e->fields.dest_id, dm) ||
>> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
>> +						e->fields.dest_id, dm))
>> +				__set_bit(e->fields.vector,
>> +					  ioapic_handled_vectors);
>> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
>>   				__set_bit(e->fields.vector,
>>   					  ioapic_handled_vectors);
>> +				__set_bit(e->fields.vector,
>> +					  vcpu->arch.ioapic_pending_vectors);
>> +			}
>>   		}
>>   	}
>>   	spin_unlock(&ioapic->lock);
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 0f008f5ef6f0..572e6f9b8602 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -5710,6 +5710,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
>>   
>>   	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>>   	kvm_apic_set_eoi_accelerated(vcpu, vector);
>> +
>> +	/* When there are instances where ioapic_handled_vectors is
>> +	 * set due to pending interrupts, clean up the record and the
>> +	 * corresponding bit after the interrupt is completed.
>> +	 */
>> +	if (test_bit(vector, vcpu->arch.ioapic_pending_vectors)) {
> This belongs in common code, probably kvm_ioapic_send_eoi().
We make the code on the common path as simple as possible.
>> +		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
>> +		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
>> +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
>> +	}
>>   	return 1;
>>   }
>>   
KVM: x86: ioapic: Optimize EOI handling to reduce
  unnecessary VM exits

Address performance issues caused by a vector being reused by a
non-IOAPIC source.

commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record highest in-service IRQ that
is pending at the time of the last scan. Then, upon the next guest
exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
the ioapic occurs only when the recorded vector is EOI'd, and
subsequently, the extra bit in the eoi_exit_bitmap are cleared,
avoiding unnecessary VM exits.

Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
---
v1 -> v2

* Move my SoB to the end and add Co-developed-by for Xuyun

* Use a u8 type to record a pending IRQ during the ioapic scan process

* Made the same changes for the split IRQ chip mode

arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/ioapic.c           | 10 ++++++++--
  arch/x86/kvm/irq_comm.c         |  9 +++++++--
  arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
  4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h 
b/arch/x86/include/asm/kvm_host.h
index e159e44a6a1b..f84a4881afa4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
  #if IS_ENABLED(CONFIG_HYPERV)
         hpa_t hv_root_tdp;
  #endif
+       u8 last_pending_vector;
  };

  struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..40252a800897 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, 
ulong *ioapic_handled_vectors)
                         u16 dm = 
kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);

                         if (kvm_apic_match_dest(vcpu, NULL, 
APIC_DEST_NOSHORT,
- e->fields.dest_id, dm) ||
-                           kvm_apic_pending_eoi(vcpu, e->fields.vector))
+ e->fields.dest_id, dm))
                                 __set_bit(e->fields.vector,
                                           ioapic_handled_vectors);
+                       else if (kvm_apic_pending_eoi(vcpu, 
e->fields.vector)) {
+                               __set_bit(e->fields.vector,
+                                         ioapic_handled_vectors);
+                               vcpu->arch.last_pending_vector = 
e->fields.vector >
+ vcpu->arch.last_pending_vector ? e->fields.vector :
+ vcpu->arch.last_pending_vector;
+                       }
                 }
         }
         spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8136695f7b96..1d23c52576e1 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,

                         if (irq.trig_mode &&
                             (kvm_apic_match_dest(vcpu, NULL, 
APIC_DEST_NOSHORT,
-                                                irq.dest_id, 
irq.dest_mode) ||
-                            kvm_apic_pending_eoi(vcpu, irq.vector)))
+                                                irq.dest_id, 
irq.dest_mode)))
                                 __set_bit(irq.vector, 
ioapic_handled_vectors);
+                       else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
+                               __set_bit(irq.vector, 
ioapic_handled_vectors);
+                               vcpu->arch.last_pending_vector = 
irq.vector >
+ vcpu->arch.last_pending_vector ? irq.vector :
+ vcpu->arch.last_pending_vector;
+                       }
                 }
         }
         srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 893366e53732..cd0db1496ce7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5702,6 +5702,15 @@ static int handle_apic_eoi_induced(struct 
kvm_vcpu *vcpu)

         /* EOI-induced VM exit is trap-like and thus no need to adjust 
IP */
         kvm_apic_set_eoi_accelerated(vcpu, vector);
+
+       /* When there are instances where ioapic_handled_vectors is
+        * set due to pending interrupts, clean up the record and do
+        * a full KVM_REQ_SCAN_IOAPIC.
+        */
+       if (vcpu->arch.last_pending_vector == vector) {
+               vcpu->arch.last_pending_vector = 0;
+               kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
+       }
         return 1;
  }

>> -- 
>> 2.43.5
>>
Sean Christopherson Feb. 11, 2025, 7:45 p.m. UTC | #3
On Fri, Dec 27, 2024, wzj wrote:
> On December 18, 2024, Sean Christopherson wrote:
> > On Thu, Nov 21, 2024, weizijie wrote:
> We make the code on the common path as simple as possible.
> > > +		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
> > > +		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
> > > +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
> > > +	}
> > >   	return 1;
> > >   }
> KVM: x86: ioapic: Optimize EOI handling to reduce
>  unnecessary VM exits
> 
> Address performance issues caused by a vector being reused by a
> non-IOAPIC source.
> 
> commit 0fc5a36dd6b3
> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
> addressed the issues related to EOI and IOAPIC reconfiguration races.
> However, it has introduced some performance concerns:
> 
> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
> already in service can unintentionally trigger a VM exit for other
> interrupts that normally do not require one, due to the settings of
> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
> runtime, this issue persists, continuing to adversely affect
> performance.
> 
> Simple Fix Proposal:
> A straightforward solution is to record highest in-service IRQ that
> is pending at the time of the last scan. Then, upon the next guest
> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
> the ioapic occurs only when the recorded vector is EOI'd, and
> subsequently, the extra bit in the eoi_exit_bitmap are cleared,
> avoiding unnecessary VM exits.
> 
> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> ---

This is whitespace damaged beyond what I am willing to massage and apply, and
honestly even beyond what I am willing to review.  Please fix your setup and
resend.

> v1 -> v2
> 
> * Move my SoB to the end and add Co-developed-by for Xuyun
> 
> * Use a u8 type to record a pending IRQ during the ioapic scan process
> 
> * Made the same changes for the split IRQ chip mode
> 
> arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/ioapic.c           | 10 ++++++++--
>  arch/x86/kvm/irq_comm.c         |  9 +++++++--
>  arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
>  4 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index e159e44a6a1b..f84a4881afa4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>         hpa_t hv_root_tdp;
>  #endif
> +       u8 last_pending_vector;
>  };
> 
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..40252a800897 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
> ulong *ioapic_handled_vectors)
>                         u16 dm =
> kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
> 
>                         if (kvm_apic_match_dest(vcpu, NULL,
> APIC_DEST_NOSHORT,
> - e->fields.dest_id, dm) ||
> -                           kvm_apic_pending_eoi(vcpu, e->fields.vector))
> + e->fields.dest_id, dm))
>                                 __set_bit(e->fields.vector,
>                                           ioapic_handled_vectors);
> +                       else if (kvm_apic_pending_eoi(vcpu,
> e->fields.vector)) {
> +                               __set_bit(e->fields.vector,
> +                                         ioapic_handled_vectors);
> +                               vcpu->arch.last_pending_vector =
> e->fields.vector >
> + vcpu->arch.last_pending_vector ? e->fields.vector :
> + vcpu->arch.last_pending_vector;
> +                       }
>                 }
>         }
>         spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8136695f7b96..1d23c52576e1 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
> 
>                         if (irq.trig_mode &&
>                             (kvm_apic_match_dest(vcpu, NULL,
> APIC_DEST_NOSHORT,
> -                                                irq.dest_id, irq.dest_mode)
> ||
> -                            kvm_apic_pending_eoi(vcpu, irq.vector)))
> +                                                irq.dest_id,
> irq.dest_mode)))
>                                 __set_bit(irq.vector,
> ioapic_handled_vectors);
> +                       else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
> +                               __set_bit(irq.vector,
> ioapic_handled_vectors);
> +                               vcpu->arch.last_pending_vector = irq.vector
> >
> + vcpu->arch.last_pending_vector ? irq.vector :
> + vcpu->arch.last_pending_vector;
> +                       }
>                 }
>         }
>         srcu_read_unlock(&kvm->irq_srcu, idx);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 893366e53732..cd0db1496ce7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5702,6 +5702,15 @@ static int handle_apic_eoi_induced(struct kvm_vcpu
> *vcpu)
> 
>         /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>         kvm_apic_set_eoi_accelerated(vcpu, vector);
> +
> +       /* When there are instances where ioapic_handled_vectors is
> +        * set due to pending interrupts, clean up the record and do
> +        * a full KVM_REQ_SCAN_IOAPIC.
> +        */
> +       if (vcpu->arch.last_pending_vector == vector) {
> +               vcpu->arch.last_pending_vector = 0;
> +               kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
> +       }
>         return 1;
>  }
> 
> > > -- 
> > > 2.43.5
> > >
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e159e44a6a1b..b008c933d2ab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1041,6 +1041,7 @@  struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
+	DECLARE_BITMAP(ioapic_pending_vectors, 256);
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..6f5a88dc63da 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -284,6 +284,8 @@  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 
 	spin_lock(&ioapic->lock);
 
+	bitmap_zero(vcpu->arch.ioapic_pending_vectors, 256);
+
 	/* Make sure we see any missing RTC EOI */
 	if (test_bit(vcpu->vcpu_id, dest_map->map))
 		__set_bit(dest_map->vectors[vcpu->vcpu_id],
@@ -297,10 +299,15 @@  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
 
 			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						e->fields.dest_id, dm) ||
-			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
+						e->fields.dest_id, dm))
+				__set_bit(e->fields.vector,
+					  ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
 				__set_bit(e->fields.vector,
 					  ioapic_handled_vectors);
+				__set_bit(e->fields.vector,
+					  vcpu->arch.ioapic_pending_vectors);
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0f008f5ef6f0..572e6f9b8602 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5710,6 +5710,16 @@  static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 
 	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
 	kvm_apic_set_eoi_accelerated(vcpu, vector);
+
+	/* When there are instances where ioapic_handled_vectors is
+	 * set due to pending interrupts, clean up the record and the
+	 * corresponding bit after the interrupt is completed.
+	 */
+	if (test_bit(vector, vcpu->arch.ioapic_pending_vectors)) {
+		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
+		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
+		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+	}
 	return 1;
 }