@@ -657,26 +657,32 @@ static u8 count_vectors(void *bitmap)
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr)
{
- u32 *__pir = (void *)pir;
+ unsigned long pir_vals[NR_PIR_WORDS];
+ u32 *__pir = (void *)pir_vals;
u32 i, vec;
- u32 pir_val, irr_val, prev_irr_val;
+ u32 irr_val, prev_irr_val;
int max_updated_irr;
max_updated_irr = -1;
*max_irr = -1;
+ for (i = 0; i < NR_PIR_WORDS; i++) {
+ pir_vals[i] = READ_ONCE(pir[i]);
+ if (!pir_vals[i])
+ continue;
+
+ pir_vals[i] = xchg(&pir[i], 0);
+ }
+
for (i = vec = 0; i <= 7; i++, vec += 32) {
u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
irr_val = READ_ONCE(*p_irr);
- pir_val = READ_ONCE(__pir[i]);
-
- if (pir_val) {
- pir_val = xchg(&__pir[i], 0);
+ if (__pir[i]) {
prev_irr_val = irr_val;
do {
- irr_val = prev_irr_val | pir_val;
+ irr_val = prev_irr_val | __pir[i];
} while (prev_irr_val != irr_val &&
!try_cmpxchg(p_irr, &prev_irr_val, irr_val));
Process the PIR at the natural kernel width, i.e. in 64-bit chunks on 64-bit kernels, so that the worst case of having a posted IRQ in each chunk of the vIRR only requires 4 loads and xchgs from/to the PIR, not 8. Deliberately use a "continue" to skip empty entries so that the code is a carbon copy of handle_pending_pir(), in anticipation of deduplicating KVM and posted MSI logic. Suggested-by: Jim Mattson <jmattson@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/lapic.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-)