@@ -15,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/irq_work.h>
#include <linux/irq.h>
+#include <linux/local_lock.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -1612,6 +1613,12 @@ extern bool kvm_has_bus_lock_exit;
/* maximum vcpu-id */
unsigned int kvm_max_vcpu_ids(void);
+/* per cpu vcpu bitmask, protected by kvm_pcpu_mask_lock */
+DECLARE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock);
+extern unsigned long __percpu *kvm_pcpu_vcpu_mask;
+#define KVM_VCPU_MASK_SZ \
+ (sizeof(*kvm_pcpu_vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS))
+
extern u64 kvm_mce_cap_supported;
/*
@@ -320,7 +320,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
bool mask_before, mask_after;
union kvm_ioapic_redirect_entry *e;
int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
- DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ unsigned long *vcpu_bitmap;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
@@ -384,6 +384,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
irq.shorthand = APIC_DEST_NOSHORT;
irq.dest_id = e->fields.dest_id;
irq.msi_redir_hint = false;
+
+ local_lock(&kvm_pcpu_mask_lock);
+
+ vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
vcpu_bitmap);
@@ -403,6 +407,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
}
kvm_make_scan_ioapic_request_mask(ioapic->kvm,
vcpu_bitmap);
+
+ local_unlock(&kvm_pcpu_mask_lock);
} else {
kvm_make_scan_ioapic_request(ioapic->kvm);
}
@@ -47,7 +47,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
{
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
- unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned long *dest_vcpu_bitmap;
unsigned int dest_vcpus = 0;
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
@@ -59,7 +59,10 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
irq->delivery_mode = APIC_DM_FIXED;
}
- memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+ local_lock(&kvm_pcpu_mask_lock);
+ dest_vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+
+ memset(dest_vcpu_bitmap, 0, KVM_VCPU_MASK_SZ);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
@@ -93,6 +96,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
lowest = kvm_get_vcpu(kvm, idx);
}
+ local_unlock(&kvm_pcpu_mask_lock);
+
if (lowest)
r = kvm_apic_set_irq(lowest, irq, dest_map);
@@ -215,6 +215,10 @@ unsigned int kvm_max_vcpu_ids(void)
}
EXPORT_SYMBOL_GPL(kvm_max_vcpu_ids);
+DEFINE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock) =
+ INIT_LOCAL_LOCK(kvm_pcpu_mask_lock);
+unsigned long __percpu *kvm_pcpu_vcpu_mask;
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -11247,9 +11251,16 @@ int kvm_arch_hardware_setup(void *opaque)
if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
+ kvm_pcpu_vcpu_mask = __alloc_percpu(KVM_VCPU_MASK_SZ,
+ sizeof(unsigned long));
+ if (!kvm_pcpu_vcpu_mask) {
+ r = -ENOMEM;
+ goto err;
+ }
+
r = ops->hardware_setup();
if (r != 0)
- return r;
+ goto err;
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
kvm_ops_static_call_update();
@@ -11277,11 +11288,16 @@ int kvm_arch_hardware_setup(void *opaque)
kvm_init_msr_list();
return 0;
+
+ err:
+ free_percpu(kvm_pcpu_vcpu_mask);
+ return r;
}
void kvm_arch_hardware_unsetup(void)
{
static_call(kvm_x86_hardware_unsetup)();
+ free_percpu(kvm_pcpu_vcpu_mask);
}
int kvm_arch_check_processor_compat(void *opaque)
In order to support high vcpu numbers per guest don't use an on stack vcpu bitmask. As this currently used bitmask is not used in functions subject to recursion it is fairly easy to replace it with a percpu bitmask. Allocate this bitmask dynamically in order to support boot time specified max number of vcpus in future. Disable preemption while such a bitmask is being used in order to avoid double usage in case we'd switch cpus. Note that this doesn't apply to vcpu bitmasks used in hyperv.c, as there the max number of vcpus is architecturally limited to 4096 and that bitmask can remain on the stack. Signed-off-by: Juergen Gross <jgross@suse.com> --- V2: - use local_lock() instead of preempt_disable() (Paolo Bonzini) V3: - drop hyperv.c related changes (Eduardo Habkost) --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/ioapic.c | 8 +++++++- arch/x86/kvm/irq_comm.c | 9 +++++++-- arch/x86/kvm/x86.c | 18 +++++++++++++++++- 4 files changed, 38 insertions(+), 4 deletions(-)