Message ID | 20180516152131.30689-6-vkuznets@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
2018-05-16 17:21+0200, Vitaly Kuznetsov: > Hyper-V style PV TLB flush hypercalls inmplementation will use this API. > To avoid memory allocation in CONFIG_CPUMASK_OFFSTACK case add > cpumask_var_t argument. > > Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> > --- > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > -bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) > +bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, > + unsigned long *vcpu_bitmap, cpumask_var_t tmp) > { > int i, cpu, me; > - cpumask_var_t cpus; > - bool called; > struct kvm_vcpu *vcpu; > - > - zalloc_cpumask_var(&cpus, GFP_ATOMIC); > + bool called; > > me = get_cpu(); > + Two optimizations come to mind: First is to use for_each_set_bit instead of kvm_for_each_vcpu to improve the sparse case. > kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!test_bit(i, vcpu_bitmap)) And the second is to pass vcpu_bitmap = NULL instead of building the bitmap with all VCPUs. Doesn't looks too good in the end, though: #define kvm_for_each_vcpu_bitmap(idx, vcpup, kvm, bitmap, len) \ for (idx = (bitmap ? find_first_bit(bitmap, len) : 0); \ idx < len && (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ bitmap ? find_next_bit(bitmap, len, idx + 1) : idx++)
Radim Krčmář <rkrcmar@redhat.com> writes: > 2018-05-16 17:21+0200, Vitaly Kuznetsov: >> Hyper-V style PV TLB flush hypercalls inmplementation will use this API. >> To avoid memory allocation in CONFIG_CPUMASK_OFFSTACK case add >> cpumask_var_t argument. >> >> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> >> --- >> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c >> -bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) >> +bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, >> + unsigned long *vcpu_bitmap, cpumask_var_t tmp) >> { >> int i, cpu, me; >> - cpumask_var_t cpus; >> - bool called; >> struct kvm_vcpu *vcpu; >> - >> - zalloc_cpumask_var(&cpus, GFP_ATOMIC); >> + bool called; >> >> me = get_cpu(); >> + > > Two optimizations come to mind: First is to use for_each_set_bit instead > of kvm_for_each_vcpu to improve the sparse case. > I think I had such version at some point but then for some reason I decided I'm re-implementing kvm_for_each_vcpu for no particular reason. >> kvm_for_each_vcpu(i, vcpu, kvm) { >> + if (!test_bit(i, vcpu_bitmap)) > > And the second is to pass vcpu_bitmap = NULL instead of building the > bitmap with all VCPUs. Doesn't looks too good in the end, though: > > #define kvm_for_each_vcpu_bitmap(idx, vcpup, kvm, bitmap, len) \ > for (idx = (bitmap ? find_first_bit(bitmap, len) : 0); \ > idx < len && (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ > bitmap ? find_next_bit(bitmap, len, idx + 1) : idx++) I'll take a try, thanks!
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6d6e79c59e68..14e710d639c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -730,6 +730,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); + +bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + unsigned long *vcpu_bitmap, cpumask_var_t tmp); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); long kvm_arch_dev_ioctl(struct file *filp, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c7b2e927f699..b125d94307d2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -203,29 +203,47 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) return true; } -bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + unsigned long *vcpu_bitmap, cpumask_var_t tmp) { int i, cpu, me; - cpumask_var_t cpus; - bool called; struct kvm_vcpu *vcpu; - - zalloc_cpumask_var(&cpus, GFP_ATOMIC); + bool called; me = get_cpu(); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!test_bit(i, vcpu_bitmap)) + continue; + kvm_make_request(req, vcpu); cpu = vcpu->cpu; if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) continue; - if (cpus != NULL && cpu != -1 && cpu != me && + if (tmp != NULL && cpu != -1 && cpu != me && kvm_request_needs_ipi(vcpu, req)) - __cpumask_set_cpu(cpu, cpus); + __cpumask_set_cpu(cpu, tmp); } - called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); + + called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT)); put_cpu(); + + return called; +} + +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +{ + cpumask_var_t cpus; + bool called; + static unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] + = {[0 ... BITS_TO_LONGS(KVM_MAX_VCPUS)-1] = ULONG_MAX}; + + zalloc_cpumask_var(&cpus, GFP_ATOMIC); + + called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap, cpus); + free_cpumask_var(cpus); return called; }
Hyper-V style PV TLB flush hypercalls inmplementation will use this API. To avoid memory allocation in CONFIG_CPUMASK_OFFSTACK case add cpumask_var_t argument. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> --- include/linux/kvm_host.h | 3 +++ virt/kvm/kvm_main.c | 34 ++++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 8 deletions(-)