Message ID | 20090827012955.208915957@localhost.localdomain (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 08/27/2009 04:20 AM, Marcelo Tosatti wrote: > Split KVM_REQ_KICKED in two bits: KVM_VCPU_KICKED, to indicate > whether a vcpu has been IPI'ed, and KVM_VCPU_GUEST_MODE, in a separate > vcpu_state variable. > > Unify remote requests with kvm_vcpu_kick. > > Synchronous requests wait on KVM_VCPU_GUEST_MODE, via wait_on_bit/wake_up_bit. > > I did miss guest_mode. > + unsigned long vcpu_state; > Why not bool guest_mode? Saves two atomics per exit.
On 08/27/2009 04:20 AM, Marcelo Tosatti wrote: > +} > + > +void kvm_vcpu_ipi(struct kvm_vcpu *vcpu) > +{ > + int me; > + int cpu = vcpu->cpu; > > me = get_cpu(); > - if (cpu != me&& (unsigned)cpu< nr_cpu_ids&& cpu_online(cpu)) > - if (!test_and_set_bit(KVM_REQ_KICK,&vcpu->requests)) > - smp_send_reschedule(cpu); > + if (cpu != me&& (unsigned)cpu< nr_cpu_ids&& cpu_online(cpu)) { > + if (test_bit(KVM_VCPU_GUEST_MODE,&vcpu->vcpu_state)) { > + if (!test_and_set_bit(KVM_VCPU_KICKED, > + &vcpu->vcpu_state)) > + smp_send_reschedule(cpu); > + } > + } > put_cpu(); > } > > @@ -168,6 +176,30 @@ static bool make_all_cpus_request(struct > return called; > } > > +static int kvm_req_wait(void *unused) > +{ > + cpu_relax(); > + return 0; > +} > + > +static void kvm_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req) > +{ > + set_bit(req,&vcpu->requests); > + barrier(); > + kvm_vcpu_ipi(vcpu); > + wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, kvm_req_wait, > + TASK_UNINTERRUPTIBLE); > +} > + > +static void kvm_vcpus_request(struct kvm *kvm, unsigned int req) > +{ > + int i; > + struct kvm_vcpu *vcpu; > + > + kvm_for_each_vcpu(i, vcpu, kvm) > + kvm_vcpu_request(vcpu, req); > +} > Gleb notes there are two problems here: instead of using a multicast IPI, you're sending multiple unicast IPIs. Second, you're serializing the waiting. It would be better to batch-send the IPIs, then batch-wait for results.
On Thu, Aug 27, 2009 at 11:15:23AM +0300, Avi Kivity wrote: > On 08/27/2009 04:20 AM, Marcelo Tosatti wrote: >> Split KVM_REQ_KICKED in two bits: KVM_VCPU_KICKED, to indicate >> whether a vcpu has been IPI'ed, and KVM_VCPU_GUEST_MODE, in a separate >> vcpu_state variable. >> >> Unify remote requests with kvm_vcpu_kick. >> >> Synchronous requests wait on KVM_VCPU_GUEST_MODE, via wait_on_bit/wake_up_bit. >> >> > > I did miss guest_mode. > >> + unsigned long vcpu_state; >> > > Why not bool guest_mode? Saves two atomics per exit. It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by multiple CPU's. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Aug 27, 2009 at 11:25:17AM +0300, Avi Kivity wrote: > On 08/27/2009 04:20 AM, Marcelo Tosatti wrote: > >> +} >> + >> +void kvm_vcpu_ipi(struct kvm_vcpu *vcpu) >> +{ >> + int me; >> + int cpu = vcpu->cpu; >> >> me = get_cpu(); >> - if (cpu != me&& (unsigned)cpu< nr_cpu_ids&& cpu_online(cpu)) >> - if (!test_and_set_bit(KVM_REQ_KICK,&vcpu->requests)) >> - smp_send_reschedule(cpu); >> + if (cpu != me&& (unsigned)cpu< nr_cpu_ids&& cpu_online(cpu)) { >> + if (test_bit(KVM_VCPU_GUEST_MODE,&vcpu->vcpu_state)) { >> + if (!test_and_set_bit(KVM_VCPU_KICKED, >> + &vcpu->vcpu_state)) >> + smp_send_reschedule(cpu); >> + } >> + } >> put_cpu(); >> } >> >> @@ -168,6 +176,30 @@ static bool make_all_cpus_request(struct >> return called; >> } >> >> +static int kvm_req_wait(void *unused) >> +{ >> + cpu_relax(); >> + return 0; >> +} >> + >> +static void kvm_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req) >> +{ >> + set_bit(req,&vcpu->requests); >> + barrier(); >> + kvm_vcpu_ipi(vcpu); >> + wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, kvm_req_wait, >> + TASK_UNINTERRUPTIBLE); >> +} >> + >> +static void kvm_vcpus_request(struct kvm *kvm, unsigned int req) >> +{ >> + int i; >> + struct kvm_vcpu *vcpu; >> + >> + kvm_for_each_vcpu(i, vcpu, kvm) >> + kvm_vcpu_request(vcpu, req); >> +} >> > > Gleb notes there are two problems here: instead of using a multicast > IPI, you're sending multiple unicast IPIs. Second, you're serializing > the waiting. It would be better to batch-send the IPIs, then batch-wait > for results. Right. Playing with multiple variants of batched send/wait but so far haven't been able to see significant improvements for REQ_FLUSH/REQ_RELOAD. Batched send will probably be more visible in guest IPI emulation. Note however that even with multiple unicast IPIs this change collapses kvm_vcpu_kick with the remote requests, so you decrease the number of IPI's. Was hoping to include these changes incrementally? void kvm_vcpus_request(struct kvm *kvm, unsigned int req) { - int i; + int i, me, cpu; struct kvm_vcpu *vcpu; + cpumask_var_t wait_cpus, kick_cpus; + + if (alloc_cpumask_var(&wait_cpus, GFP_ATOMIC)) + cpumask_clear(wait_cpus); + + if (alloc_cpumask_var(&kick_cpus, GFP_ATOMIC)) + cpumask_clear(kick_cpus); + + me = get_cpu(); + kvm_for_each_vcpu(i, vcpu, kvm) { + set_bit(req, &vcpu->requests); + barrier(); + cpu = vcpu->cpu; + if (test_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state)) { + if (cpu != -1 && cpu != me) { + if (wait_cpus != NULL) + cpumask_set_cpu(cpu, wait_cpus); + if (kick_cpus != NULL) + if (!test_and_set_bit(KVM_VCPU_KICKED, + &vcpu->vcpu_state)) + cpumask_set_cpu(cpu, kick_cpus); + } + } + } + if (unlikely(kick_cpus == NULL)) + smp_call_function_many(cpu_online_mask, ack_flush, + NULL, 1); + else if (!cpumask_empty(kick_cpus)) + smp_send_reschedule_many(kick_cpus); kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vcpu_request(vcpu, req); + if (cpumask_test_cpu(vcpu->cpu, wait_cpus)) + if (test_bit(req, &vcpu->requests)) + wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, + kvm_req_wait, TASK_UNINTERRUPTIBLE); + put_cpu(); + + free_cpumask_var(wait_cpus); + free_cpumask_var(kick_cpus); } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/27/2009 03:45 PM, Marcelo Tosatti wrote: >> Why not bool guest_mode? Saves two atomics per exit. >> > It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by > multiple CPU's. > bools are atomic.
On Thu, Aug 27, 2009 at 04:24:58PM +0300, Avi Kivity wrote: > On 08/27/2009 03:45 PM, Marcelo Tosatti wrote: >>> Why not bool guest_mode? Saves two atomics per exit. >>> >> It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by >> multiple CPU's. >> > > bools are atomic. OK. - VCPU_KICKED requires test_and_set. GUEST_MODE/VCPU_KICKED accesses must not be reordered. (OK, could have GUEST_MODE in a bool even so, but its easier to read by keeping them together, at least to me). - Its easier to cacheline align with longs rather than bools? - From testing it seems the LOCK prefix is not heavy, as long as its cpu local (probably due to 7.1.4 Effects of a LOCK Operation on Internal Processor Caches?). BTW, 7.1.2.2 Software Controlled Bus Locking Software should access semaphores (shared memory used for signalling between multiple processors) using identical addresses and operand lengths. For example, if one processor accesses a semaphore using a word access, other processors should not access the semaphore using a byte access. The bit operations use 32-bit access, but the vcpu->requests check in vcpu_enter_guest uses 64-bit access. Is that safe? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/27/2009 05:07 PM, Marcelo Tosatti wrote: > On Thu, Aug 27, 2009 at 04:24:58PM +0300, Avi Kivity wrote: > >> On 08/27/2009 03:45 PM, Marcelo Tosatti wrote: >> >>>> Why not bool guest_mode? Saves two atomics per exit. >>>> >>>> >>> It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by >>> multiple CPU's. >>> >>> >> bools are atomic. >> > OK. > > - VCPU_KICKED requires test_and_set. GUEST_MODE/VCPU_KICKED accesses > must not be reordered. > Why do we need both, btw? Set your vcpu->requests bit, if guest_mode is true, clear it and IPI. So guest_mode=false means, we might be in guest mode but if so we're due for a kick anyway. > (OK, could have GUEST_MODE in a bool even so, but its easier to read > by keeping them together, at least to me). > > - Its easier to cacheline align with longs rather than bools? > To cacheline align we need to pack everything important at the front of the structure. > - From testing it seems the LOCK prefix is not heavy, as long as its > cpu local (probably due to 7.1.4 Effects of a LOCK Operation on > Internal Processor Caches?). > Yes, in newer processors atomics are not nearly as expensive as they used to be. > BTW, > > 7.1.2.2 Software Controlled Bus Locking > > Software should access semaphores (shared memory used for signalling > between multiple processors) using identical addresses and operand > lengths. For example, if one processor accesses a semaphore using a word > access, other processors should not access the semaphore using a byte > access. > > The bit operations use 32-bit access, but the vcpu->requests check in > vcpu_enter_guest uses 64-bit access. > That's true, and bitops sometimes even uses byte operations. > Is that safe? > My guess yes, but not efficient.
On 08/28/2009 10:06 AM, Avi Kivity wrote: > > Why do we need both, btw? Set your vcpu->requests bit, if guest_mode > is true, clear it and IPI. So guest_mode=false means, we might be in > guest mode but if so we're due for a kick anyway. No, this introduces a race.
Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -3586,11 +3586,14 @@ static int vcpu_enter_guest(struct kvm_v local_irq_disable(); - clear_bit(KVM_REQ_KICK, &vcpu->requests); - smp_mb__after_clear_bit(); + set_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state); + barrier(); if (vcpu->requests || need_resched() || signal_pending(current)) { - set_bit(KVM_REQ_KICK, &vcpu->requests); + clear_bit(KVM_VCPU_KICKED, &vcpu->vcpu_state); + clear_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state); + smp_mb__after_clear_bit(); + wake_up_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE); local_irq_enable(); preempt_enable(); r = 1; @@ -3642,7 +3645,10 @@ static int vcpu_enter_guest(struct kvm_v set_debugreg(vcpu->arch.host_dr6, 6); set_debugreg(vcpu->arch.host_dr7, 7); - set_bit(KVM_REQ_KICK, &vcpu->requests); + clear_bit(KVM_VCPU_KICKED, &vcpu->vcpu_state); + clear_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state); + smp_mb__after_clear_bit(); + wake_up_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE); local_irq_enable(); ++vcpu->stat.exits; Index: kvm/include/linux/kvm_host.h =================================================================== --- kvm.orig/include/linux/kvm_host.h +++ kvm/include/linux/kvm_host.h @@ -42,6 +42,9 @@ #define KVM_USERSPACE_IRQ_SOURCE_ID 0 +#define KVM_VCPU_GUEST_MODE 0 +#define KVM_VCPU_KICKED 1 + struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; @@ -83,6 +86,7 @@ struct kvm_vcpu { int cpu; struct kvm_run *run; unsigned long requests; + unsigned long vcpu_state; unsigned long guest_debug; int fpu_active; int guest_fpu_loaded; @@ -362,6 +366,7 @@ void kvm_arch_sync_events(struct kvm *kv int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +void kvm_vcpu_ipi(struct kvm_vcpu *vcpu); int kvm_is_mmio_pfn(pfn_t pfn); Index: kvm/virt/kvm/kvm_main.c =================================================================== --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -119,18 +119,26 @@ void vcpu_put(struct kvm_vcpu *vcpu) void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int me; - int cpu = vcpu->cpu; - if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); ++vcpu->stat.halt_wakeup; } + kvm_vcpu_ipi(vcpu); +} + +void kvm_vcpu_ipi(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; me = get_cpu(); - if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) - if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) - smp_send_reschedule(cpu); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { + if (test_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state)) { + if (!test_and_set_bit(KVM_VCPU_KICKED, + &vcpu->vcpu_state)) + smp_send_reschedule(cpu); + } + } put_cpu(); } @@ -168,6 +176,30 @@ static bool make_all_cpus_request(struct return called; } +static int kvm_req_wait(void *unused) +{ + cpu_relax(); + return 0; +} + +static void kvm_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req) +{ + set_bit(req, &vcpu->requests); + barrier(); + kvm_vcpu_ipi(vcpu); + wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, kvm_req_wait, + TASK_UNINTERRUPTIBLE); +} + +static void kvm_vcpus_request(struct kvm *kvm, unsigned int req) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_request(vcpu, req); +} + void kvm_flush_remote_tlbs(struct kvm *kvm) { if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) Index: kvm/arch/ia64/kvm/kvm-ia64.c =================================================================== --- kvm.orig/arch/ia64/kvm/kvm-ia64.c +++ kvm/arch/ia64/kvm/kvm-ia64.c @@ -655,12 +655,13 @@ again: host_ctx = kvm_get_host_context(vcpu); guest_ctx = kvm_get_guest_context(vcpu); - clear_bit(KVM_REQ_KICK, &vcpu->requests); - r = kvm_vcpu_pre_transition(vcpu); if (r < 0) goto vcpu_run_fail; + set_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state); + barrier(); + up_read(&vcpu->kvm->slots_lock); kvm_guest_enter(); @@ -672,7 +673,10 @@ again: kvm_vcpu_post_transition(vcpu); vcpu->arch.launched = 1; - set_bit(KVM_REQ_KICK, &vcpu->requests); + clear_bit(KVM_VCPU_KICKED, &vcpu->vcpu_state); + clear_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state); + smp_mb__after_clear_bit(); + wake_up_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE); local_irq_enable(); /*
Split KVM_REQ_KICKED in two bits: KVM_VCPU_KICKED, to indicate whether a vcpu has been IPI'ed, and KVM_VCPU_GUEST_MODE, in a separate vcpu_state variable. Unify remote requests with kvm_vcpu_kick. Synchronous requests wait on KVM_VCPU_GUEST_MODE, via wait_on_bit/wake_up_bit. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>