diff mbox

[2/5] KVM: reintroduce guest mode bit and unify remote request code

Message ID 20090827012955.208915957@localhost.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Aug. 27, 2009, 1:20 a.m. UTC
Split KVM_REQ_KICKED in two bits: KVM_VCPU_KICKED, to indicate
whether a vcpu has been IPI'ed, and KVM_VCPU_GUEST_MODE, in a separate
vcpu_state variable.

Unify remote requests with kvm_vcpu_kick.

Synchronous requests wait on KVM_VCPU_GUEST_MODE, via wait_on_bit/wake_up_bit.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Comments

Avi Kivity Aug. 27, 2009, 8:15 a.m. UTC | #1
On 08/27/2009 04:20 AM, Marcelo Tosatti wrote:
> Split KVM_REQ_KICKED in two bits: KVM_VCPU_KICKED, to indicate
> whether a vcpu has been IPI'ed, and KVM_VCPU_GUEST_MODE, in a separate
> vcpu_state variable.
>
> Unify remote requests with kvm_vcpu_kick.
>
> Synchronous requests wait on KVM_VCPU_GUEST_MODE, via wait_on_bit/wake_up_bit.
>
>    

I did miss guest_mode.

> +	unsigned long vcpu_state;
>    

Why not bool guest_mode?  Saves two atomics per exit.
Avi Kivity Aug. 27, 2009, 8:25 a.m. UTC | #2
On 08/27/2009 04:20 AM, Marcelo Tosatti wrote:

> +}
> +
> +void kvm_vcpu_ipi(struct kvm_vcpu *vcpu)
> +{
> +	int me;
> +	int cpu = vcpu->cpu;
>
>   	me = get_cpu();
> -	if (cpu != me&&  (unsigned)cpu<  nr_cpu_ids&&  cpu_online(cpu))
> -		if (!test_and_set_bit(KVM_REQ_KICK,&vcpu->requests))
> -			smp_send_reschedule(cpu);
> +	if (cpu != me&&  (unsigned)cpu<  nr_cpu_ids&&  cpu_online(cpu)) {
> +		if (test_bit(KVM_VCPU_GUEST_MODE,&vcpu->vcpu_state)) {
> +			if (!test_and_set_bit(KVM_VCPU_KICKED,
> +					&vcpu->vcpu_state))
> +				smp_send_reschedule(cpu);
> +		}
> +	}
>   	put_cpu();
>   }
>
> @@ -168,6 +176,30 @@ static bool make_all_cpus_request(struct
>   	return called;
>   }
>
> +static int kvm_req_wait(void *unused)
> +{
> +	cpu_relax();
> +	return 0;
> +}
> +
> +static void kvm_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req)
> +{
> +	set_bit(req,&vcpu->requests);
> +	barrier();
> +	kvm_vcpu_ipi(vcpu);
> +	wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, kvm_req_wait,
> +		    TASK_UNINTERRUPTIBLE);
> +}
> +
> +static void kvm_vcpus_request(struct kvm *kvm, unsigned int req)
> +{
> +	int i;
> +	struct kvm_vcpu *vcpu;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		kvm_vcpu_request(vcpu, req);
> +}
>    

Gleb notes there are two problems here:  instead of using a multicast 
IPI, you're sending multiple unicast IPIs.  Second, you're serializing 
the waiting.  It would be better to batch-send the IPIs, then batch-wait 
for results.
Marcelo Tosatti Aug. 27, 2009, 12:45 p.m. UTC | #3
On Thu, Aug 27, 2009 at 11:15:23AM +0300, Avi Kivity wrote:
> On 08/27/2009 04:20 AM, Marcelo Tosatti wrote:
>> Split KVM_REQ_KICKED in two bits: KVM_VCPU_KICKED, to indicate
>> whether a vcpu has been IPI'ed, and KVM_VCPU_GUEST_MODE, in a separate
>> vcpu_state variable.
>>
>> Unify remote requests with kvm_vcpu_kick.
>>
>> Synchronous requests wait on KVM_VCPU_GUEST_MODE, via wait_on_bit/wake_up_bit.
>>
>>    
>
> I did miss guest_mode.
>
>> +	unsigned long vcpu_state;
>>    
>
> Why not bool guest_mode?  Saves two atomics per exit.

It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by
multiple CPU's.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 27, 2009, 12:58 p.m. UTC | #4
On Thu, Aug 27, 2009 at 11:25:17AM +0300, Avi Kivity wrote:
> On 08/27/2009 04:20 AM, Marcelo Tosatti wrote:
>
>> +}
>> +
>> +void kvm_vcpu_ipi(struct kvm_vcpu *vcpu)
>> +{
>> +	int me;
>> +	int cpu = vcpu->cpu;
>>
>>   	me = get_cpu();
>> -	if (cpu != me&&  (unsigned)cpu<  nr_cpu_ids&&  cpu_online(cpu))
>> -		if (!test_and_set_bit(KVM_REQ_KICK,&vcpu->requests))
>> -			smp_send_reschedule(cpu);
>> +	if (cpu != me&&  (unsigned)cpu<  nr_cpu_ids&&  cpu_online(cpu)) {
>> +		if (test_bit(KVM_VCPU_GUEST_MODE,&vcpu->vcpu_state)) {
>> +			if (!test_and_set_bit(KVM_VCPU_KICKED,
>> +					&vcpu->vcpu_state))
>> +				smp_send_reschedule(cpu);
>> +		}
>> +	}
>>   	put_cpu();
>>   }
>>
>> @@ -168,6 +176,30 @@ static bool make_all_cpus_request(struct
>>   	return called;
>>   }
>>
>> +static int kvm_req_wait(void *unused)
>> +{
>> +	cpu_relax();
>> +	return 0;
>> +}
>> +
>> +static void kvm_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req)
>> +{
>> +	set_bit(req,&vcpu->requests);
>> +	barrier();
>> +	kvm_vcpu_ipi(vcpu);
>> +	wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, kvm_req_wait,
>> +		    TASK_UNINTERRUPTIBLE);
>> +}
>> +
>> +static void kvm_vcpus_request(struct kvm *kvm, unsigned int req)
>> +{
>> +	int i;
>> +	struct kvm_vcpu *vcpu;
>> +
>> +	kvm_for_each_vcpu(i, vcpu, kvm)
>> +		kvm_vcpu_request(vcpu, req);
>> +}
>>    
>
> Gleb notes there are two problems here:  instead of using a multicast  
> IPI, you're sending multiple unicast IPIs.  Second, you're serializing  
> the waiting.  It would be better to batch-send the IPIs, then batch-wait  
> for results.

Right. Playing with multiple variants of batched send/wait but
so far haven't been able to see significant improvements for
REQ_FLUSH/REQ_RELOAD. 

Batched send will probably be more visible in guest IPI emulation.

Note however that even with multiple unicast IPIs this change collapses
kvm_vcpu_kick with the remote requests, so you decrease the number of
IPI's.

Was hoping to include these changes incrementally?

 void kvm_vcpus_request(struct kvm *kvm, unsigned int req)
 {
-	int i;
+	int i, me, cpu;
 	struct kvm_vcpu *vcpu;
+	cpumask_var_t wait_cpus, kick_cpus;
+
+	if (alloc_cpumask_var(&wait_cpus, GFP_ATOMIC))
+		cpumask_clear(wait_cpus);
+
+	if (alloc_cpumask_var(&kick_cpus, GFP_ATOMIC))
+		cpumask_clear(kick_cpus);
+
+	me = get_cpu();
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		set_bit(req, &vcpu->requests);
+		barrier();
+		cpu = vcpu->cpu;
+		if (test_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state)) {
+			if (cpu != -1 && cpu != me) {
+				if (wait_cpus != NULL)
+					cpumask_set_cpu(cpu, wait_cpus);
+				if (kick_cpus != NULL)
+					if (!test_and_set_bit(KVM_VCPU_KICKED,
+						      &vcpu->vcpu_state))
+						cpumask_set_cpu(cpu, kick_cpus);
+			}
+		}
+	}
+	if (unlikely(kick_cpus == NULL))
+		smp_call_function_many(cpu_online_mask, ack_flush,
+				       NULL, 1);
+	else if (!cpumask_empty(kick_cpus))
+		smp_send_reschedule_many(kick_cpus);
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_vcpu_request(vcpu, req);
+		if (cpumask_test_cpu(vcpu->cpu, wait_cpus))
+			if (test_bit(req, &vcpu->requests))
+			wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE,
+				    kvm_req_wait, TASK_UNINTERRUPTIBLE);
+	put_cpu();
+
+	free_cpumask_var(wait_cpus);
+	free_cpumask_var(kick_cpus);
 }


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 27, 2009, 1:24 p.m. UTC | #5
On 08/27/2009 03:45 PM, Marcelo Tosatti wrote:
>> Why not bool guest_mode?  Saves two atomics per exit.
>>      
> It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by
> multiple CPU's.
>    

bools are atomic.
Marcelo Tosatti Aug. 27, 2009, 2:07 p.m. UTC | #6
On Thu, Aug 27, 2009 at 04:24:58PM +0300, Avi Kivity wrote:
> On 08/27/2009 03:45 PM, Marcelo Tosatti wrote:
>>> Why not bool guest_mode?  Saves two atomics per exit.
>>>      
>> It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by
>> multiple CPU's.
>>    
>
> bools are atomic.

OK.

- VCPU_KICKED requires test_and_set. GUEST_MODE/VCPU_KICKED accesses
  must not be reordered.

(OK, could have GUEST_MODE in a bool even so, but its easier to read
by keeping them together, at least to me).

- Its easier to cacheline align with longs rather than bools?

- From testing it seems the LOCK prefix is not heavy, as long as its
  cpu local (probably due to 7.1.4 Effects of a LOCK Operation on
  Internal Processor Caches?).

BTW, 

7.1.2.2 Software Controlled Bus Locking

Software should access semaphores (shared memory used for signalling
between multiple processors) using identical addresses and operand
lengths. For example, if one processor accesses a semaphore using a word
access, other processors should not access the semaphore using a byte
access.

The bit operations use 32-bit access, but the vcpu->requests check in
vcpu_enter_guest uses 64-bit access.

Is that safe?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 28, 2009, 7:06 a.m. UTC | #7
On 08/27/2009 05:07 PM, Marcelo Tosatti wrote:
> On Thu, Aug 27, 2009 at 04:24:58PM +0300, Avi Kivity wrote:
>    
>> On 08/27/2009 03:45 PM, Marcelo Tosatti wrote:
>>      
>>>> Why not bool guest_mode?  Saves two atomics per exit.
>>>>
>>>>          
>>> It must be atomic since GUEST_MODE / VCPU_KICKED bits are manipulated by
>>> multiple CPU's.
>>>
>>>        
>> bools are atomic.
>>      
> OK.
>
> - VCPU_KICKED requires test_and_set. GUEST_MODE/VCPU_KICKED accesses
>    must not be reordered.
>    

Why do we need both, btw?  Set your vcpu->requests bit, if guest_mode is 
true, clear it and IPI.  So guest_mode=false means, we might be in guest 
mode but if so we're due for a kick anyway.

> (OK, could have GUEST_MODE in a bool even so, but its easier to read
> by keeping them together, at least to me).
>
> - Its easier to cacheline align with longs rather than bools?
>    

To cacheline align we need to pack everything important at the front of 
the structure.

> - From testing it seems the LOCK prefix is not heavy, as long as its
>    cpu local (probably due to 7.1.4 Effects of a LOCK Operation on
>    Internal Processor Caches?).
>    

Yes, in newer processors atomics are not nearly as expensive as they 
used to be.

> BTW,
>
> 7.1.2.2 Software Controlled Bus Locking
>
> Software should access semaphores (shared memory used for signalling
> between multiple processors) using identical addresses and operand
> lengths. For example, if one processor accesses a semaphore using a word
> access, other processors should not access the semaphore using a byte
> access.
>
> The bit operations use 32-bit access, but the vcpu->requests check in
> vcpu_enter_guest uses 64-bit access.
>    

That's true, and bitops sometimes even uses byte operations.

> Is that safe?
>    

My guess yes, but not efficient.
Avi Kivity Aug. 28, 2009, 7:22 a.m. UTC | #8
On 08/28/2009 10:06 AM, Avi Kivity wrote:
>
> Why do we need both, btw?  Set your vcpu->requests bit, if guest_mode 
> is true, clear it and IPI.  So guest_mode=false means, we might be in 
> guest mode but if so we're due for a kick anyway.

No, this introduces a race.
diff mbox

Patch

Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -3586,11 +3586,14 @@  static int vcpu_enter_guest(struct kvm_v
 
 	local_irq_disable();
 
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-	smp_mb__after_clear_bit();
+	set_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state);
+	barrier();
 
 	if (vcpu->requests || need_resched() || signal_pending(current)) {
-		set_bit(KVM_REQ_KICK, &vcpu->requests);
+		clear_bit(KVM_VCPU_KICKED, &vcpu->vcpu_state);
+		clear_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state);
+		smp_mb__after_clear_bit();
+		wake_up_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE);
 		local_irq_enable();
 		preempt_enable();
 		r = 1;
@@ -3642,7 +3645,10 @@  static int vcpu_enter_guest(struct kvm_v
 	set_debugreg(vcpu->arch.host_dr6, 6);
 	set_debugreg(vcpu->arch.host_dr7, 7);
 
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
+	clear_bit(KVM_VCPU_KICKED, &vcpu->vcpu_state);
+	clear_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE);
 	local_irq_enable();
 
 	++vcpu->stat.exits;
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -42,6 +42,9 @@ 
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
+#define KVM_VCPU_GUEST_MODE        0
+#define KVM_VCPU_KICKED            1
+
 struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
@@ -83,6 +86,7 @@  struct kvm_vcpu {
 	int   cpu;
 	struct kvm_run *run;
 	unsigned long requests;
+	unsigned long vcpu_state;
 	unsigned long guest_debug;
 	int fpu_active;
 	int guest_fpu_loaded;
@@ -362,6 +366,7 @@  void kvm_arch_sync_events(struct kvm *kv
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+void kvm_vcpu_ipi(struct kvm_vcpu *vcpu);
 
 int kvm_is_mmio_pfn(pfn_t pfn);
 
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -119,18 +119,26 @@  void vcpu_put(struct kvm_vcpu *vcpu)
 
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int me;
-	int cpu = vcpu->cpu;
-
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
+	kvm_vcpu_ipi(vcpu);
+}
+
+void kvm_vcpu_ipi(struct kvm_vcpu *vcpu)
+{
+	int me;
+	int cpu = vcpu->cpu;
 
 	me = get_cpu();
-	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
-		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
-			smp_send_reschedule(cpu);
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
+		if (test_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state)) {
+			if (!test_and_set_bit(KVM_VCPU_KICKED,
+					      &vcpu->vcpu_state))
+				smp_send_reschedule(cpu);
+		}
+	}
 	put_cpu();
 }
 
@@ -168,6 +176,30 @@  static bool make_all_cpus_request(struct
 	return called;
 }
 
+static int kvm_req_wait(void *unused)
+{
+	cpu_relax();
+	return 0;
+}
+
+static void kvm_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req)
+{
+	set_bit(req, &vcpu->requests);
+	barrier();
+	kvm_vcpu_ipi(vcpu);
+	wait_on_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE, kvm_req_wait,
+		    TASK_UNINTERRUPTIBLE);
+}
+
+static void kvm_vcpus_request(struct kvm *kvm, unsigned int req)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vcpu_request(vcpu, req);
+}
+
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
 	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
Index: kvm/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm/arch/ia64/kvm/kvm-ia64.c
@@ -655,12 +655,13 @@  again:
 	host_ctx = kvm_get_host_context(vcpu);
 	guest_ctx = kvm_get_guest_context(vcpu);
 
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
 	r = kvm_vcpu_pre_transition(vcpu);
 	if (r < 0)
 		goto vcpu_run_fail;
 
+	set_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state);
+	barrier();
+
 	up_read(&vcpu->kvm->slots_lock);
 	kvm_guest_enter();
 
@@ -672,7 +673,10 @@  again:
 	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
+	clear_bit(KVM_VCPU_KICKED, &vcpu->vcpu_state);
+	clear_bit(KVM_VCPU_GUEST_MODE, &vcpu->vcpu_state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&vcpu->vcpu_state, KVM_VCPU_GUEST_MODE);
 	local_irq_enable();
 
 	/*