diff mbox

[V2,RFC,2/3] kvm: Handle yield_to failure return code for potential undercommit case

Message ID 20121029140702.15448.56932.sendpatchset@codeblue (mailing list archive)
State New, archived
Headers show

Commit Message

Raghavendra K T Oct. 29, 2012, 2:07 p.m. UTC
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>

Also we do not update last boosted vcpu in failure cases.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
---

 virt/kvm/kvm_main.c |   21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Oct. 31, 2012, 12:38 p.m. UTC | #1
On 10/29/2012 04:07 PM, Raghavendra K T wrote:
> From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
> 
> Also we do not update last boosted vcpu in failure cases.
> 
>  #endif
> +
>  void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>  {
>  	struct kvm *kvm = me->kvm;
> @@ -1727,11 +1727,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>  				continue;
>  			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
>  				continue;
> -			if (kvm_vcpu_yield_to(vcpu)) {
> +
> +			yielded = kvm_vcpu_yield_to(vcpu);
> +			if (yielded > 0)
>  				kvm->last_boosted_vcpu = i;
> -				yielded = 1;
> +			if (yielded)
>  				break;
> -			}
>  		}

If yielded == -ESRCH, should we not try to yield to another vcpu?
Raghavendra K T Oct. 31, 2012, 12:41 p.m. UTC | #2
On 10/31/2012 06:08 PM, Avi Kivity wrote:
> On 10/29/2012 04:07 PM, Raghavendra K T wrote:
>> From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
>>
>> Also we do not update last boosted vcpu in failure cases.
>>
>>   #endif
>> +
>>   void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>   {
>>   	struct kvm *kvm = me->kvm;
>> @@ -1727,11 +1727,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>   				continue;
>>   			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
>>   				continue;
>> -			if (kvm_vcpu_yield_to(vcpu)) {
>> +
>> +			yielded = kvm_vcpu_yield_to(vcpu);
>> +			if (yielded > 0)
>>   				kvm->last_boosted_vcpu = i;
>> -				yielded = 1;
>> +			if (yielded)
>>   				break;
>> -			}
>>   		}
>
> If yielded == -ESRCH, should we not try to yield to another vcpu?
>

  Yes. plan is to abort the iteration. since it means we are mostly 
undercommitted.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Raghavendra K T Oct. 31, 2012, 1:15 p.m. UTC | #3
On 10/31/2012 06:11 PM, Raghavendra K T wrote:
> On 10/31/2012 06:08 PM, Avi Kivity wrote:
>> On 10/29/2012 04:07 PM, Raghavendra K T wrote:
>>> From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
>>>
>>> Also we do not update last boosted vcpu in failure cases.
>>>
>>>   #endif
>>> +
>>>   void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>>   {
>>>       struct kvm *kvm = me->kvm;
>>> @@ -1727,11 +1727,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>>                   continue;
>>>               if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
>>>                   continue;
>>> -            if (kvm_vcpu_yield_to(vcpu)) {
>>> +
>>> +            yielded = kvm_vcpu_yield_to(vcpu);
>>> +            if (yielded > 0)
>>>                   kvm->last_boosted_vcpu = i;
>>> -                yielded = 1;
>>> +            if (yielded)
>>>                   break;
>>> -            }
>>>           }
>>
>> If yielded == -ESRCH, should we not try to yield to another vcpu?
>>
>
>   Yes. plan is to abort the iteration. since it means we are mostly
> undercommitted.

Sorry if it was ambiguous. I wanted to say we do not want to continue
yield to another vcpu..

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Oct. 31, 2012, 1:41 p.m. UTC | #4
On 10/31/2012 03:15 PM, Raghavendra K T wrote:
> On 10/31/2012 06:11 PM, Raghavendra K T wrote:
>> On 10/31/2012 06:08 PM, Avi Kivity wrote:
>>> On 10/29/2012 04:07 PM, Raghavendra K T wrote:
>>>> From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
>>>>
>>>> Also we do not update last boosted vcpu in failure cases.
>>>>
>>>>   #endif
>>>> +
>>>>   void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>>>   {
>>>>       struct kvm *kvm = me->kvm;
>>>> @@ -1727,11 +1727,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>>>                   continue;
>>>>               if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
>>>>                   continue;
>>>> -            if (kvm_vcpu_yield_to(vcpu)) {
>>>> +
>>>> +            yielded = kvm_vcpu_yield_to(vcpu);
>>>> +            if (yielded > 0)
>>>>                   kvm->last_boosted_vcpu = i;
>>>> -                yielded = 1;
>>>> +            if (yielded)
>>>>                   break;
>>>> -            }
>>>>           }
>>>
>>> If yielded == -ESRCH, should we not try to yield to another vcpu?
>>>
>>
>>   Yes. plan is to abort the iteration. since it means we are mostly
>> undercommitted.
> 
> Sorry if it was ambiguous. I wanted to say we do not want to continue
> yield to another vcpu..
> 


Why not?  We found that this particular vcpu is running and therefore
likely not a lock holder.  That says nothing about other vcpus.  The
next in line might be runnable-but-not-running on another runqueue.
Raghavendra K T Oct. 31, 2012, 5:06 p.m. UTC | #5
On 10/31/2012 07:11 PM, Avi Kivity wrote:
> On 10/31/2012 03:15 PM, Raghavendra K T wrote:
>> On 10/31/2012 06:11 PM, Raghavendra K T wrote:
>>> On 10/31/2012 06:08 PM, Avi Kivity wrote:
>>>> On 10/29/2012 04:07 PM, Raghavendra K T wrote:
>>>>> From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
>>>>>
>>>>> Also we do not update last boosted vcpu in failure cases.
>>>>>
>>>>>    #endif
>>>>> +
>>>>>    void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>>>>    {
>>>>>        struct kvm *kvm = me->kvm;
>>>>> @@ -1727,11 +1727,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>>>>>                    continue;
>>>>>                if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
>>>>>                    continue;
>>>>> -            if (kvm_vcpu_yield_to(vcpu)) {
>>>>> +
>>>>> +            yielded = kvm_vcpu_yield_to(vcpu);
>>>>> +            if (yielded > 0)
>>>>>                    kvm->last_boosted_vcpu = i;
>>>>> -                yielded = 1;
>>>>> +            if (yielded)
>>>>>                    break;
>>>>> -            }
>>>>>            }
>>>>
>>>> If yielded == -ESRCH, should we not try to yield to another vcpu?
>>>>
>>>
>>>    Yes. plan is to abort the iteration. since it means we are mostly
>>> undercommitted.
>>
>> Sorry if it was ambiguous. I wanted to say we do not want to continue
>> yield to another vcpu..
>>
>
>
> Why not?  We found that this particular vcpu is running and therefore
> likely not a lock holder.  That says nothing about other vcpus.  The
> next in line might be runnable-but-not-running on another runqueue.

Agree that next in the line might be runnable-not-running.  But here we
are optimistic that, that is not the case and we save time by
returning back instead of iterating, thinking we are mostly in
undercommitted case and each vcpu has dedicated cpu.

Probably an alternative we have here is to look for say 2-3 successive
failures before breaking out?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Raghavendra K T Nov. 7, 2012, 10:25 a.m. UTC | #6
* Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> [2012-10-31 22:36:25]:

> On 10/31/2012 07:11 PM, Avi Kivity wrote:
> >On 10/31/2012 03:15 PM, Raghavendra K T wrote:
> >>On 10/31/2012 06:11 PM, Raghavendra K T wrote:
> >>>On 10/31/2012 06:08 PM, Avi Kivity wrote:
> >>>>On 10/29/2012 04:07 PM, Raghavendra K T wrote:
> >>>>>From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
> >>>>>
> >>>>>Also we do not update last boosted vcpu in failure cases.
> >>>>>
> >>>>>   #endif
> >>>>>+
> >>>>>   void kvm_vcpu_on_spin(struct kvm_vcpu *me)
> >>>>>   {
> >>>>>       struct kvm *kvm = me->kvm;
> >>>>>@@ -1727,11 +1727,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
> >>>>>                   continue;
> >>>>>               if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
> >>>>>                   continue;
> >>>>>-            if (kvm_vcpu_yield_to(vcpu)) {
> >>>>>+
> >>>>>+            yielded = kvm_vcpu_yield_to(vcpu);
> >>>>>+            if (yielded > 0)
> >>>>>                   kvm->last_boosted_vcpu = i;
> >>>>>-                yielded = 1;
> >>>>>+            if (yielded)
> >>>>>                   break;
> >>>>>-            }
> >>>>>           }
> >>>>
> >>>>If yielded == -ESRCH, should we not try to yield to another vcpu?
> >>>>
> >>>
> >>>   Yes. plan is to abort the iteration. since it means we are mostly
> >>>undercommitted.
> >>
> >>Sorry if it was ambiguous. I wanted to say we do not want to continue
> >>yield to another vcpu..
> >>
> >
> >
> >Why not?  We found that this particular vcpu is running and therefore
> >likely not a lock holder.  That says nothing about other vcpus.  The
> >next in line might be runnable-but-not-running on another runqueue.
> 
> Agree that next in the line might be runnable-not-running.  But here we
> are optimistic that, that is not the case and we save time by
> returning back instead of iterating, thinking we are mostly in
> undercommitted case and each vcpu has dedicated cpu.
> 
> Probably an alternative we have here is to look for say 2-3 successive
> failures before breaking out?

Hi Avi,

I tried the idea of bailing out only when we have successive failure
too (hunk below). results are as follows.

base = 3.7.0-rc1 
A = base +  patch 1 +  patch 2 (original series except patch 3)
B = A + bail out on successive failures patch (hunk below)

% improvements w.r.t base kernel 32 vcpu guest on 32 core PLE machine

                          A               B
kernbench_1x         1.83959         0.95158
kernbench_2x         5.58283         8.31783

ebizzy_1x            144.96959       147.47995	
ebizzy_2x            -11.52278       -4.52835	
ebizzy_3x            -7.59141        -5.17241	
       	
dbench_1x            87.46935        61.14888	
dbench_2x            -7.16749        -4.17130	
dbench_3x            -0.34115        -3.18740


IMO, 
the original patch would have affceted moderate overcommits more
when we have average runqueue length less than two but we are still 
overcommitted.

With this patch though we may get litlle less improvement for
1x overcommit, probability of this affetcting moderate overcommit
situation reduces drastically.

If we consider cases where, we have average run queue length as follows,
and corresponding probability of we considering it as undercommitted (wrongly)
case with rough/simple math is as follows: (correct me if something is
wrong here)

( In precise math it should be the probability of we hitting
a source and target runqueue length one when we are overcommitted for
two successive trials, noting that source of the yield_to remains same)

(Probability = p^3 where p is the probability that we hit a cpu having
rq length = 1. I have taken out #cpus from calculation here though it affects)

average     Probability 
rq length
---------------------------------
1.25       27/64  Note: we are slightly overcommitted here and hopefully it does not afftect much.
1.5        1/8 
1.75       1/64

for original patch it was p^2 and hence 9/16, 1/4, 1/16 respectively.

I think continuing to yield_to beyond this would make us go back to
square one, unnecessarily wasting time.

Please let me know if you have any comments on idea of bailing out after successive trials?

(Side) Note: Dynamic ple window built on top of this logic is already done. and
will be posting it with results in a separate patch.

Changed hunk:
---8<---
@@ -1697,12 +1696,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 	return eligible;
 }
 #endif
+
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
 	struct kvm *kvm = me->kvm;
 	struct kvm_vcpu *vcpu;
 	int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
 	int yielded = 0;
+	int try = 2;
 	int pass;
 	int i;
 
@@ -1714,7 +1715,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 	 * VCPU is holding the lock that we need and will release it.
 	 * We approximate round-robin by starting at the last boosted VCPU.
 	 */
-	for (pass = 0; pass < 2 && !yielded; pass++) {
+	for (pass = 0; pass < 2 && !yielded && try; pass++) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (!pass && i <= last_boosted_vcpu) {
 				i = last_boosted_vcpu;
@@ -1727,10 +1728,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;
-			if (kvm_vcpu_yield_to(vcpu)) {
+
+			yielded = kvm_vcpu_yield_to(vcpu);
+			if (yielded > 0) {
 				kvm->last_boosted_vcpu = i;
-				yielded = 1;
 				break;
+			} else if (yielded < 0) {
+				try--;
+				if (!try)
+					break;
 			}
 		}
 	}

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be70035..e376434 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1639,6 +1639,7 @@  bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;
 	struct task_struct *task = NULL;
+	bool ret = false;
 
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
@@ -1646,17 +1647,15 @@  bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 		task = get_pid_task(target->pid, PIDTYPE_PID);
 	rcu_read_unlock();
 	if (!task)
-		return false;
+		return ret;
 	if (task->flags & PF_VCPU) {
 		put_task_struct(task);
-		return false;
-	}
-	if (yield_to(task, 1)) {
-		put_task_struct(task);
-		return true;
+		return ret;
 	}
+	ret = yield_to(task, 1);
 	put_task_struct(task);
-	return false;
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
 
@@ -1697,6 +1696,7 @@  bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 	return eligible;
 }
 #endif
+
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
 	struct kvm *kvm = me->kvm;
@@ -1727,11 +1727,12 @@  void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;
-			if (kvm_vcpu_yield_to(vcpu)) {
+
+			yielded = kvm_vcpu_yield_to(vcpu);
+			if (yielded > 0)
 				kvm->last_boosted_vcpu = i;
-				yielded = 1;
+			if (yielded)
 				break;
-			}
 		}
 	}
 	kvm_vcpu_set_in_spin_loop(me, false);