diff mbox series

[v6,2/5] KVM: x86: Dirty quota-based throttling of vcpus

Message ID 20220915101049.187325-3-shivam.kumar1@nutanix.com (mailing list archive)
State New, archived
Headers show
Series KVM: Dirty quota-based throttling | expand

Commit Message

Shivam Kumar Sept. 15, 2022, 10:10 a.m. UTC
Exit to userspace whenever the dirty quota is exhausted (i.e. dirty count
equals/exceeds dirty quota) to request more dirty quota.

Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com>
Suggested-by: Manish Mishra <manish.mishra@nutanix.com>
Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com>
---
 arch/x86/kvm/mmu/spte.c | 4 ++--
 arch/x86/kvm/vmx/vmx.c  | 3 +++
 arch/x86/kvm/x86.c      | 9 +++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

Comments

Sean Christopherson Oct. 7, 2022, 7:30 p.m. UTC | #1
On Thu, Sep 15, 2022, Shivam Kumar wrote:
> index c9b49a09e6b5..140a5511ed45 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
>  		 */
>  		if (__xfer_to_guest_mode_work_pending())
>  			return 1;
> +
> +		if (!kvm_vcpu_check_dirty_quota(vcpu))

A dedicated helper is no longer necessary, this can _test_ the event, a la
KVM_REQ_EVENT above:

		if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu))
			return 1;

> +			return 0;
>  	}
>  
>  	return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 43a6a7efc6ec..8447e70b26f5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  			r = 0;
>  			goto out;
>  		}
> +		if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) {
> +			struct kvm_run *run = vcpu->run;
> +

> +			run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
> +			run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
> +			run->dirty_quota_exit.quota = vcpu->dirty_quota;

As mentioned in patch 1, the code code snippet I suggested is bad.  With a request,
there's no need to snapshot the quota prior to making the request.  If userspace
changes the quota, then using the new quota is perfectly ok since KVM is still
providing sane data.  If userspace lowers the quota, an exit will still ocurr as
expected.  If userspace disables or increases the quota to the point where no exit
is necessary, then userspace can't expect and exit and won't even be aware that KVM
temporarily detected an exhausted quota.

And all of this belongs in a common KVM helper.  Name isn't pefect, but it aligns
with kvm_check_request().

#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu)
{
	struct kvm_run *run = vcpu->run;

	run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
	run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
	run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota);

	/*
	 * Re-check the quota and exit if and only if the vCPU still exceeds its
	 * quota.  If userspace increases (or disables entirely) the quota, then
	 * no exit is required as KVM is still honoring its ABI, e.g. userspace
	 * won't even be aware that KVM temporarily detected an exhausted quota.
	 */
	return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; 
}
#endif

And then arch usage is simply something like:

		if (kvm_check_dirty_quota_request(vcpu)) {
			r = 0;
			goto out;
		}
Shivam Kumar Oct. 9, 2022, 7:05 p.m. UTC | #2
On 08/10/22 1:00 am, Sean Christopherson wrote:
> On Thu, Sep 15, 2022, Shivam Kumar wrote:
>> index c9b49a09e6b5..140a5511ed45 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
>>   		 */
>>   		if (__xfer_to_guest_mode_work_pending())
>>   			return 1;
>> +
>> +		if (!kvm_vcpu_check_dirty_quota(vcpu))
> 
> A dedicated helper is no longer necessary, this can _test_ the event, a la
> KVM_REQ_EVENT above:
> 
> 		if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu))
> 			return 1;
> 
>> +			return 0;
>>   	}
>>   
>>   	return 1;
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 43a6a7efc6ec..8447e70b26f5 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   			r = 0;
>>   			goto out;
>>   		}
>> +		if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) {
>> +			struct kvm_run *run = vcpu->run;
>> +
> 
>> +			run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
>> +			run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
>> +			run->dirty_quota_exit.quota = vcpu->dirty_quota;
> 
> As mentioned in patch 1, the code code snippet I suggested is bad.  With a request,
> there's no need to snapshot the quota prior to making the request.  If userspace
> changes the quota, then using the new quota is perfectly ok since KVM is still
> providing sane data.  If userspace lowers the quota, an exit will still ocurr as
> expected.  If userspace disables or increases the quota to the point where no exit
> is necessary, then userspace can't expect and exit and won't even be aware that KVM
> temporarily detected an exhausted quota.
> 
> And all of this belongs in a common KVM helper.  Name isn't pefect, but it aligns
> with kvm_check_request().
> 
> #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
> static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu)
> {
> 	struct kvm_run *run = vcpu->run;
> 
> 	run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
> 	run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
> 	run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota);
> 
> 	/*
> 	 * Re-check the quota and exit if and only if the vCPU still exceeds its
> 	 * quota.  If userspace increases (or disables entirely) the quota, then
> 	 * no exit is required as KVM is still honoring its ABI, e.g. userspace
> 	 * won't even be aware that KVM temporarily detected an exhausted quota.
> 	 */
> 	return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota;
> }
> #endif
> 
> And then arch usage is simply something like:
> 
> 		if (kvm_check_dirty_quota_request(vcpu)) {
> 			r = 0;
> 			goto out;
> 		}
If we are not even checking for request KVM_REQ_DIRTY_QUOTA_EXIT, what's 
the use of kvm_make_request in patch 1?
Shivam Kumar Oct. 9, 2022, 7:17 p.m. UTC | #3
On 08/10/22 1:00 am, Sean Christopherson wrote:
> On Thu, Sep 15, 2022, Shivam Kumar wrote:
>> index c9b49a09e6b5..140a5511ed45 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
>>   		 */
>>   		if (__xfer_to_guest_mode_work_pending())
>>   			return 1;
>> +
>> +		if (!kvm_vcpu_check_dirty_quota(vcpu))
> 
> A dedicated helper is no longer necessary, this can _test_ the event, a la
> KVM_REQ_EVENT above:
> 
> 		if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu))
> 			return 1;
Sure, thanks.
> 
>> +			return 0;
>>   	}
>>   
>>   	return 1;
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 43a6a7efc6ec..8447e70b26f5 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   			r = 0;
>>   			goto out;
>>   		}
>> +		if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) {
>> +			struct kvm_run *run = vcpu->run;
>> +
> 
>> +			run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
>> +			run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
>> +			run->dirty_quota_exit.quota = vcpu->dirty_quota;
> 
> As mentioned in patch 1, the code code snippet I suggested is bad.  With a request,
> there's no need to snapshot the quota prior to making the request.  If userspace
> changes the quota, then using the new quota is perfectly ok since KVM is still
> providing sane data.  If userspace lowers the quota, an exit will still ocurr as
> expected.  If userspace disables or increases the quota to the point where no exit
> is necessary, then userspace can't expect and exit and won't even be aware that KVM
> temporarily detected an exhausted quota.
> 
> And all of this belongs in a common KVM helper.  Name isn't pefect, but it aligns
> with kvm_check_request().
> 
> #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
> static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu)
> {
> 	struct kvm_run *run = vcpu->run;
> 
> 	run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
> 	run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
> 	run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota);
> 
> 	/*
> 	 * Re-check the quota and exit if and only if the vCPU still exceeds its
> 	 * quota.  If userspace increases (or disables entirely) the quota, then
> 	 * no exit is required as KVM is still honoring its ABI, e.g. userspace
> 	 * won't even be aware that KVM temporarily detected an exhausted quota.
> 	 */
> 	return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota;
> }
> #endif
> 
> And then arch usage is simply something like:
> 
> 		if (kvm_check_dirty_quota_request(vcpu)) {
> 			r = 0;
> 			goto out;
> 		}
Shivam Kumar Oct. 18, 2022, 5:43 p.m. UTC | #4
>>> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu 
>>> *vcpu)
>>>               r = 0;
>>>               goto out;
>>>           }
>>> +        if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) {
>>> +            struct kvm_run *run = vcpu->run;
>>> +
>>
>>> +            run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
>>> +            run->dirty_quota_exit.count = 
>>> vcpu->stat.generic.pages_dirtied;
>>> +            run->dirty_quota_exit.quota = vcpu->dirty_quota;
>>
>> As mentioned in patch 1, the code code snippet I suggested is bad.  
>> With a request,
>> there's no need to snapshot the quota prior to making the request.  If 
>> userspace
>> changes the quota, then using the new quota is perfectly ok since KVM 
>> is still
>> providing sane data.  If userspace lowers the quota, an exit will 
>> still ocurr as
>> expected.  If userspace disables or increases the quota to the point 
>> where no exit
>> is necessary, then userspace can't expect and exit and won't even be 
>> aware that KVM
>> temporarily detected an exhausted quota.
>>
>> And all of this belongs in a common KVM helper.  Name isn't pefect, 
>> but it aligns
>> with kvm_check_request().
>>
>> #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
>> static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu)
>> {
>>     struct kvm_run *run = vcpu->run;
>>
>>     run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
>>     run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
>>     run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota);
>>
>>     /*
>>      * Re-check the quota and exit if and only if the vCPU still 
>> exceeds its
>>      * quota.  If userspace increases (or disables entirely) the 
>> quota, then
>>      * no exit is required as KVM is still honoring its ABI, e.g. 
>> userspace
>>      * won't even be aware that KVM temporarily detected an exhausted 
>> quota.
>>      */
>>     return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota;
>> }
>> #endif
>>
>> And then arch usage is simply something like:
>>
>>         if (kvm_check_dirty_quota_request(vcpu)) {
>>             r = 0;
>>             goto out;
>>         }
> If we are not even checking for request KVM_REQ_DIRTY_QUOTA_EXIT, what's 
> the use of kvm_make_request in patch 1?
Ok, so we don't need to explicitely check for request here because we 
are checking the quota directly but we do need to make the request when 
the quota is exhausted so as to guarantee that we enter the if block "if 
(kvm_request_pending(vcpu))".

Please let me know if my understanding is correct or if I am missing 
something.

Also, should I add ifdef here:

	#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
	if (kvm_check_dirty_quota_request(vcpu)) {
		r = 0;
		goto out;
	}
	#endif

Or should I bring the ifdef inside kvm_check_dirty_quota_request and 
return false if not defined.

static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
	struct kvm_run *run = vcpu->run;

	run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
	run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
	run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota);

	/*
	 * Re-check the quota and exit if and only if the vCPU still exceeds its
	 * quota.  If userspace increases (or disables entirely) the quota, then
	 * no exit is required as KVM is still honoring its ABI, e.g. userspace
	 * won't even be aware that KVM temporarily detected an exhausted quota.
	 */
	return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota;
#else
	return false;
#endif
}



Thanks a lot for the reviews so far. Looking forward to your reply.
Sean Christopherson Oct. 19, 2022, 3:42 p.m. UTC | #5
On Tue, Oct 18, 2022, Shivam Kumar wrote:
> > > #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
> > > static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu)
> > > {
> > >     struct kvm_run *run = vcpu->run;
> > > 
> > >     run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
> > >     run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
> > >     run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota);
> > > 
> > >     /*
> > >      * Re-check the quota and exit if and only if the vCPU still
> > > exceeds its
> > >      * quota.  If userspace increases (or disables entirely) the
> > > quota, then
> > >      * no exit is required as KVM is still honoring its ABI, e.g.
> > > userspace
> > >      * won't even be aware that KVM temporarily detected an
> > > exhausted quota.
> > >      */
> > >     return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota;
> > > }
> > > #endif
> > > 
> > > And then arch usage is simply something like:
> > > 
> > >         if (kvm_check_dirty_quota_request(vcpu)) {
> > >             r = 0;
> > >             goto out;
> > >         }
> > If we are not even checking for request KVM_REQ_DIRTY_QUOTA_EXIT, what's
> > the use of kvm_make_request in patch 1?
> Ok, so we don't need to explicitely check for request here because we are
> checking the quota directly but we do need to make the request when the
> quota is exhausted so as to guarantee that we enter the if block "if
> (kvm_request_pending(vcpu))".
> 
> Please let me know if my understanding is correct or if I am missing
> something.

The request needs to be explicitly checked, I simply unintentionally omitted that
code in the above snippet.  kvm_check_request() also _clears_ the request, which
is very important as not clearing the request will prevent KVM from entering the
guest.

The intent is to check KVM_REQ_DIRTY_QUOTA_EXIT in kvm_check_dirty_quota_request().

> 
> Also, should I add ifdef here:
> 
> 	#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
> 	if (kvm_check_dirty_quota_request(vcpu)) {
> 		r = 0;
> 		goto out;
> 	}
> 	#endif

No need for an #ifdef in the caller, the call is from arch code and architectures
that don't "select HAVE_KVM_DIRTY_QUOTA" shouldn't be checking for a dirty quota
exit.
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 2e08b2a45361..c0ed35abbf2d 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -228,9 +228,9 @@  bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		  "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level,
 		  get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level));
 
-	if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
+	if (spte & PT_WRITABLE_MASK) {
 		/* Enforced by kvm_mmu_hugepage_adjust. */
-		WARN_ON(level > PG_LEVEL_4K);
+		WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot));
 		mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
 	}
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c9b49a09e6b5..140a5511ed45 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5749,6 +5749,9 @@  static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 		 */
 		if (__xfer_to_guest_mode_work_pending())
 			return 1;
+
+		if (!kvm_vcpu_check_dirty_quota(vcpu))
+			return 0;
 	}
 
 	return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43a6a7efc6ec..8447e70b26f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10379,6 +10379,15 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) {
+			struct kvm_run *run = vcpu->run;
+
+			run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
+			run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied;
+			run->dirty_quota_exit.quota = vcpu->dirty_quota;
+			r = 0;
+			goto out;
+		}
 
 		/*
 		 * KVM_REQ_HV_STIMER has to be processed after