Message ID | 20221113170507.208810-3-shivam.kumar1@nutanix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Dirty quota-based throttling | expand |
On Sun, Nov 13, 2022 at 05:05:08PM +0000, Shivam Kumar wrote: > Exit to userspace whenever the dirty quota is exhausted (i.e. dirty count > equals/exceeds dirty quota) to request more dirty quota. > > Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> > Suggested-by: Manish Mishra <manish.mishra@nutanix.com> > Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> > --- > arch/x86/kvm/mmu/spte.c | 4 ++-- > arch/x86/kvm/vmx/vmx.c | 3 +++ > arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++++++ > 3 files changed, 33 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c > index 2e08b2a45361..c0ed35abbf2d 100644 > --- a/arch/x86/kvm/mmu/spte.c > +++ b/arch/x86/kvm/mmu/spte.c > @@ -228,9 +228,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, > get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); > > - if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { > + if (spte & PT_WRITABLE_MASK) { > /* Enforced by kvm_mmu_hugepage_adjust. */ > - WARN_ON(level > PG_LEVEL_4K); > + WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot)); > mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); > } > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 63247c57c72c..cc130999eddf 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -5745,6 +5745,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) > */ > if (__xfer_to_guest_mode_work_pending()) > return 1; > + > + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) > + return 1; Any reason for this check? Is this quota related to the invalid guest state? Sorry if I missed anything here. > } > > return 1; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index ecea83f0da49..1a960fbb51f4 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10494,6 +10494,30 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) > } > EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); > > +static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) > +{ > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + struct kvm_run *run; > + > + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { > + run = vcpu->run; > + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; > + run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); > + > + /* > + * Re-check the quota and exit if and only if the vCPU still > + * exceeds its quota. If userspace increases (or disables > + * entirely) the quota, then no exit is required as KVM is > + * still honoring its ABI, e.g. userspace won't even be aware > + * that KVM temporarily detected an exhausted quota. > + */ > + return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; Would it be better to check before updating the vcpu->run?
On 15/11/22 5:46 am, Yunhong Jiang wrote: > On Sun, Nov 13, 2022 at 05:05:08PM +0000, Shivam Kumar wrote: >> Exit to userspace whenever the dirty quota is exhausted (i.e. dirty count >> equals/exceeds dirty quota) to request more dirty quota. >> >> Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> >> Suggested-by: Manish Mishra <manish.mishra@nutanix.com> >> Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >> Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >> Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> >> --- >> arch/x86/kvm/mmu/spte.c | 4 ++-- >> arch/x86/kvm/vmx/vmx.c | 3 +++ >> arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++++++ >> 3 files changed, 33 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c >> index 2e08b2a45361..c0ed35abbf2d 100644 >> --- a/arch/x86/kvm/mmu/spte.c >> +++ b/arch/x86/kvm/mmu/spte.c >> @@ -228,9 +228,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, >> "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, >> get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); >> >> - if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { >> + if (spte & PT_WRITABLE_MASK) { >> /* Enforced by kvm_mmu_hugepage_adjust. */ >> - WARN_ON(level > PG_LEVEL_4K); >> + WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot)); >> mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); >> } >> >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index 63247c57c72c..cc130999eddf 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -5745,6 +5745,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) >> */ >> if (__xfer_to_guest_mode_work_pending()) >> return 1; >> + >> + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) >> + return 1; > Any reason for this check? Is this quota related to the invalid > guest state? Sorry if I missed anything here. Quoting Sean: "And thinking more about silly edge cases, VMX's big emulation loop for invalid guest state when unrestricted guest is disabled should probably explicitly check the dirty quota. Again, I doubt it matters to anyone's use case, but it is treated as a full run loop for things like pending signals, it'd be good to be consistent." Please see v4 for details. Thanks. > >> } >> >> return 1; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index ecea83f0da49..1a960fbb51f4 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -10494,6 +10494,30 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) >> } >> EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); >> >> +static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) >> +{ >> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA >> + struct kvm_run *run; >> + >> + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { >> + run = vcpu->run; >> + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; >> + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; >> + run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); >> + >> + /* >> + * Re-check the quota and exit if and only if the vCPU still >> + * exceeds its quota. If userspace increases (or disables >> + * entirely) the quota, then no exit is required as KVM is >> + * still honoring its ABI, e.g. userspace won't even be aware >> + * that KVM temporarily detected an exhausted quota. >> + */ >> + return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; > Would it be better to check before updating the vcpu->run? The reason for checking it at the last moment is to avoid invalid exits to userspace as much as possible. Thanks and regards, Shivam
On Tue, Nov 15, 2022 at 10:25:31AM +0530, Shivam Kumar wrote: > > > On 15/11/22 5:46 am, Yunhong Jiang wrote: > > On Sun, Nov 13, 2022 at 05:05:08PM +0000, Shivam Kumar wrote: > > > Exit to userspace whenever the dirty quota is exhausted (i.e. dirty count > > > equals/exceeds dirty quota) to request more dirty quota. > > > > > > Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> > > > Suggested-by: Manish Mishra <manish.mishra@nutanix.com> > > > Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > > > Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > > > Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> > > > --- > > > arch/x86/kvm/mmu/spte.c | 4 ++-- > > > arch/x86/kvm/vmx/vmx.c | 3 +++ > > > arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++++++ > > > 3 files changed, 33 insertions(+), 2 deletions(-) > > > > > > diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c > > > index 2e08b2a45361..c0ed35abbf2d 100644 > > > --- a/arch/x86/kvm/mmu/spte.c > > > +++ b/arch/x86/kvm/mmu/spte.c > > > @@ -228,9 +228,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > > > "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, > > > get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); > > > - if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { > > > + if (spte & PT_WRITABLE_MASK) { > > > /* Enforced by kvm_mmu_hugepage_adjust. */ > > > - WARN_ON(level > PG_LEVEL_4K); > > > + WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot)); > > > mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); > > > } > > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > > index 63247c57c72c..cc130999eddf 100644 > > > --- a/arch/x86/kvm/vmx/vmx.c > > > +++ b/arch/x86/kvm/vmx/vmx.c > > > @@ -5745,6 +5745,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) > > > */ > > > if (__xfer_to_guest_mode_work_pending()) > > > return 1; > > > + > > > + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) > > > + return 1; > > Any reason for this check? Is this quota related to the invalid > > guest state? Sorry if I missed anything here. > Quoting Sean: > "And thinking more about silly edge cases, VMX's big emulation loop for > invalid > guest state when unrestricted guest is disabled should probably explicitly > check > the dirty quota. Again, I doubt it matters to anyone's use case, but it is > treated > as a full run loop for things like pending signals, it'd be good to be > consistent." > > Please see v4 for details. Thanks. Thank you for the sharing. > > > > > } > > > return 1; > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index ecea83f0da49..1a960fbb51f4 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -10494,6 +10494,30 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) > > > } > > > EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); > > > +static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) > > > +{ > > > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > > > + struct kvm_run *run; > > > + > > > + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { > > > + run = vcpu->run; > > > + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > > > + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; > > > + run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); > > > + > > > + /* > > > + * Re-check the quota and exit if and only if the vCPU still > > > + * exceeds its quota. If userspace increases (or disables > > > + * entirely) the quota, then no exit is required as KVM is > > > + * still honoring its ABI, e.g. userspace won't even be aware > > > + * that KVM temporarily detected an exhausted quota. > > > + */ > > > + return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; > > Would it be better to check before updating the vcpu->run? > The reason for checking it at the last moment is to avoid invalid exits to > userspace as much as possible. So if the userspace increases the quota, then the above vcpu->run change just leaves some garbage information on vcpu->run and the exit_reason is misleading. Possibly it's ok since this information will not be used anymore. Not sure how critical is the time spent on the vcpu->run update.
On 15/11/22 12:15 pm, Yunhong Jiang wrote: > On Tue, Nov 15, 2022 at 10:25:31AM +0530, Shivam Kumar wrote: >> >> >> On 15/11/22 5:46 am, Yunhong Jiang wrote: >>> On Sun, Nov 13, 2022 at 05:05:08PM +0000, Shivam Kumar wrote: >>>> Exit to userspace whenever the dirty quota is exhausted (i.e. dirty count >>>> equals/exceeds dirty quota) to request more dirty quota. >>>> >>>> Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> >>>> Suggested-by: Manish Mishra <manish.mishra@nutanix.com> >>>> Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >>>> Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >>>> Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> >>>> --- >>>> arch/x86/kvm/mmu/spte.c | 4 ++-- >>>> arch/x86/kvm/vmx/vmx.c | 3 +++ >>>> arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++++++ >>>> 3 files changed, 33 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c >>>> index 2e08b2a45361..c0ed35abbf2d 100644 >>>> --- a/arch/x86/kvm/mmu/spte.c >>>> +++ b/arch/x86/kvm/mmu/spte.c >>>> @@ -228,9 +228,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, >>>> "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, >>>> get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); >>>> - if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { >>>> + if (spte & PT_WRITABLE_MASK) { >>>> /* Enforced by kvm_mmu_hugepage_adjust. */ >>>> - WARN_ON(level > PG_LEVEL_4K); >>>> + WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot)); >>>> mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); >>>> } >>>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >>>> index 63247c57c72c..cc130999eddf 100644 >>>> --- a/arch/x86/kvm/vmx/vmx.c >>>> +++ b/arch/x86/kvm/vmx/vmx.c >>>> @@ -5745,6 +5745,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) >>>> */ >>>> if (__xfer_to_guest_mode_work_pending()) >>>> return 1; >>>> + >>>> + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) >>>> + return 1; >>> Any reason for this check? Is this quota related to the invalid >>> guest state? Sorry if I missed anything here. >> Quoting Sean: >> "And thinking more about silly edge cases, VMX's big emulation loop for >> invalid >> guest state when unrestricted guest is disabled should probably explicitly >> check >> the dirty quota. Again, I doubt it matters to anyone's use case, but it is >> treated >> as a full run loop for things like pending signals, it'd be good to be >> consistent." >> >> Please see v4 for details. Thanks. > Thank you for the sharing. >>> >>>> } >>>> return 1; >>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>> index ecea83f0da49..1a960fbb51f4 100644 >>>> --- a/arch/x86/kvm/x86.c >>>> +++ b/arch/x86/kvm/x86.c >>>> @@ -10494,6 +10494,30 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) >>>> } >>>> EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); >>>> +static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) >>>> +{ >>>> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA >>>> + struct kvm_run *run; >>>> + >>>> + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { >>>> + run = vcpu->run; >>>> + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; >>>> + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; >>>> + run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); >>>> + >>>> + /* >>>> + * Re-check the quota and exit if and only if the vCPU still >>>> + * exceeds its quota. If userspace increases (or disables >>>> + * entirely) the quota, then no exit is required as KVM is >>>> + * still honoring its ABI, e.g. userspace won't even be aware >>>> + * that KVM temporarily detected an exhausted quota. >>>> + */ >>>> + return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; >>> Would it be better to check before updating the vcpu->run? >> The reason for checking it at the last moment is to avoid invalid exits to >> userspace as much as possible. > > So if the userspace increases the quota, then the above vcpu->run change just > leaves some garbage information on vcpu->run and the exit_reason is > misleading. Possibly it's ok since this information will not be used anymore. > > Not sure how critical is the time spent on the vcpu->run update. IMO the time spent in the update might not be very significant but the grabage value is harmless. Thanks, Shivam
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 2e08b2a45361..c0ed35abbf2d 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -228,9 +228,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); - if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { + if (spte & PT_WRITABLE_MASK) { /* Enforced by kvm_mmu_hugepage_adjust. */ - WARN_ON(level > PG_LEVEL_4K); + WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot)); mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 63247c57c72c..cc130999eddf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5745,6 +5745,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) */ if (__xfer_to_guest_mode_work_pending()) return 1; + + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) + return 1; } return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecea83f0da49..1a960fbb51f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10494,6 +10494,30 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); +static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + struct kvm_run *run; + + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { + run = vcpu->run; + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; + run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); + + /* + * Re-check the quota and exit if and only if the vCPU still + * exceeds its quota. If userspace increases (or disables + * entirely) the quota, then no exit is required as KVM is + * still honoring its ABI, e.g. userspace won't even be aware + * that KVM temporarily detected an exhausted quota. + */ + return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; + } +#endif + return false; +} + /* * Called within kvm->srcu read side. * Returns 1 to let vcpu_run() continue the guest execution loop without @@ -10625,6 +10649,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_dirty_quota_request(vcpu)) { + r = 0; + goto out; + } /* * KVM_REQ_HV_STIMER has to be processed after