Message ID | 20230225204758.17726-3-shivam.kumar1@nutanix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Dirty quota-based throttling | expand |
On Sat, Feb 25, 2023 at 08:47:59PM +0000, Shivam Kumar wrote: > Call update_dirty_quota whenever a page is marked dirty with > appropriate arch-specific page size. Process the KVM request > KVM_REQ_DIRTY_QUOTA_EXIT (raised by update_dirty_quota) to exit to > userspace with exit reason KVM_EXIT_DIRTY_QUOTA_EXHAUSTED. > > Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> > Suggested-by: Manish Mishra <manish.mishra@nutanix.com> > Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> > --- > arch/x86/kvm/Kconfig | 1 + > arch/x86/kvm/mmu/mmu.c | 8 +++++++- > arch/x86/kvm/mmu/spte.c | 3 +++ > arch/x86/kvm/mmu/tdp_mmu.c | 3 +++ > arch/x86/kvm/vmx/vmx.c | 5 +++++ > arch/x86/kvm/x86.c | 16 ++++++++++++++++ > arch/x86/kvm/xen.c | 12 +++++++++++- > 7 files changed, 46 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig > index 8e578311ca9d..8621a9512572 100644 > --- a/arch/x86/kvm/Kconfig > +++ b/arch/x86/kvm/Kconfig > @@ -48,6 +48,7 @@ config KVM > select KVM_VFIO > select SRCU > select INTERVAL_TREE > + select HAVE_KVM_DIRTY_QUOTA > select HAVE_KVM_PM_NOTIFIER if PM > select KVM_GENERIC_HARDWARE_ENABLING > help > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index c8ebe542c565..e0c8348ecdf1 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3323,8 +3323,14 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > if (!try_cmpxchg64(sptep, &old_spte, new_spte)) > return false; > > - if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) > + if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + struct kvm_mmu_page *sp = sptep_to_sp(sptep); > + > + update_dirty_quota(vcpu->kvm, (1L << SPTE_LEVEL_SHIFT(sp->role.level))); > +#endif > mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); Possible to call update_dirty_quota() from mark_page_dirty_in_slot() ? Then other Architectures can be covered yet. > + } > > return true; > } > diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c > index c15bfca3ed15..15f4f1d97ce9 100644 > --- a/arch/x86/kvm/mmu/spte.c > +++ b/arch/x86/kvm/mmu/spte.c > @@ -243,6 +243,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { > /* Enforced by kvm_mmu_hugepage_adjust. */ > WARN_ON(level > PG_LEVEL_4K); > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(vcpu->kvm, (1L << SPTE_LEVEL_SHIFT(level))); > +#endif > mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); > } > > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c > index 7c25dbf32ecc..4bf98e96343d 100644 > --- a/arch/x86/kvm/mmu/tdp_mmu.c > +++ b/arch/x86/kvm/mmu/tdp_mmu.c > @@ -358,6 +358,9 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, > > if ((!is_writable_pte(old_spte) || pfn_changed) && > is_writable_pte(new_spte)) { > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(kvm, (1L << SPTE_LEVEL_SHIFT(level))); > +#endif > slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); > mark_page_dirty_in_slot(kvm, slot, gfn); > } > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index bcac3efcde41..da4c6342a647 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -5861,6 +5861,11 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) > */ > if (__xfer_to_guest_mode_work_pending()) > return 1; > + > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) > + return 1; > +#endif > } > > return 1; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 7713420abab0..1733be829197 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3092,6 +3092,9 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, > > guest_hv_clock->version = ++vcpu->hv_clock.version; > > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(v->kvm, PAGE_SIZE); > +#endif > mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); > read_unlock_irqrestore(&gpc->lock, flags); > > @@ -3566,6 +3569,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) > out: > user_access_end(); > dirty: > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(vcpu->kvm, PAGE_SIZE); > +#endif > mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); > } > > @@ -4815,6 +4821,9 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) > if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) > vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; > > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(vcpu->kvm, PAGE_SIZE); > +#endif > mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); > } > > @@ -10514,6 +10523,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > r = 0; > goto out; > } > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { > + vcpu->run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > + r = 0; > + goto out; > + } > +#endif > > /* > * KVM_REQ_HV_STIMER has to be processed after > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c > index 40edf4d1974c..00a3ac438539 100644 > --- a/arch/x86/kvm/xen.c > +++ b/arch/x86/kvm/xen.c > @@ -435,9 +435,16 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) > > read_unlock_irqrestore(&gpc1->lock, flags); > > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(v->kvm, PAGE_SIZE); > +#endif > mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); > - if (user_len2) > + if (user_len2) { > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(v->kvm, PAGE_SIZE); > +#endif > mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); > + } > } > > void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) > @@ -549,6 +556,9 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) > if (v->arch.xen.upcall_vector) > kvm_xen_inject_vcpu_vector(v); > > +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > + update_dirty_quota(v->kvm, PAGE_SIZE); > +#endif > mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); > } > > -- > 2.22.3 >
On 28/02/23 7:01 am, Yuan Yao wrote: > On Sat, Feb 25, 2023 at 08:47:59PM +0000, Shivam Kumar wrote: >> Call update_dirty_quota whenever a page is marked dirty with >> appropriate arch-specific page size. Process the KVM request >> KVM_REQ_DIRTY_QUOTA_EXIT (raised by update_dirty_quota) to exit to >> userspace with exit reason KVM_EXIT_DIRTY_QUOTA_EXHAUSTED. >> >> Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> >> Suggested-by: Manish Mishra <manish.mishra@nutanix.com> >> Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >> Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >> Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> >> --- >> arch/x86/kvm/Kconfig | 1 + >> arch/x86/kvm/mmu/mmu.c | 8 +++++++- >> arch/x86/kvm/mmu/spte.c | 3 +++ >> arch/x86/kvm/mmu/tdp_mmu.c | 3 +++ >> arch/x86/kvm/vmx/vmx.c | 5 +++++ >> arch/x86/kvm/x86.c | 16 ++++++++++++++++ >> arch/x86/kvm/xen.c | 12 +++++++++++- >> 7 files changed, 46 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig >> index 8e578311ca9d..8621a9512572 100644 >> --- a/arch/x86/kvm/Kconfig >> +++ b/arch/x86/kvm/Kconfig >> @@ -48,6 +48,7 @@ config KVM >> select KVM_VFIO >> select SRCU >> select INTERVAL_TREE >> + select HAVE_KVM_DIRTY_QUOTA >> select HAVE_KVM_PM_NOTIFIER if PM >> select KVM_GENERIC_HARDWARE_ENABLING >> help >> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c >> index c8ebe542c565..e0c8348ecdf1 100644 >> --- a/arch/x86/kvm/mmu/mmu.c >> +++ b/arch/x86/kvm/mmu/mmu.c >> @@ -3323,8 +3323,14 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, >> if (!try_cmpxchg64(sptep, &old_spte, new_spte)) >> return false; >> >> - if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) >> + if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { >> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA >> + struct kvm_mmu_page *sp = sptep_to_sp(sptep); >> + >> + update_dirty_quota(vcpu->kvm, (1L << SPTE_LEVEL_SHIFT(sp->role.level))); >> +#endif >> mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); > > Possible to call update_dirty_quota() from mark_page_dirty_in_slot() ? > Then other Architectures can be covered yet. As Marc commented on the first patch of this patchset, mark_page_dirty_in_slot can be called multiple times for the same page, e.g. in the case of PML for nested guests. If bitmap-based dirty tracking is not enabled, we might not be able to handle those cases without adding an extra param (which can tell us whether a dirty quota update is required or not) in mark_page_dirty_in_slot. Thanks. Thanks, Shivam
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8e578311ca9d..8621a9512572 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -48,6 +48,7 @@ config KVM select KVM_VFIO select SRCU select INTERVAL_TREE + select HAVE_KVM_DIRTY_QUOTA select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING help diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8ebe542c565..e0c8348ecdf1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3323,8 +3323,14 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (!try_cmpxchg64(sptep, &old_spte, new_spte)) return false; - if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) + if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + struct kvm_mmu_page *sp = sptep_to_sp(sptep); + + update_dirty_quota(vcpu->kvm, (1L << SPTE_LEVEL_SHIFT(sp->role.level))); +#endif mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); + } return true; } diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index c15bfca3ed15..15f4f1d97ce9 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -243,6 +243,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { /* Enforced by kvm_mmu_hugepage_adjust. */ WARN_ON(level > PG_LEVEL_4K); +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(vcpu->kvm, (1L << SPTE_LEVEL_SHIFT(level))); +#endif mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7c25dbf32ecc..4bf98e96343d 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -358,6 +358,9 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, if ((!is_writable_pte(old_spte) || pfn_changed) && is_writable_pte(new_spte)) { +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(kvm, (1L << SPTE_LEVEL_SHIFT(level))); +#endif slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); mark_page_dirty_in_slot(kvm, slot, gfn); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index bcac3efcde41..da4c6342a647 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5861,6 +5861,11 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) */ if (__xfer_to_guest_mode_work_pending()) return 1; + +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) + return 1; +#endif } return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7713420abab0..1733be829197 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3092,6 +3092,9 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, guest_hv_clock->version = ++vcpu->hv_clock.version; +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(v->kvm, PAGE_SIZE); +#endif mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); read_unlock_irqrestore(&gpc->lock, flags); @@ -3566,6 +3569,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) out: user_access_end(); dirty: +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(vcpu->kvm, PAGE_SIZE); +#endif mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } @@ -4815,6 +4821,9 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(vcpu->kvm, PAGE_SIZE); +#endif mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } @@ -10514,6 +10523,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; + r = 0; + goto out; + } +#endif /* * KVM_REQ_HV_STIMER has to be processed after diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 40edf4d1974c..00a3ac438539 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -435,9 +435,16 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) read_unlock_irqrestore(&gpc1->lock, flags); +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(v->kvm, PAGE_SIZE); +#endif mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); - if (user_len2) + if (user_len2) { +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(v->kvm, PAGE_SIZE); +#endif mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); + } } void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) @@ -549,6 +556,9 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) if (v->arch.xen.upcall_vector) kvm_xen_inject_vcpu_vector(v); +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA + update_dirty_quota(v->kvm, PAGE_SIZE); +#endif mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); }