Message ID | 20220915101049.187325-3-shivam.kumar1@nutanix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Dirty quota-based throttling | expand |
On Thu, Sep 15, 2022, Shivam Kumar wrote: > index c9b49a09e6b5..140a5511ed45 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) > */ > if (__xfer_to_guest_mode_work_pending()) > return 1; > + > + if (!kvm_vcpu_check_dirty_quota(vcpu)) A dedicated helper is no longer necessary, this can _test_ the event, a la KVM_REQ_EVENT above: if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) return 1; > + return 0; > } > > return 1; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 43a6a7efc6ec..8447e70b26f5 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > r = 0; > goto out; > } > + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { > + struct kvm_run *run = vcpu->run; > + > + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; > + run->dirty_quota_exit.quota = vcpu->dirty_quota; As mentioned in patch 1, the code code snippet I suggested is bad. With a request, there's no need to snapshot the quota prior to making the request. If userspace changes the quota, then using the new quota is perfectly ok since KVM is still providing sane data. If userspace lowers the quota, an exit will still ocurr as expected. If userspace disables or increases the quota to the point where no exit is necessary, then userspace can't expect and exit and won't even be aware that KVM temporarily detected an exhausted quota. And all of this belongs in a common KVM helper. Name isn't pefect, but it aligns with kvm_check_request(). #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); /* * Re-check the quota and exit if and only if the vCPU still exceeds its * quota. If userspace increases (or disables entirely) the quota, then * no exit is required as KVM is still honoring its ABI, e.g. userspace * won't even be aware that KVM temporarily detected an exhausted quota. */ return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; } #endif And then arch usage is simply something like: if (kvm_check_dirty_quota_request(vcpu)) { r = 0; goto out; }
On 08/10/22 1:00 am, Sean Christopherson wrote: > On Thu, Sep 15, 2022, Shivam Kumar wrote: >> index c9b49a09e6b5..140a5511ed45 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) >> */ >> if (__xfer_to_guest_mode_work_pending()) >> return 1; >> + >> + if (!kvm_vcpu_check_dirty_quota(vcpu)) > > A dedicated helper is no longer necessary, this can _test_ the event, a la > KVM_REQ_EVENT above: > > if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) > return 1; > >> + return 0; >> } >> >> return 1; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 43a6a7efc6ec..8447e70b26f5 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> r = 0; >> goto out; >> } >> + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { >> + struct kvm_run *run = vcpu->run; >> + > >> + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; >> + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; >> + run->dirty_quota_exit.quota = vcpu->dirty_quota; > > As mentioned in patch 1, the code code snippet I suggested is bad. With a request, > there's no need to snapshot the quota prior to making the request. If userspace > changes the quota, then using the new quota is perfectly ok since KVM is still > providing sane data. If userspace lowers the quota, an exit will still ocurr as > expected. If userspace disables or increases the quota to the point where no exit > is necessary, then userspace can't expect and exit and won't even be aware that KVM > temporarily detected an exhausted quota. > > And all of this belongs in a common KVM helper. Name isn't pefect, but it aligns > with kvm_check_request(). > > #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) > { > struct kvm_run *run = vcpu->run; > > run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; > run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); > > /* > * Re-check the quota and exit if and only if the vCPU still exceeds its > * quota. If userspace increases (or disables entirely) the quota, then > * no exit is required as KVM is still honoring its ABI, e.g. userspace > * won't even be aware that KVM temporarily detected an exhausted quota. > */ > return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; > } > #endif > > And then arch usage is simply something like: > > if (kvm_check_dirty_quota_request(vcpu)) { > r = 0; > goto out; > } If we are not even checking for request KVM_REQ_DIRTY_QUOTA_EXIT, what's the use of kvm_make_request in patch 1?
On 08/10/22 1:00 am, Sean Christopherson wrote: > On Thu, Sep 15, 2022, Shivam Kumar wrote: >> index c9b49a09e6b5..140a5511ed45 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) >> */ >> if (__xfer_to_guest_mode_work_pending()) >> return 1; >> + >> + if (!kvm_vcpu_check_dirty_quota(vcpu)) > > A dedicated helper is no longer necessary, this can _test_ the event, a la > KVM_REQ_EVENT above: > > if (kvm_test_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) > return 1; Sure, thanks. > >> + return 0; >> } >> >> return 1; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 43a6a7efc6ec..8447e70b26f5 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> r = 0; >> goto out; >> } >> + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { >> + struct kvm_run *run = vcpu->run; >> + > >> + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; >> + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; >> + run->dirty_quota_exit.quota = vcpu->dirty_quota; > > As mentioned in patch 1, the code code snippet I suggested is bad. With a request, > there's no need to snapshot the quota prior to making the request. If userspace > changes the quota, then using the new quota is perfectly ok since KVM is still > providing sane data. If userspace lowers the quota, an exit will still ocurr as > expected. If userspace disables or increases the quota to the point where no exit > is necessary, then userspace can't expect and exit and won't even be aware that KVM > temporarily detected an exhausted quota. > > And all of this belongs in a common KVM helper. Name isn't pefect, but it aligns > with kvm_check_request(). > > #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) > { > struct kvm_run *run = vcpu->run; > > run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; > run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); > > /* > * Re-check the quota and exit if and only if the vCPU still exceeds its > * quota. If userspace increases (or disables entirely) the quota, then > * no exit is required as KVM is still honoring its ABI, e.g. userspace > * won't even be aware that KVM temporarily detected an exhausted quota. > */ > return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; > } > #endif > > And then arch usage is simply something like: > > if (kvm_check_dirty_quota_request(vcpu)) { > r = 0; > goto out; > }
>>> @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu >>> *vcpu) >>> r = 0; >>> goto out; >>> } >>> + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { >>> + struct kvm_run *run = vcpu->run; >>> + >> >>> + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; >>> + run->dirty_quota_exit.count = >>> vcpu->stat.generic.pages_dirtied; >>> + run->dirty_quota_exit.quota = vcpu->dirty_quota; >> >> As mentioned in patch 1, the code code snippet I suggested is bad. >> With a request, >> there's no need to snapshot the quota prior to making the request. If >> userspace >> changes the quota, then using the new quota is perfectly ok since KVM >> is still >> providing sane data. If userspace lowers the quota, an exit will >> still ocurr as >> expected. If userspace disables or increases the quota to the point >> where no exit >> is necessary, then userspace can't expect and exit and won't even be >> aware that KVM >> temporarily detected an exhausted quota. >> >> And all of this belongs in a common KVM helper. Name isn't pefect, >> but it aligns >> with kvm_check_request(). >> >> #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA >> static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) >> { >> struct kvm_run *run = vcpu->run; >> >> run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; >> run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; >> run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); >> >> /* >> * Re-check the quota and exit if and only if the vCPU still >> exceeds its >> * quota. If userspace increases (or disables entirely) the >> quota, then >> * no exit is required as KVM is still honoring its ABI, e.g. >> userspace >> * won't even be aware that KVM temporarily detected an exhausted >> quota. >> */ >> return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; >> } >> #endif >> >> And then arch usage is simply something like: >> >> if (kvm_check_dirty_quota_request(vcpu)) { >> r = 0; >> goto out; >> } > If we are not even checking for request KVM_REQ_DIRTY_QUOTA_EXIT, what's > the use of kvm_make_request in patch 1? Ok, so we don't need to explicitely check for request here because we are checking the quota directly but we do need to make the request when the quota is exhausted so as to guarantee that we enter the if block "if (kvm_request_pending(vcpu))". Please let me know if my understanding is correct or if I am missing something. Also, should I add ifdef here: #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA if (kvm_check_dirty_quota_request(vcpu)) { r = 0; goto out; } #endif Or should I bring the ifdef inside kvm_check_dirty_quota_request and return false if not defined. static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) { #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA struct kvm_run *run = vcpu->run; run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); /* * Re-check the quota and exit if and only if the vCPU still exceeds its * quota. If userspace increases (or disables entirely) the quota, then * no exit is required as KVM is still honoring its ABI, e.g. userspace * won't even be aware that KVM temporarily detected an exhausted quota. */ return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; #else return false; #endif } Thanks a lot for the reviews so far. Looking forward to your reply.
On Tue, Oct 18, 2022, Shivam Kumar wrote: > > > #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > > > static inline bool kvm_check_dirty_quota_request(struct kvm_vcpu *vcpu) > > > { > > > struct kvm_run *run = vcpu->run; > > > > > > run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; > > > run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; > > > run->dirty_quota_exit.quota = READ_ONCE(run->dirty_quota); > > > > > > /* > > > * Re-check the quota and exit if and only if the vCPU still > > > exceeds its > > > * quota. If userspace increases (or disables entirely) the > > > quota, then > > > * no exit is required as KVM is still honoring its ABI, e.g. > > > userspace > > > * won't even be aware that KVM temporarily detected an > > > exhausted quota. > > > */ > > > return run->dirty_quota_exit.count >= run->dirty_quota_exit.quota; > > > } > > > #endif > > > > > > And then arch usage is simply something like: > > > > > > if (kvm_check_dirty_quota_request(vcpu)) { > > > r = 0; > > > goto out; > > > } > > If we are not even checking for request KVM_REQ_DIRTY_QUOTA_EXIT, what's > > the use of kvm_make_request in patch 1? > Ok, so we don't need to explicitely check for request here because we are > checking the quota directly but we do need to make the request when the > quota is exhausted so as to guarantee that we enter the if block "if > (kvm_request_pending(vcpu))". > > Please let me know if my understanding is correct or if I am missing > something. The request needs to be explicitly checked, I simply unintentionally omitted that code in the above snippet. kvm_check_request() also _clears_ the request, which is very important as not clearing the request will prevent KVM from entering the guest. The intent is to check KVM_REQ_DIRTY_QUOTA_EXIT in kvm_check_dirty_quota_request(). > > Also, should I add ifdef here: > > #ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA > if (kvm_check_dirty_quota_request(vcpu)) { > r = 0; > goto out; > } > #endif No need for an #ifdef in the caller, the call is from arch code and architectures that don't "select HAVE_KVM_DIRTY_QUOTA" shouldn't be checking for a dirty quota exit.
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 2e08b2a45361..c0ed35abbf2d 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -228,9 +228,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); - if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { + if (spte & PT_WRITABLE_MASK) { /* Enforced by kvm_mmu_hugepage_adjust. */ - WARN_ON(level > PG_LEVEL_4K); + WARN_ON(level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(slot)); mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c9b49a09e6b5..140a5511ed45 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5749,6 +5749,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) */ if (__xfer_to_guest_mode_work_pending()) return 1; + + if (!kvm_vcpu_check_dirty_quota(vcpu)) + return 0; } return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43a6a7efc6ec..8447e70b26f5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10379,6 +10379,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu)) { + struct kvm_run *run = vcpu->run; + + run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED; + run->dirty_quota_exit.count = vcpu->stat.generic.pages_dirtied; + run->dirty_quota_exit.quota = vcpu->dirty_quota; + r = 0; + goto out; + } /* * KVM_REQ_HV_STIMER has to be processed after