Message ID | 20230911021637.1941096-6-stevensd@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: allow mapping non-refcounted pages | expand |
У пн, 2023-09-11 у 11:16 +0900, David Stevens пише: > From: David Stevens <stevensd@chromium.org> > > Migrate functions which need access to is_refcounted_page to > __kvm_follow_pfn. The functions which need this are __kvm_faultin_pfn > and reexecute_instruction. The former requires replacing the async > in/out parameter with FOLL_NOWAIT parameter and the KVM_PFN_ERR_NEEDS_IO > return value. Handling non-refcounted pages is complicated, so it will > be done in a followup. The latter is a straightforward refactor. > > APIC related callers do not need to migrate because KVM controls the > memslot, so it will always be regular memory. Prefetch related callers > do not need to be migrated because atomic gfn_to_pfn calls can never > make it to hva_to_pfn_remapped. > > Signed-off-by: David Stevens <stevensd@chromium.org> > --- > arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++++++++++---------- > arch/x86/kvm/x86.c | 12 ++++++++++-- > 2 files changed, 43 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e1d011c67cc6..e1eca26215e2 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4254,7 +4254,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) > static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > { > struct kvm_memory_slot *slot = fault->slot; > - bool async; > + struct kvm_follow_pfn foll = { > + .slot = slot, > + .gfn = fault->gfn, > + .flags = fault->write ? FOLL_WRITE : 0, > + .try_map_writable = true, > + .guarded_by_mmu_notifier = true, > + .allow_non_refcounted_struct_page = false, > + }; > > /* > * Retry the page fault if the gfn hit a memslot that is being deleted > @@ -4283,12 +4290,20 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > return RET_PF_EMULATE; > } > > - async = false; > - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, > - fault->write, &fault->map_writable, > - &fault->hva); > - if (!async) > - return RET_PF_CONTINUE; /* *pfn has correct page already */ > + foll.flags |= FOLL_NOWAIT; > + fault->pfn = __kvm_follow_pfn(&foll); > + > + if (!is_error_noslot_pfn(fault->pfn)) > + goto success; Unrelated but I can't say I like the 'is_error_noslot_pfn()' name, I wish it was called something like is_valid_pfn(). > + > + /* > + * If __kvm_follow_pfn() failed because I/O is needed to fault in the > + * page, then either set up an asynchronous #PF to do the I/O, or if > + * doing an async #PF isn't possible, retry __kvm_follow_pfn() with > + * I/O allowed. All other failures are fatal, i.e. retrying won't help. > + */ > + if (fault->pfn != KVM_PFN_ERR_NEEDS_IO) > + return RET_PF_CONTINUE; > > if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { > trace_kvm_try_async_get_page(fault->addr, fault->gfn); > @@ -4306,9 +4321,17 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > * to wait for IO. Note, gup always bails if it is unable to quickly > * get a page and a fatal signal, i.e. SIGKILL, is pending. > */ > - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, > - fault->write, &fault->map_writable, > - &fault->hva); > + foll.flags |= FOLL_INTERRUPTIBLE; > + foll.flags &= ~FOLL_NOWAIT; > + fault->pfn = __kvm_follow_pfn(&foll); > + > + if (!is_error_noslot_pfn(fault->pfn)) > + goto success; > + > + return RET_PF_CONTINUE; > +success: > + fault->hva = foll.hva; > + fault->map_writable = foll.writable; > return RET_PF_CONTINUE; > } > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6c9c81e82e65..2011a7e47296 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -8556,6 +8556,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > { > gpa_t gpa = cr2_or_gpa; > kvm_pfn_t pfn; > + struct kvm_follow_pfn foll; > > if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) > return false; > @@ -8585,7 +8586,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > * retry instruction -> write #PF -> emulation fail -> retry > * instruction -> ... > */ > - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); > + foll = (struct kvm_follow_pfn) { > + .slot = gfn_to_memslot(vcpu->kvm, gpa_to_gfn(gpa)), > + .gfn = gpa_to_gfn(gpa), > + .flags = FOLL_WRITE, > + .allow_non_refcounted_struct_page = true, > + }; > + pfn = __kvm_follow_pfn(&foll); > > /* > * If the instruction failed on the error pfn, it can not be fixed, > @@ -8594,7 +8601,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > if (is_error_noslot_pfn(pfn)) > return false; > > - kvm_release_pfn_clean(pfn); > + if (foll.is_refcounted_page) > + kvm_release_page_clean(pfn_to_page(pfn)); > > /* The instructions are well-emulated on direct mmu. */ > if (vcpu->arch.mmu->root_role.direct) { Overall looks good, I might have missed something. Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> Best regards, Maxim Levitsky
On Tue, Oct 03, 2023, Maxim Levitsky wrote: > У пн, 2023-09-11 у 11:16 +0900, David Stevens пише: > > From: David Stevens <stevensd@chromium.org> > > @@ -4283,12 +4290,20 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > > return RET_PF_EMULATE; > > } > > > > - async = false; > > - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, > > - fault->write, &fault->map_writable, > > - &fault->hva); > > - if (!async) > > - return RET_PF_CONTINUE; /* *pfn has correct page already */ > > + foll.flags |= FOLL_NOWAIT; > > + fault->pfn = __kvm_follow_pfn(&foll); > > + > > + if (!is_error_noslot_pfn(fault->pfn)) > > + goto success; > Unrelated but I can't say I like the 'is_error_noslot_pfn()' name, > I wish it was called something like is_valid_pfn(). I don't love the name either, but is_valid_pfn() would be extremely confusing because the kernel already provides pfn_valid() to identify pfns/pages that are managed by the kernel. Trying to shove "guest" somewhere in the name also gets confusing because it's a host pfn, not a guest pfn.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e1d011c67cc6..e1eca26215e2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4254,7 +4254,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; - bool async; + struct kvm_follow_pfn foll = { + .slot = slot, + .gfn = fault->gfn, + .flags = fault->write ? FOLL_WRITE : 0, + .try_map_writable = true, + .guarded_by_mmu_notifier = true, + .allow_non_refcounted_struct_page = false, + }; /* * Retry the page fault if the gfn hit a memslot that is being deleted @@ -4283,12 +4290,20 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return RET_PF_EMULATE; } - async = false; - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, - fault->write, &fault->map_writable, - &fault->hva); - if (!async) - return RET_PF_CONTINUE; /* *pfn has correct page already */ + foll.flags |= FOLL_NOWAIT; + fault->pfn = __kvm_follow_pfn(&foll); + + if (!is_error_noslot_pfn(fault->pfn)) + goto success; + + /* + * If __kvm_follow_pfn() failed because I/O is needed to fault in the + * page, then either set up an asynchronous #PF to do the I/O, or if + * doing an async #PF isn't possible, retry __kvm_follow_pfn() with + * I/O allowed. All other failures are fatal, i.e. retrying won't help. + */ + if (fault->pfn != KVM_PFN_ERR_NEEDS_IO) + return RET_PF_CONTINUE; if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(fault->addr, fault->gfn); @@ -4306,9 +4321,17 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * to wait for IO. Note, gup always bails if it is unable to quickly * get a page and a fatal signal, i.e. SIGKILL, is pending. */ - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, - fault->write, &fault->map_writable, - &fault->hva); + foll.flags |= FOLL_INTERRUPTIBLE; + foll.flags &= ~FOLL_NOWAIT; + fault->pfn = __kvm_follow_pfn(&foll); + + if (!is_error_noslot_pfn(fault->pfn)) + goto success; + + return RET_PF_CONTINUE; +success: + fault->hva = foll.hva; + fault->map_writable = foll.writable; return RET_PF_CONTINUE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c9c81e82e65..2011a7e47296 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8556,6 +8556,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, { gpa_t gpa = cr2_or_gpa; kvm_pfn_t pfn; + struct kvm_follow_pfn foll; if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; @@ -8585,7 +8586,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * retry instruction -> write #PF -> emulation fail -> retry * instruction -> ... */ - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); + foll = (struct kvm_follow_pfn) { + .slot = gfn_to_memslot(vcpu->kvm, gpa_to_gfn(gpa)), + .gfn = gpa_to_gfn(gpa), + .flags = FOLL_WRITE, + .allow_non_refcounted_struct_page = true, + }; + pfn = __kvm_follow_pfn(&foll); /* * If the instruction failed on the error pfn, it can not be fixed, @@ -8594,7 +8601,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (is_error_noslot_pfn(pfn)) return false; - kvm_release_pfn_clean(pfn); + if (foll.is_refcounted_page) + kvm_release_page_clean(pfn_to_page(pfn)); /* The instructions are well-emulated on direct mmu. */ if (vcpu->arch.mmu->root_role.direct) {