diff mbox series

[v2,25/27] KVM: x86/mmu: Drop @slot param from exported/external page-track APIs

Message ID 20230311002258.852397-26-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/gvt: KVM: KVMGT fixes and page-track cleanups | expand

Commit Message

Sean Christopherson March 11, 2023, 12:22 a.m. UTC
Refactor KVM's exported/external page-track, a.k.a. write-track, APIs
to take only the gfn and do the required memslot lookup in KVM proper.
Forcing users of the APIs to get the memslot unnecessarily bleeds
KVM internals into KVMGT and complicates usage of the APIs.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/kvm_page_track.h |  8 +--
 arch/x86/kvm/mmu/mmu.c                |  4 +-
 arch/x86/kvm/mmu/page_track.c         | 86 ++++++++++++++++++++-------
 arch/x86/kvm/mmu/page_track.h         |  5 ++
 drivers/gpu/drm/i915/gvt/kvmgt.c      | 37 +++---------
 5 files changed, 82 insertions(+), 58 deletions(-)

Comments

Yan Zhao March 17, 2023, 8:28 a.m. UTC | #1
On Fri, Mar 10, 2023 at 04:22:56PM -0800, Sean Christopherson wrote:
...
> +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> +{
> +	struct kvm_memory_slot *slot;
> +	int idx;
> +
> +	idx = srcu_read_lock(&kvm->srcu);
> +
> +	slot = gfn_to_memslot(kvm, gfn);
> +	if (!slot) {
> +		srcu_read_unlock(&kvm->srcu, idx);
> +		return -EINVAL;
> +	}
> +
Also fail if slot->flags & KVM_MEMSLOT_INVALID is true?
There should exist a window for external users to see an invalid slot
when a slot is about to get deleted/moved.
(It happens before MOVE is rejected in kvm_arch_prepare_memory_region()).

> +	write_lock(&kvm->mmu_lock);
> +	__kvm_write_track_add_gfn(kvm, slot, gfn);
> +	write_unlock(&kvm->mmu_lock);
> +
> +	srcu_read_unlock(&kvm->srcu, idx);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
> +
> +/*
> + * remove the guest page from the tracking pool which stops the interception
> + * of corresponding access on that page.
> + *
> + * @kvm: the guest instance we are interested in.
> + * @gfn: the guest page.
> + */
> +int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
> +{
> +	struct kvm_memory_slot *slot;
> +	int idx;
> +
> +	idx = srcu_read_lock(&kvm->srcu);
> +
> +	slot = gfn_to_memslot(kvm, gfn);
> +	if (!slot) {
> +		srcu_read_unlock(&kvm->srcu, idx);
> +		return -EINVAL;
> +	}
> +
Ditto.

> +	write_lock(&kvm->mmu_lock);
> +	__kvm_write_track_remove_gfn(kvm, slot, gfn);
> +	write_unlock(&kvm->mmu_lock);
> +
> +	srcu_read_unlock(&kvm->srcu, idx);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
Yan Zhao March 23, 2023, 8:50 a.m. UTC | #2
On Fri, Mar 17, 2023 at 04:28:56PM +0800, Yan Zhao wrote:
> On Fri, Mar 10, 2023 at 04:22:56PM -0800, Sean Christopherson wrote:
> ...
> > +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> > +{
> > +	struct kvm_memory_slot *slot;
> > +	int idx;
> > +
> > +	idx = srcu_read_lock(&kvm->srcu);
> > +
> > +	slot = gfn_to_memslot(kvm, gfn);
> > +	if (!slot) {
> > +		srcu_read_unlock(&kvm->srcu, idx);
> > +		return -EINVAL;
> > +	}
> > +
> Also fail if slot->flags & KVM_MEMSLOT_INVALID is true?
> There should exist a window for external users to see an invalid slot
> when a slot is about to get deleted/moved.
> (It happens before MOVE is rejected in kvm_arch_prepare_memory_region()).

Or using
        if (!kvm_is_visible_memslot(slot)) {
		srcu_read_unlock(&kvm->srcu, idx);
		return -EINVAL;
	}

> 
> > +	write_lock(&kvm->mmu_lock);
> > +	__kvm_write_track_add_gfn(kvm, slot, gfn);
> > +	write_unlock(&kvm->mmu_lock);
> > +
> > +	srcu_read_unlock(&kvm->srcu, idx);
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
> > +
> > +/*
> > + * remove the guest page from the tracking pool which stops the interception
> > + * of corresponding access on that page.
> > + *
> > + * @kvm: the guest instance we are interested in.
> > + * @gfn: the guest page.
> > + */
> > +int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
> > +{
> > +	struct kvm_memory_slot *slot;
> > +	int idx;
> > +
> > +	idx = srcu_read_lock(&kvm->srcu);
> > +
> > +	slot = gfn_to_memslot(kvm, gfn);
> > +	if (!slot) {
> > +		srcu_read_unlock(&kvm->srcu, idx);
> > +		return -EINVAL;
> > +	}
> > +
> Ditto.
> 
> > +	write_lock(&kvm->mmu_lock);
> > +	__kvm_write_track_remove_gfn(kvm, slot, gfn);
> > +	write_unlock(&kvm->mmu_lock);
> > +
> > +	srcu_read_unlock(&kvm->srcu, idx);
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
>
Sean Christopherson May 3, 2023, 11:16 p.m. UTC | #3
Finally getting back to this series...

On Thu, Mar 23, 2023, Yan Zhao wrote:
> On Fri, Mar 17, 2023 at 04:28:56PM +0800, Yan Zhao wrote:
> > On Fri, Mar 10, 2023 at 04:22:56PM -0800, Sean Christopherson wrote:
> > ...
> > > +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> > > +{
> > > +	struct kvm_memory_slot *slot;
> > > +	int idx;
> > > +
> > > +	idx = srcu_read_lock(&kvm->srcu);
> > > +
> > > +	slot = gfn_to_memslot(kvm, gfn);
> > > +	if (!slot) {
> > > +		srcu_read_unlock(&kvm->srcu, idx);
> > > +		return -EINVAL;
> > > +	}
> > > +
> > Also fail if slot->flags & KVM_MEMSLOT_INVALID is true?
> > There should exist a window for external users to see an invalid slot
> > when a slot is about to get deleted/moved.
> > (It happens before MOVE is rejected in kvm_arch_prepare_memory_region()).
> 
> Or using
>         if (!kvm_is_visible_memslot(slot)) {
> 		srcu_read_unlock(&kvm->srcu, idx);
> 		return -EINVAL;
> 	}

Hrm.  If the DELETE/MOVE succeeds, then the funky accounting is ok (by the end
of the series) as the tracking disappears on DELETE, KVMGT will reject MOVE, and
KVM proper zaps SPTEs and resets accounting on MOVE (account_shadowed() runs under
mmu_lock and thus ensures all previous SPTEs are zapped before the "flush" from
kvm_arch_flush_shadow_memslot() can run).

If kvm_prepare_memory_region() fails though...

Ah, KVM itself is safe because of the aforementioned kvm_arch_flush_shadow_memslot().
Any accounting done on a temporarily invalid memslot will be unwound when the SPTEs
are zapped.  So for KVM, ignoring invalid memslots is correct _and necessary_.
We could clean that up by having accounted_shadowed() use the @slot from the fault,
which would close the window where the fault starts with a valid memslot but then
sees an invalid memslot when accounting a new shadow page.  But I don't think there
is a bug there.

Right, and DELETE can't actually fail in the current code base, and we've established
that MOVE can't possibly work.  So even if this is problematic in theory, there are
no _unknown_ bugs, and the known bugs are fixed by the end of the series.

And at the end of the series, KVMGT drops its tracking only when the DELETE is
committed.  So I _think_ allowing external trackers to add and remove gfns for
write-tracking in an invalid slot is actually desirable/correct.  I'm pretty sure
removal should be allowed as that can lead to dangling write-protection in a
rollback scenario.   And I can't think of anything that will break (in the kernel)
if write-tracking a gfn in an invalid slot is allowed, so I don't see any harm in
allowing the extremely theoretical case of KVMGT shadowing a gfn in a to-be-deleted
memslot _and_ the deletion being rolled back.
Yan Zhao May 4, 2023, 2:17 a.m. UTC | #4
On Wed, May 03, 2023 at 04:16:10PM -0700, Sean Christopherson wrote:
> Finally getting back to this series...
> 
> On Thu, Mar 23, 2023, Yan Zhao wrote:
> > On Fri, Mar 17, 2023 at 04:28:56PM +0800, Yan Zhao wrote:
> > > On Fri, Mar 10, 2023 at 04:22:56PM -0800, Sean Christopherson wrote:
> > > ...
> > > > +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> > > > +{
> > > > +	struct kvm_memory_slot *slot;
> > > > +	int idx;
> > > > +
> > > > +	idx = srcu_read_lock(&kvm->srcu);
> > > > +
> > > > +	slot = gfn_to_memslot(kvm, gfn);
> > > > +	if (!slot) {
> > > > +		srcu_read_unlock(&kvm->srcu, idx);
> > > > +		return -EINVAL;
> > > > +	}
> > > > +
> > > Also fail if slot->flags & KVM_MEMSLOT_INVALID is true?
> > > There should exist a window for external users to see an invalid slot
> > > when a slot is about to get deleted/moved.
> > > (It happens before MOVE is rejected in kvm_arch_prepare_memory_region()).
> > 
> > Or using
> >         if (!kvm_is_visible_memslot(slot)) {
> > 		srcu_read_unlock(&kvm->srcu, idx);
> > 		return -EINVAL;
> > 	}
> 
> Hrm.  If the DELETE/MOVE succeeds, then the funky accounting is ok (by the end
> of the series) as the tracking disappears on DELETE, KVMGT will reject MOVE, and
> KVM proper zaps SPTEs and resets accounting on MOVE (account_shadowed() runs under
> mmu_lock and thus ensures all previous SPTEs are zapped before the "flush" from
> kvm_arch_flush_shadow_memslot() can run).
> 
> If kvm_prepare_memory_region() fails though...
> 
> Ah, KVM itself is safe because of the aforementioned kvm_arch_flush_shadow_memslot().
> Any accounting done on a temporarily invalid memslot will be unwound when the SPTEs
> are zapped.  So for KVM, ignoring invalid memslots is correct _and necessary_.
> We could clean that up by having accounted_shadowed() use the @slot from the fault,
> which would close the window where the fault starts with a valid memslot but then
> sees an invalid memslot when accounting a new shadow page.  But I don't think there
> is a bug there.
> 
> Right, and DELETE can't actually fail in the current code base, and we've established
> that MOVE can't possibly work.  So even if this is problematic in theory, there are
> no _unknown_ bugs, and the known bugs are fixed by the end of the series.
> 
> And at the end of the series, KVMGT drops its tracking only when the DELETE is
> committed.  So I _think_ allowing external trackers to add and remove gfns for
> write-tracking in an invalid slot is actually desirable/correct.  I'm pretty sure
> removal should be allowed as that can lead to dangling write-protection in a
> rollback scenario.   And I can't think of anything that will break (in the kernel)
> if write-tracking a gfn in an invalid slot is allowed, so I don't see any harm in
> allowing the extremely theoretical case of KVMGT shadowing a gfn in a to-be-deleted
> memslot _and_ the deletion being rolled back.
Yes, you are right!
I previously thought that
invalid_slot->arch.gfn_write_track and old->arch.gfn_write_track are
pointing to different places. But I'm wrong.
Yes, allowing INVALID slot here is more desired for deletion rolling-back.
Yan Zhao May 8, 2023, 1:15 a.m. UTC | #5
On Thu, May 04, 2023 at 10:17:20AM +0800, Yan Zhao wrote:
> On Wed, May 03, 2023 at 04:16:10PM -0700, Sean Christopherson wrote:
> > Finally getting back to this series...
> > 
> > On Thu, Mar 23, 2023, Yan Zhao wrote:
> > > On Fri, Mar 17, 2023 at 04:28:56PM +0800, Yan Zhao wrote:
> > > > On Fri, Mar 10, 2023 at 04:22:56PM -0800, Sean Christopherson wrote:
> > > > ...
> > > > > +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> > > > > +{
> > > > > +	struct kvm_memory_slot *slot;
> > > > > +	int idx;
> > > > > +
> > > > > +	idx = srcu_read_lock(&kvm->srcu);
> > > > > +
> > > > > +	slot = gfn_to_memslot(kvm, gfn);
> > > > > +	if (!slot) {
> > > > > +		srcu_read_unlock(&kvm->srcu, idx);
> > > > > +		return -EINVAL;
> > > > > +	}
> > > > > +
> > > > Also fail if slot->flags & KVM_MEMSLOT_INVALID is true?
> > > > There should exist a window for external users to see an invalid slot
> > > > when a slot is about to get deleted/moved.
> > > > (It happens before MOVE is rejected in kvm_arch_prepare_memory_region()).
> > > 
> > > Or using
> > >         if (!kvm_is_visible_memslot(slot)) {
> > > 		srcu_read_unlock(&kvm->srcu, idx);
> > > 		return -EINVAL;
> > > 	}
> > 
Hi Sean,
After more thoughts, do you think checking KVM internal memslot is necessary?

slot = gfn_to_memslot(kvm, gfn);
if (!slot || slot->id >= KVM_USER_MEM_SLOTS) {
		srcu_read_unlock(&kvm->srcu, idx);
		return -EINVAL;
}

Do we allow write tracking to APIC access page when APIC-write VM exit
is not desired?

Thanks
Yan

> > Hrm.  If the DELETE/MOVE succeeds, then the funky accounting is ok (by the end
> > of the series) as the tracking disappears on DELETE, KVMGT will reject MOVE, and
> > KVM proper zaps SPTEs and resets accounting on MOVE (account_shadowed() runs under
> > mmu_lock and thus ensures all previous SPTEs are zapped before the "flush" from
> > kvm_arch_flush_shadow_memslot() can run).
> > 
> > If kvm_prepare_memory_region() fails though...
> > 
> > Ah, KVM itself is safe because of the aforementioned kvm_arch_flush_shadow_memslot().
> > Any accounting done on a temporarily invalid memslot will be unwound when the SPTEs
> > are zapped.  So for KVM, ignoring invalid memslots is correct _and necessary_.
> > We could clean that up by having accounted_shadowed() use the @slot from the fault,
> > which would close the window where the fault starts with a valid memslot but then
> > sees an invalid memslot when accounting a new shadow page.  But I don't think there
> > is a bug there.
> > 
> > Right, and DELETE can't actually fail in the current code base, and we've established
> > that MOVE can't possibly work.  So even if this is problematic in theory, there are
> > no _unknown_ bugs, and the known bugs are fixed by the end of the series.
> > 
> > And at the end of the series, KVMGT drops its tracking only when the DELETE is
> > committed.  So I _think_ allowing external trackers to add and remove gfns for
> > write-tracking in an invalid slot is actually desirable/correct.  I'm pretty sure
> > removal should be allowed as that can lead to dangling write-protection in a
> > rollback scenario.   And I can't think of anything that will break (in the kernel)
> > if write-tracking a gfn in an invalid slot is allowed, so I don't see any harm in
> > allowing the extremely theoretical case of KVMGT shadowing a gfn in a to-be-deleted
> > memslot _and_ the deletion being rolled back.
> Yes, you are right!
> I previously thought that
> invalid_slot->arch.gfn_write_track and old->arch.gfn_write_track are
> pointing to different places. But I'm wrong.
> Yes, allowing INVALID slot here is more desired for deletion rolling-back.
>
Sean Christopherson May 11, 2023, 10:39 p.m. UTC | #6
On Mon, May 08, 2023, Yan Zhao wrote:
> On Thu, May 04, 2023 at 10:17:20AM +0800, Yan Zhao wrote:
> > On Wed, May 03, 2023 at 04:16:10PM -0700, Sean Christopherson wrote:
> > > Finally getting back to this series...
> > > 
> > > On Thu, Mar 23, 2023, Yan Zhao wrote:
> > > > On Fri, Mar 17, 2023 at 04:28:56PM +0800, Yan Zhao wrote:
> > > > > On Fri, Mar 10, 2023 at 04:22:56PM -0800, Sean Christopherson wrote:
> > > > > ...
> > > > > > +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> > > > > > +{
> > > > > > +	struct kvm_memory_slot *slot;
> > > > > > +	int idx;
> > > > > > +
> > > > > > +	idx = srcu_read_lock(&kvm->srcu);
> > > > > > +
> > > > > > +	slot = gfn_to_memslot(kvm, gfn);
> > > > > > +	if (!slot) {
> > > > > > +		srcu_read_unlock(&kvm->srcu, idx);
> > > > > > +		return -EINVAL;
> > > > > > +	}
> > > > > > +
> > > > > Also fail if slot->flags & KVM_MEMSLOT_INVALID is true?
> > > > > There should exist a window for external users to see an invalid slot
> > > > > when a slot is about to get deleted/moved.
> > > > > (It happens before MOVE is rejected in kvm_arch_prepare_memory_region()).
> > > > 
> > > > Or using
> > > >         if (!kvm_is_visible_memslot(slot)) {
> > > > 		srcu_read_unlock(&kvm->srcu, idx);
> > > > 		return -EINVAL;
> > > > 	}
> > > 
> Hi Sean,
> After more thoughts, do you think checking KVM internal memslot is necessary?

I don't think it's necessary per se, but I also can't think of any reason to allow
it.

> slot = gfn_to_memslot(kvm, gfn);
> if (!slot || slot->id >= KVM_USER_MEM_SLOTS) {
> 		srcu_read_unlock(&kvm->srcu, idx);
> 		return -EINVAL;
> }
> 
> Do we allow write tracking to APIC access page when APIC-write VM exit
> is not desired?

Allow?  Yes.
 
But KVM doesn't use write-tracking for anything APICv related, e.g. to disable
APICv, KVM instead zaps the SPTEs for the APIC access page and on page fault goes
straight to MMIO emulation.

Theoretically, the guest could create an intermediate PTE in the APIC access page
and AFAICT KVM would shadow the access and write-protect the APIC access page.
But that's benign as the resulting emulation would be handled just like emulated
APIC MMIO.

FWIW, the other internal memslots, TSS and idenity mapped page tables, are used
if and only if paging is disabled in the guest, i.e. there are no guest PTEs for
KVM to shadow (and paging must be enabled to enable VMX, so nested EPT is also
ruled out).  So this is theoretically possible only for the APIC access page.
That changes with KVMGT, but that again should not be problematic.  KVM will
emulate in response to the write-protected page and things go on.  E.g. it's
arguably much weirder that the guest can read/write the identity mapped page
tables that are used for EPT without unrestricted guest.

There's no sane reason to allow creating PTEs in the APIC page, but I'm also not
all that motivated to "fix" things.   account_shadowed() isn't expected to fail,
so KVM would need to check further up the stack, e.g. in walk_addr_generic() by
open coding a form of kvm_vcpu_gfn_to_hva_prot().

I _think_ that's the only place KVM would need to add a check, as KVM already
checks that the root, i.e. CR3, is in a "visible" memslot.  I suppose KVM could
just synthesize triple fault, like it does for the root/CR3 case, but I don't
like making up behavior.

In other words, I'm not opposed to disallowing write-tracking internal memslots,
but I can't think of anything that will break, and so for me personally at least,
the ROI isn't sufficient to justify writing tests and dealing with any fallout.
Yan Zhao May 12, 2023, 2:58 a.m. UTC | #7
> > Hi Sean,
> > After more thoughts, do you think checking KVM internal memslot is necessary?
> 
> I don't think it's necessary per se, but I also can't think of any reason to allow
> it.
> 
> > slot = gfn_to_memslot(kvm, gfn);
> > if (!slot || slot->id >= KVM_USER_MEM_SLOTS) {
> > 		srcu_read_unlock(&kvm->srcu, idx);
> > 		return -EINVAL;
> > }
> > 
> > Do we allow write tracking to APIC access page when APIC-write VM exit
> > is not desired?
> 
> Allow?  Yes.
>  
> But KVM doesn't use write-tracking for anything APICv related, e.g. to disable
> APICv, KVM instead zaps the SPTEs for the APIC access page and on page fault goes
> straight to MMIO emulation.
> 
> Theoretically, the guest could create an intermediate PTE in the APIC access page
> and AFAICT KVM would shadow the access and write-protect the APIC access page.
> But that's benign as the resulting emulation would be handled just like emulated
> APIC MMIO.
> 
> FWIW, the other internal memslots, TSS and idenity mapped page tables, are used
> if and only if paging is disabled in the guest, i.e. there are no guest PTEs for
> KVM to shadow (and paging must be enabled to enable VMX, so nested EPT is also
> ruled out).  So this is theoretically possible only for the APIC access page.
> That changes with KVMGT, but that again should not be problematic.  KVM will
> emulate in response to the write-protected page and things go on.  E.g. it's
> arguably much weirder that the guest can read/write the identity mapped page
> tables that are used for EPT without unrestricted guest.
> 
> There's no sane reason to allow creating PTEs in the APIC page, but I'm also not
> all that motivated to "fix" things.   account_shadowed() isn't expected to fail,
> so KVM would need to check further up the stack, e.g. in walk_addr_generic() by
> open coding a form of kvm_vcpu_gfn_to_hva_prot().
> 
> I _think_ that's the only place KVM would need to add a check, as KVM already
> checks that the root, i.e. CR3, is in a "visible" memslot.  I suppose KVM could
> just synthesize triple fault, like it does for the root/CR3 case, but I don't
> like making up behavior.
> 
> In other words, I'm not opposed to disallowing write-tracking internal memslots,
> but I can't think of anything that will break, and so for me personally at least,
> the ROI isn't sufficient to justify writing tests and dealing with any fallout.

It makes sense. Thanks for the explanation.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 20055064793a..415537ce45b4 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -43,11 +43,6 @@  struct kvm_page_track_notifier_node {
 				    struct kvm_page_track_notifier_node *node);
 };
 
-void kvm_write_track_add_gfn(struct kvm *kvm,
-			     struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_write_track_remove_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-				gfn_t gfn);
-
 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 enum pg_level kvm_page_track_max_mapping_level(struct kvm *kvm, gfn_t gfn,
 					       enum pg_level max_level);
@@ -58,6 +53,9 @@  kvm_page_track_register_notifier(struct kvm *kvm,
 void
 kvm_page_track_unregister_notifier(struct kvm *kvm,
 				   struct kvm_page_track_notifier_node *n);
+
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
 #endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
 
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3d1aad44c2ec..cf59b44de912 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -820,7 +820,7 @@  static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	/* the non-leaf shadow pages are keeping readonly. */
 	if (sp->role.level > PG_LEVEL_4K)
-		return kvm_write_track_add_gfn(kvm, slot, gfn);
+		return __kvm_write_track_add_gfn(kvm, slot, gfn);
 
 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
@@ -866,7 +866,7 @@  static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, gfn);
 	if (sp->role.level > PG_LEVEL_4K)
-		return kvm_write_track_remove_gfn(kvm, slot, gfn);
+		return __kvm_write_track_remove_gfn(kvm, slot, gfn);
 
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 327e73be62d6..69b6431b394b 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -74,16 +74,8 @@  static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
 	slot->arch.gfn_write_track[index] += count;
 }
 
-/*
- * add guest page to the tracking pool so that corresponding access on that
- * page will be intercepted.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- */
-void kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-			     gfn_t gfn)
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+			       gfn_t gfn)
 {
 	lockdep_assert_held_write(&kvm->mmu_lock);
 
@@ -104,18 +96,9 @@  void kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
 		kvm_flush_remote_tlbs(kvm);
 }
-EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
 
-/*
- * remove the guest page from the tracking pool which stops the interception
- * of corresponding access on that page.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- */
-void kvm_write_track_remove_gfn(struct kvm *kvm,
-				struct kvm_memory_slot *slot, gfn_t gfn)
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+				  struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	lockdep_assert_held_write(&kvm->mmu_lock);
 
@@ -133,7 +116,6 @@  void kvm_write_track_remove_gfn(struct kvm *kvm,
 	 */
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
-EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
 
 /*
  * check if the corresponding access on the specified guest page is tracked.
@@ -274,4 +256,64 @@  enum pg_level kvm_page_track_max_mapping_level(struct kvm *kvm, gfn_t gfn,
 	return max_level;
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_max_mapping_level);
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+
+	slot = gfn_to_memslot(kvm, gfn);
+	if (!slot) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -EINVAL;
+	}
+
+	write_lock(&kvm->mmu_lock);
+	__kvm_write_track_add_gfn(kvm, slot, gfn);
+	write_unlock(&kvm->mmu_lock);
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+
+	slot = gfn_to_memslot(kvm, gfn);
+	if (!slot) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -EINVAL;
+	}
+
+	write_lock(&kvm->mmu_lock);
+	__kvm_write_track_remove_gfn(kvm, slot, gfn);
+	write_unlock(&kvm->mmu_lock);
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
 #endif
diff --git a/arch/x86/kvm/mmu/page_track.h b/arch/x86/kvm/mmu/page_track.h
index 50d3278e8c69..62f98c6c5af3 100644
--- a/arch/x86/kvm/mmu/page_track.h
+++ b/arch/x86/kvm/mmu/page_track.h
@@ -15,6 +15,11 @@  int kvm_page_track_create_memslot(struct kvm *kvm,
 				  struct kvm_memory_slot *slot,
 				  unsigned long npages);
 
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+			       gfn_t gfn);
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+				  struct kvm_memory_slot *slot, gfn_t gfn);
+
 bool kvm_gfn_is_write_tracked(struct kvm *kvm,
 			      const struct kvm_memory_slot *slot, gfn_t gfn);
 
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index e5a18d92030b..898f1f1d308d 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1560,9 +1560,7 @@  static struct mdev_driver intel_vgpu_mdev_driver = {
 
 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 {
-	struct kvm *kvm = info->vfio_device.kvm;
-	struct kvm_memory_slot *slot;
-	int idx;
+	int r;
 
 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
 		return -ESRCH;
@@ -1570,18 +1568,9 @@  int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 	if (kvmgt_gfn_is_write_protected(info, gfn))
 		return 0;
 
-	idx = srcu_read_lock(&kvm->srcu);
-	slot = gfn_to_memslot(kvm, gfn);
-	if (!slot) {
-		srcu_read_unlock(&kvm->srcu, idx);
-		return -EINVAL;
-	}
-
-	write_lock(&kvm->mmu_lock);
-	kvm_write_track_add_gfn(kvm, slot, gfn);
-	write_unlock(&kvm->mmu_lock);
-
-	srcu_read_unlock(&kvm->srcu, idx);
+	r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn);
+	if (r)
+		return r;
 
 	kvmgt_protect_table_add(info, gfn);
 	return 0;
@@ -1589,9 +1578,7 @@  int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 
 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
 {
-	struct kvm *kvm = info->vfio_device.kvm;
-	struct kvm_memory_slot *slot;
-	int idx;
+	int r;
 
 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
 		return -ESRCH;
@@ -1599,17 +1586,9 @@  int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
 	if (!kvmgt_gfn_is_write_protected(info, gfn))
 		return 0;
 
-	idx = srcu_read_lock(&kvm->srcu);
-	slot = gfn_to_memslot(kvm, gfn);
-	if (!slot) {
-		srcu_read_unlock(&kvm->srcu, idx);
-		return -EINVAL;
-	}
-
-	write_lock(&kvm->mmu_lock);
-	kvm_write_track_remove_gfn(kvm, slot, gfn);
-	write_unlock(&kvm->mmu_lock);
-	srcu_read_unlock(&kvm->srcu, idx);
+	r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn);
+	if (r)
+		return r;
 
 	kvmgt_protect_table_del(info, gfn);
 	return 0;