diff mbox series

[v5,05/14] KVM: arm64: Add a helper to tear down unlinked stage-2 subtrees

Message ID 20221107215644.1895162-6-oliver.upton@linux.dev (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Parallel stage-2 fault handling | expand

Commit Message

Oliver Upton Nov. 7, 2022, 9:56 p.m. UTC
A subsequent change to KVM will move the tear down of an unlinked
stage-2 subtree out of the critical path of the break-before-make
sequence.

Introduce a new helper for tearing down unlinked stage-2 subtrees.
Leverage the existing stage-2 free walkers to do so, with a deep call
into __kvm_pgtable_walk() as the subtree is no longer reachable from the
root.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/include/asm/kvm_pgtable.h | 11 +++++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 23 +++++++++++++++++++++++
 2 files changed, 34 insertions(+)

Comments

Ben Gardon Nov. 9, 2022, 10:23 p.m. UTC | #1
On Mon, Nov 7, 2022 at 1:57 PM Oliver Upton <oliver.upton@linux.dev> wrote:
>
> A subsequent change to KVM will move the tear down of an unlinked
> stage-2 subtree out of the critical path of the break-before-make
> sequence.
>
> Introduce a new helper for tearing down unlinked stage-2 subtrees.
> Leverage the existing stage-2 free walkers to do so, with a deep call
> into __kvm_pgtable_walk() as the subtree is no longer reachable from the
> root.
>
> Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 11 +++++++++++
>  arch/arm64/kvm/hyp/pgtable.c         | 23 +++++++++++++++++++++++
>  2 files changed, 34 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index a752793482cb..93b1feeaebab 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -333,6 +333,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
>   */
>  void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
>
> +/**
> + * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
> + * @mm_ops:    Memory management callbacks.
> + * @pgtable:   Unlinked stage-2 paging structure to be freed.
> + * @level:     Level of the stage-2 paging structure to be freed.
> + *
> + * The page-table is assumed to be unreachable by any hardware walkers prior to
> + * freeing and therefore no TLB invalidation is performed.
> + */
> +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
> +
>  /**
>   * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
>   * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 93989b750a26..363a5cce7e1a 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -1203,3 +1203,26 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
>         pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
>         pgt->pgd = NULL;
>  }
> +
> +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
> +{
> +       kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
> +       struct kvm_pgtable_walker walker = {
> +               .cb     = stage2_free_walker,
> +               .flags  = KVM_PGTABLE_WALK_LEAF |
> +                         KVM_PGTABLE_WALK_TABLE_POST,
> +       };
> +       struct kvm_pgtable_walk_data data = {
> +               .walker = &walker,
> +
> +               /*
> +                * At this point the IPA really doesn't matter, as the page
> +                * table being traversed has already been removed from the stage
> +                * 2. Set an appropriate range to cover the entire page table.
> +                */
> +               .addr   = 0,
> +               .end    = kvm_granule_size(level),
> +       };
> +
> +       WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
> +}

Will this callback be able to yield? In my experience, if processing a
large teardown (i.e. level >=3 / maps 512G region) it's possible to
hit scheduler tick warnings.


> --
> 2.38.1.431.g37b22c650d-goog
>
Oliver Upton Nov. 9, 2022, 10:54 p.m. UTC | #2
On Wed, Nov 09, 2022 at 02:23:33PM -0800, Ben Gardon wrote:
> On Mon, Nov 7, 2022 at 1:57 PM Oliver Upton <oliver.upton@linux.dev> wrote:
> >
> > A subsequent change to KVM will move the tear down of an unlinked
> > stage-2 subtree out of the critical path of the break-before-make
> > sequence.
> >
> > Introduce a new helper for tearing down unlinked stage-2 subtrees.
> > Leverage the existing stage-2 free walkers to do so, with a deep call
> > into __kvm_pgtable_walk() as the subtree is no longer reachable from the
> > root.
> >
> > Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> > ---
> >  arch/arm64/include/asm/kvm_pgtable.h | 11 +++++++++++
> >  arch/arm64/kvm/hyp/pgtable.c         | 23 +++++++++++++++++++++++
> >  2 files changed, 34 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > index a752793482cb..93b1feeaebab 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -333,6 +333,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
> >   */
> >  void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
> >
> > +/**
> > + * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
> > + * @mm_ops:    Memory management callbacks.
> > + * @pgtable:   Unlinked stage-2 paging structure to be freed.
> > + * @level:     Level of the stage-2 paging structure to be freed.
> > + *
> > + * The page-table is assumed to be unreachable by any hardware walkers prior to
> > + * freeing and therefore no TLB invalidation is performed.
> > + */
> > +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
> > +
> >  /**
> >   * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
> >   * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
> > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> > index 93989b750a26..363a5cce7e1a 100644
> > --- a/arch/arm64/kvm/hyp/pgtable.c
> > +++ b/arch/arm64/kvm/hyp/pgtable.c
> > @@ -1203,3 +1203,26 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
> >         pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
> >         pgt->pgd = NULL;
> >  }
> > +
> > +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
> > +{
> > +       kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
> > +       struct kvm_pgtable_walker walker = {
> > +               .cb     = stage2_free_walker,
> > +               .flags  = KVM_PGTABLE_WALK_LEAF |
> > +                         KVM_PGTABLE_WALK_TABLE_POST,
> > +       };
> > +       struct kvm_pgtable_walk_data data = {
> > +               .walker = &walker,
> > +
> > +               /*
> > +                * At this point the IPA really doesn't matter, as the page
> > +                * table being traversed has already been removed from the stage
> > +                * 2. Set an appropriate range to cover the entire page table.
> > +                */
> > +               .addr   = 0,
> > +               .end    = kvm_granule_size(level),
> > +       };
> > +
> > +       WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
> > +}
> 
> Will this callback be able to yield? In my experience, if processing a
> large teardown (i.e. level >=3 / maps 512G region) it's possible to
> hit scheduler tick warnings.

No, but this is a pretty obvious problem with all of our table walkers,
which led to commit 5994bc9e05c2 ("KVM: arm64: Limit
stage2_apply_range() batch size to largest block").

We're lucky in that the largest supported granule across all page table
sizes is 1GB (no true 5-level paging yet), so it may not be too
horrendous.

But yeah, it is on the list of things to fix :)

--
Thanks,
Oliver
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index a752793482cb..93b1feeaebab 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -333,6 +333,17 @@  int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
  */
 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
 
+/**
+ * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * @mm_ops:	Memory management callbacks.
+ * @pgtable:	Unlinked stage-2 paging structure to be freed.
+ * @level:	Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+
 /**
  * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 93989b750a26..363a5cce7e1a 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1203,3 +1203,26 @@  void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
 	pgt->pgd = NULL;
 }
+
+void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+{
+	kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
+	struct kvm_pgtable_walker walker = {
+		.cb	= stage2_free_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF |
+			  KVM_PGTABLE_WALK_TABLE_POST,
+	};
+	struct kvm_pgtable_walk_data data = {
+		.walker	= &walker,
+
+		/*
+		 * At this point the IPA really doesn't matter, as the page
+		 * table being traversed has already been removed from the stage
+		 * 2. Set an appropriate range to cover the entire page table.
+		 */
+		.addr	= 0,
+		.end	= kvm_granule_size(level),
+	};
+
+	WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
+}