Message ID | 20211201170411.1561936-7-qperret@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Introduce kvm_share_hyp() | expand |
On Wed, 1 Dec 2021 at 17:04, 'Quentin Perret' via kernel-team <kernel-team@android.com> wrote: > > From: Will Deacon <will@kernel.org> > > Implement kvm_pgtable_hyp_unmap() which can be used to remove hypervisor > stage-1 mappings at EL2. > > Signed-off-by: Will Deacon <will@kernel.org> > Signed-off-by: Quentin Perret <qperret@google.com> > --- > arch/arm64/include/asm/kvm_pgtable.h | 21 ++++++++++ > arch/arm64/kvm/hyp/pgtable.c | 63 ++++++++++++++++++++++++++++ > 2 files changed, 84 insertions(+) > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h > index 027783829584..9d076f36401d 100644 > --- a/arch/arm64/include/asm/kvm_pgtable.h > +++ b/arch/arm64/include/asm/kvm_pgtable.h > @@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); > int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > enum kvm_pgtable_prot prot); > > +/** > + * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table. > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > + * @addr: Virtual address from which to remove the mapping. > + * @size: Size of the mapping. > + * > + * The offset of @addr within a page is ignored, @size is rounded-up to > + * the next page boundary and @phys is rounded-down to the previous page > + * boundary. > + * > + * TLB invalidation is performed for each page-table entry cleared during the > + * unmapping operation and the reference count for the page-table page > + * containing the cleared entry is decremented, with unreferenced pages being > + * freed. The unmapping operation will stop early if it encounters either an > + * invalid page-table entry or a valid block mapping which maps beyond the range > + * being unmapped. How is the caller expected to break up the block mapping? Why not handle that within this function? > > + * > + * Return: Number of bytes unmapped, which may be 0. > + */ > +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); > + > /** > * kvm_get_vtcr() - Helper to construct VTCR_EL2 > * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index 768a58835153..6ad4cb2d6947 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -463,6 +463,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > return ret; > } > > +struct hyp_unmap_data { > + u64 unmapped; > + struct kvm_pgtable_mm_ops *mm_ops; > +}; > + > +static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > + enum kvm_pgtable_walk_flags flag, void * const arg) > +{ > + kvm_pte_t pte = *ptep, *childp = NULL; > + u64 granule = kvm_granule_size(level); > + struct hyp_unmap_data *data = arg; > + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; > + > + if (!kvm_pte_valid(pte)) > + return -EINVAL; > + > + if (kvm_pte_table(pte, level)) { > + childp = kvm_pte_follow(pte, mm_ops); > + > + if (mm_ops->page_count(childp) != 1) > + return 0; > + > + kvm_clear_pte(ptep); > + dsb(ishst); > + __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); > + } else { > + if (end - addr < granule) > + return -EINVAL; > + > + kvm_clear_pte(ptep); > + dsb(ishst); > + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); > + data->unmapped += granule; > + } > + > + dsb(ish); > + isb(); > + mm_ops->put_page(ptep); > + > + if (childp) > + mm_ops->put_page(childp); > + > + return 0; > +} > + > +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) > +{ > + struct hyp_unmap_data unmap_data = { > + .mm_ops = pgt->mm_ops, > + }; > + struct kvm_pgtable_walker walker = { > + .cb = hyp_unmap_walker, > + .arg = &unmap_data, > + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, > + }; > + > + if (!pgt->mm_ops->page_count) > + return 0; > + > + kvm_pgtable_walk(pgt, addr, size, &walker); > + return unmap_data.unmapped; > +} > + > int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, > struct kvm_pgtable_mm_ops *mm_ops) > { > -- > 2.34.0.rc2.393.gf8c9666880-goog > > -- > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com. >
Hi Andrew, On Tuesday 07 Dec 2021 at 14:47:14 (+0000), Andrew Walbran wrote: > On Wed, 1 Dec 2021 at 17:04, 'Quentin Perret' via kernel-team > <kernel-team@android.com> wrote: > > > > From: Will Deacon <will@kernel.org> > > > > Implement kvm_pgtable_hyp_unmap() which can be used to remove hypervisor > > stage-1 mappings at EL2. > > > > Signed-off-by: Will Deacon <will@kernel.org> > > Signed-off-by: Quentin Perret <qperret@google.com> > > --- > > arch/arm64/include/asm/kvm_pgtable.h | 21 ++++++++++ > > arch/arm64/kvm/hyp/pgtable.c | 63 ++++++++++++++++++++++++++++ > > 2 files changed, 84 insertions(+) > > > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h > > index 027783829584..9d076f36401d 100644 > > --- a/arch/arm64/include/asm/kvm_pgtable.h > > +++ b/arch/arm64/include/asm/kvm_pgtable.h > > @@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); > > int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > > enum kvm_pgtable_prot prot); > > > > +/** > > + * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table. > > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > > + * @addr: Virtual address from which to remove the mapping. > > + * @size: Size of the mapping. > > + * > > + * The offset of @addr within a page is ignored, @size is rounded-up to > > + * the next page boundary and @phys is rounded-down to the previous page > > + * boundary. > > + * > > + * TLB invalidation is performed for each page-table entry cleared during the > > + * unmapping operation and the reference count for the page-table page > > + * containing the cleared entry is decremented, with unreferenced pages being > > + * freed. The unmapping operation will stop early if it encounters either an > > + * invalid page-table entry or a valid block mapping which maps beyond the range > > + * being unmapped. > > How is the caller expected to break up the block mapping? Why not > handle that within this function? We don't really use block mappings for the hyp stage-1, since pretty much forever (see the loop in pkvm_create_mappings_locked() for ex), so handling it here would be somewhat unnecessary complexity. Handling this in the pgtable code itself (which I assume would mean proactively re-mapping the rest of the range with page-granularity mappings or something along those lines) is tricky because of BBM and concurrency, so I'd rather avoid handling same-level aborts at EL2 and all that mess unless we have a good reason. Is there a use-case where you think that'd be needed? Cheers, Quentin
On Wed, 8 Dec 2021 at 09:51, Quentin Perret <qperret@google.com> wrote: > > Hi Andrew, > > On Tuesday 07 Dec 2021 at 14:47:14 (+0000), Andrew Walbran wrote: > > On Wed, 1 Dec 2021 at 17:04, 'Quentin Perret' via kernel-team > > <kernel-team@android.com> wrote: > > > > > > From: Will Deacon <will@kernel.org> > > > > > > Implement kvm_pgtable_hyp_unmap() which can be used to remove hypervisor > > > stage-1 mappings at EL2. > > > > > > Signed-off-by: Will Deacon <will@kernel.org> > > > Signed-off-by: Quentin Perret <qperret@google.com> > > > --- > > > arch/arm64/include/asm/kvm_pgtable.h | 21 ++++++++++ > > > arch/arm64/kvm/hyp/pgtable.c | 63 ++++++++++++++++++++++++++++ > > > 2 files changed, 84 insertions(+) > > > > > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h > > > index 027783829584..9d076f36401d 100644 > > > --- a/arch/arm64/include/asm/kvm_pgtable.h > > > +++ b/arch/arm64/include/asm/kvm_pgtable.h > > > @@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); > > > int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > > > enum kvm_pgtable_prot prot); > > > > > > +/** > > > + * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table. > > > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > > > + * @addr: Virtual address from which to remove the mapping. > > > + * @size: Size of the mapping. > > > + * > > > + * The offset of @addr within a page is ignored, @size is rounded-up to > > > + * the next page boundary and @phys is rounded-down to the previous page > > > + * boundary. > > > + * > > > + * TLB invalidation is performed for each page-table entry cleared during the > > > + * unmapping operation and the reference count for the page-table page > > > + * containing the cleared entry is decremented, with unreferenced pages being > > > + * freed. The unmapping operation will stop early if it encounters either an > > > + * invalid page-table entry or a valid block mapping which maps beyond the range > > > + * being unmapped. > > > > How is the caller expected to break up the block mapping? Why not > > handle that within this function? > > We don't really use block mappings for the hyp stage-1, since pretty > much forever (see the loop in pkvm_create_mappings_locked() for ex), so > handling it here would be somewhat unnecessary complexity. Handling this > in the pgtable code itself (which I assume would mean proactively > re-mapping the rest of the range with page-granularity mappings or > something along those lines) is tricky because of BBM and concurrency, > so I'd rather avoid handling same-level aborts at EL2 and all that mess > unless we have a good reason. Is there a use-case where you think that'd > be needed? Aha, I didn't realise that block mappings, but it makes sense in that case. How about adding a note to the function comment here explaining that reasoning? Otherwise it sounds like the caller has to handle it somehow.
Hey Andrew, On Wednesday 08 Dec 2021 at 14:40:54 (+0000), Andrew Walbran wrote: > Aha, I didn't realise that block mappings, but it makes sense in that > case. How about adding a note to the function comment here explaining > that reasoning? Otherwise it sounds like the caller has to handle it > somehow. Well in fact the caller does have to handle it if it decided to use block mappings. We just decided not to use them in pkvm for now for simplicity, but I guess that might change at some point. The page-table library is meant to be architecturally compliant (it can create block mappings or not, ...) but the decision of which mappings to create is left to the caller and the handling logic belongs there. At least that's my view of it. So, I actually like the comment the way it is, it describes clearly what the function does, and what the caller should expect. I have a bunch of changes for v4 queued locally, so I'll send it now with that comment left untouched, but please shout if you have an idea on how to make it better. Cheers, Quentin
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 027783829584..9d076f36401d 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot); +/** + * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * @addr: Virtual address from which to remove the mapping. + * @size: Size of the mapping. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * TLB invalidation is performed for each page-table entry cleared during the + * unmapping operation and the reference count for the page-table page + * containing the cleared entry is decremented, with unreferenced pages being + * freed. The unmapping operation will stop early if it encounters either an + * invalid page-table entry or a valid block mapping which maps beyond the range + * being unmapped. + * + * Return: Number of bytes unmapped, which may be 0. + */ +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); + /** * kvm_get_vtcr() - Helper to construct VTCR_EL2 * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 768a58835153..6ad4cb2d6947 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -463,6 +463,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } +struct hyp_unmap_data { + u64 unmapped; + struct kvm_pgtable_mm_ops *mm_ops; +}; + +static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + kvm_pte_t pte = *ptep, *childp = NULL; + u64 granule = kvm_granule_size(level); + struct hyp_unmap_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + + if (!kvm_pte_valid(pte)) + return -EINVAL; + + if (kvm_pte_table(pte, level)) { + childp = kvm_pte_follow(pte, mm_ops); + + if (mm_ops->page_count(childp) != 1) + return 0; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + } else { + if (end - addr < granule) + return -EINVAL; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); + data->unmapped += granule; + } + + dsb(ish); + isb(); + mm_ops->put_page(ptep); + + if (childp) + mm_ops->put_page(childp); + + return 0; +} + +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct hyp_unmap_data unmap_data = { + .mm_ops = pgt->mm_ops, + }; + struct kvm_pgtable_walker walker = { + .cb = hyp_unmap_walker, + .arg = &unmap_data, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + }; + + if (!pgt->mm_ops->page_count) + return 0; + + kvm_pgtable_walk(pgt, addr, size, &walker); + return unmap_data.unmapped; +} + int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) {