Message ID | 20240827084549.45731-5-sebastianene@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: ptdump: View the second stage page-tables | expand |
Hi Seb, Thanks for the respin. On Tue, Aug 27, 2024 at 08:45:47AM +0000, Sebastian Ene wrote: > While arch/*/mem/ptdump handles the kernel pagetable dumping code, > introduce KVM/ptdump to show the guest stage-2 pagetables. The > separation is necessary because most of the definitions from the > stage-2 pagetable reside in the KVM path and we will be invoking > functionality specific to KVM. > > When a guest is created, register a new file entry under the guest > debugfs dir which allows userspace to show the contents of the guest > stage-2 pagetables when accessed. > > Signed-off-by: Sebastian Ene <sebastianene@google.com> I only have some nits, otherwise: Reviewed-by: Vincent Donnefort <vdonnefort@google.com> > --- > arch/arm64/include/asm/kvm_host.h | 6 + > arch/arm64/kvm/Makefile | 1 + > arch/arm64/kvm/arm.c | 1 + > arch/arm64/kvm/ptdump.c | 247 ++++++++++++++++++++++++++++++ > 4 files changed, 255 insertions(+) > create mode 100644 arch/arm64/kvm/ptdump.c > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index a33f5996ca9f..4acd589f086b 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -1473,4 +1473,10 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); > (pa + pi + pa3) == 1; \ > }) > > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); > +#else > +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ > + > #endif /* __ARM64_KVM_HOST_H__ */ > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > index 86a629aaf0a1..e4233b323a73 100644 > --- a/arch/arm64/kvm/Makefile > +++ b/arch/arm64/kvm/Makefile > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ > > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o > > always-y := hyp_constants.h hyp-constants.s > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index 9bef7638342e..b9fd928d3477 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -228,6 +228,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) > void kvm_arch_create_vm_debugfs(struct kvm *kvm) > { > kvm_sys_regs_create_debugfs(kvm); > + kvm_s2_ptdump_create_debugfs(kvm); > } > > static void kvm_destroy_mpidr_data(struct kvm *kvm) > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c > new file mode 100644 > index 000000000000..e72a928d4445 > --- /dev/null > +++ b/arch/arm64/kvm/ptdump.c > @@ -0,0 +1,247 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Debug helper used to dump the stage-2 pagetables of the system and their > + * associated permissions. > + * > + * Copyright (C) Google, 2024 > + * Author: Sebastian Ene <sebastianene@google.com> > + */ > +#include <linux/debugfs.h> > +#include <linux/kvm_host.h> > +#include <linux/seq_file.h> > + > +#include <asm/kvm_pgtable.h> > +#include <asm/kvm_host.h> nit: I believe you wanted to follow the alphabetical order, if that is the case, kvm_host.h then kvm_pgtable.h > +#include <asm/ptdump.h> > + > + nit: don't think double empty are a rule, I would remove it. > +#define MARKERS_LEN (2) nit: The brackets are not necessary for MARKERS_LEN. > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1) > + > +struct kvm_ptdump_guest_state { > + struct kvm *kvm; > + struct ptdump_pg_state parser_state; > + struct addr_marker ipa_marker[MARKERS_LEN]; > + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS]; > + struct ptdump_range range[MARKERS_LEN]; > +}; > + > +static const struct ptdump_prot_bits stage2_pte_bits[] = { > + { > + .mask = PTE_VALID, > + .val = PTE_VALID, > + .set = " ", > + .clear = "F", This is effectively never used because an invalid PTE is 0 and note_page() won't print it. This probably can be removed? > + }, { > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > + .set = "R", > + .clear = " ", > + }, { > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > + .set = "W", > + .clear = " ", > + }, { > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, > + .val = PTE_VALID, > + .set = " ", > + .clear = "X", > + }, { > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > + .set = "AF", > + .clear = " ", > + }, { > + .mask = PTE_TABLE_BIT | PTE_VALID, > + .val = PTE_VALID, > + .set = "BLK", > + .clear = " ", > + }, > +}; > + > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx, > + enum kvm_pgtable_walk_flags visit) > +{ > + struct ptdump_pg_state *st = ctx->arg; > + struct ptdump_state *pt_st = &st->ptdump; > + > + note_page(pt_st, ctx->addr, ctx->level, ctx->old); > + > + return 0; > +} > + > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl) > +{ > + u32 i; > + u64 mask; > + > + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL)) > + return -EINVAL; > + > + mask = 0; > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++) > + mask |= stage2_pte_bits[i].mask; > + > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) { > + snprintf(level[i].name, sizeof(level[i].name), "%d", i); %u, i being unsigned. > + > + level[i].num = ARRAY_SIZE(stage2_pte_bits); > + level[i].bits = stage2_pte_bits; > + level[i].mask = mask; > + } > + > + return 0; > +} > + > +static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm) > +{ > + struct kvm_ptdump_guest_state *st; > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > + struct kvm_pgtable *pgtable = mmu->pgt; > + int ret; > + > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT); > + if (!st) > + return ERR_PTR(-ENOMEM); > + > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level); > + if (ret) { > + kfree(st); > + return ERR_PTR(ret); > + } > + > + st->ipa_marker[0].name = "Guest IPA"; > + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits); > + st->range[0].end = BIT(pgtable->ia_bits); > + > + st->kvm = kvm; > + st->parser_state = (struct ptdump_pg_state) { > + .marker = &st->ipa_marker[0], > + .level = -1, > + .pg_level = &st->level[0], > + .ptdump.range = &st->range[0], > + .start_address = 0, > + }; > + > + return st; > +} > + > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused) > +{ > + int ret; > + struct kvm_ptdump_guest_state *st = m->private; > + struct kvm *kvm = st->kvm; > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > + struct ptdump_pg_state *parser_state = &st->parser_state; > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) { > + .cb = kvm_ptdump_visitor, > + .arg = parser_state, > + .flags = KVM_PGTABLE_WALK_LEAF, > + }; > + > + parser_state->seq = m; > + > + write_lock(&kvm->mmu_lock); > + ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker); > + write_unlock(&kvm->mmu_lock); > + > + return ret; > +} > + > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) > +{ > + struct kvm *kvm = m->i_private; > + struct kvm_ptdump_guest_state *st; > + int ret; > + > + if (!kvm_get_kvm_safe(kvm)) > + return -ENOENT; > + > + st = kvm_ptdump_parser_create(kvm); > + if (IS_ERR(st)) { > + ret = PTR_ERR(st); > + goto free_with_kvm_ref; > + } > + > + ret = single_open(file, kvm_ptdump_guest_show, st); > + if (!ret) > + return 0; > + > + kfree(st); > +free_with_kvm_ref: nit: I believe kfree understands IS_ERR() so you could have a simple "err:" label covering all the error path. > + kvm_put_kvm(kvm); > + return ret; > +} > + > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file) > +{ > + struct kvm *kvm = m->i_private; > + void *st = ((struct seq_file *)file->private_data)->private; > + > + kfree(st); > + kvm_put_kvm(kvm); > + > + return single_release(m, file); > +} > + > +static const struct file_operations kvm_ptdump_guest_fops = { > + .open = kvm_ptdump_guest_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = kvm_ptdump_guest_close, > +}; > + > +static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) > +{ > + const struct file *file = m->file; > + struct kvm_pgtable *pgtable = m->private; > + > + if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) > + seq_printf(m, "%2u\n", pgtable->ia_bits); > + else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) > + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); nit: KVM_PGTABLE_MAX_LEVELS - pgtable->start_level ? > + return 0; > +} > + > +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) > +{ > + struct kvm *kvm = m->i_private; > + struct kvm_pgtable *pgtable; > + int ret; > + > + if (!kvm_get_kvm_safe(kvm)) > + return -ENOENT; > + > + pgtable = kvm->arch.mmu.pgt; > + > + ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); > + if (ret < 0) > + kvm_put_kvm(kvm); > + return ret; > +} > + > +static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) > +{ > + struct kvm *kvm = m->i_private; > + > + kvm_put_kvm(kvm); > + return single_release(m, file); > +} > + > +static const struct file_operations kvm_pgtable_debugfs_fops = { > + .open = kvm_pgtable_debugfs_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = kvm_pgtable_debugfs_close, > +}; > + > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) > +{ > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, > + kvm, &kvm_ptdump_guest_fops); > + debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, > + &kvm_pgtable_debugfs_fops); > + debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, > + kvm, &kvm_pgtable_debugfs_fops); > +} > -- > 2.46.0.295.g3b9ea8a38a-goog >
On Fri, 30 Aug 2024 11:24:53 +0100, Vincent Donnefort <vdonnefort@google.com> wrote: > > Hi Seb, > > Thanks for the respin. > > On Tue, Aug 27, 2024 at 08:45:47AM +0000, Sebastian Ene wrote: > > While arch/*/mem/ptdump handles the kernel pagetable dumping code, > > introduce KVM/ptdump to show the guest stage-2 pagetables. The > > separation is necessary because most of the definitions from the > > stage-2 pagetable reside in the KVM path and we will be invoking > > functionality specific to KVM. > > > > When a guest is created, register a new file entry under the guest > > debugfs dir which allows userspace to show the contents of the guest > > stage-2 pagetables when accessed. > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com> > > I only have some nits, otherwise: > > Reviewed-by: Vincent Donnefort <vdonnefort@google.com> > > > --- > > arch/arm64/include/asm/kvm_host.h | 6 + > > arch/arm64/kvm/Makefile | 1 + > > arch/arm64/kvm/arm.c | 1 + > > arch/arm64/kvm/ptdump.c | 247 ++++++++++++++++++++++++++++++ > > 4 files changed, 255 insertions(+) > > create mode 100644 arch/arm64/kvm/ptdump.c > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index a33f5996ca9f..4acd589f086b 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -1473,4 +1473,10 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); > > (pa + pi + pa3) == 1; \ > > }) > > > > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS > > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); > > +#else > > +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} > > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ > > + > > #endif /* __ARM64_KVM_HOST_H__ */ > > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > > index 86a629aaf0a1..e4233b323a73 100644 > > --- a/arch/arm64/kvm/Makefile > > +++ b/arch/arm64/kvm/Makefile > > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ > > > > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o > > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o > > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o > > > > always-y := hyp_constants.h hyp-constants.s > > > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > > index 9bef7638342e..b9fd928d3477 100644 > > --- a/arch/arm64/kvm/arm.c > > +++ b/arch/arm64/kvm/arm.c > > @@ -228,6 +228,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) > > void kvm_arch_create_vm_debugfs(struct kvm *kvm) > > { > > kvm_sys_regs_create_debugfs(kvm); > > + kvm_s2_ptdump_create_debugfs(kvm); > > } > > > > static void kvm_destroy_mpidr_data(struct kvm *kvm) > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c > > new file mode 100644 > > index 000000000000..e72a928d4445 > > --- /dev/null > > +++ b/arch/arm64/kvm/ptdump.c > > @@ -0,0 +1,247 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Debug helper used to dump the stage-2 pagetables of the system and their > > + * associated permissions. > > + * > > + * Copyright (C) Google, 2024 > > + * Author: Sebastian Ene <sebastianene@google.com> > > + */ > > +#include <linux/debugfs.h> > > +#include <linux/kvm_host.h> > > +#include <linux/seq_file.h> > > + > > +#include <asm/kvm_pgtable.h> > > +#include <asm/kvm_host.h> > > nit: I believe you wanted to follow the alphabetical order, if that is the case, > kvm_host.h then kvm_pgtable.h > > > +#include <asm/ptdump.h> > > + > > + > > nit: don't think double empty are a rule, I would remove it. > > > +#define MARKERS_LEN (2) > > nit: The brackets are not necessary for MARKERS_LEN. > > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1) > > + > > +struct kvm_ptdump_guest_state { > > + struct kvm *kvm; > > + struct ptdump_pg_state parser_state; > > + struct addr_marker ipa_marker[MARKERS_LEN]; > > + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS]; > > + struct ptdump_range range[MARKERS_LEN]; > > +}; > > + > > +static const struct ptdump_prot_bits stage2_pte_bits[] = { > > + { > > + .mask = PTE_VALID, > > + .val = PTE_VALID, > > + .set = " ", > > + .clear = "F", > > This is effectively never used because an invalid PTE is 0 and note_page() won't > print it. This probably can be removed? Yeah, I can't see how we are going to trigger that one given that PTE_VALID must be set, and that we only print something if the bit is clear. Seb? > > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > > + .set = "R", > > + .clear = " ", > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > > + .set = "W", > > + .clear = " ", > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, > > + .val = PTE_VALID, > > + .set = " ", > > + .clear = "X", > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > > + .set = "AF", > > + .clear = " ", > > + }, { > > + .mask = PTE_TABLE_BIT | PTE_VALID, > > + .val = PTE_VALID, > > + .set = "BLK", > > + .clear = " ", > > + }, > > +}; > > + > > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx, > > + enum kvm_pgtable_walk_flags visit) > > +{ > > + struct ptdump_pg_state *st = ctx->arg; > > + struct ptdump_state *pt_st = &st->ptdump; > > + > > + note_page(pt_st, ctx->addr, ctx->level, ctx->old); > > + > > + return 0; > > +} > > + > > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl) > > +{ > > + u32 i; > > + u64 mask; > > + > > + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL)) > > + return -EINVAL; > > + > > + mask = 0; > > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++) > > + mask |= stage2_pte_bits[i].mask; > > + > > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) { > > + snprintf(level[i].name, sizeof(level[i].name), "%d", i); > > %u, i being unsigned. > > > + > > + level[i].num = ARRAY_SIZE(stage2_pte_bits); > > + level[i].bits = stage2_pte_bits; > > + level[i].mask = mask; > > + } > > + > > + return 0; > > +} > > + > > +static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm) > > +{ > > + struct kvm_ptdump_guest_state *st; > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > + struct kvm_pgtable *pgtable = mmu->pgt; > > + int ret; > > + > > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT); > > + if (!st) > > + return ERR_PTR(-ENOMEM); > > + > > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level); > > + if (ret) { > > + kfree(st); > > + return ERR_PTR(ret); > > + } > > + > > + st->ipa_marker[0].name = "Guest IPA"; > > + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits); > > + st->range[0].end = BIT(pgtable->ia_bits); > > + > > + st->kvm = kvm; > > + st->parser_state = (struct ptdump_pg_state) { > > + .marker = &st->ipa_marker[0], > > + .level = -1, > > + .pg_level = &st->level[0], > > + .ptdump.range = &st->range[0], > > + .start_address = 0, > > + }; > > + > > + return st; > > +} > > + > > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused) > > +{ > > + int ret; > > + struct kvm_ptdump_guest_state *st = m->private; > > + struct kvm *kvm = st->kvm; > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > + struct ptdump_pg_state *parser_state = &st->parser_state; > > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) { > > + .cb = kvm_ptdump_visitor, > > + .arg = parser_state, > > + .flags = KVM_PGTABLE_WALK_LEAF, > > + }; > > + > > + parser_state->seq = m; > > + > > + write_lock(&kvm->mmu_lock); > > + ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker); > > + write_unlock(&kvm->mmu_lock); > > + > > + return ret; > > +} > > + > > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + struct kvm_ptdump_guest_state *st; > > + int ret; > > + > > + if (!kvm_get_kvm_safe(kvm)) > > + return -ENOENT; > > + > > + st = kvm_ptdump_parser_create(kvm); > > + if (IS_ERR(st)) { > > + ret = PTR_ERR(st); > > + goto free_with_kvm_ref; > > + } > > + > > + ret = single_open(file, kvm_ptdump_guest_show, st); > > + if (!ret) > > + return 0; > > + > > + kfree(st); > > +free_with_kvm_ref: > > nit: I believe kfree understands IS_ERR() so you could have a simple "err:" > label covering all the error path. I couldn't find such handling in kfree(). Could you point be to it? > > > + kvm_put_kvm(kvm); > > + return ret; > > +} > > + > > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + void *st = ((struct seq_file *)file->private_data)->private; > > + > > + kfree(st); > > + kvm_put_kvm(kvm); > > + > > + return single_release(m, file); > > +} > > + > > +static const struct file_operations kvm_ptdump_guest_fops = { > > + .open = kvm_ptdump_guest_open, > > + .read = seq_read, > > + .llseek = seq_lseek, > > + .release = kvm_ptdump_guest_close, > > +}; > > + > > +static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) > > +{ > > + const struct file *file = m->file; > > + struct kvm_pgtable *pgtable = m->private; > > + > > + if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) I really dislike this sort of construct, and I'd rather we pick the correct callback by construction rather than relying on a string comparison. See below for a suggestion. > > + seq_printf(m, "%2u\n", pgtable->ia_bits); > > + else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) > > + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); > > nit: KVM_PGTABLE_MAX_LEVELS - pgtable->start_level ? > > > + return 0; > > +} > > + > > +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + struct kvm_pgtable *pgtable; > > + int ret; > > + > > + if (!kvm_get_kvm_safe(kvm)) > > + return -ENOENT; > > + > > + pgtable = kvm->arch.mmu.pgt; > > + > > + ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); > > + if (ret < 0) > > + kvm_put_kvm(kvm); > > + return ret; > > +} > > + > > +static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + > > + kvm_put_kvm(kvm); > > + return single_release(m, file); > > +} > > + > > +static const struct file_operations kvm_pgtable_debugfs_fops = { > > + .open = kvm_pgtable_debugfs_open, > > + .read = seq_read, > > + .llseek = seq_lseek, > > + .release = kvm_pgtable_debugfs_close, > > +}; > > + > > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) > > +{ > > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, > > + kvm, &kvm_ptdump_guest_fops); > > + debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, > > + &kvm_pgtable_debugfs_fops); > > + debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, > > + kvm, &kvm_pgtable_debugfs_fops); > > +} I'd expect something like this instead: diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c index e72a928d4445..c11ea355aa51 100644 --- a/arch/arm64/kvm/ptdump.c +++ b/arch/arm64/kvm/ptdump.c @@ -192,19 +191,24 @@ static const struct file_operations kvm_ptdump_guest_fops = { .release = kvm_ptdump_guest_close, }; -static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) +static int kvm_pgtable_range_show(struct seq_file *m, void *unused) +{ + struct kvm_pgtable *pgtable = m->private; + + seq_printf(m, "%2u\n", pgtable->ia_bits); + return 0; +} + +static int kvm_pgtable_levels_show(struct seq_file *m, void *unused) { - const struct file *file = m->file; struct kvm_pgtable *pgtable = m->private; - if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) - seq_printf(m, "%2u\n", pgtable->ia_bits); - else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) - seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); return 0; } -static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file, + int (*show)(struct seq_file *, void *)) { struct kvm *kvm = m->i_private; struct kvm_pgtable *pgtable; @@ -215,12 +219,22 @@ static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) pgtable = kvm->arch.mmu.pgt; - ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); + ret = single_open(file, show, pgtable); if (ret < 0) kvm_put_kvm(kvm); return ret; } +static int kvm_pgtable_range_open(struct inode *m, struct file *file) +{ + return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_range_show); +} + +static int kvm_pgtable_levels_open(struct inode *m, struct file *file) +{ + return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_levels_show); +} + static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) { struct kvm *kvm = m->i_private; @@ -229,8 +243,15 @@ static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) return single_release(m, file); } -static const struct file_operations kvm_pgtable_debugfs_fops = { - .open = kvm_pgtable_debugfs_open, +static const struct file_operations kvm_pgtable_range_fops = { + .open = kvm_pgtable_range_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_pgtable_debugfs_close, +}; + +static const struct file_operations kvm_pgtable_levels_fops = { + .open = kvm_pgtable_levels_open, .read = seq_read, .llseek = seq_lseek, .release = kvm_pgtable_debugfs_close, @@ -241,7 +262,7 @@ void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, kvm, &kvm_ptdump_guest_fops); debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, - &kvm_pgtable_debugfs_fops); + &kvm_pgtable_range_fops); debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, - kvm, &kvm_pgtable_debugfs_fops); + kvm, &kvm_pgtable_levels_fops); } Thanks, M.
On Fri, Aug 30, 2024 at 03:11:34PM +0100, Marc Zyngier wrote: > On Fri, 30 Aug 2024 11:24:53 +0100, > Vincent Donnefort <vdonnefort@google.com> wrote: > > > > Hi Seb, > > > > Thanks for the respin. > > > > On Tue, Aug 27, 2024 at 08:45:47AM +0000, Sebastian Ene wrote: > > > While arch/*/mem/ptdump handles the kernel pagetable dumping code, > > > introduce KVM/ptdump to show the guest stage-2 pagetables. The > > > separation is necessary because most of the definitions from the > > > stage-2 pagetable reside in the KVM path and we will be invoking > > > functionality specific to KVM. > > > > > > When a guest is created, register a new file entry under the guest > > > debugfs dir which allows userspace to show the contents of the guest > > > stage-2 pagetables when accessed. > > > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com> > > > > I only have some nits, otherwise: > > > > Reviewed-by: Vincent Donnefort <vdonnefort@google.com> > > > > > --- > > > arch/arm64/include/asm/kvm_host.h | 6 + > > > arch/arm64/kvm/Makefile | 1 + > > > arch/arm64/kvm/arm.c | 1 + > > > arch/arm64/kvm/ptdump.c | 247 ++++++++++++++++++++++++++++++ > > > 4 files changed, 255 insertions(+) > > > create mode 100644 arch/arm64/kvm/ptdump.c > > > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > > index a33f5996ca9f..4acd589f086b 100644 > > > --- a/arch/arm64/include/asm/kvm_host.h > > > +++ b/arch/arm64/include/asm/kvm_host.h > > > @@ -1473,4 +1473,10 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); > > > (pa + pi + pa3) == 1; \ > > > }) > > > > > > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS > > > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); > > > +#else > > > +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} > > > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ > > > + > > > #endif /* __ARM64_KVM_HOST_H__ */ > > > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > > > index 86a629aaf0a1..e4233b323a73 100644 > > > --- a/arch/arm64/kvm/Makefile > > > +++ b/arch/arm64/kvm/Makefile > > > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ > > > > > > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o > > > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o > > > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o > > > > > > always-y := hyp_constants.h hyp-constants.s > > > > > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > > > index 9bef7638342e..b9fd928d3477 100644 > > > --- a/arch/arm64/kvm/arm.c > > > +++ b/arch/arm64/kvm/arm.c > > > @@ -228,6 +228,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) > > > void kvm_arch_create_vm_debugfs(struct kvm *kvm) > > > { > > > kvm_sys_regs_create_debugfs(kvm); > > > + kvm_s2_ptdump_create_debugfs(kvm); > > > } > > > > > > static void kvm_destroy_mpidr_data(struct kvm *kvm) > > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c > > > new file mode 100644 > > > index 000000000000..e72a928d4445 > > > --- /dev/null > > > +++ b/arch/arm64/kvm/ptdump.c > > > @@ -0,0 +1,247 @@ > > > +// SPDX-License-Identifier: GPL-2.0-only > > > +/* > > > + * Debug helper used to dump the stage-2 pagetables of the system and their > > > + * associated permissions. > > > + * > > > + * Copyright (C) Google, 2024 > > > + * Author: Sebastian Ene <sebastianene@google.com> > > > + */ > > > +#include <linux/debugfs.h> > > > +#include <linux/kvm_host.h> > > > +#include <linux/seq_file.h> > > > + > > > +#include <asm/kvm_pgtable.h> > > > +#include <asm/kvm_host.h> > > > > nit: I believe you wanted to follow the alphabetical order, if that is the case, > > kvm_host.h then kvm_pgtable.h > > > > > +#include <asm/ptdump.h> > > > + > > > + > > > > nit: don't think double empty are a rule, I would remove it. > > > > > +#define MARKERS_LEN (2) > > > > nit: The brackets are not necessary for MARKERS_LEN. > > > > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1) > > > + > > > +struct kvm_ptdump_guest_state { > > > + struct kvm *kvm; > > > + struct ptdump_pg_state parser_state; > > > + struct addr_marker ipa_marker[MARKERS_LEN]; > > > + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS]; > > > + struct ptdump_range range[MARKERS_LEN]; > > > +}; > > > + > > > +static const struct ptdump_prot_bits stage2_pte_bits[] = { > > > + { > > > + .mask = PTE_VALID, > > > + .val = PTE_VALID, > > > + .set = " ", > > > + .clear = "F", > > > > This is effectively never used because an invalid PTE is 0 and note_page() won't > > print it. This probably can be removed? > > Yeah, I can't see how we are going to trigger that one given that > PTE_VALID must be set, and that we only print something if the bit is > clear. > > Seb? When (pte_prot & mask == val) we print .set (which is empty), otherwise we print .clear (This is from dump_prot). Invalid non-zero PTEs should be printed in this case. > > > > > > + }, { > > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > > > + .set = "R", > > > + .clear = " ", > > > + }, { > > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > > > + .set = "W", > > > + .clear = " ", > > > + }, { > > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, > > > + .val = PTE_VALID, > > > + .set = " ", > > > + .clear = "X", > > > + }, { > > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > > > + .set = "AF", > > > + .clear = " ", > > > + }, { > > > + .mask = PTE_TABLE_BIT | PTE_VALID, > > > + .val = PTE_VALID, > > > + .set = "BLK", > > > + .clear = " ", > > > + }, > > > +}; > > > + > > > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx, > > > + enum kvm_pgtable_walk_flags visit) > > > +{ > > > + struct ptdump_pg_state *st = ctx->arg; > > > + struct ptdump_state *pt_st = &st->ptdump; > > > + > > > + note_page(pt_st, ctx->addr, ctx->level, ctx->old); > > > + > > > + return 0; > > > +} > > > + > > > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl) > > > +{ > > > + u32 i; > > > + u64 mask; > > > + > > > + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL)) > > > + return -EINVAL; > > > + > > > + mask = 0; > > > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++) > > > + mask |= stage2_pte_bits[i].mask; > > > + > > > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) { > > > + snprintf(level[i].name, sizeof(level[i].name), "%d", i); > > > > %u, i being unsigned. > > > > > + > > > + level[i].num = ARRAY_SIZE(stage2_pte_bits); > > > + level[i].bits = stage2_pte_bits; > > > + level[i].mask = mask; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm) > > > +{ > > > + struct kvm_ptdump_guest_state *st; > > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > > + struct kvm_pgtable *pgtable = mmu->pgt; > > > + int ret; > > > + > > > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT); > > > + if (!st) > > > + return ERR_PTR(-ENOMEM); > > > + > > > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level); > > > + if (ret) { > > > + kfree(st); > > > + return ERR_PTR(ret); > > > + } > > > + > > > + st->ipa_marker[0].name = "Guest IPA"; > > > + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits); > > > + st->range[0].end = BIT(pgtable->ia_bits); > > > + > > > + st->kvm = kvm; > > > + st->parser_state = (struct ptdump_pg_state) { > > > + .marker = &st->ipa_marker[0], > > > + .level = -1, > > > + .pg_level = &st->level[0], > > > + .ptdump.range = &st->range[0], > > > + .start_address = 0, > > > + }; > > > + > > > + return st; > > > +} > > > + > > > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused) > > > +{ > > > + int ret; > > > + struct kvm_ptdump_guest_state *st = m->private; > > > + struct kvm *kvm = st->kvm; > > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > > + struct ptdump_pg_state *parser_state = &st->parser_state; > > > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) { > > > + .cb = kvm_ptdump_visitor, > > > + .arg = parser_state, > > > + .flags = KVM_PGTABLE_WALK_LEAF, > > > + }; > > > + > > > + parser_state->seq = m; > > > + > > > + write_lock(&kvm->mmu_lock); > > > + ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker); > > > + write_unlock(&kvm->mmu_lock); > > > + > > > + return ret; > > > +} > > > + > > > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) > > > +{ > > > + struct kvm *kvm = m->i_private; > > > + struct kvm_ptdump_guest_state *st; > > > + int ret; > > > + > > > + if (!kvm_get_kvm_safe(kvm)) > > > + return -ENOENT; > > > + > > > + st = kvm_ptdump_parser_create(kvm); > > > + if (IS_ERR(st)) { > > > + ret = PTR_ERR(st); > > > + goto free_with_kvm_ref; > > > + } > > > + > > > + ret = single_open(file, kvm_ptdump_guest_show, st); > > > + if (!ret) > > > + return 0; > > > + > > > + kfree(st); > > > +free_with_kvm_ref: > > > > nit: I believe kfree understands IS_ERR() so you could have a simple "err:" > > label covering all the error path. > > I couldn't find such handling in kfree(). Could you point be to it? > > > > > > + kvm_put_kvm(kvm); > > > + return ret; > > > +} > > > + > > > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file) > > > +{ > > > + struct kvm *kvm = m->i_private; > > > + void *st = ((struct seq_file *)file->private_data)->private; > > > + > > > + kfree(st); > > > + kvm_put_kvm(kvm); > > > + > > > + return single_release(m, file); > > > +} > > > + > > > +static const struct file_operations kvm_ptdump_guest_fops = { > > > + .open = kvm_ptdump_guest_open, > > > + .read = seq_read, > > > + .llseek = seq_lseek, > > > + .release = kvm_ptdump_guest_close, > > > +}; > > > + > > > +static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) > > > +{ > > > + const struct file *file = m->file; > > > + struct kvm_pgtable *pgtable = m->private; > > > + > > > + if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) > > I really dislike this sort of construct, and I'd rather we pick the > correct callback by construction rather than relying on a string > comparison. See below for a suggestion. > Thanks for the feedback and for the suggestion. Let me try to re-write this as indicated. > > > + seq_printf(m, "%2u\n", pgtable->ia_bits); > > > + else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) > > > + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); > > > > nit: KVM_PGTABLE_MAX_LEVELS - pgtable->start_level ? > > > > > + return 0; > > > +} > > > + > > > +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) > > > +{ > > > + struct kvm *kvm = m->i_private; > > > + struct kvm_pgtable *pgtable; > > > + int ret; > > > + > > > + if (!kvm_get_kvm_safe(kvm)) > > > + return -ENOENT; > > > + > > > + pgtable = kvm->arch.mmu.pgt; > > > + > > > + ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); > > > + if (ret < 0) > > > + kvm_put_kvm(kvm); > > > + return ret; > > > +} > > > + > > > +static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) > > > +{ > > > + struct kvm *kvm = m->i_private; > > > + > > > + kvm_put_kvm(kvm); > > > + return single_release(m, file); > > > +} > > > + > > > +static const struct file_operations kvm_pgtable_debugfs_fops = { > > > + .open = kvm_pgtable_debugfs_open, > > > + .read = seq_read, > > > + .llseek = seq_lseek, > > > + .release = kvm_pgtable_debugfs_close, > > > +}; > > > + > > > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) > > > +{ > > > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, > > > + kvm, &kvm_ptdump_guest_fops); > > > + debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, > > > + &kvm_pgtable_debugfs_fops); > > > + debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, > > > + kvm, &kvm_pgtable_debugfs_fops); > > > +} > > I'd expect something like this instead: > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c > index e72a928d4445..c11ea355aa51 100644 > --- a/arch/arm64/kvm/ptdump.c > +++ b/arch/arm64/kvm/ptdump.c > @@ -192,19 +191,24 @@ static const struct file_operations kvm_ptdump_guest_fops = { > .release = kvm_ptdump_guest_close, > }; > > -static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) > +static int kvm_pgtable_range_show(struct seq_file *m, void *unused) > +{ > + struct kvm_pgtable *pgtable = m->private; > + > + seq_printf(m, "%2u\n", pgtable->ia_bits); > + return 0; > +} > + > +static int kvm_pgtable_levels_show(struct seq_file *m, void *unused) > { > - const struct file *file = m->file; > struct kvm_pgtable *pgtable = m->private; > > - if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) > - seq_printf(m, "%2u\n", pgtable->ia_bits); > - else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) > - seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); > + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); > return 0; > } > > -static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) > +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file, > + int (*show)(struct seq_file *, void *)) > { > struct kvm *kvm = m->i_private; > struct kvm_pgtable *pgtable; > @@ -215,12 +219,22 @@ static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) > > pgtable = kvm->arch.mmu.pgt; > > - ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); > + ret = single_open(file, show, pgtable); > if (ret < 0) > kvm_put_kvm(kvm); > return ret; > } > > +static int kvm_pgtable_range_open(struct inode *m, struct file *file) > +{ > + return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_range_show); > +} > + > +static int kvm_pgtable_levels_open(struct inode *m, struct file *file) > +{ > + return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_levels_show); > +} > + > static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) > { > struct kvm *kvm = m->i_private; > @@ -229,8 +243,15 @@ static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) > return single_release(m, file); > } > > -static const struct file_operations kvm_pgtable_debugfs_fops = { > - .open = kvm_pgtable_debugfs_open, > +static const struct file_operations kvm_pgtable_range_fops = { > + .open = kvm_pgtable_range_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = kvm_pgtable_debugfs_close, > +}; > + > +static const struct file_operations kvm_pgtable_levels_fops = { > + .open = kvm_pgtable_levels_open, > .read = seq_read, > .llseek = seq_lseek, > .release = kvm_pgtable_debugfs_close, > @@ -241,7 +262,7 @@ void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) > debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, > kvm, &kvm_ptdump_guest_fops); > debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, > - &kvm_pgtable_debugfs_fops); > + &kvm_pgtable_range_fops); > debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, > - kvm, &kvm_pgtable_debugfs_fops); > + kvm, &kvm_pgtable_levels_fops); > } > > > Thanks, > > M. > Thanks, Seb > -- > Without deviation from the norm, progress is not possible.
On Fri, Aug 30, 2024 at 11:24:53AM +0100, Vincent Donnefort wrote: > Hi Seb, > > Thanks for the respin. > > On Tue, Aug 27, 2024 at 08:45:47AM +0000, Sebastian Ene wrote: > > While arch/*/mem/ptdump handles the kernel pagetable dumping code, > > introduce KVM/ptdump to show the guest stage-2 pagetables. The > > separation is necessary because most of the definitions from the > > stage-2 pagetable reside in the KVM path and we will be invoking > > functionality specific to KVM. > > > > When a guest is created, register a new file entry under the guest > > debugfs dir which allows userspace to show the contents of the guest > > stage-2 pagetables when accessed. > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com> > > I only have some nits, otherwise: Hello Vincent, > > Reviewed-by: Vincent Donnefort <vdonnefort@google.com> > Thanks for giving me consistent feedback on the series. I will incorporate your latest suggestions in my patch series and add the tag. > > --- > > arch/arm64/include/asm/kvm_host.h | 6 + > > arch/arm64/kvm/Makefile | 1 + > > arch/arm64/kvm/arm.c | 1 + > > arch/arm64/kvm/ptdump.c | 247 ++++++++++++++++++++++++++++++ > > 4 files changed, 255 insertions(+) > > create mode 100644 arch/arm64/kvm/ptdump.c > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index a33f5996ca9f..4acd589f086b 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -1473,4 +1473,10 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); > > (pa + pi + pa3) == 1; \ > > }) > > > > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS > > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); > > +#else > > +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} > > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ > > + > > #endif /* __ARM64_KVM_HOST_H__ */ > > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > > index 86a629aaf0a1..e4233b323a73 100644 > > --- a/arch/arm64/kvm/Makefile > > +++ b/arch/arm64/kvm/Makefile > > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ > > > > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o > > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o > > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o > > > > always-y := hyp_constants.h hyp-constants.s > > > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > > index 9bef7638342e..b9fd928d3477 100644 > > --- a/arch/arm64/kvm/arm.c > > +++ b/arch/arm64/kvm/arm.c > > @@ -228,6 +228,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) > > void kvm_arch_create_vm_debugfs(struct kvm *kvm) > > { > > kvm_sys_regs_create_debugfs(kvm); > > + kvm_s2_ptdump_create_debugfs(kvm); > > } > > > > static void kvm_destroy_mpidr_data(struct kvm *kvm) > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c > > new file mode 100644 > > index 000000000000..e72a928d4445 > > --- /dev/null > > +++ b/arch/arm64/kvm/ptdump.c > > @@ -0,0 +1,247 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Debug helper used to dump the stage-2 pagetables of the system and their > > + * associated permissions. > > + * > > + * Copyright (C) Google, 2024 > > + * Author: Sebastian Ene <sebastianene@google.com> > > + */ > > +#include <linux/debugfs.h> > > +#include <linux/kvm_host.h> > > +#include <linux/seq_file.h> > > + > > +#include <asm/kvm_pgtable.h> > > +#include <asm/kvm_host.h> > > nit: I believe you wanted to follow the alphabetical order, if that is the case, > kvm_host.h then kvm_pgtable.h > > > +#include <asm/ptdump.h> > > + > > + > > nit: don't think double empty are a rule, I would remove it. > Ack. > > +#define MARKERS_LEN (2) > > nit: The brackets are not necessary for MARKERS_LEN. > > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1) > > + > > +struct kvm_ptdump_guest_state { > > + struct kvm *kvm; > > + struct ptdump_pg_state parser_state; > > + struct addr_marker ipa_marker[MARKERS_LEN]; > > + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS]; > > + struct ptdump_range range[MARKERS_LEN]; > > +}; > > + > > +static const struct ptdump_prot_bits stage2_pte_bits[] = { > > + { > > + .mask = PTE_VALID, > > + .val = PTE_VALID, > > + .set = " ", > > + .clear = "F", > > This is effectively never used because an invalid PTE is 0 and note_page() won't > print it. This probably can be removed? > Please see my previous reply to this. I would keep it around as it should print out non-zero invalid PTEs. > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, > > + .set = "R", > > + .clear = " ", > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, > > + .set = "W", > > + .clear = " ", > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, > > + .val = PTE_VALID, > > + .set = " ", > > + .clear = "X", > > + }, { > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, > > + .set = "AF", > > + .clear = " ", > > + }, { > > + .mask = PTE_TABLE_BIT | PTE_VALID, > > + .val = PTE_VALID, > > + .set = "BLK", > > + .clear = " ", > > + }, > > +}; > > + > > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx, > > + enum kvm_pgtable_walk_flags visit) > > +{ > > + struct ptdump_pg_state *st = ctx->arg; > > + struct ptdump_state *pt_st = &st->ptdump; > > + > > + note_page(pt_st, ctx->addr, ctx->level, ctx->old); > > + > > + return 0; > > +} > > + > > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl) > > +{ > > + u32 i; > > + u64 mask; > > + > > + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL)) > > + return -EINVAL; > > + > > + mask = 0; > > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++) > > + mask |= stage2_pte_bits[i].mask; > > + > > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) { > > + snprintf(level[i].name, sizeof(level[i].name), "%d", i); > > %u, i being unsigned. Ack. > > > + > > + level[i].num = ARRAY_SIZE(stage2_pte_bits); > > + level[i].bits = stage2_pte_bits; > > + level[i].mask = mask; > > + } > > + > > + return 0; > > +} > > + > > +static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm) > > +{ > > + struct kvm_ptdump_guest_state *st; > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > + struct kvm_pgtable *pgtable = mmu->pgt; > > + int ret; > > + > > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT); > > + if (!st) > > + return ERR_PTR(-ENOMEM); > > + > > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level); > > + if (ret) { > > + kfree(st); > > + return ERR_PTR(ret); > > + } > > + > > + st->ipa_marker[0].name = "Guest IPA"; > > + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits); > > + st->range[0].end = BIT(pgtable->ia_bits); > > + > > + st->kvm = kvm; > > + st->parser_state = (struct ptdump_pg_state) { > > + .marker = &st->ipa_marker[0], > > + .level = -1, > > + .pg_level = &st->level[0], > > + .ptdump.range = &st->range[0], > > + .start_address = 0, > > + }; > > + > > + return st; > > +} > > + > > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused) > > +{ > > + int ret; > > + struct kvm_ptdump_guest_state *st = m->private; > > + struct kvm *kvm = st->kvm; > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > + struct ptdump_pg_state *parser_state = &st->parser_state; > > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) { > > + .cb = kvm_ptdump_visitor, > > + .arg = parser_state, > > + .flags = KVM_PGTABLE_WALK_LEAF, > > + }; > > + > > + parser_state->seq = m; > > + > > + write_lock(&kvm->mmu_lock); > > + ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker); > > + write_unlock(&kvm->mmu_lock); > > + > > + return ret; > > +} > > + > > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + struct kvm_ptdump_guest_state *st; > > + int ret; > > + > > + if (!kvm_get_kvm_safe(kvm)) > > + return -ENOENT; > > + > > + st = kvm_ptdump_parser_create(kvm); > > + if (IS_ERR(st)) { > > + ret = PTR_ERR(st); > > + goto free_with_kvm_ref; > > + } > > + > > + ret = single_open(file, kvm_ptdump_guest_show, st); > > + if (!ret) > > + return 0; > > + > > + kfree(st); > > +free_with_kvm_ref: > > nit: I believe kfree understands IS_ERR() so you could have a simple "err:" > label covering all the error path. > > > + kvm_put_kvm(kvm); > > + return ret; > > +} > > + > > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + void *st = ((struct seq_file *)file->private_data)->private; > > + > > + kfree(st); > > + kvm_put_kvm(kvm); > > + > > + return single_release(m, file); > > +} > > + > > +static const struct file_operations kvm_ptdump_guest_fops = { > > + .open = kvm_ptdump_guest_open, > > + .read = seq_read, > > + .llseek = seq_lseek, > > + .release = kvm_ptdump_guest_close, > > +}; > > + > > +static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) > > +{ > > + const struct file *file = m->file; > > + struct kvm_pgtable *pgtable = m->private; > > + > > + if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) > > + seq_printf(m, "%2u\n", pgtable->ia_bits); > > + else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) > > + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); > > nit: KVM_PGTABLE_MAX_LEVELS - pgtable->start_level ? Yes, we can use this one. > > > + return 0; > > +} > > + > > +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + struct kvm_pgtable *pgtable; > > + int ret; > > + > > + if (!kvm_get_kvm_safe(kvm)) > > + return -ENOENT; > > + > > + pgtable = kvm->arch.mmu.pgt; > > + > > + ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); > > + if (ret < 0) > > + kvm_put_kvm(kvm); > > + return ret; > > +} > > + > > +static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) > > +{ > > + struct kvm *kvm = m->i_private; > > + > > + kvm_put_kvm(kvm); > > + return single_release(m, file); > > +} > > + > > +static const struct file_operations kvm_pgtable_debugfs_fops = { > > + .open = kvm_pgtable_debugfs_open, > > + .read = seq_read, > > + .llseek = seq_lseek, > > + .release = kvm_pgtable_debugfs_close, > > +}; > > + > > +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) > > +{ > > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, > > + kvm, &kvm_ptdump_guest_fops); > > + debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, > > + &kvm_pgtable_debugfs_fops); > > + debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, > > + kvm, &kvm_pgtable_debugfs_fops); > > +} > > -- > > 2.46.0.295.g3b9ea8a38a-goog > >
[...] > > > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) > > > +{ > > > + struct kvm *kvm = m->i_private; > > > + struct kvm_ptdump_guest_state *st; > > > + int ret; > > > + > > > + if (!kvm_get_kvm_safe(kvm)) > > > + return -ENOENT; > > > + > > > + st = kvm_ptdump_parser_create(kvm); > > > + if (IS_ERR(st)) { > > > + ret = PTR_ERR(st); > > > + goto free_with_kvm_ref; > > > + } > > > + > > > + ret = single_open(file, kvm_ptdump_guest_show, st); > > > + if (!ret) > > > + return 0; > > > + > > > + kfree(st); > > > +free_with_kvm_ref: > > > > nit: I believe kfree understands IS_ERR() so you could have a simple "err:" > > label covering all the error path. > > I couldn't find such handling in kfree(). Could you point be to it? My aplogies, I was confused by the DEFINE_FREE(kfree ...) for __free(). kfree() only checks for null ptr. Although, I wonder if the naming "free_with_kvm_ref" isn't an artifact from previous code? Nothing is freeed here. So perhaps err_with_kvm_ref? which could be shorten as this is the only label? [...]
On Mon, Sep 02, 2024 at 12:13:12PM +0100, Vincent Donnefort wrote: > [...] > > > > > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) > > > > +{ > > > > + struct kvm *kvm = m->i_private; > > > > + struct kvm_ptdump_guest_state *st; > > > > + int ret; > > > > + > > > > + if (!kvm_get_kvm_safe(kvm)) > > > > + return -ENOENT; > > > > + > > > > + st = kvm_ptdump_parser_create(kvm); > > > > + if (IS_ERR(st)) { > > > > + ret = PTR_ERR(st); > > > > + goto free_with_kvm_ref; > > > > + } > > > > + > > > > + ret = single_open(file, kvm_ptdump_guest_show, st); > > > > + if (!ret) > > > > + return 0; > > > > + > > > > + kfree(st); > > > > +free_with_kvm_ref: > > > > > > nit: I believe kfree understands IS_ERR() so you could have a simple "err:" > > > label covering all the error path. > > > > I couldn't find such handling in kfree(). Could you point be to it? > > My aplogies, I was confused by the DEFINE_FREE(kfree ...) for __free(). kfree() > only checks for null ptr. > > Although, I wonder if the naming "free_with_kvm_ref" isn't an artifact from > previous code? Nothing is freeed here. So perhaps err_with_kvm_ref? which could > be shorten as this is the only label? > > [...] Yes, I guess that works better. Thanks for checking, Seb
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a33f5996ca9f..4acd589f086b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1473,4 +1473,10 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); (pa + pi + pa3) == 1; \ }) +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); +#else +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 86a629aaf0a1..e4233b323a73 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9bef7638342e..b9fd928d3477 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -228,6 +228,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) void kvm_arch_create_vm_debugfs(struct kvm *kvm) { kvm_sys_regs_create_debugfs(kvm); + kvm_s2_ptdump_create_debugfs(kvm); } static void kvm_destroy_mpidr_data(struct kvm *kvm) diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c new file mode 100644 index 000000000000..e72a928d4445 --- /dev/null +++ b/arch/arm64/kvm/ptdump.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Debug helper used to dump the stage-2 pagetables of the system and their + * associated permissions. + * + * Copyright (C) Google, 2024 + * Author: Sebastian Ene <sebastianene@google.com> + */ +#include <linux/debugfs.h> +#include <linux/kvm_host.h> +#include <linux/seq_file.h> + +#include <asm/kvm_pgtable.h> +#include <asm/kvm_host.h> +#include <asm/ptdump.h> + + +#define MARKERS_LEN (2) +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1) + +struct kvm_ptdump_guest_state { + struct kvm *kvm; + struct ptdump_pg_state parser_state; + struct addr_marker ipa_marker[MARKERS_LEN]; + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS]; + struct ptdump_range range[MARKERS_LEN]; +}; + +static const struct ptdump_prot_bits stage2_pte_bits[] = { + { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .set = "R", + .clear = " ", + }, { + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .set = "W", + .clear = " ", + }, { + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "X", + }, { + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_TABLE_BIT | PTE_VALID, + .val = PTE_VALID, + .set = "BLK", + .clear = " ", + }, +}; + +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) +{ + struct ptdump_pg_state *st = ctx->arg; + struct ptdump_state *pt_st = &st->ptdump; + + note_page(pt_st, ctx->addr, ctx->level, ctx->old); + + return 0; +} + +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl) +{ + u32 i; + u64 mask; + + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL)) + return -EINVAL; + + mask = 0; + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++) + mask |= stage2_pte_bits[i].mask; + + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) { + snprintf(level[i].name, sizeof(level[i].name), "%d", i); + + level[i].num = ARRAY_SIZE(stage2_pte_bits); + level[i].bits = stage2_pte_bits; + level[i].mask = mask; + } + + return 0; +} + +static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm) +{ + struct kvm_ptdump_guest_state *st; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct kvm_pgtable *pgtable = mmu->pgt; + int ret; + + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT); + if (!st) + return ERR_PTR(-ENOMEM); + + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level); + if (ret) { + kfree(st); + return ERR_PTR(ret); + } + + st->ipa_marker[0].name = "Guest IPA"; + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits); + st->range[0].end = BIT(pgtable->ia_bits); + + st->kvm = kvm; + st->parser_state = (struct ptdump_pg_state) { + .marker = &st->ipa_marker[0], + .level = -1, + .pg_level = &st->level[0], + .ptdump.range = &st->range[0], + .start_address = 0, + }; + + return st; +} + +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused) +{ + int ret; + struct kvm_ptdump_guest_state *st = m->private; + struct kvm *kvm = st->kvm; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct ptdump_pg_state *parser_state = &st->parser_state; + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) { + .cb = kvm_ptdump_visitor, + .arg = parser_state, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + parser_state->seq = m; + + write_lock(&kvm->mmu_lock); + ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker); + write_unlock(&kvm->mmu_lock); + + return ret; +} + +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + struct kvm_ptdump_guest_state *st; + int ret; + + if (!kvm_get_kvm_safe(kvm)) + return -ENOENT; + + st = kvm_ptdump_parser_create(kvm); + if (IS_ERR(st)) { + ret = PTR_ERR(st); + goto free_with_kvm_ref; + } + + ret = single_open(file, kvm_ptdump_guest_show, st); + if (!ret) + return 0; + + kfree(st); +free_with_kvm_ref: + kvm_put_kvm(kvm); + return ret; +} + +static int kvm_ptdump_guest_close(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + void *st = ((struct seq_file *)file->private_data)->private; + + kfree(st); + kvm_put_kvm(kvm); + + return single_release(m, file); +} + +static const struct file_operations kvm_ptdump_guest_fops = { + .open = kvm_ptdump_guest_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_ptdump_guest_close, +}; + +static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused) +{ + const struct file *file = m->file; + struct kvm_pgtable *pgtable = m->private; + + if (!strcmp(file_dentry(file)->d_iname, "ipa_range")) + seq_printf(m, "%2u\n", pgtable->ia_bits); + else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels")) + seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1); + return 0; +} + +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + struct kvm_pgtable *pgtable; + int ret; + + if (!kvm_get_kvm_safe(kvm)) + return -ENOENT; + + pgtable = kvm->arch.mmu.pgt; + + ret = single_open(file, kvm_pgtable_debugfs_show, pgtable); + if (ret < 0) + kvm_put_kvm(kvm); + return ret; +} + +static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + + kvm_put_kvm(kvm); + return single_release(m, file); +} + +static const struct file_operations kvm_pgtable_debugfs_fops = { + .open = kvm_pgtable_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_pgtable_debugfs_close, +}; + +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) +{ + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, + kvm, &kvm_ptdump_guest_fops); + debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, + &kvm_pgtable_debugfs_fops); + debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, + kvm, &kvm_pgtable_debugfs_fops); +}
While arch/*/mem/ptdump handles the kernel pagetable dumping code, introduce KVM/ptdump to show the guest stage-2 pagetables. The separation is necessary because most of the definitions from the stage-2 pagetable reside in the KVM path and we will be invoking functionality specific to KVM. When a guest is created, register a new file entry under the guest debugfs dir which allows userspace to show the contents of the guest stage-2 pagetables when accessed. Signed-off-by: Sebastian Ene <sebastianene@google.com> --- arch/arm64/include/asm/kvm_host.h | 6 + arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/ptdump.c | 247 ++++++++++++++++++++++++++++++ 4 files changed, 255 insertions(+) create mode 100644 arch/arm64/kvm/ptdump.c