Message ID | 20231019144032.2943044-13-sebastianene@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: ptdump: View the second stage page-tables | expand |
On Thu, Oct 19, 2023 at 02:40:33PM +0000, Sebastian Ene wrote: > Register a debugfs file on guest creation to be able to view their > second translation tables with ptdump. This assumes that the host is in > control of the guest stage-2 and has direct access to the pagetables. What about pKVM? The walker you wrote for the host stage-2 should be reusable in that case? > > Signed-off-by: Sebastian Ene <sebastianene@google.com> > --- > arch/arm64/include/asm/ptdump.h | 21 +++++++-- > arch/arm64/kvm/mmu.c | 3 ++ > arch/arm64/mm/ptdump.c | 84 +++++++++++++++++++++++++++++++++ > arch/arm64/mm/ptdump_debugfs.c | 5 +- > 4 files changed, 108 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h > index 35b883524462..be86244d532b 100644 > --- a/arch/arm64/include/asm/ptdump.h > +++ b/arch/arm64/include/asm/ptdump.h > @@ -5,6 +5,8 @@ > #ifndef __ASM_PTDUMP_H > #define __ASM_PTDUMP_H > > +#include <asm/kvm_pgtable.h> > + > #ifdef CONFIG_PTDUMP_CORE > > #include <linux/mm_types.h> > @@ -30,14 +32,27 @@ struct ptdump_info { > void ptdump_walk(struct seq_file *s, struct ptdump_info *info); > #ifdef CONFIG_PTDUMP_DEBUGFS > #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 > -void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); > +struct dentry *ptdump_debugfs_register(struct ptdump_info *info, > + const char *name); > #else > -static inline void ptdump_debugfs_register(struct ptdump_info *info, > - const char *name) { } > +static inline struct dentry *ptdump_debugfs_register(struct ptdump_info *info, > + const char *name) > +{ > + return NULL; > +} > #endif > void ptdump_check_wx(void); > #endif /* CONFIG_PTDUMP_CORE */ > > +#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS > +void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, void *lock); > +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt); > +#else > +static inline void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, > + void *lock) { } > +static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { } > +#endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */ I believe this should be compatible with VHE as well, that option should be renamed. > + > #ifdef CONFIG_DEBUG_WX > #define debug_checkwx() ptdump_check_wx() > #else > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 482280fe22d7..e47988dba34d 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -11,6 +11,7 @@ > #include <linux/sched/signal.h> > #include <trace/events/kvm.h> > #include <asm/pgalloc.h> > +#include <asm/ptdump.h> > #include <asm/cacheflush.h> > #include <asm/kvm_arm.h> > #include <asm/kvm_mmu.h> > @@ -908,6 +909,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t > if (err) > goto out_free_pgtable; > > + ptdump_register_guest_stage2(pgt, &kvm->mmu_lock); > mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); > if (!mmu->last_vcpu_ran) { > err = -ENOMEM; > @@ -1021,6 +1023,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > write_unlock(&kvm->mmu_lock); > > if (pgt) { > + ptdump_unregister_guest_stage2(pgt); > kvm_pgtable_stage2_destroy(pgt); > kfree(pgt); > } > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c > index 4687840dcb69..facfb15468f5 100644 > --- a/arch/arm64/mm/ptdump.c > +++ b/arch/arm64/mm/ptdump.c > @@ -26,6 +26,7 @@ > #include <asm/ptdump.h> > #include <asm/kvm_pkvm.h> > #include <asm/kvm_pgtable.h> > +#include <asm/kvm_host.h> > > > enum address_markers_idx { > @@ -543,6 +544,22 @@ void ptdump_check_wx(void) > #ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS > static struct ptdump_info stage2_kernel_ptdump_info; > > +#define GUEST_NAME_LEN (32U) > + > +struct ptdump_registered_guest { > + struct list_head reg_list; > + struct ptdump_info info; > + struct mm_struct mem; > + struct kvm_pgtable_snapshot snapshot; > + struct dentry *dentry; > + rwlock_t *lock; > + char reg_name[GUEST_NAME_LEN]; > +}; > + > +static LIST_HEAD(ptdump_guest_list); > +static DEFINE_MUTEX(ptdump_list_lock); > +static u16 guest_no; This is not robust enough: If 1 VM starts then 65535 others which are killed. guest_no overflows. The next number is 0 which is already taken. Linux has and ID allocation to solve this problem, but I don't think this is necessary anyway. This should simply reuse the struct kvm->debugfs_dentry. Also probably most of the informations contained in ptdump_registered_guest can be found in struct kvm. The debugfs should then probably simply take struct kvm for the private argument. > + > static phys_addr_t ptdump_host_pa(void *addr) > { > return __pa(addr); > @@ -740,6 +757,73 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info) > > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker); > } [...]
On Fri, Oct 20, 2023 at 09:40:06AM +0100, Vincent Donnefort wrote: > On Thu, Oct 19, 2023 at 02:40:33PM +0000, Sebastian Ene wrote: > > Register a debugfs file on guest creation to be able to view their > > second translation tables with ptdump. This assumes that the host is in > > control of the guest stage-2 and has direct access to the pagetables. > > What about pKVM? The walker you wrote for the host stage-2 should be > reusable in that case? > Yes, when pKVM will be ready upstream the walker which duplicates the pagetables for the host will be re-used for the guests. We will have to add a separate HVC for this which receives as an argument the guest vmid. > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com> > > --- > > arch/arm64/include/asm/ptdump.h | 21 +++++++-- > > arch/arm64/kvm/mmu.c | 3 ++ > > arch/arm64/mm/ptdump.c | 84 +++++++++++++++++++++++++++++++++ > > arch/arm64/mm/ptdump_debugfs.c | 5 +- > > 4 files changed, 108 insertions(+), 5 deletions(-) > > > > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h > > index 35b883524462..be86244d532b 100644 > > --- a/arch/arm64/include/asm/ptdump.h > > +++ b/arch/arm64/include/asm/ptdump.h > > @@ -5,6 +5,8 @@ > > #ifndef __ASM_PTDUMP_H > > #define __ASM_PTDUMP_H > > > > +#include <asm/kvm_pgtable.h> > > + > > #ifdef CONFIG_PTDUMP_CORE > > > > #include <linux/mm_types.h> > > @@ -30,14 +32,27 @@ struct ptdump_info { > > void ptdump_walk(struct seq_file *s, struct ptdump_info *info); > > #ifdef CONFIG_PTDUMP_DEBUGFS > > #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 > > -void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); > > +struct dentry *ptdump_debugfs_register(struct ptdump_info *info, > > + const char *name); > > #else > > -static inline void ptdump_debugfs_register(struct ptdump_info *info, > > - const char *name) { } > > +static inline struct dentry *ptdump_debugfs_register(struct ptdump_info *info, > > + const char *name) > > +{ > > + return NULL; > > +} > > #endif > > void ptdump_check_wx(void); > > #endif /* CONFIG_PTDUMP_CORE */ > > > > +#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS > > +void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, void *lock); > > +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt); > > +#else > > +static inline void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, > > + void *lock) { } > > +static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { } > > +#endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */ > > I believe this should be compatible with VHE as well, that option should be > renamed. > Good point, I will rename this. > > + > > #ifdef CONFIG_DEBUG_WX > > #define debug_checkwx() ptdump_check_wx() > > #else > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > index 482280fe22d7..e47988dba34d 100644 > > --- a/arch/arm64/kvm/mmu.c > > +++ b/arch/arm64/kvm/mmu.c > > @@ -11,6 +11,7 @@ > > #include <linux/sched/signal.h> > > #include <trace/events/kvm.h> > > #include <asm/pgalloc.h> > > +#include <asm/ptdump.h> > > #include <asm/cacheflush.h> > > #include <asm/kvm_arm.h> > > #include <asm/kvm_mmu.h> > > @@ -908,6 +909,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t > > if (err) > > goto out_free_pgtable; > > > > + ptdump_register_guest_stage2(pgt, &kvm->mmu_lock); > > mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); > > if (!mmu->last_vcpu_ran) { > > err = -ENOMEM; > > @@ -1021,6 +1023,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > > write_unlock(&kvm->mmu_lock); > > > > if (pgt) { > > + ptdump_unregister_guest_stage2(pgt); > > kvm_pgtable_stage2_destroy(pgt); > > kfree(pgt); > > } > > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c > > index 4687840dcb69..facfb15468f5 100644 > > --- a/arch/arm64/mm/ptdump.c > > +++ b/arch/arm64/mm/ptdump.c > > @@ -26,6 +26,7 @@ > > #include <asm/ptdump.h> > > #include <asm/kvm_pkvm.h> > > #include <asm/kvm_pgtable.h> > > +#include <asm/kvm_host.h> > > > > > > enum address_markers_idx { > > @@ -543,6 +544,22 @@ void ptdump_check_wx(void) > > #ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS > > static struct ptdump_info stage2_kernel_ptdump_info; > > > > +#define GUEST_NAME_LEN (32U) > > + > > +struct ptdump_registered_guest { > > + struct list_head reg_list; > > + struct ptdump_info info; > > + struct mm_struct mem; > > + struct kvm_pgtable_snapshot snapshot; > > + struct dentry *dentry; > > + rwlock_t *lock; > > + char reg_name[GUEST_NAME_LEN]; > > +}; > > + > > +static LIST_HEAD(ptdump_guest_list); > > +static DEFINE_MUTEX(ptdump_list_lock); > > +static u16 guest_no; > > This is not robust enough: If 1 VM starts then 65535 others which are killed. > guest_no overflows. The next number is 0 which is already taken. > Yes, I guess this should be improved. In the case you described we won't register any debugfs file because of the name clash. > Linux has and ID allocation to solve this problem, but I don't think this is > necessary anyway. This should simply reuse the struct kvm->debugfs_dentry. > > Also probably most of the informations contained in ptdump_registered_guest can > be found in struct kvm. The debugfs should then probably simply take struct kvm > for the private argument. > I would prefer to keep it as a separate struct here as it gives some flexibility if we need to extend it for guests pKVM support. I think we can drop the struct mm_struct from here. Thanks, Sebastian > > + > > static phys_addr_t ptdump_host_pa(void *addr) > > { > > return __pa(addr); > > @@ -740,6 +757,73 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info) > > > > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker); > > } > > [...]
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 35b883524462..be86244d532b 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,6 +5,8 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H +#include <asm/kvm_pgtable.h> + #ifdef CONFIG_PTDUMP_CORE #include <linux/mm_types.h> @@ -30,14 +32,27 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 -void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); +struct dentry *ptdump_debugfs_register(struct ptdump_info *info, + const char *name); #else -static inline void ptdump_debugfs_register(struct ptdump_info *info, - const char *name) { } +static inline struct dentry *ptdump_debugfs_register(struct ptdump_info *info, + const char *name) +{ + return NULL; +} #endif void ptdump_check_wx(void); #endif /* CONFIG_PTDUMP_CORE */ +#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS +void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, void *lock); +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt); +#else +static inline void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, + void *lock) { } +static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { } +#endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */ + #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_check_wx() #else diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 482280fe22d7..e47988dba34d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -11,6 +11,7 @@ #include <linux/sched/signal.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> +#include <asm/ptdump.h> #include <asm/cacheflush.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> @@ -908,6 +909,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t if (err) goto out_free_pgtable; + ptdump_register_guest_stage2(pgt, &kvm->mmu_lock); mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); if (!mmu->last_vcpu_ran) { err = -ENOMEM; @@ -1021,6 +1023,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { + ptdump_unregister_guest_stage2(pgt); kvm_pgtable_stage2_destroy(pgt); kfree(pgt); } diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 4687840dcb69..facfb15468f5 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -26,6 +26,7 @@ #include <asm/ptdump.h> #include <asm/kvm_pkvm.h> #include <asm/kvm_pgtable.h> +#include <asm/kvm_host.h> enum address_markers_idx { @@ -543,6 +544,22 @@ void ptdump_check_wx(void) #ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS static struct ptdump_info stage2_kernel_ptdump_info; +#define GUEST_NAME_LEN (32U) + +struct ptdump_registered_guest { + struct list_head reg_list; + struct ptdump_info info; + struct mm_struct mem; + struct kvm_pgtable_snapshot snapshot; + struct dentry *dentry; + rwlock_t *lock; + char reg_name[GUEST_NAME_LEN]; +}; + +static LIST_HEAD(ptdump_guest_list); +static DEFINE_MUTEX(ptdump_list_lock); +static u16 guest_no; + static phys_addr_t ptdump_host_pa(void *addr) { return __pa(addr); @@ -740,6 +757,73 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info) kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker); } + +static void guest_stage2_ptdump_walk(struct seq_file *s, + struct ptdump_info *info) +{ + struct kvm_pgtable_snapshot *snapshot = info->priv; + struct ptdump_registered_guest *guest; + + guest = container_of(snapshot, struct ptdump_registered_guest, + snapshot); + read_lock(guest->lock); + stage2_ptdump_walk(s, info); + read_unlock(guest->lock); +} + +void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, void *lock) +{ + struct ptdump_registered_guest *guest; + struct dentry *d; + + if (pgt == NULL || lock == NULL) + return; + + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL); + if (!guest) + return; + + memcpy(&guest->snapshot.pgtable, pgt, sizeof(struct kvm_pgtable)); + guest->info = (struct ptdump_info) { + .ptdump_walk = guest_stage2_ptdump_walk, + .priv = &guest->snapshot + }; + + mutex_init(&guest->info.file_lock); + guest->lock = lock; + mutex_lock(&ptdump_list_lock); + snprintf(guest->reg_name, GUEST_NAME_LEN, + "%u_guest_stage2_page_tables", guest_no++); + d = ptdump_debugfs_register(&guest->info, guest->reg_name); + if (!d) { + mutex_unlock(&ptdump_list_lock); + goto free_entry; + } + + guest->dentry = d; + list_add(&guest->reg_list, &ptdump_guest_list); + mutex_unlock(&ptdump_list_lock); + return; + +free_entry: + kfree(guest); +} + +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) +{ + struct ptdump_registered_guest *guest; + + mutex_lock(&ptdump_list_lock); + list_for_each_entry(guest, &ptdump_guest_list, reg_list) { + if (guest->snapshot.pgtable.pgd == pgt->pgd) { + list_del(&guest->reg_list); + debugfs_remove(guest->dentry); + kfree(guest); + break; + } + } + mutex_unlock(&ptdump_list_lock); +} #endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */ static void __init ptdump_register_host_stage2(void) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 14619452dd8d..356753e27dee 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -49,7 +49,8 @@ static const struct file_operations ptdump_fops = { .release = ptdump_release, }; -void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name) +struct dentry *ptdump_debugfs_register(struct ptdump_info *info, + const char *name) { - debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); + return debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); }
Register a debugfs file on guest creation to be able to view their second translation tables with ptdump. This assumes that the host is in control of the guest stage-2 and has direct access to the pagetables. Signed-off-by: Sebastian Ene <sebastianene@google.com> --- arch/arm64/include/asm/ptdump.h | 21 +++++++-- arch/arm64/kvm/mmu.c | 3 ++ arch/arm64/mm/ptdump.c | 84 +++++++++++++++++++++++++++++++++ arch/arm64/mm/ptdump_debugfs.c | 5 +- 4 files changed, 108 insertions(+), 5 deletions(-)