Message ID | 20231115171639.2852644-12-sebastianene@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: ptdump: View the second stage page-tables | expand |
On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote: > +struct ptdump_registered_guest { > + struct list_head reg_list; > + struct ptdump_info info; > + struct kvm_pgtable_snapshot snapshot; > + rwlock_t *lock; > +}; Why can't we just store a pointer directly to struct kvm in ::private? Also, shouldn't you take a reference on struct kvm when the file is opened to protect against VM teardown? > +static LIST_HEAD(ptdump_guest_list); > +static DEFINE_MUTEX(ptdump_list_lock); What is the list for? > static phys_addr_t ptdump_host_pa(void *addr) > { > return __pa(addr); > @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info) > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker); > } > > +static void guest_stage2_ptdump_walk(struct seq_file *s, > + struct ptdump_info *info) > +{ > + struct ptdump_info_file_priv *f_priv = > + container_of(info, struct ptdump_info_file_priv, info); > + struct ptdump_registered_guest *guest = info->priv; > + > + f_priv->file_priv = &guest->snapshot; > + > + read_lock(guest->lock); > + stage2_ptdump_walk(s, info); > + read_unlock(guest->lock); Taking the mmu lock for read allows other table walkers to add new mappings and adjust the granularity of existing ones. Should this instead take the mmu lock for write? > +} > + > +int ptdump_register_guest_stage2(struct kvm *kvm) > +{ > + struct ptdump_registered_guest *guest; > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > + struct kvm_pgtable *pgt = mmu->pgt; > + > + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL); You want GFP_KERNEL_ACCOUNT here.
On Wed, Nov 22, 2023 at 11:35:57PM +0000, Oliver Upton wrote: > On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote: > > +struct ptdump_registered_guest { > > + struct list_head reg_list; > > + struct ptdump_info info; > > + struct kvm_pgtable_snapshot snapshot; > > + rwlock_t *lock; > > +}; > > Why can't we just store a pointer directly to struct kvm in ::private? I don't think it will work unless we expect a struct kvm_pgtable in stage2_ptdump_walk file_priv field. I think it is a good ideea and will simplify things a little bit dropping kvm_pgtable_snapshot from here. The current code has some fileds that are reduntant (the priv pointers) because I also made this to work with protected guests where we can't access their pagetables directly. > Also, shouldn't you take a reference on struct kvm when the file is > opened to protect against VM teardown? > I am not sure about the need could you please elaborate a bit ? On VM teardown we expect ptdump_unregister_guest_stage2 to be invoked. > > +static LIST_HEAD(ptdump_guest_list); > > +static DEFINE_MUTEX(ptdump_list_lock); > > What is the list for? > I am keeping a list of registered guests with ptdump and the lock should protect the list against concurent VM teardowns. > > static phys_addr_t ptdump_host_pa(void *addr) > > { > > return __pa(addr); > > @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info) > > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker); > > } > > > > +static void guest_stage2_ptdump_walk(struct seq_file *s, > > + struct ptdump_info *info) > > +{ > > + struct ptdump_info_file_priv *f_priv = > > + container_of(info, struct ptdump_info_file_priv, info); > > + struct ptdump_registered_guest *guest = info->priv; > > + > > + f_priv->file_priv = &guest->snapshot; > > + > > + read_lock(guest->lock); > > + stage2_ptdump_walk(s, info); > > + read_unlock(guest->lock); > > Taking the mmu lock for read allows other table walkers to add new > mappings and adjust the granularity of existing ones. Should this > instead take the mmu lock for write? > Thanks for pointing our, this is exactly what I was trying to avoid, so yes I should use the write mmu lock in this case. > > +} > > + > > +int ptdump_register_guest_stage2(struct kvm *kvm) > > +{ > > + struct ptdump_registered_guest *guest; > > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; > > + struct kvm_pgtable *pgt = mmu->pgt; > > + > > + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL); > > You want GFP_KERNEL_ACCOUNT here. > Right, thanks this is because it is an untrusted allocation triggered from userspace. > -- > Thanks, > Oliver Thank you, Seb
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index de5a5a0c0ecf..21b281715d27 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,6 +5,8 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H +#include <asm/kvm_pgtable.h> + #ifdef CONFIG_PTDUMP_CORE #include <linux/mm_types.h> @@ -23,6 +25,7 @@ struct ptdump_info { int (*ptdump_prepare_walk)(void *file_priv); void (*ptdump_end_walk)(void *file_priv); size_t mc_len; + void *priv; }; void ptdump_walk(struct seq_file *s, struct ptdump_info *info); @@ -48,8 +51,12 @@ void ptdump_check_wx(void); #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS void ptdump_register_host_stage2(void); +int ptdump_register_guest_stage2(struct kvm *kvm); +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt); #else static inline void ptdump_register_host_stage2(void) { } +static inline int ptdump_register_guest_stage2(struct kvm *kvm) { return 0; } +static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { } #endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ #ifdef CONFIG_DEBUG_WX diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 8725291cb00a..555d022f8ad9 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -13,6 +13,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> +#include <asm/ptdump.h> #include "trace.h" @@ -342,3 +343,8 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu) vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE); vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } + +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + return ptdump_register_guest_stage2(kvm); +} diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d87c8fcc4c24..da45050596e6 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -11,6 +11,7 @@ #include <linux/sched/signal.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> +#include <asm/ptdump.h> #include <asm/cacheflush.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> @@ -1021,6 +1022,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { + ptdump_unregister_guest_stage2(pgt); kvm_pgtable_stage2_destroy(pgt); kfree(pgt); } diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index ffb87237078b..741764cff105 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -27,6 +27,7 @@ #include <asm/kvm_pkvm.h> #include <asm/kvm_pgtable.h> #include <asm/kvm_host.h> +#include <asm/kvm_mmu.h> enum address_markers_idx { @@ -519,6 +520,16 @@ void ptdump_check_wx(void) #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS static struct ptdump_info stage2_kernel_ptdump_info; +struct ptdump_registered_guest { + struct list_head reg_list; + struct ptdump_info info; + struct kvm_pgtable_snapshot snapshot; + rwlock_t *lock; +}; + +static LIST_HEAD(ptdump_guest_list); +static DEFINE_MUTEX(ptdump_list_lock); + static phys_addr_t ptdump_host_pa(void *addr) { return __pa(addr); @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info) kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker); } +static void guest_stage2_ptdump_walk(struct seq_file *s, + struct ptdump_info *info) +{ + struct ptdump_info_file_priv *f_priv = + container_of(info, struct ptdump_info_file_priv, info); + struct ptdump_registered_guest *guest = info->priv; + + f_priv->file_priv = &guest->snapshot; + + read_lock(guest->lock); + stage2_ptdump_walk(s, info); + read_unlock(guest->lock); +} + +int ptdump_register_guest_stage2(struct kvm *kvm) +{ + struct ptdump_registered_guest *guest; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct kvm_pgtable *pgt = mmu->pgt; + + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL); + if (!guest) + return -ENOMEM; + + memcpy(&guest->snapshot.pgtable, pgt, sizeof(struct kvm_pgtable)); + guest->info = (struct ptdump_info) { + .ptdump_walk = guest_stage2_ptdump_walk, + }; + + guest->info.priv = guest; + guest->lock = &kvm->mmu_lock; + mutex_lock(&ptdump_list_lock); + + ptdump_debugfs_kvm_register(&guest->info, "stage2_page_tables", + kvm->debugfs_dentry); + + list_add(&guest->reg_list, &ptdump_guest_list); + mutex_unlock(&ptdump_list_lock); + + return 0; +} + +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) +{ + struct ptdump_registered_guest *guest; + + mutex_lock(&ptdump_list_lock); + list_for_each_entry(guest, &ptdump_guest_list, reg_list) { + if (guest->snapshot.pgtable.pgd == pgt->pgd) { + list_del(&guest->reg_list); + kfree(guest); + break; + } + } + mutex_unlock(&ptdump_list_lock); +} + void ptdump_register_host_stage2(void) { if (!is_protected_kvm_enabled())
Register a debugfs file on guest creation to be able to view their second translation tables with ptdump. This assumes that the host is in control of the guest stage-2 and has direct access to the pagetables. Signed-off-by: Sebastian Ene <sebastianene@google.com> --- arch/arm64/include/asm/ptdump.h | 7 ++++ arch/arm64/kvm/debug.c | 6 +++ arch/arm64/kvm/mmu.c | 2 + arch/arm64/mm/ptdump.c | 68 +++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+)