Message ID | 20240313042118.230397-1-rmclure@linux.ibm.com (mailing list archive) |
---|---|
Headers | show |
Series | Support page table check PowerPC | expand |
Hi, Le 13/03/2024 à 05:21, Rohan McLure a écrit : > Replace occurrences of p{u,m,4}d_is_leaf with p{u,m,4}_leaf, as the > latter is the name given to checking that a higher-level entry in > multi-level paging contains a page translation entry (pte) throughout > all other archs. There's already an equivalent commit in mm-stable, that will likely go into v6.9: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git/commit/?h=mm-stable&id=bd18b688220c7225fb50498dabd9f9d0c9988e67 > > Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> > Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> > --- > v9: No longer required in order to implement page table check, just a > refactor. > v10: Fix more occurances, and just delete p{u,m,4}_is_leaf() stubs as > equivalent p{u,m,4}_leaf() stubs already exist. > --- > arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++++---- > arch/powerpc/include/asm/pgtable.h | 24 -------------------- > arch/powerpc/kvm/book3s_64_mmu_radix.c | 12 +++++----- > arch/powerpc/mm/book3s64/radix_pgtable.c | 14 ++++++------ > arch/powerpc/mm/pgtable.c | 6 ++--- > arch/powerpc/mm/pgtable_64.c | 6 ++--- > arch/powerpc/xmon/xmon.c | 6 ++--- > 7 files changed, 26 insertions(+), 52 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 62c43d3d80ec..382724c5e872 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -1443,16 +1443,14 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va > /* > * Like pmd_huge() and pmd_large(), but works regardless of config options > */ > -#define pmd_is_leaf pmd_is_leaf > -#define pmd_leaf pmd_is_leaf > -static inline bool pmd_is_leaf(pmd_t pmd) > +#define pmd_leaf pmd_leaf > +static inline bool pmd_leaf(pmd_t pmd) > { > return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); > } > > -#define pud_is_leaf pud_is_leaf > -#define pud_leaf pud_is_leaf > -static inline bool pud_is_leaf(pud_t pud) > +#define pud_leaf pud_leaf > +static inline bool pud_leaf(pud_t pud) > { > return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); > } > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > index 9224f23065ff..0c0ffbe7a3b5 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -180,30 +180,6 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p) > } > #endif > > -#ifndef pmd_is_leaf > -#define pmd_is_leaf pmd_is_leaf > -static inline bool pmd_is_leaf(pmd_t pmd) > -{ > - return false; > -} > -#endif > - > -#ifndef pud_is_leaf > -#define pud_is_leaf pud_is_leaf > -static inline bool pud_is_leaf(pud_t pud) > -{ > - return false; > -} > -#endif > - > -#ifndef p4d_is_leaf > -#define p4d_is_leaf p4d_is_leaf > -static inline bool p4d_is_leaf(p4d_t p4d) > -{ > - return false; > -} > -#endif > - > #define pmd_pgtable pmd_pgtable > static inline pgtable_t pmd_pgtable(pmd_t pmd) > { > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c > index 4a1abb9f7c05..408d98f8a514 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c > @@ -503,7 +503,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full, > for (im = 0; im < PTRS_PER_PMD; ++im, ++p) { > if (!pmd_present(*p)) > continue; > - if (pmd_is_leaf(*p)) { > + if (pmd_leaf(*p)) { > if (full) { > pmd_clear(p); > } else { > @@ -532,7 +532,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, > for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) { > if (!pud_present(*p)) > continue; > - if (pud_is_leaf(*p)) { > + if (pud_leaf(*p)) { > pud_clear(p); > } else { > pmd_t *pmd; > @@ -635,12 +635,12 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > new_pud = pud_alloc_one(kvm->mm, gpa); > > pmd = NULL; > - if (pud && pud_present(*pud) && !pud_is_leaf(*pud)) > + if (pud && pud_present(*pud) && !pud_leaf(*pud)) > pmd = pmd_offset(pud, gpa); > else if (level <= 1) > new_pmd = kvmppc_pmd_alloc(); > > - if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) > + if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_leaf(*pmd))) > new_ptep = kvmppc_pte_alloc(); > > /* Check if we might have been invalidated; let the guest retry if so */ > @@ -658,7 +658,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > new_pud = NULL; > } > pud = pud_offset(p4d, gpa); > - if (pud_is_leaf(*pud)) { > + if (pud_leaf(*pud)) { > unsigned long hgpa = gpa & PUD_MASK; > > /* Check if we raced and someone else has set the same thing */ > @@ -709,7 +709,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > new_pmd = NULL; > } > pmd = pmd_offset(pud, gpa); > - if (pmd_is_leaf(*pmd)) { > + if (pmd_leaf(*pmd)) { > unsigned long lgpa = gpa & PMD_MASK; > > /* Check if we raced and someone else has set the same thing */ > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c > index e16e2fd104c5..46fa46ce6526 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -204,14 +204,14 @@ static void radix__change_memory_range(unsigned long start, unsigned long end, > pudp = pud_alloc(&init_mm, p4dp, idx); > if (!pudp) > continue; > - if (pud_is_leaf(*pudp)) { > + if (pud_leaf(*pudp)) { > ptep = (pte_t *)pudp; > goto update_the_pte; > } > pmdp = pmd_alloc(&init_mm, pudp, idx); > if (!pmdp) > continue; > - if (pmd_is_leaf(*pmdp)) { > + if (pmd_leaf(*pmdp)) { > ptep = pmdp_ptep(pmdp); > goto update_the_pte; > } > @@ -767,7 +767,7 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, > if (!pmd_present(*pmd)) > continue; > > - if (pmd_is_leaf(*pmd)) { > + if (pmd_leaf(*pmd)) { > if (IS_ALIGNED(addr, PMD_SIZE) && > IS_ALIGNED(next, PMD_SIZE)) { > if (!direct) > @@ -807,7 +807,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, > if (!pud_present(*pud)) > continue; > > - if (pud_is_leaf(*pud)) { > + if (pud_leaf(*pud)) { > if (!IS_ALIGNED(addr, PUD_SIZE) || > !IS_ALIGNED(next, PUD_SIZE)) { > WARN_ONCE(1, "%s: unaligned range\n", __func__); > @@ -845,7 +845,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct, > if (!p4d_present(*p4d)) > continue; > > - if (p4d_is_leaf(*p4d)) { > + if (p4d_leaf(*p4d)) { > if (!IS_ALIGNED(addr, P4D_SIZE) || > !IS_ALIGNED(next, P4D_SIZE)) { > WARN_ONCE(1, "%s: unaligned range\n", __func__); > @@ -1540,7 +1540,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) > > int pud_clear_huge(pud_t *pud) > { > - if (pud_is_leaf(*pud)) { > + if (pud_leaf(*pud)) { > pud_clear(pud); > return 1; > } > @@ -1587,7 +1587,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) > > int pmd_clear_huge(pmd_t *pmd) > { > - if (pmd_is_leaf(*pmd)) { > + if (pmd_leaf(*pmd)) { > pmd_clear(pmd); > return 1; > } > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > index a04ae4449a02..e8e0289d7ab0 100644 > --- a/arch/powerpc/mm/pgtable.c > +++ b/arch/powerpc/mm/pgtable.c > @@ -413,7 +413,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > if (p4d_none(p4d)) > return NULL; > > - if (p4d_is_leaf(p4d)) { > + if (p4d_leaf(p4d)) { > ret_pte = (pte_t *)p4dp; > goto out; > } > @@ -435,7 +435,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > if (pud_none(pud)) > return NULL; > > - if (pud_is_leaf(pud)) { > + if (pud_leaf(pud)) { > ret_pte = (pte_t *)pudp; > goto out; > } > @@ -474,7 +474,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > goto out; > } > > - if (pmd_is_leaf(pmd)) { > + if (pmd_leaf(pmd)) { > ret_pte = (pte_t *)pmdp; > goto out; > } > diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c > index 5ac1fd30341b..0604c80dae66 100644 > --- a/arch/powerpc/mm/pgtable_64.c > +++ b/arch/powerpc/mm/pgtable_64.c > @@ -100,7 +100,7 @@ EXPORT_SYMBOL(__pte_frag_size_shift); > /* 4 level page table */ > struct page *p4d_page(p4d_t p4d) > { > - if (p4d_is_leaf(p4d)) { > + if (p4d_leaf(p4d)) { > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) > VM_WARN_ON(!p4d_huge(p4d)); > return pte_page(p4d_pte(p4d)); > @@ -111,7 +111,7 @@ struct page *p4d_page(p4d_t p4d) > > struct page *pud_page(pud_t pud) > { > - if (pud_is_leaf(pud)) { > + if (pud_leaf(pud)) { > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) > VM_WARN_ON(!pud_huge(pud)); > return pte_page(pud_pte(pud)); > @@ -125,7 +125,7 @@ struct page *pud_page(pud_t pud) > */ > struct page *pmd_page(pmd_t pmd) > { > - if (pmd_is_leaf(pmd)) { > + if (pmd_leaf(pmd)) { > /* > * vmalloc_to_page may be called on any vmap address (not only > * vmalloc), and it uses pmd_page() etc., when huge vmap is > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > index c85fa3f0dd3b..d79d6633f333 100644 > --- a/arch/powerpc/xmon/xmon.c > +++ b/arch/powerpc/xmon/xmon.c > @@ -3340,7 +3340,7 @@ static void show_pte(unsigned long addr) > return; > } > > - if (p4d_is_leaf(*p4dp)) { > + if (p4d_leaf(*p4dp)) { > format_pte(p4dp, p4d_val(*p4dp)); > return; > } > @@ -3354,7 +3354,7 @@ static void show_pte(unsigned long addr) > return; > } > > - if (pud_is_leaf(*pudp)) { > + if (pud_leaf(*pudp)) { > format_pte(pudp, pud_val(*pudp)); > return; > } > @@ -3368,7 +3368,7 @@ static void show_pte(unsigned long addr) > return; > } > > - if (pmd_is_leaf(*pmdp)) { > + if (pmd_leaf(*pmdp)) { > format_pte(pmdp, pmd_val(*pmdp)); > return; > }
Le 13/03/2024 à 05:21, Rohan McLure a écrit : > Page table checking depends on architectures providing an > implementation of p{te,md,ud}_user_accessible_page. With > refactorisations made on powerpc/mm, the pte_access_permitted() and > similar methods verify whether a userland page is accessible with the > required permissions. > > Since page table checking is the only user of > p{te,md,ud}_user_accessible_page(), implement these for all platforms, > using some of the same preliminay checks taken by pte_access_permitted() > on that platform. > > Since Commit 8e9bd41e4ce1 ("powerpc/nohash: Replace pte_user() by pte_read()") > pte_user() is no longer required to be present on all platforms as it > may be equivalent to or implied by pte_read(). Hence implementations are > specialised. > > Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> > --- > v9: New implementation > v10: Let book3s/64 use pte_user(), but otherwise default other platforms > to using the address provided with the call to infer whether it is a > user page or not. pmd/pud variants will warn on all other platforms, as > they should not be used for user page mappings > --- > arch/powerpc/include/asm/book3s/64/pgtable.h | 19 ++++++++++++++ > arch/powerpc/include/asm/pgtable.h | 26 ++++++++++++++++++++ > 2 files changed, 45 insertions(+) > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 382724c5e872..ca765331e21d 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -538,6 +538,12 @@ static inline bool pte_access_permitted(pte_t pte, bool write) > return arch_pte_access_permitted(pte_val(pte), write, 0); > } > > +#define pte_user_accessible_page pte_user_accessible_page > +static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) > +{ > + return pte_present(pte) && pte_user(pte); > +} > + > /* > * Conversion functions: convert a page and protection to a page entry, > * and a page entry and page directory to the page they refer to. > @@ -881,6 +887,7 @@ static inline int pud_present(pud_t pud) > > extern struct page *pud_page(pud_t pud); > extern struct page *pmd_page(pmd_t pmd); > + Garbage ? > static inline pte_t pud_pte(pud_t pud) > { > return __pte_raw(pud_raw(pud)); > @@ -926,6 +933,12 @@ static inline bool pud_access_permitted(pud_t pud, bool write) > return pte_access_permitted(pud_pte(pud), write); > } > > +#define pud_user_accessible_page pud_user_accessible_page > +static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) > +{ > + return pte_user_accessible_page(pud_pte(pud), addr); > +} > + If I understand what is done on arm64, you should first check pud_leaf(). Then this function could be common to all powerpc platforms, only pte_user_accessible_page() would be platform specific. > #define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) > static inline __be64 p4d_raw(p4d_t x) > { > @@ -1091,6 +1104,12 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write) > return pte_access_permitted(pmd_pte(pmd), write); > } > > +#define pmd_user_accessible_page pmd_user_accessible_page > +static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) > +{ > + return pte_user_accessible_page(pmd_pte(pmd), addr); > +} Same, pmd_leaf() should be checked. > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); > extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot); > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > index 13f661831333..3741a63fb82e 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -227,6 +227,32 @@ static inline int pud_pfn(pud_t pud) > } > #endif > > +#ifndef pte_user_accessible_page > +#define pte_user_accessible_page pte_user_accessible_page > +static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) > +{ > + return pte_present(pte) && !is_kernel_addr(addr); > +} > +#endif I would prefer to see one version in asm/book3s/32/pgtable.h and one in asm/nohash/pgtable.h and then avoid this game with ifdefs. > + > +#ifndef pmd_user_accessible_page > +#define pmd_user_accessible_page pmd_user_accessible_page > +static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) > +{ > + WARN_ONCE(1, "pmd: platform does not use pmd entries directly"); > + return false; > +} > +#endif Also check pmd_leaf() and this function on all platforms. > + > +#ifndef pud_user_accessible_page > +#define pud_user_accessible_page pud_user_accessible_page > +static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) > +{ > + WARN_ONCE(1, "pud: platform does not use pud entries directly"); > + return false; > +} Also check pud_leaf() and this function on all platforms. > +#endif > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_PGTABLE_H */
Le 13/03/2024 à 05:21, Rohan McLure a écrit : > In the new set_ptes() API, set_pte_at() (a special case of set_ptes()) > is intended to be instrumented by the page table check facility. There > are however several other routines that constitute the API for setting > page table entries, including set_pmd_at() among others. Such routines > are themselves implemented in terms of set_ptes_at(). > > A future patch providing support for page table checking on powerpc > must take care to avoid duplicate calls to > page_table_check_p{te,md,ud}_set(). Allow for assignment of pte entries > without instrumentation through the set_pte_at_unchecked() routine > introduced in this patch. > > Cause API-facing routines that call set_pte_at() to instead call > set_pte_at_unchecked(), which will remain uninstrumented by page > table check. set_ptes() is itself implemented by calls to > __set_pte_at(), so this eliminates redundant code. > > Also prefer set_pte_at_unchecked() in early-boot usages which should not be > instrumented. > > Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> > --- > v9: New patch > v10: don't reuse __set_pte_at(), as that will not apply filters. Instead > use new set_pte_at_unchecked(). Are filters needed at all in those usecases ? > --- > arch/powerpc/include/asm/pgtable.h | 2 ++ > arch/powerpc/mm/book3s64/hash_pgtable.c | 2 +- > arch/powerpc/mm/book3s64/pgtable.c | 6 +++--- > arch/powerpc/mm/book3s64/radix_pgtable.c | 8 ++++---- > arch/powerpc/mm/nohash/book3e_pgtable.c | 2 +- > arch/powerpc/mm/pgtable.c | 7 +++++++ > arch/powerpc/mm/pgtable_32.c | 2 +- > 7 files changed, 19 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > index 3741a63fb82e..6ff1d8cfa216 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -44,6 +44,8 @@ struct mm_struct; > void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, > pte_t pte, unsigned int nr); > #define set_ptes set_ptes > +void set_pte_at_unchecked(struct mm_struct *mm, unsigned long addr, > + pte_t *ptep, pte_t pte); > #define update_mmu_cache(vma, addr, ptep) \ > update_mmu_cache_range(NULL, vma, addr, ptep, 1) > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c > index 988948d69bc1..871472f99a01 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -165,7 +165,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > ptep = pte_alloc_kernel(pmdp, ea); > if (!ptep) > return -ENOMEM; > - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); > + set_pte_at_unchecked(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); > } else { > /* > * If the mm subsystem is not fully up, we cannot create a > diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c > index 3438ab72c346..25082ab6018b 100644 > --- a/arch/powerpc/mm/book3s64/pgtable.c > +++ b/arch/powerpc/mm/book3s64/pgtable.c > @@ -116,7 +116,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, > WARN_ON(!(pmd_large(pmd))); > #endif > trace_hugepage_set_pmd(addr, pmd_val(pmd)); > - return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); > + return set_pte_at_unchecked(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); > } > > void set_pud_at(struct mm_struct *mm, unsigned long addr, > @@ -133,7 +133,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr, > WARN_ON(!(pud_large(pud))); > #endif > trace_hugepage_set_pud(addr, pud_val(pud)); > - return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud)); > + return set_pte_at_unchecked(mm, addr, pudp_ptep(pudp), pud_pte(pud)); > } > > static void do_serialize(void *arg) > @@ -539,7 +539,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, > if (radix_enabled()) > return radix__ptep_modify_prot_commit(vma, addr, > ptep, old_pte, pte); > - set_pte_at(vma->vm_mm, addr, ptep, pte); > + set_pte_at_unchecked(vma->vm_mm, addr, ptep, pte); > } > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c > index 46fa46ce6526..c661e42bb2f1 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -109,7 +109,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, > ptep = pte_offset_kernel(pmdp, ea); > > set_the_pte: > - set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); > + set_pte_at_unchecked(&init_mm, ea, ptep, pfn_pte(pfn, flags)); > asm volatile("ptesync": : :"memory"); > return 0; > } > @@ -1522,7 +1522,7 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, > (atomic_read(&mm->context.copros) > 0)) > radix__flush_tlb_page(vma, addr); > > - set_pte_at(mm, addr, ptep, pte); > + set_pte_at_unchecked(mm, addr, ptep, pte); > } > > int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) > @@ -1533,7 +1533,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) > if (!radix_enabled()) > return 0; > > - set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); > + set_pte_at_unchecked(&init_mm, 0 /* radix unused */, ptep, new_pud); > > return 1; > } > @@ -1580,7 +1580,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) > if (!radix_enabled()) > return 0; > > - set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); > + set_pte_at_unchecked(&init_mm, 0 /* radix unused */, ptep, new_pmd); > > return 1; > } > diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c > index 1c5e4ecbebeb..10d487b2b991 100644 > --- a/arch/powerpc/mm/nohash/book3e_pgtable.c > +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c > @@ -111,7 +111,7 @@ int __ref map_kernel_page(unsigned long ea, phys_addr_t pa, pgprot_t prot) > } > ptep = pte_offset_kernel(pmdp, ea); > } > - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); > + set_pte_at_unchecked(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); > > smp_wmb(); > return 0; > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > index e8e0289d7ab0..352679cf2684 100644 > --- a/arch/powerpc/mm/pgtable.c > +++ b/arch/powerpc/mm/pgtable.c > @@ -227,6 +227,13 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, > } > } > > +void set_pte_at_unchecked(struct mm_struct *mm, unsigned long addr, > + pte_t *ptep, pte_t pte) > +{ No need of the VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); which is in set_ptes() ? > + pte = set_pte_filter(pte, addr); > + __set_pte_at(mm, addr, ptep, pte, 0); > +} > + > void unmap_kernel_page(unsigned long va) > { > pmd_t *pmdp = pmd_off_k(va); > diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c > index 4be97b4a44f9..a5a26faf91ec 100644 > --- a/arch/powerpc/mm/pgtable_32.c > +++ b/arch/powerpc/mm/pgtable_32.c > @@ -89,7 +89,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) > * hash table > */ > BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot)); > - set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot)); > + set_pte_at_unchecked(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot)); > } > smp_wmb(); > return err;
On Wed, 2024-03-13 at 11:30 +0000, Christophe Leroy wrote: > > > Le 13/03/2024 à 05:21, Rohan McLure a écrit : > > In the new set_ptes() API, set_pte_at() (a special case of > > set_ptes()) > > is intended to be instrumented by the page table check facility. > > There > > are however several other routines that constitute the API for > > setting > > page table entries, including set_pmd_at() among others. Such > > routines > > are themselves implemented in terms of set_ptes_at(). > > > > A future patch providing support for page table checking on powerpc > > must take care to avoid duplicate calls to > > page_table_check_p{te,md,ud}_set(). Allow for assignment of pte > > entries > > without instrumentation through the set_pte_at_unchecked() routine > > introduced in this patch. > > > > Cause API-facing routines that call set_pte_at() to instead call > > set_pte_at_unchecked(), which will remain uninstrumented by page > > table check. set_ptes() is itself implemented by calls to > > __set_pte_at(), so this eliminates redundant code. > > > > Also prefer set_pte_at_unchecked() in early-boot usages which > > should not be > > instrumented. > > > > Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> > > --- > > v9: New patch > > v10: don't reuse __set_pte_at(), as that will not apply filters. > > Instead > > use new set_pte_at_unchecked(). > > Are filters needed at all in those usecases ? I'm just retaining the original semantics of these calls. I think another patch can replace this call with __set_pte_at() if filters are deemed unnecessary. > > > --- > > arch/powerpc/include/asm/pgtable.h | 2 ++ > > arch/powerpc/mm/book3s64/hash_pgtable.c | 2 +- > > arch/powerpc/mm/book3s64/pgtable.c | 6 +++--- > > arch/powerpc/mm/book3s64/radix_pgtable.c | 8 ++++---- > > arch/powerpc/mm/nohash/book3e_pgtable.c | 2 +- > > arch/powerpc/mm/pgtable.c | 7 +++++++ > > arch/powerpc/mm/pgtable_32.c | 2 +- > > 7 files changed, 19 insertions(+), 10 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/pgtable.h > > b/arch/powerpc/include/asm/pgtable.h > > index 3741a63fb82e..6ff1d8cfa216 100644 > > --- a/arch/powerpc/include/asm/pgtable.h > > +++ b/arch/powerpc/include/asm/pgtable.h > > @@ -44,6 +44,8 @@ struct mm_struct; > > void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t > > *ptep, > > pte_t pte, unsigned int nr); > > #define set_ptes set_ptes > > +void set_pte_at_unchecked(struct mm_struct *mm, unsigned long > > addr, > > + pte_t *ptep, pte_t pte); > > #define update_mmu_cache(vma, addr, ptep) \ > > update_mmu_cache_range(NULL, vma, addr, ptep, 1) > > > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c > > b/arch/powerpc/mm/book3s64/hash_pgtable.c > > index 988948d69bc1..871472f99a01 100644 > > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > > @@ -165,7 +165,7 @@ int hash__map_kernel_page(unsigned long ea, > > unsigned long pa, pgprot_t prot) > > ptep = pte_alloc_kernel(pmdp, ea); > > if (!ptep) > > return -ENOMEM; > > - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> > > PAGE_SHIFT, prot)); > > + set_pte_at_unchecked(&init_mm, ea, ptep, > > pfn_pte(pa >> PAGE_SHIFT, prot)); > > } else { > > /* > > * If the mm subsystem is not fully up, we cannot > > create a > > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > > b/arch/powerpc/mm/book3s64/pgtable.c > > index 3438ab72c346..25082ab6018b 100644 > > --- a/arch/powerpc/mm/book3s64/pgtable.c > > +++ b/arch/powerpc/mm/book3s64/pgtable.c > > @@ -116,7 +116,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned > > long addr, > > WARN_ON(!(pmd_large(pmd))); > > #endif > > trace_hugepage_set_pmd(addr, pmd_val(pmd)); > > - return set_pte_at(mm, addr, pmdp_ptep(pmdp), > > pmd_pte(pmd)); > > + return set_pte_at_unchecked(mm, addr, pmdp_ptep(pmdp), > > pmd_pte(pmd)); > > } > > > > void set_pud_at(struct mm_struct *mm, unsigned long addr, > > @@ -133,7 +133,7 @@ void set_pud_at(struct mm_struct *mm, unsigned > > long addr, > > WARN_ON(!(pud_large(pud))); > > #endif > > trace_hugepage_set_pud(addr, pud_val(pud)); > > - return set_pte_at(mm, addr, pudp_ptep(pudp), > > pud_pte(pud)); > > + return set_pte_at_unchecked(mm, addr, pudp_ptep(pudp), > > pud_pte(pud)); > > } > > > > static void do_serialize(void *arg) > > @@ -539,7 +539,7 @@ void ptep_modify_prot_commit(struct > > vm_area_struct *vma, unsigned long addr, > > if (radix_enabled()) > > return radix__ptep_modify_prot_commit(vma, addr, > > ptep, > > old_pte, pte); > > - set_pte_at(vma->vm_mm, addr, ptep, pte); > > + set_pte_at_unchecked(vma->vm_mm, addr, ptep, pte); > > } > > > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > > b/arch/powerpc/mm/book3s64/radix_pgtable.c > > index 46fa46ce6526..c661e42bb2f1 100644 > > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > > @@ -109,7 +109,7 @@ static int early_map_kernel_page(unsigned long > > ea, unsigned long pa, > > ptep = pte_offset_kernel(pmdp, ea); > > > > set_the_pte: > > - set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); > > + set_pte_at_unchecked(&init_mm, ea, ptep, pfn_pte(pfn, > > flags)); > > asm volatile("ptesync": : :"memory"); > > return 0; > > } > > @@ -1522,7 +1522,7 @@ void radix__ptep_modify_prot_commit(struct > > vm_area_struct *vma, > > (atomic_read(&mm->context.copros) > 0)) > > radix__flush_tlb_page(vma, addr); > > > > - set_pte_at(mm, addr, ptep, pte); > > + set_pte_at_unchecked(mm, addr, ptep, pte); > > } > > > > int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) > > @@ -1533,7 +1533,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t > > addr, pgprot_t prot) > > if (!radix_enabled()) > > return 0; > > > > - set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); > > + set_pte_at_unchecked(&init_mm, 0 /* radix unused */, ptep, > > new_pud); > > > > return 1; > > } > > @@ -1580,7 +1580,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t > > addr, pgprot_t prot) > > if (!radix_enabled()) > > return 0; > > > > - set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); > > + set_pte_at_unchecked(&init_mm, 0 /* radix unused */, ptep, > > new_pmd); > > > > return 1; > > } > > diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c > > b/arch/powerpc/mm/nohash/book3e_pgtable.c > > index 1c5e4ecbebeb..10d487b2b991 100644 > > --- a/arch/powerpc/mm/nohash/book3e_pgtable.c > > +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c > > @@ -111,7 +111,7 @@ int __ref map_kernel_page(unsigned long ea, > > phys_addr_t pa, pgprot_t prot) > > } > > ptep = pte_offset_kernel(pmdp, ea); > > } > > - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, > > prot)); > > + set_pte_at_unchecked(&init_mm, ea, ptep, pfn_pte(pa >> > > PAGE_SHIFT, prot)); > > > > smp_wmb(); > > return 0; > > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > > index e8e0289d7ab0..352679cf2684 100644 > > --- a/arch/powerpc/mm/pgtable.c > > +++ b/arch/powerpc/mm/pgtable.c > > @@ -227,6 +227,13 @@ void set_ptes(struct mm_struct *mm, unsigned > > long addr, pte_t *ptep, > > } > > } > > > > +void set_pte_at_unchecked(struct mm_struct *mm, unsigned long > > addr, > > + pte_t *ptep, pte_t pte) > > +{ > > No need of the > > VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); > > which is in set_ptes() ? Good spot, I'll include that check in this routine. > > > + pte = set_pte_filter(pte, addr); > > + __set_pte_at(mm, addr, ptep, pte, 0); > > +} > > + > > void unmap_kernel_page(unsigned long va) > > { > > pmd_t *pmdp = pmd_off_k(va); > > diff --git a/arch/powerpc/mm/pgtable_32.c > > b/arch/powerpc/mm/pgtable_32.c > > index 4be97b4a44f9..a5a26faf91ec 100644 > > --- a/arch/powerpc/mm/pgtable_32.c > > +++ b/arch/powerpc/mm/pgtable_32.c > > @@ -89,7 +89,7 @@ int __ref map_kernel_page(unsigned long va, > > phys_addr_t pa, pgprot_t prot) > > * hash table > > */ > > BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && > > pgprot_val(prot)); > > - set_pte_at(&init_mm, va, pg, pfn_pte(pa >> > > PAGE_SHIFT, prot)); > > + set_pte_at_unchecked(&init_mm, va, pg, pfn_pte(pa > > >> PAGE_SHIFT, prot)); > > } > > smp_wmb(); > > return err;