@@ -29,9 +29,8 @@ void copy_user_highpage(struct page *to, struct page *from,
void copy_highpage(struct page *to, struct page *from);
#define __HAVE_ARCH_COPY_HIGHPAGE
-struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
- unsigned long vaddr);
-#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false);
void tag_clear_highpage(struct page *to);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
@@ -1065,6 +1065,9 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#endif /* CONFIG_ARM64_MTE */
+#define __HAVE_ARCH_CALC_VMA_GFP
+gfp_t arch_calc_vma_gfp(struct vm_area_struct *vma, gfp_t gfp);
+
/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
*/
@@ -935,22 +935,15 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
NOKPROBE_SYMBOL(do_debug_exception);
/*
- * Used during anonymous page fault handling.
+ * If this is called during anonymous page fault handling, and the page is
+ * mapped with PROT_MTE, initialise the tags at the point of tag zeroing as this
+ * is usually faster than separate DC ZVA and STGM.
*/
-struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
- unsigned long vaddr)
+gfp_t arch_calc_vma_gfp(struct vm_area_struct *vma, gfp_t gfp)
{
- gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO;
-
- /*
- * If the page is mapped with PROT_MTE, initialise the tags at the
- * point of allocation and page zeroing as this is usually faster than
- * separate DC ZVA and STGM.
- */
if (vma->vm_flags & VM_MTE)
- flags |= __GFP_TAGGED;
-
- return vma_alloc_folio(flags, 0, vma, vaddr, false);
+ return __GFP_TAGGED;
+ return 0;
}
void tag_clear_highpage(struct page *page)
@@ -873,6 +873,13 @@ static inline void arch_do_swap_page(struct mm_struct *mm,
}
#endif
+#ifndef __HAVE_ARCH_CALC_VMA_GFP
+static inline gfp_t arch_calc_vma_gfp(struct vm_area_struct *vma, gfp_t gfp)
+{
+ return 0;
+}
+#endif
+
#ifndef __HAVE_ARCH_PREP_NEW_PAGE
static inline int arch_prep_new_page(struct page *page, int order, gfp_t gfp)
{
@@ -2170,6 +2170,7 @@ struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
pgoff_t ilx;
struct page *page;
+ gfp |= arch_calc_vma_gfp(vma, gfp);
pol = get_vma_policy(vma, addr, order, &ilx);
page = alloc_pages_mpol(gfp | __GFP_COMP, order,
pol, ilx, numa_node_id());
@@ -1570,7 +1570,7 @@ static struct folio *shmem_swapin_cluster(swp_entry_t swap, gfp_t gfp,
*/
static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
{
- gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
+ gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM | __GFP_TAGGED;
gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
@@ -2023,6 +2023,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
gfp_t huge_gfp;
huge_gfp = vma_thp_gfp_mask(vma);
+ huge_gfp |= arch_calc_vma_gfp(vma, huge_gfp);
huge_gfp = limit_gfp_mask(huge_gfp, gfp);
folio = shmem_alloc_and_add_folio(huge_gfp,
inode, index, fault_mm, true);
@@ -2199,6 +2200,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
vm_fault_t ret = 0;
int err;
+ gfp |= arch_calc_vma_gfp(vmf->vma, gfp);
+
/*
* Trinity finds that probing a hole which tmpfs is punching can
* prevent the hole-punch from ever completing: noted in i_private.
An architecture might want to fixup the gfp flags based on the type of VMA where the page will be mapped. On arm64, this is currently used if the VMA is MTE enabled. When __GFP_TAGGED is set, for performance reasons, tag zeroing is performed at the same time as the data is zeroed, instead of being performed separately, in set_pte_at() -> mte_sync_tags(). Its usage will be expanded when the storage for the tags will have to be explicitely managed by the kernel. Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> --- arch/arm64/include/asm/page.h | 5 ++--- arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/fault.c | 19 ++++++------------- include/linux/pgtable.h | 7 +++++++ mm/mempolicy.c | 1 + mm/shmem.c | 5 ++++- 6 files changed, 23 insertions(+), 17 deletions(-)