@@ -113,6 +113,7 @@ config ARM64
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_GENERAL_HUGETLB_CONTPTE
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_EXECMEM_LATE
@@ -13,6 +13,7 @@
#include <asm/cacheflush.h>
#include <asm/mte.h>
#include <asm/page.h>
+#include <linux/hugetlb_contpte.h>
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported
@@ -53,8 +54,6 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
#define __HAVE_ARCH_HUGE_PTE_CLEAR
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz);
-#define __HAVE_ARCH_HUGE_PTEP_GET
-extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void __init arm64_hugetlb_cma_reserve(void);
@@ -420,9 +420,10 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}
-static inline void __set_ptes(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void ___set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr,
+ size_t pgsize)
{
page_table_check_ptes_set(mm, ptep, pte, nr);
__sync_cache_and_tags(pte, nr);
@@ -433,10 +434,15 @@ static inline void __set_ptes(struct mm_struct *mm,
if (--nr == 0)
break;
ptep++;
- pte = pte_advance_pfn(pte, 1);
+ pte = pte_advance_pfn(pte, pgsize >> PAGE_SHIFT);
}
}
+#define __set_ptes(mm, addr, ptep, pte, nr) \
+ ___set_ptes(mm, addr, ptep, pte, nr, PAGE_SIZE)
+
+#define set_contptes ___set_ptes
+
/*
* Hugetlb definitions.
*/
@@ -1825,6 +1831,49 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
#endif /* CONFIG_ARM64_CONTPTE */
+static inline bool __hugetlb_valid_size(unsigned long size)
+{
+ switch (size) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ return pud_sect_supported();
+#endif
+ case CONT_PMD_SIZE:
+ case PMD_SIZE:
+ case CONT_PTE_SIZE:
+ return true;
+ }
+
+ return false;
+}
+
+static inline int arch_contpte_get_num_contig(pte_t *ptep,
+ unsigned long size,
+ size_t *pgsize)
+{
+ int contig_ptes = 1;
+
+ if (pgsize)
+ *pgsize = size;
+
+ switch (size) {
+ case CONT_PMD_SIZE:
+ if (pgsize)
+ *pgsize = PMD_SIZE;
+ contig_ptes = CONT_PMDS;
+ break;
+ case CONT_PTE_SIZE:
+ if (pgsize)
+ *pgsize = PAGE_SIZE;
+ contig_ptes = CONT_PTES;
+ break;
+ default:
+ WARN_ON(!__hugetlb_valid_size(size));
+ }
+
+ return contig_ptes;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
@@ -49,22 +49,6 @@ void __init arm64_hugetlb_cma_reserve(void)
}
#endif /* CONFIG_CMA */
-static bool __hugetlb_valid_size(unsigned long size)
-{
- switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
- case PUD_SIZE:
- return pud_sect_supported();
-#endif
- case CONT_PMD_SIZE:
- case PMD_SIZE:
- case CONT_PTE_SIZE:
- return true;
- }
-
- return false;
-}
-
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
bool arch_hugetlb_migration_supported(struct hstate *h)
{
@@ -98,50 +82,6 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
return CONT_PTES;
}
-static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
-{
- int contig_ptes = 1;
-
- *pgsize = size;
-
- switch (size) {
- case CONT_PMD_SIZE:
- *pgsize = PMD_SIZE;
- contig_ptes = CONT_PMDS;
- break;
- case CONT_PTE_SIZE:
- *pgsize = PAGE_SIZE;
- contig_ptes = CONT_PTES;
- break;
- default:
- WARN_ON(!__hugetlb_valid_size(size));
- }
-
- return contig_ptes;
-}
-
-pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- int ncontig, i;
- size_t pgsize;
- pte_t orig_pte = __ptep_get(ptep);
-
- if (!pte_present(orig_pte) || !pte_cont(orig_pte))
- return orig_pte;
-
- ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
- for (i = 0; i < ncontig; i++, ptep++) {
- pte_t pte = __ptep_get(ptep);
-
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
- }
- return orig_pte;
-}
-
/*
* Changing some bits of contiguous entries requires us to follow a
* Break-Before-Make approach, breaking the whole contiguous set
@@ -221,7 +161,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
unsigned long pfn, dpfn;
pgprot_t hugeprot;
- ncontig = num_contig_ptes(sz, &pgsize);
+ ncontig = arch_contpte_get_num_contig(ptep, sz, &pgsize);
if (!pte_present(pte)) {
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
@@ -382,7 +322,7 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
int i, ncontig;
size_t pgsize;
- ncontig = num_contig_ptes(sz, &pgsize);
+ ncontig = arch_contpte_get_num_contig(ptep, sz, &pgsize);
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
__pte_clear(mm, addr, ptep);
@@ -394,7 +334,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
int ncontig;
size_t pgsize;
- ncontig = num_contig_ptes(sz, &pgsize);
+ ncontig = arch_contpte_get_num_contig(ptep, sz, &pgsize);
return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
}
@@ -79,6 +79,7 @@ config RISCV
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
+ select ARCH_WANT_GENERAL_HUGETLB_CONTPTE if RISCV_ISA_SVNAPOT
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
@@ -4,6 +4,9 @@
#include <asm/cacheflush.h>
#include <asm/page.h>
+#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB_CONTPTE
+#include <linux/hugetlb_contpte.h>
+#endif
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
@@ -44,9 +47,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
-#define __HAVE_ARCH_HUGE_PTEP_GET
-pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
@@ -296,6 +296,8 @@ static inline unsigned long pte_napot(pte_t pte)
return pte_val(pte) & _PAGE_NAPOT;
}
+#define pte_cont pte_napot
+
#define pte_valid_napot(pte) (pte_present(pte) && pte_napot(pte))
/*
@@ -606,6 +608,49 @@ static inline void ___set_ptes(struct mm_struct *mm, unsigned long addr,
}
}
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+/*
+ * Some hugetlb functions can be called on !present ptes, so we must use the
+ * size parameter when it is passed.
+ */
+static inline int arch_contpte_get_num_contig(pte_t *ptep, unsigned long size,
+ size_t *pgsize)
+{
+ unsigned long hugepage_shift;
+ pte_t __pte;
+
+ if (size) {
+ if (size >= PGDIR_SIZE)
+ hugepage_shift = PGDIR_SHIFT;
+ else if (size >= P4D_SIZE)
+ hugepage_shift = P4D_SHIFT;
+ else if (size >= PUD_SIZE)
+ hugepage_shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ hugepage_shift = PMD_SHIFT;
+ else
+ hugepage_shift = PAGE_SHIFT;
+ } else {
+ /*
+ * We must read the raw value of the pte to get the size of
+ * the mapping
+ */
+ __pte = ___ptep_get(ptep);
+
+ /* Make sure __pte is not a swap entry */
+ BUG_ON(!pte_valid_napot(__pte));
+
+ hugepage_shift = PAGE_SHIFT;
+ size = napot_cont_size(napot_cont_order(__pte));
+ }
+
+ if (pgsize)
+ *pgsize = BIT(hugepage_shift);
+
+ return size >> hugepage_shift;
+}
+#endif
+
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
@@ -3,30 +3,6 @@
#include <linux/err.h>
#ifdef CONFIG_RISCV_ISA_SVNAPOT
-pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- unsigned long pte_num;
- int i;
- pte_t orig_pte = ptep_get(ptep);
-
- if (!pte_present(orig_pte) || !pte_napot(orig_pte))
- return orig_pte;
-
- pte_num = napot_pte_num(napot_cont_order(orig_pte));
-
- for (i = 0; i < pte_num; i++, ptep++) {
- pte_t pte = ptep_get(ptep);
-
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
- }
-
- return orig_pte;
-}
-
pte_t *huge_pte_alloc(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long addr,
@@ -215,26 +191,6 @@ static void clear_flush(struct mm_struct *mm,
flush_tlb_range(&vma, saddr, addr);
}
-static int num_contig_ptes_from_size(unsigned long sz, size_t *pgsize)
-{
- unsigned long hugepage_shift;
-
- if (sz >= PGDIR_SIZE)
- hugepage_shift = PGDIR_SHIFT;
- else if (sz >= P4D_SIZE)
- hugepage_shift = P4D_SHIFT;
- else if (sz >= PUD_SIZE)
- hugepage_shift = PUD_SHIFT;
- else if (sz >= PMD_SIZE)
- hugepage_shift = PMD_SHIFT;
- else
- hugepage_shift = PAGE_SHIFT;
-
- *pgsize = 1 << hugepage_shift;
-
- return sz >> hugepage_shift;
-}
-
/*
* When dealing with NAPOT mappings, the privileged specification indicates that
* "if an update needs to be made, the OS generally should first mark all of the
@@ -252,7 +208,7 @@ void set_huge_pte_at(struct mm_struct *mm,
size_t pgsize;
int i, pte_num;
- pte_num = num_contig_ptes_from_size(sz, &pgsize);
+ pte_num = arch_contpte_get_num_contig(ptep, sz, &pgsize);
if (!pte_present(pte)) {
for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
@@ -277,15 +233,13 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
int dirty)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long order;
pte_t orig_pte;
int pte_num;
if (!pte_napot(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- order = napot_cont_order(pte);
- pte_num = napot_pte_num(order);
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
if (pte_dirty(orig_pte))
@@ -303,14 +257,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, unsigned long sz)
{
- size_t pgsize;
pte_t orig_pte = ptep_get(ptep);
int pte_num;
if (!pte_napot(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
- pte_num = num_contig_ptes_from_size(sz, &pgsize);
+ pte_num = arch_contpte_get_num_contig(ptep, sz, NULL);
return get_clear_contig(mm, addr, ptep, pte_num);
}
@@ -320,7 +273,6 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
pte_t *ptep)
{
pte_t pte = ptep_get(ptep);
- unsigned long order;
pte_t orig_pte;
int pte_num;
@@ -329,8 +281,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
return;
}
- order = napot_cont_order(pte);
- pte_num = napot_pte_num(order);
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
orig_pte = pte_wrprotect(orig_pte);
@@ -348,7 +299,7 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
if (!pte_napot(pte))
return ptep_clear_flush(vma, addr, ptep);
- pte_num = napot_pte_num(napot_cont_order(pte));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
}
@@ -367,8 +318,7 @@ void huge_pte_clear(struct mm_struct *mm,
return;
}
- pte_num = num_contig_ptes_from_size(sz, &pgsize);
-
+ pte_num = arch_contpte_get_num_contig(ptep, sz, &pgsize);
for (i = 0; i < pte_num; i++, addr += pgsize, ptep++)
pte_clear(mm, addr, ptep);
}
new file mode 100644
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 Rivos Inc.
+ */
+
+#ifndef _LINUX_HUGETLB_CONTPTE_H
+#define _LINUX_HUGETLB_CONTPTE_H
+
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+#endif /* _LINUX_HUGETLB_CONTPTE_H */
@@ -853,6 +853,9 @@ config NOMMU_INITIAL_TRIM_EXCESS
config ARCH_WANT_GENERAL_HUGETLB
bool
+config ARCH_WANT_GENERAL_HUGETLB_CONTPTE
+ bool
+
config ARCH_WANTS_THP_SWAP
def_bool n
@@ -96,6 +96,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_NUMA) += memory-tiers.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
+obj-$(CONFIG_ARCH_WANT_GENERAL_HUGETLB_CONTPTE) += hugetlb_contpte.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
new file mode 100644
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2025 Rivos Inc.
+ */
+
+#include <linux/pgtable.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ int ncontig, i;
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (!pte_present(orig_pte) || !pte_cont(orig_pte))
+ return orig_pte;
+
+ ncontig = arch_contpte_get_num_contig(ptep,
+ page_size(pte_page(orig_pte)),
+ NULL);
+
+ for (i = 0; i < ncontig; i++, ptep++) {
+ pte_t pte = __ptep_get(ptep);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+ return orig_pte;
+}
After some adjustments, both architectures have the same implementation so move it to the generic code. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/hugetlb.h | 3 +- arch/arm64/include/asm/pgtable.h | 57 +++++++++++++++++++++++++-- arch/arm64/mm/hugetlbpage.c | 66 ++------------------------------ arch/riscv/Kconfig | 1 + arch/riscv/include/asm/hugetlb.h | 6 +-- arch/riscv/include/asm/pgtable.h | 45 ++++++++++++++++++++++ arch/riscv/mm/hugetlbpage.c | 62 +++--------------------------- include/linux/hugetlb_contpte.h | 12 ++++++ mm/Kconfig | 3 ++ mm/Makefile | 1 + mm/hugetlb_contpte.c | 32 ++++++++++++++++ 12 files changed, 161 insertions(+), 128 deletions(-) create mode 100644 include/linux/hugetlb_contpte.h create mode 100644 mm/hugetlb_contpte.c