Message ID | 20221204141137.691790-3-panqinglin2020@iscas.ac.cn (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | riscv, mm: detect svnapot cpu support at runtime | expand |
Context | Check | Description |
---|---|---|
conchuod/patch_count | success | Link |
conchuod/cover_letter | success | Series has a cover letter |
conchuod/tree_selection | success | Guessed tree name to be for-next |
conchuod/fixes_present | success | Fixes tag not required for -next series |
conchuod/verify_signedoff | success | Signed-off-by tag matches author and committer |
conchuod/kdoc | success | Errors and warnings before: 0 this patch: 0 |
conchuod/module_param | success | Was 0 now: 0 |
conchuod/alphanumeric_selects | success | Out of order selects before the patch: 59 and now 59 |
conchuod/build_rv32_defconfig | success | Build OK |
conchuod/build_warn_rv64 | success | Errors and warnings before: 0 this patch: 0 |
conchuod/dtb_warn_rv64 | success | Errors and warnings before: 0 this patch: 0 |
conchuod/header_inline | success | No static functions without inline keyword in header files |
conchuod/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 361 lines checked |
conchuod/source_inline | success | Was 0 now: 0 |
conchuod/build_rv64_nommu_k210_defconfig | success | Build OK |
conchuod/verify_fixes | success | No Fixes tag |
conchuod/build_rv64_nommu_virt_defconfig | success | Build OK |
On Sun, Dec 04, 2022 at 10:11:36PM +0800, panqinglin2020@iscas.ac.cn wrote: > From: Qinglin Pan <panqinglin2020@iscas.ac.cn> > > Svnapot can be used to support 64KB hugetlb page, so it can become a new > option when using hugetlbfs. Add a basic implementation of hugetlb page, > and support 64KB as a size in it by using Svnapot. > > For test, boot kernel with command line contains "default_hugepagesz=64K > hugepagesz=64K hugepages=20" and run a simple test like this: > > tools/testing/selftests/vm/map_hugetlb 1 16 > > And it should be passed. > > Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 1d8477c0af7c..be5c1edea70f 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -43,7 +43,7 @@ config RISCV > select ARCH_USE_QUEUED_RWLOCKS > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU > select ARCH_WANT_FRAME_POINTERS > - select ARCH_WANT_GENERAL_HUGETLB > + select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT I am expecting this to be a dumb question too, but I'm curious again about what happens in a system that enables CONFIG_RISCV_ISA_SVNAPOT but the platform it is running on does not support it... > select ARCH_WANT_HUGE_PMD_SHARE if 64BIT > select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE > select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU > diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h > index ec19d6afc896..fe6f23006641 100644 > --- a/arch/riscv/include/asm/hugetlb.h > +++ b/arch/riscv/include/asm/hugetlb.h > @@ -2,7 +2,6 @@ > #ifndef _ASM_RISCV_HUGETLB_H > #define _ASM_RISCV_HUGETLB_H > > -#include <asm-generic/hugetlb.h> > #include <asm/page.h> > > static inline void arch_clear_hugepage_flags(struct page *page) > @@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page) > } > #define arch_clear_hugepage_flags arch_clear_hugepage_flags > > +#ifdef CONFIG_RISCV_ISA_SVNAPOT > +#define __HAVE_ARCH_HUGE_PTE_CLEAR > +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, > + pte_t *ptep, unsigned long sz); > + > +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT > +void set_huge_pte_at(struct mm_struct *mm, > + unsigned long addr, pte_t *ptep, pte_t pte); > + > +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR > +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, > + unsigned long addr, pte_t *ptep); > + > +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH > +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, > + unsigned long addr, pte_t *ptep); > + > +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT > +void huge_ptep_set_wrprotect(struct mm_struct *mm, > + unsigned long addr, pte_t *ptep); > + > +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS > +int huge_ptep_set_access_flags(struct vm_area_struct *vma, > + unsigned long addr, pte_t *ptep, > + pte_t pte, int dirty); > + > +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); > +#define arch_make_huge_pte arch_make_huge_pte > + > +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ > + > +#include <asm-generic/hugetlb.h> ...is this sufficient to fall back to generic huge pages? Hopefully that's just my ignorance on show! Thanks, Conor. > + > #endif /* _ASM_RISCV_HUGETLB_H */ > diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c > index 932dadfdca54..49f92f8cd431 100644 > --- a/arch/riscv/mm/hugetlbpage.c > +++ b/arch/riscv/mm/hugetlbpage.c > @@ -2,6 +2,301 @@ > #include <linux/hugetlb.h> > #include <linux/err.h> > > +#ifdef CONFIG_RISCV_ISA_SVNAPOT > +pte_t *huge_pte_alloc(struct mm_struct *mm, > + struct vm_area_struct *vma, > + unsigned long addr, > + unsigned long sz) > +{ > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > + pte_t *pte = NULL; > + unsigned long order; > + > + pgd = pgd_offset(mm, addr); > + p4d = p4d_alloc(mm, pgd, addr); > + if (!p4d) > + return NULL; > + > + pud = pud_alloc(mm, p4d, addr); > + if (!pud) > + return NULL; > + > + if (sz == PUD_SIZE) { > + pte = (pte_t *)pud; > + goto out; > + } > + > + if (sz == PMD_SIZE) { > + if (want_pmd_share(vma, addr) && pud_none(*pud)) > + pte = huge_pmd_share(mm, vma, addr, pud); > + else > + pte = (pte_t *)pmd_alloc(mm, pud, addr); > + goto out; > + } > + > + pmd = pmd_alloc(mm, pud, addr); > + if (!pmd) > + return NULL; > + > + for_each_napot_order(order) { > + if (napot_cont_size(order) == sz) { > + pte = pte_alloc_map(mm, pmd, (addr & napot_cont_mask(order))); > + break; > + } > + } > + > +out: > + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); > + return pte; > +} > + > +pte_t *huge_pte_offset(struct mm_struct *mm, > + unsigned long addr, > + unsigned long sz) > +{ > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > + pte_t *pte = NULL; > + unsigned long order; > + > + pgd = pgd_offset(mm, addr); > + if (!pgd_present(*pgd)) > + return NULL; > + p4d = p4d_offset(pgd, addr); > + if (!p4d_present(*p4d)) > + return NULL; > + > + pud = pud_offset(p4d, addr); > + if (sz == PUD_SIZE) > + /* must be pud huge, non-present or none */ > + return (pte_t *)pud; > + if (!pud_present(*pud)) > + return NULL; > + > + pmd = pmd_offset(pud, addr); > + if (sz == PMD_SIZE) > + /* must be pmd huge, non-present or none */ > + return (pte_t *)pmd; > + if (!pmd_present(*pmd)) > + return NULL; > + > + for_each_napot_order(order) { > + if (napot_cont_size(order) == sz) { > + pte = pte_offset_kernel(pmd, (addr & napot_cont_mask(order))); > + break; > + } > + } > + return pte; > +} > + > +static pte_t get_clear_contig(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + unsigned long pte_num) > +{ > + pte_t orig_pte = ptep_get(ptep); > + unsigned long i; > + > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { > + pte_t pte = ptep_get_and_clear(mm, addr, ptep); > + > + if (pte_dirty(pte)) > + orig_pte = pte_mkdirty(orig_pte); > + > + if (pte_young(pte)) > + orig_pte = pte_mkyoung(orig_pte); > + } > + return orig_pte; > +} > + > +static pte_t get_clear_contig_flush(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + unsigned long pte_num) > +{ > + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); > + bool valid = !pte_none(orig_pte); > + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); > + > + if (valid) > + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); > + > + return orig_pte; > +} > + > +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) > +{ > + unsigned long order; > + > + for_each_napot_order(order) { > + if (shift == napot_cont_shift(order)) { > + entry = pte_mknapot(entry, order); > + break; > + } > + } > + if (order == NAPOT_ORDER_MAX) > + entry = pte_mkhuge(entry); > + > + return entry; > +} > + > +void set_huge_pte_at(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + pte_t pte) > +{ > + int i; > + int pte_num; > + > + if (!pte_napot(pte)) { > + set_pte_at(mm, addr, ptep, pte); > + return; > + } > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) > + set_pte_at(mm, addr, ptep, pte); > +} > + > +int huge_ptep_set_access_flags(struct vm_area_struct *vma, > + unsigned long addr, > + pte_t *ptep, > + pte_t pte, > + int dirty) > +{ > + pte_t orig_pte; > + int i; > + int pte_num; > + struct mm_struct *mm = vma->vm_mm; > + > + if (!pte_napot(pte)) > + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + ptep = huge_pte_offset(mm, addr, > + napot_cont_size(napot_cont_order(pte))); > + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); > + > + if (pte_dirty(orig_pte)) > + pte = pte_mkdirty(pte); > + > + if (pte_young(orig_pte)) > + pte = pte_mkyoung(pte); > + > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) > + set_pte_at(mm, addr, ptep, pte); > + > + return true; > +} > + > +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep) > +{ > + int pte_num; > + pte_t orig_pte = ptep_get(ptep); > + > + if (!pte_napot(orig_pte)) > + return ptep_get_and_clear(mm, addr, ptep); > + > + pte_num = napot_pte_num(napot_cont_order(orig_pte)); > + > + return get_clear_contig(mm, addr, ptep, pte_num); > +} > + > +void huge_ptep_set_wrprotect(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep) > +{ > + int i; > + int pte_num; > + pte_t pte = ptep_get(ptep); > + > + if (!pte_napot(pte)) { > + ptep_set_wrprotect(mm, addr, ptep); > + return; > + } > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + ptep = huge_pte_offset(mm, addr, napot_cont_size(napot_cont_order(pte))); > + > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) > + ptep_set_wrprotect(mm, addr, ptep); > +} > + > +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, > + unsigned long addr, > + pte_t *ptep) > +{ > + int pte_num; > + pte_t pte = ptep_get(ptep); > + > + if (!pte_napot(pte)) > + return ptep_clear_flush(vma, addr, ptep); > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + > + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); > +} > + > +void huge_pte_clear(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + unsigned long sz) > +{ > + int i, pte_num; > + pte_t pte = READ_ONCE(*ptep); > + > + if (!pte_napot(pte)) { > + pte_clear(mm, addr, ptep); > + return; > + } > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) > + pte_clear(mm, addr, ptep); > +} > + > +bool __init is_napot_size(unsigned long size) > +{ > + unsigned long order; > + > + if (!has_svnapot()) > + return false; > + > + for_each_napot_order(order) { > + if (size == napot_cont_size(order)) > + return true; > + } > + return false; > +} > + > +static __init int napot_hugetlbpages_init(void) > +{ > + if (has_svnapot()) { > + unsigned long order; > + > + for_each_napot_order(order) > + hugetlb_add_hstate(order); > + } > + return 0; > +} > +arch_initcall(napot_hugetlbpages_init); > + > +#else > + > +bool __init is_napot_size(unsigned long size) > +{ > + return false; > +} > + > +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ > + > int pud_huge(pud_t pud) > { > return pud_leaf(pud); > @@ -18,6 +313,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) > return true; > else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) > return true; > + else if (is_napot_size(size)) > + return true; > else > return false; > } > -- > 2.37.4 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv >
Hey! On 2022/12/8 02:55, Conor Dooley wrote: > On Sun, Dec 04, 2022 at 10:11:36PM +0800, panqinglin2020@iscas.ac.cn wrote: >> From: Qinglin Pan <panqinglin2020@iscas.ac.cn> >> >> Svnapot can be used to support 64KB hugetlb page, so it can become a new >> option when using hugetlbfs. Add a basic implementation of hugetlb page, >> and support 64KB as a size in it by using Svnapot. >> >> For test, boot kernel with command line contains "default_hugepagesz=64K >> hugepagesz=64K hugepages=20" and run a simple test like this: >> >> tools/testing/selftests/vm/map_hugetlb 1 16 >> >> And it should be passed. >> >> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn> >> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig >> index 1d8477c0af7c..be5c1edea70f 100644 >> --- a/arch/riscv/Kconfig >> +++ b/arch/riscv/Kconfig >> @@ -43,7 +43,7 @@ config RISCV >> select ARCH_USE_QUEUED_RWLOCKS >> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU >> select ARCH_WANT_FRAME_POINTERS >> - select ARCH_WANT_GENERAL_HUGETLB >> + select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT > > I am expecting this to be a dumb question too, but I'm curious again > about what happens in a system that enables CONFIG_RISCV_ISA_SVNAPOT but > the platform it is running on does not support it... > >> select ARCH_WANT_HUGE_PMD_SHARE if 64BIT >> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE >> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU >> diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h >> index ec19d6afc896..fe6f23006641 100644 >> --- a/arch/riscv/include/asm/hugetlb.h >> +++ b/arch/riscv/include/asm/hugetlb.h >> @@ -2,7 +2,6 @@ >> #ifndef _ASM_RISCV_HUGETLB_H >> #define _ASM_RISCV_HUGETLB_H >> >> -#include <asm-generic/hugetlb.h> >> #include <asm/page.h> >> >> static inline void arch_clear_hugepage_flags(struct page *page) >> @@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page) >> } >> #define arch_clear_hugepage_flags arch_clear_hugepage_flags >> >> +#ifdef CONFIG_RISCV_ISA_SVNAPOT >> +#define __HAVE_ARCH_HUGE_PTE_CLEAR >> +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, >> + pte_t *ptep, unsigned long sz); >> + >> +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT >> +void set_huge_pte_at(struct mm_struct *mm, >> + unsigned long addr, pte_t *ptep, pte_t pte); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR >> +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, >> + unsigned long addr, pte_t *ptep); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH >> +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, >> + unsigned long addr, pte_t *ptep); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT >> +void huge_ptep_set_wrprotect(struct mm_struct *mm, >> + unsigned long addr, pte_t *ptep); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS >> +int huge_ptep_set_access_flags(struct vm_area_struct *vma, >> + unsigned long addr, pte_t *ptep, >> + pte_t pte, int dirty); >> + >> +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); >> +#define arch_make_huge_pte arch_make_huge_pte >> + >> +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ >> + >> +#include <asm-generic/hugetlb.h> > > ...is this sufficient to fall back to generic huge pages? Yes. If CONFIG_RISCV_ISA_SVNAPOT is disabled, it will fall back to generic huge pages. And if CONFIG_RISCV_ISA_SVNAPOT is enabled but the platform has not svnapot support, PMD_SIZE/PUD_SIZE huge pages are still available. Thanks, Qinglin. > > Hopefully that's just my ignorance on show > > Thanks, > Conor. > >> + >> #endif /* _ASM_RISCV_HUGETLB_H */ >> diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c >> index 932dadfdca54..49f92f8cd431 100644 >> --- a/arch/riscv/mm/hugetlbpage.c >> +++ b/arch/riscv/mm/hugetlbpage.c >> @@ -2,6 +2,301 @@ >> #include <linux/hugetlb.h> >> #include <linux/err.h> >> >> +#ifdef CONFIG_RISCV_ISA_SVNAPOT >> +pte_t *huge_pte_alloc(struct mm_struct *mm, >> + struct vm_area_struct *vma, >> + unsigned long addr, >> + unsigned long sz) >> +{ >> + pgd_t *pgd; >> + p4d_t *p4d; >> + pud_t *pud; >> + pmd_t *pmd; >> + pte_t *pte = NULL; >> + unsigned long order; >> + >> + pgd = pgd_offset(mm, addr); >> + p4d = p4d_alloc(mm, pgd, addr); >> + if (!p4d) >> + return NULL; >> + >> + pud = pud_alloc(mm, p4d, addr); >> + if (!pud) >> + return NULL; >> + >> + if (sz == PUD_SIZE) { >> + pte = (pte_t *)pud; >> + goto out; >> + } >> + >> + if (sz == PMD_SIZE) { >> + if (want_pmd_share(vma, addr) && pud_none(*pud)) >> + pte = huge_pmd_share(mm, vma, addr, pud); >> + else >> + pte = (pte_t *)pmd_alloc(mm, pud, addr); >> + goto out; >> + } >> + >> + pmd = pmd_alloc(mm, pud, addr); >> + if (!pmd) >> + return NULL; >> + >> + for_each_napot_order(order) { >> + if (napot_cont_size(order) == sz) { >> + pte = pte_alloc_map(mm, pmd, (addr & napot_cont_mask(order))); >> + break; >> + } >> + } >> + >> +out: >> + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); >> + return pte; >> +} >> + >> +pte_t *huge_pte_offset(struct mm_struct *mm, >> + unsigned long addr, >> + unsigned long sz) >> +{ >> + pgd_t *pgd; >> + p4d_t *p4d; >> + pud_t *pud; >> + pmd_t *pmd; >> + pte_t *pte = NULL; >> + unsigned long order; >> + >> + pgd = pgd_offset(mm, addr); >> + if (!pgd_present(*pgd)) >> + return NULL; >> + p4d = p4d_offset(pgd, addr); >> + if (!p4d_present(*p4d)) >> + return NULL; >> + >> + pud = pud_offset(p4d, addr); >> + if (sz == PUD_SIZE) >> + /* must be pud huge, non-present or none */ >> + return (pte_t *)pud; >> + if (!pud_present(*pud)) >> + return NULL; >> + >> + pmd = pmd_offset(pud, addr); >> + if (sz == PMD_SIZE) >> + /* must be pmd huge, non-present or none */ >> + return (pte_t *)pmd; >> + if (!pmd_present(*pmd)) >> + return NULL; >> + >> + for_each_napot_order(order) { >> + if (napot_cont_size(order) == sz) { >> + pte = pte_offset_kernel(pmd, (addr & napot_cont_mask(order))); >> + break; >> + } >> + } >> + return pte; >> +} >> + >> +static pte_t get_clear_contig(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + unsigned long pte_num) >> +{ >> + pte_t orig_pte = ptep_get(ptep); >> + unsigned long i; >> + >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { >> + pte_t pte = ptep_get_and_clear(mm, addr, ptep); >> + >> + if (pte_dirty(pte)) >> + orig_pte = pte_mkdirty(orig_pte); >> + >> + if (pte_young(pte)) >> + orig_pte = pte_mkyoung(orig_pte); >> + } >> + return orig_pte; >> +} >> + >> +static pte_t get_clear_contig_flush(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + unsigned long pte_num) >> +{ >> + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); >> + bool valid = !pte_none(orig_pte); >> + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); >> + >> + if (valid) >> + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); >> + >> + return orig_pte; >> +} >> + >> +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) >> +{ >> + unsigned long order; >> + >> + for_each_napot_order(order) { >> + if (shift == napot_cont_shift(order)) { >> + entry = pte_mknapot(entry, order); >> + break; >> + } >> + } >> + if (order == NAPOT_ORDER_MAX) >> + entry = pte_mkhuge(entry); >> + >> + return entry; >> +} >> + >> +void set_huge_pte_at(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + pte_t pte) >> +{ >> + int i; >> + int pte_num; >> + >> + if (!pte_napot(pte)) { >> + set_pte_at(mm, addr, ptep, pte); >> + return; >> + } >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) >> + set_pte_at(mm, addr, ptep, pte); >> +} >> + >> +int huge_ptep_set_access_flags(struct vm_area_struct *vma, >> + unsigned long addr, >> + pte_t *ptep, >> + pte_t pte, >> + int dirty) >> +{ >> + pte_t orig_pte; >> + int i; >> + int pte_num; >> + struct mm_struct *mm = vma->vm_mm; >> + >> + if (!pte_napot(pte)) >> + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + ptep = huge_pte_offset(mm, addr, >> + napot_cont_size(napot_cont_order(pte))); >> + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); >> + >> + if (pte_dirty(orig_pte)) >> + pte = pte_mkdirty(pte); >> + >> + if (pte_young(orig_pte)) >> + pte = pte_mkyoung(pte); >> + >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) >> + set_pte_at(mm, addr, ptep, pte); >> + >> + return true; >> +} >> + >> +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep) >> +{ >> + int pte_num; >> + pte_t orig_pte = ptep_get(ptep); >> + >> + if (!pte_napot(orig_pte)) >> + return ptep_get_and_clear(mm, addr, ptep); >> + >> + pte_num = napot_pte_num(napot_cont_order(orig_pte)); >> + >> + return get_clear_contig(mm, addr, ptep, pte_num); >> +} >> + >> +void huge_ptep_set_wrprotect(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep) >> +{ >> + int i; >> + int pte_num; >> + pte_t pte = ptep_get(ptep); >> + >> + if (!pte_napot(pte)) { >> + ptep_set_wrprotect(mm, addr, ptep); >> + return; >> + } >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + ptep = huge_pte_offset(mm, addr, napot_cont_size(napot_cont_order(pte))); >> + >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) >> + ptep_set_wrprotect(mm, addr, ptep); >> +} >> + >> +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, >> + unsigned long addr, >> + pte_t *ptep) >> +{ >> + int pte_num; >> + pte_t pte = ptep_get(ptep); >> + >> + if (!pte_napot(pte)) >> + return ptep_clear_flush(vma, addr, ptep); >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + >> + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); >> +} >> + >> +void huge_pte_clear(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + unsigned long sz) >> +{ >> + int i, pte_num; >> + pte_t pte = READ_ONCE(*ptep); >> + >> + if (!pte_napot(pte)) { >> + pte_clear(mm, addr, ptep); >> + return; >> + } >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) >> + pte_clear(mm, addr, ptep); >> +} >> + >> +bool __init is_napot_size(unsigned long size) >> +{ >> + unsigned long order; >> + >> + if (!has_svnapot()) >> + return false; >> + >> + for_each_napot_order(order) { >> + if (size == napot_cont_size(order)) >> + return true; >> + } >> + return false; >> +} >> + >> +static __init int napot_hugetlbpages_init(void) >> +{ >> + if (has_svnapot()) { >> + unsigned long order; >> + >> + for_each_napot_order(order) >> + hugetlb_add_hstate(order); >> + } >> + return 0; >> +} >> +arch_initcall(napot_hugetlbpages_init); >> + >> +#else >> + >> +bool __init is_napot_size(unsigned long size) >> +{ >> + return false; >> +} >> + >> +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ >> + >> int pud_huge(pud_t pud) >> { >> return pud_leaf(pud); >> @@ -18,6 +313,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) >> return true; >> else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) >> return true; >> + else if (is_napot_size(size)) >> + return true; >> else >> return false; >> } >> -- >> 2.37.4 >> >> >> _______________________________________________ >> linux-riscv mailing list >> linux-riscv@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-riscv >>
On Sun, Dec 04, 2022 at 10:11:36PM +0800, panqinglin2020@iscas.ac.cn wrote: > From: Qinglin Pan <panqinglin2020@iscas.ac.cn> > > Svnapot can be used to support 64KB hugetlb page, so it can become a new > option when using hugetlbfs. Add a basic implementation of hugetlb page, > and support 64KB as a size in it by using Svnapot. > > For test, boot kernel with command line contains "default_hugepagesz=64K > hugepagesz=64K hugepages=20" and run a simple test like this: > > tools/testing/selftests/vm/map_hugetlb 1 16 > > And it should be passed. > > Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 1d8477c0af7c..be5c1edea70f 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -43,7 +43,7 @@ config RISCV > select ARCH_USE_QUEUED_RWLOCKS > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU > select ARCH_WANT_FRAME_POINTERS > - select ARCH_WANT_GENERAL_HUGETLB > + select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT > select ARCH_WANT_HUGE_PMD_SHARE if 64BIT > select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE > select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU > diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h > index ec19d6afc896..fe6f23006641 100644 > --- a/arch/riscv/include/asm/hugetlb.h > +++ b/arch/riscv/include/asm/hugetlb.h > @@ -2,7 +2,6 @@ > #ifndef _ASM_RISCV_HUGETLB_H > #define _ASM_RISCV_HUGETLB_H > > -#include <asm-generic/hugetlb.h> > #include <asm/page.h> > > static inline void arch_clear_hugepage_flags(struct page *page) > @@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page) > } > #define arch_clear_hugepage_flags arch_clear_hugepage_flags > > +#ifdef CONFIG_RISCV_ISA_SVNAPOT > +#define __HAVE_ARCH_HUGE_PTE_CLEAR > +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, > + pte_t *ptep, unsigned long sz); > + > +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT > +void set_huge_pte_at(struct mm_struct *mm, > + unsigned long addr, pte_t *ptep, pte_t pte); > + > +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR > +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, > + unsigned long addr, pte_t *ptep); > + > +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH > +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, > + unsigned long addr, pte_t *ptep); > + > +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT > +void huge_ptep_set_wrprotect(struct mm_struct *mm, > + unsigned long addr, pte_t *ptep); > + > +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS > +int huge_ptep_set_access_flags(struct vm_area_struct *vma, > + unsigned long addr, pte_t *ptep, > + pte_t pte, int dirty); > + > +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); > +#define arch_make_huge_pte arch_make_huge_pte > + > +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ > + > +#include <asm-generic/hugetlb.h> > + > #endif /* _ASM_RISCV_HUGETLB_H */ > diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c > index 932dadfdca54..49f92f8cd431 100644 > --- a/arch/riscv/mm/hugetlbpage.c > +++ b/arch/riscv/mm/hugetlbpage.c > @@ -2,6 +2,301 @@ > #include <linux/hugetlb.h> > #include <linux/err.h> > > +#ifdef CONFIG_RISCV_ISA_SVNAPOT > +pte_t *huge_pte_alloc(struct mm_struct *mm, > + struct vm_area_struct *vma, > + unsigned long addr, > + unsigned long sz) > +{ > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > + pte_t *pte = NULL; > + unsigned long order; Since it's Christmas time I'll make the nit that reverse fir tree is preferred[1]. [1] https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations > + > + pgd = pgd_offset(mm, addr); > + p4d = p4d_alloc(mm, pgd, addr); > + if (!p4d) > + return NULL; > + > + pud = pud_alloc(mm, p4d, addr); > + if (!pud) > + return NULL; > + > + if (sz == PUD_SIZE) { > + pte = (pte_t *)pud; > + goto out; > + } > + > + if (sz == PMD_SIZE) { > + if (want_pmd_share(vma, addr) && pud_none(*pud)) > + pte = huge_pmd_share(mm, vma, addr, pud); > + else > + pte = (pte_t *)pmd_alloc(mm, pud, addr); > + goto out; > + } > + > + pmd = pmd_alloc(mm, pud, addr); > + if (!pmd) > + return NULL; > + > + for_each_napot_order(order) { > + if (napot_cont_size(order) == sz) { > + pte = pte_alloc_map(mm, pmd, (addr & napot_cont_mask(order))); nit: No need for the () in the 3rd parameter > + break; > + } > + } > + > +out: > + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); > + return pte; > +} > + > +pte_t *huge_pte_offset(struct mm_struct *mm, > + unsigned long addr, > + unsigned long sz) > +{ > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > + pte_t *pte = NULL; > + unsigned long order; > + > + pgd = pgd_offset(mm, addr); > + if (!pgd_present(*pgd)) > + return NULL; nit: Add blank line here > + p4d = p4d_offset(pgd, addr); > + if (!p4d_present(*p4d)) > + return NULL; > + > + pud = pud_offset(p4d, addr); > + if (sz == PUD_SIZE) > + /* must be pud huge, non-present or none */ > + return (pte_t *)pud; nit: Add blank line here > + if (!pud_present(*pud)) > + return NULL; > + > + pmd = pmd_offset(pud, addr); > + if (sz == PMD_SIZE) > + /* must be pmd huge, non-present or none */ > + return (pte_t *)pmd; nit: Add blank line here > + if (!pmd_present(*pmd)) > + return NULL; > + > + for_each_napot_order(order) { > + if (napot_cont_size(order) == sz) { > + pte = pte_offset_kernel(pmd, (addr & napot_cont_mask(order))); nit: extra () > + break; > + } > + } nit: Add blank line here > + return pte; > +} > + > +static pte_t get_clear_contig(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + unsigned long pte_num) > +{ > + pte_t orig_pte = ptep_get(ptep); > + unsigned long i; > + > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { > + pte_t pte = ptep_get_and_clear(mm, addr, ptep); > + > + if (pte_dirty(pte)) > + orig_pte = pte_mkdirty(orig_pte); > + > + if (pte_young(pte)) > + orig_pte = pte_mkyoung(orig_pte); > + } nit: Add blank line here > + return orig_pte; > +} > + > +static pte_t get_clear_contig_flush(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + unsigned long pte_num) > +{ > + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); > + bool valid = !pte_none(orig_pte); > + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); > + > + if (valid) > + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); > + > + return orig_pte; > +} > + > +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) > +{ > + unsigned long order; > + > + for_each_napot_order(order) { > + if (shift == napot_cont_shift(order)) { > + entry = pte_mknapot(entry, order); > + break; > + } > + } > + if (order == NAPOT_ORDER_MAX) > + entry = pte_mkhuge(entry); > + > + return entry; > +} > + > +void set_huge_pte_at(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + pte_t pte) > +{ > + int i; > + int pte_num; > + > + if (!pte_napot(pte)) { > + set_pte_at(mm, addr, ptep, pte); > + return; > + } > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) > + set_pte_at(mm, addr, ptep, pte); > +} > + > +int huge_ptep_set_access_flags(struct vm_area_struct *vma, > + unsigned long addr, > + pte_t *ptep, > + pte_t pte, > + int dirty) > +{ > + pte_t orig_pte; > + int i; > + int pte_num; > + struct mm_struct *mm = vma->vm_mm; > + > + if (!pte_napot(pte)) > + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + ptep = huge_pte_offset(mm, addr, > + napot_cont_size(napot_cont_order(pte))); nit: order = napot_cont_order(pte); pte_num = napot_pte_num(order); ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); > + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); > + > + if (pte_dirty(orig_pte)) > + pte = pte_mkdirty(pte); > + > + if (pte_young(orig_pte)) > + pte = pte_mkyoung(pte); > + > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) > + set_pte_at(mm, addr, ptep, pte); > + > + return true; > +} > + > +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep) > +{ > + int pte_num; > + pte_t orig_pte = ptep_get(ptep); > + > + if (!pte_napot(orig_pte)) > + return ptep_get_and_clear(mm, addr, ptep); > + > + pte_num = napot_pte_num(napot_cont_order(orig_pte)); > + > + return get_clear_contig(mm, addr, ptep, pte_num); > +} > + > +void huge_ptep_set_wrprotect(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep) > +{ > + int i; > + int pte_num; > + pte_t pte = ptep_get(ptep); > + > + if (!pte_napot(pte)) { > + ptep_set_wrprotect(mm, addr, ptep); > + return; > + } > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + ptep = huge_pte_offset(mm, addr, napot_cont_size(napot_cont_order(pte))); Same use an 'order' variable nit as above > + > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) > + ptep_set_wrprotect(mm, addr, ptep); > +} > + > +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, > + unsigned long addr, > + pte_t *ptep) > +{ > + int pte_num; > + pte_t pte = ptep_get(ptep); > + > + if (!pte_napot(pte)) > + return ptep_clear_flush(vma, addr, ptep); > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + > + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); > +} > + > +void huge_pte_clear(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, > + unsigned long sz) > +{ > + int i, pte_num; > + pte_t pte = READ_ONCE(*ptep); > + > + if (!pte_napot(pte)) { > + pte_clear(mm, addr, ptep); > + return; > + } > + > + pte_num = napot_pte_num(napot_cont_order(pte)); > + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) > + pte_clear(mm, addr, ptep); > +} > + > +bool __init is_napot_size(unsigned long size) > +{ > + unsigned long order; > + > + if (!has_svnapot()) > + return false; > + > + for_each_napot_order(order) { > + if (size == napot_cont_size(order)) > + return true; > + } > + return false; > +} > + > +static __init int napot_hugetlbpages_init(void) > +{ > + if (has_svnapot()) { > + unsigned long order; > + > + for_each_napot_order(order) > + hugetlb_add_hstate(order); > + } > + return 0; > +} > +arch_initcall(napot_hugetlbpages_init); > + > +#else > + > +bool __init is_napot_size(unsigned long size) > +{ > + return false; > +} > + > +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ > + > int pud_huge(pud_t pud) > { > return pud_leaf(pud); > @@ -18,6 +313,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) > return true; > else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) > return true; > + else if (is_napot_size(size)) > + return true; > else > return false; > } > -- > 2.37.4 > Besides the nits, Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Thanks, drew
Hi Andrew, On 2022/12/8 23:17, Andrew Jones wrote: > On Sun, Dec 04, 2022 at 10:11:36PM +0800, panqinglin2020@iscas.ac.cn wrote: >> From: Qinglin Pan <panqinglin2020@iscas.ac.cn> >> >> Svnapot can be used to support 64KB hugetlb page, so it can become a new >> option when using hugetlbfs. Add a basic implementation of hugetlb page, >> and support 64KB as a size in it by using Svnapot. >> >> For test, boot kernel with command line contains "default_hugepagesz=64K >> hugepagesz=64K hugepages=20" and run a simple test like this: >> >> tools/testing/selftests/vm/map_hugetlb 1 16 >> >> And it should be passed. >> >> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn> >> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig >> index 1d8477c0af7c..be5c1edea70f 100644 >> --- a/arch/riscv/Kconfig >> +++ b/arch/riscv/Kconfig >> @@ -43,7 +43,7 @@ config RISCV >> select ARCH_USE_QUEUED_RWLOCKS >> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU >> select ARCH_WANT_FRAME_POINTERS >> - select ARCH_WANT_GENERAL_HUGETLB >> + select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT >> select ARCH_WANT_HUGE_PMD_SHARE if 64BIT >> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE >> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU >> diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h >> index ec19d6afc896..fe6f23006641 100644 >> --- a/arch/riscv/include/asm/hugetlb.h >> +++ b/arch/riscv/include/asm/hugetlb.h >> @@ -2,7 +2,6 @@ >> #ifndef _ASM_RISCV_HUGETLB_H >> #define _ASM_RISCV_HUGETLB_H >> >> -#include <asm-generic/hugetlb.h> >> #include <asm/page.h> >> >> static inline void arch_clear_hugepage_flags(struct page *page) >> @@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page) >> } >> #define arch_clear_hugepage_flags arch_clear_hugepage_flags >> >> +#ifdef CONFIG_RISCV_ISA_SVNAPOT >> +#define __HAVE_ARCH_HUGE_PTE_CLEAR >> +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, >> + pte_t *ptep, unsigned long sz); >> + >> +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT >> +void set_huge_pte_at(struct mm_struct *mm, >> + unsigned long addr, pte_t *ptep, pte_t pte); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR >> +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, >> + unsigned long addr, pte_t *ptep); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH >> +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, >> + unsigned long addr, pte_t *ptep); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT >> +void huge_ptep_set_wrprotect(struct mm_struct *mm, >> + unsigned long addr, pte_t *ptep); >> + >> +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS >> +int huge_ptep_set_access_flags(struct vm_area_struct *vma, >> + unsigned long addr, pte_t *ptep, >> + pte_t pte, int dirty); >> + >> +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); >> +#define arch_make_huge_pte arch_make_huge_pte >> + >> +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ >> + >> +#include <asm-generic/hugetlb.h> >> + >> #endif /* _ASM_RISCV_HUGETLB_H */ >> diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c >> index 932dadfdca54..49f92f8cd431 100644 >> --- a/arch/riscv/mm/hugetlbpage.c >> +++ b/arch/riscv/mm/hugetlbpage.c >> @@ -2,6 +2,301 @@ >> #include <linux/hugetlb.h> >> #include <linux/err.h> >> >> +#ifdef CONFIG_RISCV_ISA_SVNAPOT >> +pte_t *huge_pte_alloc(struct mm_struct *mm, >> + struct vm_area_struct *vma, >> + unsigned long addr, >> + unsigned long sz) >> +{ >> + pgd_t *pgd; >> + p4d_t *p4d; >> + pud_t *pud; >> + pmd_t *pmd; >> + pte_t *pte = NULL; >> + unsigned long order; > > Since it's Christmas time I'll make the nit that reverse fir tree is > preferred[1]. > > [1] https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations Got it :) > >> + >> + pgd = pgd_offset(mm, addr); >> + p4d = p4d_alloc(mm, pgd, addr); >> + if (!p4d) >> + return NULL; >> + >> + pud = pud_alloc(mm, p4d, addr); >> + if (!pud) >> + return NULL; >> + >> + if (sz == PUD_SIZE) { >> + pte = (pte_t *)pud; >> + goto out; >> + } >> + >> + if (sz == PMD_SIZE) { >> + if (want_pmd_share(vma, addr) && pud_none(*pud)) >> + pte = huge_pmd_share(mm, vma, addr, pud); >> + else >> + pte = (pte_t *)pmd_alloc(mm, pud, addr); >> + goto out; >> + } >> + >> + pmd = pmd_alloc(mm, pud, addr); >> + if (!pmd) >> + return NULL; >> + >> + for_each_napot_order(order) { >> + if (napot_cont_size(order) == sz) { >> + pte = pte_alloc_map(mm, pmd, (addr & napot_cont_mask(order))); > > nit: No need for the () in the 3rd parameter > >> + break; >> + } >> + } >> + >> +out: >> + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); >> + return pte; >> +} >> + >> +pte_t *huge_pte_offset(struct mm_struct *mm, >> + unsigned long addr, >> + unsigned long sz) >> +{ >> + pgd_t *pgd; >> + p4d_t *p4d; >> + pud_t *pud; >> + pmd_t *pmd; >> + pte_t *pte = NULL; >> + unsigned long order; >> + >> + pgd = pgd_offset(mm, addr); >> + if (!pgd_present(*pgd)) >> + return NULL; > > nit: Add blank line here > >> + p4d = p4d_offset(pgd, addr); >> + if (!p4d_present(*p4d)) >> + return NULL; >> + >> + pud = pud_offset(p4d, addr); >> + if (sz == PUD_SIZE) >> + /* must be pud huge, non-present or none */ >> + return (pte_t *)pud; > > nit: Add blank line here > >> + if (!pud_present(*pud)) >> + return NULL; >> + >> + pmd = pmd_offset(pud, addr); >> + if (sz == PMD_SIZE) >> + /* must be pmd huge, non-present or none */ >> + return (pte_t *)pmd; > > nit: Add blank line here > >> + if (!pmd_present(*pmd)) >> + return NULL; >> + >> + for_each_napot_order(order) { >> + if (napot_cont_size(order) == sz) { >> + pte = pte_offset_kernel(pmd, (addr & napot_cont_mask(order))); > > nit: extra () > >> + break; >> + } >> + } > > nit: Add blank line here > >> + return pte; >> +} >> + >> +static pte_t get_clear_contig(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + unsigned long pte_num) >> +{ >> + pte_t orig_pte = ptep_get(ptep); >> + unsigned long i; >> + >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { >> + pte_t pte = ptep_get_and_clear(mm, addr, ptep); >> + >> + if (pte_dirty(pte)) >> + orig_pte = pte_mkdirty(orig_pte); >> + >> + if (pte_young(pte)) >> + orig_pte = pte_mkyoung(orig_pte); >> + } > > nit: Add blank line here > >> + return orig_pte; >> +} >> + >> +static pte_t get_clear_contig_flush(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + unsigned long pte_num) >> +{ >> + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); >> + bool valid = !pte_none(orig_pte); >> + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); >> + >> + if (valid) >> + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); >> + >> + return orig_pte; >> +} >> + >> +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) >> +{ >> + unsigned long order; >> + >> + for_each_napot_order(order) { >> + if (shift == napot_cont_shift(order)) { >> + entry = pte_mknapot(entry, order); >> + break; >> + } >> + } >> + if (order == NAPOT_ORDER_MAX) >> + entry = pte_mkhuge(entry); >> + >> + return entry; >> +} >> + >> +void set_huge_pte_at(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + pte_t pte) >> +{ >> + int i; >> + int pte_num; >> + >> + if (!pte_napot(pte)) { >> + set_pte_at(mm, addr, ptep, pte); >> + return; >> + } >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) >> + set_pte_at(mm, addr, ptep, pte); >> +} >> + >> +int huge_ptep_set_access_flags(struct vm_area_struct *vma, >> + unsigned long addr, >> + pte_t *ptep, >> + pte_t pte, >> + int dirty) >> +{ >> + pte_t orig_pte; >> + int i; >> + int pte_num; >> + struct mm_struct *mm = vma->vm_mm; >> + >> + if (!pte_napot(pte)) >> + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + ptep = huge_pte_offset(mm, addr, >> + napot_cont_size(napot_cont_order(pte))); > > nit: > order = napot_cont_order(pte); > pte_num = napot_pte_num(order); > ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); > >> + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); >> + >> + if (pte_dirty(orig_pte)) >> + pte = pte_mkdirty(pte); >> + >> + if (pte_young(orig_pte)) >> + pte = pte_mkyoung(pte); >> + >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) >> + set_pte_at(mm, addr, ptep, pte); >> + >> + return true; >> +} >> + >> +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep) >> +{ >> + int pte_num; >> + pte_t orig_pte = ptep_get(ptep); >> + >> + if (!pte_napot(orig_pte)) >> + return ptep_get_and_clear(mm, addr, ptep); >> + >> + pte_num = napot_pte_num(napot_cont_order(orig_pte)); >> + >> + return get_clear_contig(mm, addr, ptep, pte_num); >> +} >> + >> +void huge_ptep_set_wrprotect(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep) >> +{ >> + int i; >> + int pte_num; >> + pte_t pte = ptep_get(ptep); >> + >> + if (!pte_napot(pte)) { >> + ptep_set_wrprotect(mm, addr, ptep); >> + return; >> + } >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + ptep = huge_pte_offset(mm, addr, napot_cont_size(napot_cont_order(pte))); > > Same use an 'order' variable nit as above > >> + >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) >> + ptep_set_wrprotect(mm, addr, ptep); >> +} >> + >> +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, >> + unsigned long addr, >> + pte_t *ptep) >> +{ >> + int pte_num; >> + pte_t pte = ptep_get(ptep); >> + >> + if (!pte_napot(pte)) >> + return ptep_clear_flush(vma, addr, ptep); >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + >> + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); >> +} >> + >> +void huge_pte_clear(struct mm_struct *mm, >> + unsigned long addr, >> + pte_t *ptep, >> + unsigned long sz) >> +{ >> + int i, pte_num; >> + pte_t pte = READ_ONCE(*ptep); >> + >> + if (!pte_napot(pte)) { >> + pte_clear(mm, addr, ptep); >> + return; >> + } >> + >> + pte_num = napot_pte_num(napot_cont_order(pte)); >> + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) >> + pte_clear(mm, addr, ptep); >> +} >> + >> +bool __init is_napot_size(unsigned long size) >> +{ >> + unsigned long order; >> + >> + if (!has_svnapot()) >> + return false; >> + >> + for_each_napot_order(order) { >> + if (size == napot_cont_size(order)) >> + return true; >> + } >> + return false; >> +} >> + >> +static __init int napot_hugetlbpages_init(void) >> +{ >> + if (has_svnapot()) { >> + unsigned long order; >> + >> + for_each_napot_order(order) >> + hugetlb_add_hstate(order); >> + } >> + return 0; >> +} >> +arch_initcall(napot_hugetlbpages_init); >> + >> +#else >> + >> +bool __init is_napot_size(unsigned long size) >> +{ >> + return false; >> +} >> + >> +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ >> + >> int pud_huge(pud_t pud) >> { >> return pud_leaf(pud); >> @@ -18,6 +313,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) >> return true; >> else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) >> return true; >> + else if (is_napot_size(size)) >> + return true; >> else >> return false; >> } >> -- >> 2.37.4 >> > > Besides the nits, Thanks a lot for all the nits! Will fix them in next version. > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > Thanks, > drew Thanks, Qinglin.
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1d8477c0af7c..be5c1edea70f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -43,7 +43,7 @@ config RISCV select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS - select ARCH_WANT_GENERAL_HUGETLB + select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index ec19d6afc896..fe6f23006641 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -2,7 +2,6 @@ #ifndef _ASM_RISCV_HUGETLB_H #define _ASM_RISCV_HUGETLB_H -#include <asm-generic/hugetlb.h> #include <asm/page.h> static inline void arch_clear_hugepage_flags(struct page *page) @@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page) } #define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifdef CONFIG_RISCV_ISA_SVNAPOT +#define __HAVE_ARCH_HUGE_PTE_CLEAR +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); + +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT +void set_huge_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pte); + +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); + +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); +#define arch_make_huge_pte arch_make_huge_pte + +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ + +#include <asm-generic/hugetlb.h> + #endif /* _ASM_RISCV_HUGETLB_H */ diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 932dadfdca54..49f92f8cd431 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -2,6 +2,301 @@ #include <linux/hugetlb.h> #include <linux/err.h> +#ifdef CONFIG_RISCV_ISA_SVNAPOT +pte_t *huge_pte_alloc(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + unsigned long order; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + goto out; + } + + if (sz == PMD_SIZE) { + if (want_pmd_share(vma, addr) && pud_none(*pud)) + pte = huge_pmd_share(mm, vma, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + goto out; + } + + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + for_each_napot_order(order) { + if (napot_cont_size(order) == sz) { + pte = pte_alloc_map(mm, pmd, (addr & napot_cont_mask(order))); + break; + } + } + +out: + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, + unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + unsigned long order; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, addr); + if (sz == PUD_SIZE) + /* must be pud huge, non-present or none */ + return (pte_t *)pud; + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (sz == PMD_SIZE) + /* must be pmd huge, non-present or none */ + return (pte_t *)pmd; + if (!pmd_present(*pmd)) + return NULL; + + for_each_napot_order(order) { + if (napot_cont_size(order) == sz) { + pte = pte_offset_kernel(pmd, (addr & napot_cont_mask(order))); + break; + } + } + return pte; +} + +static pte_t get_clear_contig(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pte_num) +{ + pte_t orig_pte = ptep_get(ptep); + unsigned long i; + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { + pte_t pte = ptep_get_and_clear(mm, addr, ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + return orig_pte; +} + +static pte_t get_clear_contig_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pte_num) +{ + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); + bool valid = !pte_none(orig_pte); + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + + if (valid) + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); + + return orig_pte; +} + +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) +{ + unsigned long order; + + for_each_napot_order(order) { + if (shift == napot_cont_shift(order)) { + entry = pte_mknapot(entry, order); + break; + } + } + if (order == NAPOT_ORDER_MAX) + entry = pte_mkhuge(entry); + + return entry; +} + +void set_huge_pte_at(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + pte_t pte) +{ + int i; + int pte_num; + + if (!pte_napot(pte)) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) + set_pte_at(mm, addr, ptep, pte); +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep, + pte_t pte, + int dirty) +{ + pte_t orig_pte; + int i; + int pte_num; + struct mm_struct *mm = vma->vm_mm; + + if (!pte_napot(pte)) + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + + pte_num = napot_pte_num(napot_cont_order(pte)); + ptep = huge_pte_offset(mm, addr, + napot_cont_size(napot_cont_order(pte))); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); + + if (pte_dirty(orig_pte)) + pte = pte_mkdirty(pte); + + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + set_pte_at(mm, addr, ptep, pte); + + return true; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + int pte_num; + pte_t orig_pte = ptep_get(ptep); + + if (!pte_napot(orig_pte)) + return ptep_get_and_clear(mm, addr, ptep); + + pte_num = napot_pte_num(napot_cont_order(orig_pte)); + + return get_clear_contig(mm, addr, ptep, pte_num); +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + int i; + int pte_num; + pte_t pte = ptep_get(ptep); + + if (!pte_napot(pte)) { + ptep_set_wrprotect(mm, addr, ptep); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + ptep = huge_pte_offset(mm, addr, napot_cont_size(napot_cont_order(pte))); + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + ptep_set_wrprotect(mm, addr, ptep); +} + +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep) +{ + int pte_num; + pte_t pte = ptep_get(ptep); + + if (!pte_napot(pte)) + return ptep_clear_flush(vma, addr, ptep); + + pte_num = napot_pte_num(napot_cont_order(pte)); + + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); +} + +void huge_pte_clear(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long sz) +{ + int i, pte_num; + pte_t pte = READ_ONCE(*ptep); + + if (!pte_napot(pte)) { + pte_clear(mm, addr, ptep); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + pte_clear(mm, addr, ptep); +} + +bool __init is_napot_size(unsigned long size) +{ + unsigned long order; + + if (!has_svnapot()) + return false; + + for_each_napot_order(order) { + if (size == napot_cont_size(order)) + return true; + } + return false; +} + +static __init int napot_hugetlbpages_init(void) +{ + if (has_svnapot()) { + unsigned long order; + + for_each_napot_order(order) + hugetlb_add_hstate(order); + } + return 0; +} +arch_initcall(napot_hugetlbpages_init); + +#else + +bool __init is_napot_size(unsigned long size) +{ + return false; +} + +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ + int pud_huge(pud_t pud) { return pud_leaf(pud); @@ -18,6 +313,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return true; else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) return true; + else if (is_napot_size(size)) + return true; else return false; }