Message ID | 20220704112526.2492342-4-chenhuacai@loongson.cn (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | mm/sparse-vmemmap: Generalise helpers and enable for LoongArch | expand |
On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > From: Feiyang Chen <chenfeiyang@loongson.cn> > > Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can > share its implementation. > > Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> > --- > arch/arm64/mm/mmu.c | 53 ++++++----------------- > arch/loongarch/mm/init.c | 63 ++++++++------------------- > arch/x86/mm/init_64.c | 92 ++++++++++++++-------------------------- > include/linux/mm.h | 6 +++ > mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++ > 5 files changed, 124 insertions(+), 144 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 626ec32873c6..b080a65c719d 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > return vmemmap_populate_basepages(start, end, node, altmap); > } > #else /* !ARM64_KERNEL_USES_PMD_MAPS */ > +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node, > + unsigned long addr, unsigned long next) > +{ > + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); > +} > + > +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, > + unsigned long next) > +{ > + vmemmap_verify((pte_t *)pmdp, node, addr, next); > + return 1; > +} > + > int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > struct vmem_altmap *altmap) > { > - unsigned long addr = start; > - unsigned long next; > - pgd_t *pgdp; > - p4d_t *p4dp; > - pud_t *pudp; > - pmd_t *pmdp; > - > WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); > - do { > - next = pmd_addr_end(addr, end); > - > - pgdp = vmemmap_pgd_populate(addr, node); > - if (!pgdp) > - return -ENOMEM; > - > - p4dp = vmemmap_p4d_populate(pgdp, addr, node); > - if (!p4dp) > - return -ENOMEM; > - > - pudp = vmemmap_pud_populate(p4dp, addr, node); > - if (!pudp) > - return -ENOMEM; > - > - pmdp = pmd_offset(pudp, addr); > - if (pmd_none(READ_ONCE(*pmdp))) { > - void *p = NULL; > - > - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > - if (!p) { > - if (vmemmap_populate_basepages(addr, next, node, altmap)) > - return -ENOMEM; > - continue; > - } > - > - pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); > - } else > - vmemmap_verify((pte_t *)pmdp, node, addr, next); > - } while (addr = next, addr != end); > - > - return 0; > + return vmemmap_populate_hugepages(start, end, node, altmap); > } > #endif /* !ARM64_KERNEL_USES_PMD_MAPS */ I think the arm64 change is mostly ok (thanks!), but I have a question about the core code you're introducing: > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > index 33e2a1ceee72..6f2e40bb695d 100644 > --- a/mm/sparse-vmemmap.c > +++ b/mm/sparse-vmemmap.c > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, > return vmemmap_populate_range(start, end, node, altmap, NULL); > } > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, > + unsigned long addr, unsigned long next) > +{ > +} > + > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, > + unsigned long next) > +{ > + return 0; > +} > + > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > + int node, struct vmem_altmap *altmap) > +{ > + unsigned long addr; > + unsigned long next; > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > + > + for (addr = start; addr < end; addr = next) { > + next = pmd_addr_end(addr, end); > + > + pgd = vmemmap_pgd_populate(addr, node); > + if (!pgd) > + return -ENOMEM; > + > + p4d = vmemmap_p4d_populate(pgd, addr, node); > + if (!p4d) > + return -ENOMEM; > + > + pud = vmemmap_pud_populate(p4d, addr, node); > + if (!pud) > + return -ENOMEM; > + > + pmd = pmd_offset(pud, addr); > + if (pmd_none(READ_ONCE(*pmd))) { > + void *p; > + > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > + if (p) { > + vmemmap_set_pmd(pmd, p, node, addr, next); > + continue; > + } else if (altmap) > + return -ENOMEM; /* no fallback */ Why do you return -ENOMEM if 'altmap' here? That seems to be different to what we currently have on arm64 and it's not clear to me why we're happy with an altmap for the pmd case, but not for the pte case. Will
Hi, Will, On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > From: Feiyang Chen <chenfeiyang@loongson.cn> > > > > Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can > > share its implementation. > > > > Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn> > > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> > > --- > > arch/arm64/mm/mmu.c | 53 ++++++----------------- > > arch/loongarch/mm/init.c | 63 ++++++++------------------- > > arch/x86/mm/init_64.c | 92 ++++++++++++++-------------------------- > > include/linux/mm.h | 6 +++ > > mm/sparse-vmemmap.c | 54 +++++++++++++++++++++++ > > 5 files changed, 124 insertions(+), 144 deletions(-) > > > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > > index 626ec32873c6..b080a65c719d 100644 > > --- a/arch/arm64/mm/mmu.c > > +++ b/arch/arm64/mm/mmu.c > > @@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > > return vmemmap_populate_basepages(start, end, node, altmap); > > } > > #else /* !ARM64_KERNEL_USES_PMD_MAPS */ > > +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node, > > + unsigned long addr, unsigned long next) > > +{ > > + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); > > +} > > + > > +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, > > + unsigned long next) > > +{ > > + vmemmap_verify((pte_t *)pmdp, node, addr, next); > > + return 1; > > +} > > + > > int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > > struct vmem_altmap *altmap) > > { > > - unsigned long addr = start; > > - unsigned long next; > > - pgd_t *pgdp; > > - p4d_t *p4dp; > > - pud_t *pudp; > > - pmd_t *pmdp; > > - > > WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); > > - do { > > - next = pmd_addr_end(addr, end); > > - > > - pgdp = vmemmap_pgd_populate(addr, node); > > - if (!pgdp) > > - return -ENOMEM; > > - > > - p4dp = vmemmap_p4d_populate(pgdp, addr, node); > > - if (!p4dp) > > - return -ENOMEM; > > - > > - pudp = vmemmap_pud_populate(p4dp, addr, node); > > - if (!pudp) > > - return -ENOMEM; > > - > > - pmdp = pmd_offset(pudp, addr); > > - if (pmd_none(READ_ONCE(*pmdp))) { > > - void *p = NULL; > > - > > - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > - if (!p) { > > - if (vmemmap_populate_basepages(addr, next, node, altmap)) > > - return -ENOMEM; > > - continue; > > - } > > - > > - pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); > > - } else > > - vmemmap_verify((pte_t *)pmdp, node, addr, next); > > - } while (addr = next, addr != end); > > - > > - return 0; > > + return vmemmap_populate_hugepages(start, end, node, altmap); > > } > > #endif /* !ARM64_KERNEL_USES_PMD_MAPS */ > > > I think the arm64 change is mostly ok (thanks!), but I have a question about > the core code you're introducing: > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > > index 33e2a1ceee72..6f2e40bb695d 100644 > > --- a/mm/sparse-vmemmap.c > > +++ b/mm/sparse-vmemmap.c > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, > > return vmemmap_populate_range(start, end, node, altmap, NULL); > > } > > > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, > > + unsigned long addr, unsigned long next) > > +{ > > +} > > + > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, > > + unsigned long next) > > +{ > > + return 0; > > +} > > + > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > + int node, struct vmem_altmap *altmap) > > +{ > > + unsigned long addr; > > + unsigned long next; > > + pgd_t *pgd; > > + p4d_t *p4d; > > + pud_t *pud; > > + pmd_t *pmd; > > + > > + for (addr = start; addr < end; addr = next) { > > + next = pmd_addr_end(addr, end); > > + > > + pgd = vmemmap_pgd_populate(addr, node); > > + if (!pgd) > > + return -ENOMEM; > > + > > + p4d = vmemmap_p4d_populate(pgd, addr, node); > > + if (!p4d) > > + return -ENOMEM; > > + > > + pud = vmemmap_pud_populate(p4d, addr, node); > > + if (!pud) > > + return -ENOMEM; > > + > > + pmd = pmd_offset(pud, addr); > > + if (pmd_none(READ_ONCE(*pmd))) { > > + void *p; > > + > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > + if (p) { > > + vmemmap_set_pmd(pmd, p, node, addr, next); > > + continue; > > + } else if (altmap) > > + return -ENOMEM; /* no fallback */ > > Why do you return -ENOMEM if 'altmap' here? That seems to be different to > what we currently have on arm64 and it's not clear to me why we're happy > with an altmap for the pmd case, but not for the pte case. The generic version is the same as X86. It seems that ARM64 always fallback whether there is an altmap, but X86 only fallback in the no altmap case. I don't know the reason of X86, can Dan Williams give some explaination? Huacai > > Will
On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > > > index 33e2a1ceee72..6f2e40bb695d 100644 > > > --- a/mm/sparse-vmemmap.c > > > +++ b/mm/sparse-vmemmap.c > > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, > > > return vmemmap_populate_range(start, end, node, altmap, NULL); > > > } > > > > > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, > > > + unsigned long addr, unsigned long next) > > > +{ > > > +} > > > + > > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, > > > + unsigned long next) > > > +{ > > > + return 0; > > > +} > > > + > > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > > + int node, struct vmem_altmap *altmap) > > > +{ > > > + unsigned long addr; > > > + unsigned long next; > > > + pgd_t *pgd; > > > + p4d_t *p4d; > > > + pud_t *pud; > > > + pmd_t *pmd; > > > + > > > + for (addr = start; addr < end; addr = next) { > > > + next = pmd_addr_end(addr, end); > > > + > > > + pgd = vmemmap_pgd_populate(addr, node); > > > + if (!pgd) > > > + return -ENOMEM; > > > + > > > + p4d = vmemmap_p4d_populate(pgd, addr, node); > > > + if (!p4d) > > > + return -ENOMEM; > > > + > > > + pud = vmemmap_pud_populate(p4d, addr, node); > > > + if (!pud) > > > + return -ENOMEM; > > > + > > > + pmd = pmd_offset(pud, addr); > > > + if (pmd_none(READ_ONCE(*pmd))) { > > > + void *p; > > > + > > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > + if (p) { > > > + vmemmap_set_pmd(pmd, p, node, addr, next); > > > + continue; > > > + } else if (altmap) > > > + return -ENOMEM; /* no fallback */ > > > > Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > what we currently have on arm64 and it's not clear to me why we're happy > > with an altmap for the pmd case, but not for the pte case. > The generic version is the same as X86. It seems that ARM64 always > fallback whether there is an altmap, but X86 only fallback in the no > altmap case. I don't know the reason of X86, can Dan Williams give > some explaination? Right, I think we need to understand the new behaviour here before we adopt it on arm64. Will
+Dan Williams +Sudarshan Rajagopalan On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > > On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > > On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > > > > index 33e2a1ceee72..6f2e40bb695d 100644 > > > > --- a/mm/sparse-vmemmap.c > > > > +++ b/mm/sparse-vmemmap.c > > > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, > > > > return vmemmap_populate_range(start, end, node, altmap, NULL); > > > > } > > > > > > > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, > > > > + unsigned long addr, unsigned long next) > > > > +{ > > > > +} > > > > + > > > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, > > > > + unsigned long next) > > > > +{ > > > > + return 0; > > > > +} > > > > + > > > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > > > + int node, struct vmem_altmap *altmap) > > > > +{ > > > > + unsigned long addr; > > > > + unsigned long next; > > > > + pgd_t *pgd; > > > > + p4d_t *p4d; > > > > + pud_t *pud; > > > > + pmd_t *pmd; > > > > + > > > > + for (addr = start; addr < end; addr = next) { > > > > + next = pmd_addr_end(addr, end); > > > > + > > > > + pgd = vmemmap_pgd_populate(addr, node); > > > > + if (!pgd) > > > > + return -ENOMEM; > > > > + > > > > + p4d = vmemmap_p4d_populate(pgd, addr, node); > > > > + if (!p4d) > > > > + return -ENOMEM; > > > > + > > > > + pud = vmemmap_pud_populate(p4d, addr, node); > > > > + if (!pud) > > > > + return -ENOMEM; > > > > + > > > > + pmd = pmd_offset(pud, addr); > > > > + if (pmd_none(READ_ONCE(*pmd))) { > > > > + void *p; > > > > + > > > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > > + if (p) { > > > > + vmemmap_set_pmd(pmd, p, node, addr, next); > > > > + continue; > > > > + } else if (altmap) > > > > + return -ENOMEM; /* no fallback */ > > > > > > Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > > what we currently have on arm64 and it's not clear to me why we're happy > > > with an altmap for the pmd case, but not for the pte case. > > The generic version is the same as X86. It seems that ARM64 always > > fallback whether there is an altmap, but X86 only fallback in the no > > altmap case. I don't know the reason of X86, can Dan Williams give > > some explaination? > > Right, I think we need to understand the new behaviour here before we adopt > it on arm64. Hi, Dan, Could you please tell us the reason? Thanks. And Sudarshan, You are the author of adding a fallback mechanism to ARM64, do you know why ARM64 is different from X86 (only fallback in no altmap case)? Huacai > > Will
Oh, Sudarshan Rajagopalan's Email has changed, Let's update. Huacai On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: > > +Dan Williams > +Sudarshan Rajagopalan > > On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > > > > On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > > > On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > > > On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > > > > > index 33e2a1ceee72..6f2e40bb695d 100644 > > > > > --- a/mm/sparse-vmemmap.c > > > > > +++ b/mm/sparse-vmemmap.c > > > > > @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, > > > > > return vmemmap_populate_range(start, end, node, altmap, NULL); > > > > > } > > > > > > > > > > +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, > > > > > + unsigned long addr, unsigned long next) > > > > > +{ > > > > > +} > > > > > + > > > > > +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, > > > > > + unsigned long next) > > > > > +{ > > > > > + return 0; > > > > > +} > > > > > + > > > > > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > > > > + int node, struct vmem_altmap *altmap) > > > > > +{ > > > > > + unsigned long addr; > > > > > + unsigned long next; > > > > > + pgd_t *pgd; > > > > > + p4d_t *p4d; > > > > > + pud_t *pud; > > > > > + pmd_t *pmd; > > > > > + > > > > > + for (addr = start; addr < end; addr = next) { > > > > > + next = pmd_addr_end(addr, end); > > > > > + > > > > > + pgd = vmemmap_pgd_populate(addr, node); > > > > > + if (!pgd) > > > > > + return -ENOMEM; > > > > > + > > > > > + p4d = vmemmap_p4d_populate(pgd, addr, node); > > > > > + if (!p4d) > > > > > + return -ENOMEM; > > > > > + > > > > > + pud = vmemmap_pud_populate(p4d, addr, node); > > > > > + if (!pud) > > > > > + return -ENOMEM; > > > > > + > > > > > + pmd = pmd_offset(pud, addr); > > > > > + if (pmd_none(READ_ONCE(*pmd))) { > > > > > + void *p; > > > > > + > > > > > + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > > > + if (p) { > > > > > + vmemmap_set_pmd(pmd, p, node, addr, next); > > > > > + continue; > > > > > + } else if (altmap) > > > > > + return -ENOMEM; /* no fallback */ > > > > > > > > Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > > > what we currently have on arm64 and it's not clear to me why we're happy > > > > with an altmap for the pmd case, but not for the pte case. > > > The generic version is the same as X86. It seems that ARM64 always > > > fallback whether there is an altmap, but X86 only fallback in the no > > > altmap case. I don't know the reason of X86, can Dan Williams give > > > some explaination? > > > > Right, I think we need to understand the new behaviour here before we adopt > > it on arm64. > Hi, Dan, > Could you please tell us the reason? Thanks. > > And Sudarshan, > You are the author of adding a fallback mechanism to ARM64, do you > know why ARM64 is different from X86 (only fallback in no altmap > case)? > > Huacai > > > > > Will
On 14.07.22 14:34, Huacai Chen wrote: > Oh, Sudarshan Rajagopalan's Email has changed, Let's update. > > Huacai > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: >> >> +Dan Williams >> +Sudarshan Rajagopalan >> >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: >>> >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: >>>>>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c >>>>>> index 33e2a1ceee72..6f2e40bb695d 100644 >>>>>> --- a/mm/sparse-vmemmap.c >>>>>> +++ b/mm/sparse-vmemmap.c >>>>>> @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, >>>>>> return vmemmap_populate_range(start, end, node, altmap, NULL); >>>>>> } >>>>>> >>>>>> +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, >>>>>> + unsigned long addr, unsigned long next) >>>>>> +{ >>>>>> +} >>>>>> + >>>>>> +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, >>>>>> + unsigned long next) >>>>>> +{ >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, >>>>>> + int node, struct vmem_altmap *altmap) >>>>>> +{ >>>>>> + unsigned long addr; >>>>>> + unsigned long next; >>>>>> + pgd_t *pgd; >>>>>> + p4d_t *p4d; >>>>>> + pud_t *pud; >>>>>> + pmd_t *pmd; >>>>>> + >>>>>> + for (addr = start; addr < end; addr = next) { >>>>>> + next = pmd_addr_end(addr, end); >>>>>> + >>>>>> + pgd = vmemmap_pgd_populate(addr, node); >>>>>> + if (!pgd) >>>>>> + return -ENOMEM; >>>>>> + >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node); >>>>>> + if (!p4d) >>>>>> + return -ENOMEM; >>>>>> + >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node); >>>>>> + if (!pud) >>>>>> + return -ENOMEM; >>>>>> + >>>>>> + pmd = pmd_offset(pud, addr); >>>>>> + if (pmd_none(READ_ONCE(*pmd))) { >>>>>> + void *p; >>>>>> + >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); >>>>>> + if (p) { >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next); >>>>>> + continue; >>>>>> + } else if (altmap) >>>>>> + return -ENOMEM; /* no fallback */ >>>>> >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to >>>>> what we currently have on arm64 and it's not clear to me why we're happy >>>>> with an altmap for the pmd case, but not for the pte case. >>>> The generic version is the same as X86. It seems that ARM64 always >>>> fallback whether there is an altmap, but X86 only fallback in the no >>>> altmap case. I don't know the reason of X86, can Dan Williams give >>>> some explaination? >>> >>> Right, I think we need to understand the new behaviour here before we adopt >>> it on arm64. >> Hi, Dan, >> Could you please tell us the reason? Thanks. >> >> And Sudarshan, >> You are the author of adding a fallback mechanism to ARM64, do you >> know why ARM64 is different from X86 (only fallback in no altmap >> case)? I think that's a purely theoretical issue: I assume that in any case we care about, the altmap should be reasonably sized and aligned such that this will always succeed. To me it even sounds like the best idea to *consistently* fail if there is no more space in the altmap, even if we'd have to fallback to PTE (again, highly unlikely that this is relevant in practice). Could indicate an altmap-size configuration issue.
Hi, Will, On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <david@redhat.com> wrote: > > On 14.07.22 14:34, Huacai Chen wrote: > > Oh, Sudarshan Rajagopalan's Email has changed, Let's update. > > > > Huacai > > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: > >> > >> +Dan Williams > >> +Sudarshan Rajagopalan > >> > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > >>> > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > >>>>>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > >>>>>> index 33e2a1ceee72..6f2e40bb695d 100644 > >>>>>> --- a/mm/sparse-vmemmap.c > >>>>>> +++ b/mm/sparse-vmemmap.c > >>>>>> @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, > >>>>>> return vmemmap_populate_range(start, end, node, altmap, NULL); > >>>>>> } > >>>>>> > >>>>>> +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, > >>>>>> + unsigned long addr, unsigned long next) > >>>>>> +{ > >>>>>> +} > >>>>>> + > >>>>>> +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, > >>>>>> + unsigned long next) > >>>>>> +{ > >>>>>> + return 0; > >>>>>> +} > >>>>>> + > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > >>>>>> + int node, struct vmem_altmap *altmap) > >>>>>> +{ > >>>>>> + unsigned long addr; > >>>>>> + unsigned long next; > >>>>>> + pgd_t *pgd; > >>>>>> + p4d_t *p4d; > >>>>>> + pud_t *pud; > >>>>>> + pmd_t *pmd; > >>>>>> + > >>>>>> + for (addr = start; addr < end; addr = next) { > >>>>>> + next = pmd_addr_end(addr, end); > >>>>>> + > >>>>>> + pgd = vmemmap_pgd_populate(addr, node); > >>>>>> + if (!pgd) > >>>>>> + return -ENOMEM; > >>>>>> + > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node); > >>>>>> + if (!p4d) > >>>>>> + return -ENOMEM; > >>>>>> + > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node); > >>>>>> + if (!pud) > >>>>>> + return -ENOMEM; > >>>>>> + > >>>>>> + pmd = pmd_offset(pud, addr); > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) { > >>>>>> + void *p; > >>>>>> + > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > >>>>>> + if (p) { > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next); > >>>>>> + continue; > >>>>>> + } else if (altmap) > >>>>>> + return -ENOMEM; /* no fallback */ > >>>>> > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to > >>>>> what we currently have on arm64 and it's not clear to me why we're happy > >>>>> with an altmap for the pmd case, but not for the pte case. > >>>> The generic version is the same as X86. It seems that ARM64 always > >>>> fallback whether there is an altmap, but X86 only fallback in the no > >>>> altmap case. I don't know the reason of X86, can Dan Williams give > >>>> some explaination? > >>> > >>> Right, I think we need to understand the new behaviour here before we adopt > >>> it on arm64. > >> Hi, Dan, > >> Could you please tell us the reason? Thanks. > >> > >> And Sudarshan, > >> You are the author of adding a fallback mechanism to ARM64, do you > >> know why ARM64 is different from X86 (only fallback in no altmap > >> case)? > > I think that's a purely theoretical issue: I assume that in any case we > care about, the altmap should be reasonably sized and aligned such that > this will always succeed. > > To me it even sounds like the best idea to *consistently* fail if there > is no more space in the altmap, even if we'd have to fallback to PTE > (again, highly unlikely that this is relevant in practice). Could > indicate an altmap-size configuration issue. Does David's explanation make things clear? Moreover, I think Dan's dedicated comments "no fallback" implies that his design is carefully considered. So I think the generic version using the X86 logic is just OK. Huacai > > -- > Thanks, > > David / dhildenb > >
On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote: > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <david@redhat.com> wrote: > > On 14.07.22 14:34, Huacai Chen wrote: > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > >>>>>> + int node, struct vmem_altmap *altmap) > > >>>>>> +{ > > >>>>>> + unsigned long addr; > > >>>>>> + unsigned long next; > > >>>>>> + pgd_t *pgd; > > >>>>>> + p4d_t *p4d; > > >>>>>> + pud_t *pud; > > >>>>>> + pmd_t *pmd; > > >>>>>> + > > >>>>>> + for (addr = start; addr < end; addr = next) { > > >>>>>> + next = pmd_addr_end(addr, end); > > >>>>>> + > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node); > > >>>>>> + if (!pgd) > > >>>>>> + return -ENOMEM; > > >>>>>> + > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node); > > >>>>>> + if (!p4d) > > >>>>>> + return -ENOMEM; > > >>>>>> + > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node); > > >>>>>> + if (!pud) > > >>>>>> + return -ENOMEM; > > >>>>>> + > > >>>>>> + pmd = pmd_offset(pud, addr); > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) { > > >>>>>> + void *p; > > >>>>>> + > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > >>>>>> + if (p) { > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next); > > >>>>>> + continue; > > >>>>>> + } else if (altmap) > > >>>>>> + return -ENOMEM; /* no fallback */ > > >>>>> > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy > > >>>>> with an altmap for the pmd case, but not for the pte case. > > >>>> The generic version is the same as X86. It seems that ARM64 always > > >>>> fallback whether there is an altmap, but X86 only fallback in the no > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give > > >>>> some explaination? > > >>> > > >>> Right, I think we need to understand the new behaviour here before we adopt > > >>> it on arm64. > > >> Hi, Dan, > > >> Could you please tell us the reason? Thanks. > > >> > > >> And Sudarshan, > > >> You are the author of adding a fallback mechanism to ARM64, do you > > >> know why ARM64 is different from X86 (only fallback in no altmap > > >> case)? > > > > I think that's a purely theoretical issue: I assume that in any case we > > care about, the altmap should be reasonably sized and aligned such that > > this will always succeed. > > > > To me it even sounds like the best idea to *consistently* fail if there > > is no more space in the altmap, even if we'd have to fallback to PTE > > (again, highly unlikely that this is relevant in practice). Could > > indicate an altmap-size configuration issue. > > Does David's explanation make things clear? Moreover, I think Dan's > dedicated comments "no fallback" implies that his design is carefully > considered. So I think the generic version using the X86 logic is just > OK. I think the comment isn't worth the metaphorical paper that it's written on! If you can bulk it up a bit based on David's reasoning, then that would help. But yes, I'm happy with the code now, thanks both. Will
Hi, Will, On Thu, Jul 21, 2022 at 5:55 PM Will Deacon <will@kernel.org> wrote: > > On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote: > > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <david@redhat.com> wrote: > > > On 14.07.22 14:34, Huacai Chen wrote: > > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: > > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > > >>>>>> + int node, struct vmem_altmap *altmap) > > > >>>>>> +{ > > > >>>>>> + unsigned long addr; > > > >>>>>> + unsigned long next; > > > >>>>>> + pgd_t *pgd; > > > >>>>>> + p4d_t *p4d; > > > >>>>>> + pud_t *pud; > > > >>>>>> + pmd_t *pmd; > > > >>>>>> + > > > >>>>>> + for (addr = start; addr < end; addr = next) { > > > >>>>>> + next = pmd_addr_end(addr, end); > > > >>>>>> + > > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node); > > > >>>>>> + if (!pgd) > > > >>>>>> + return -ENOMEM; > > > >>>>>> + > > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node); > > > >>>>>> + if (!p4d) > > > >>>>>> + return -ENOMEM; > > > >>>>>> + > > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node); > > > >>>>>> + if (!pud) > > > >>>>>> + return -ENOMEM; > > > >>>>>> + > > > >>>>>> + pmd = pmd_offset(pud, addr); > > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) { > > > >>>>>> + void *p; > > > >>>>>> + > > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > >>>>>> + if (p) { > > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next); > > > >>>>>> + continue; > > > >>>>>> + } else if (altmap) > > > >>>>>> + return -ENOMEM; /* no fallback */ > > > >>>>> > > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy > > > >>>>> with an altmap for the pmd case, but not for the pte case. > > > >>>> The generic version is the same as X86. It seems that ARM64 always > > > >>>> fallback whether there is an altmap, but X86 only fallback in the no > > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give > > > >>>> some explaination? > > > >>> > > > >>> Right, I think we need to understand the new behaviour here before we adopt > > > >>> it on arm64. > > > >> Hi, Dan, > > > >> Could you please tell us the reason? Thanks. > > > >> > > > >> And Sudarshan, > > > >> You are the author of adding a fallback mechanism to ARM64, do you > > > >> know why ARM64 is different from X86 (only fallback in no altmap > > > >> case)? > > > > > > I think that's a purely theoretical issue: I assume that in any case we > > > care about, the altmap should be reasonably sized and aligned such that > > > this will always succeed. > > > > > > To me it even sounds like the best idea to *consistently* fail if there > > > is no more space in the altmap, even if we'd have to fallback to PTE > > > (again, highly unlikely that this is relevant in practice). Could > > > indicate an altmap-size configuration issue. > > > > Does David's explanation make things clear? Moreover, I think Dan's > > dedicated comments "no fallback" implies that his design is carefully > > considered. So I think the generic version using the X86 logic is just > > OK. > > I think the comment isn't worth the metaphorical paper that it's written > on! If you can bulk it up a bit based on David's reasoning, then that would > help. But yes, I'm happy with the code now, thanks both. OK, I will add a detailed comment here. Huacai > > Will
Huacai Chen wrote: > Hi, Will, > > On Thu, Jul 21, 2022 at 5:55 PM Will Deacon <will@kernel.org> wrote: > > > > On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote: > > > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <david@redhat.com> wrote: > > > > On 14.07.22 14:34, Huacai Chen wrote: > > > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: > > > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > > > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > > > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > > > >>>>>> + int node, struct vmem_altmap *altmap) > > > > >>>>>> +{ > > > > >>>>>> + unsigned long addr; > > > > >>>>>> + unsigned long next; > > > > >>>>>> + pgd_t *pgd; > > > > >>>>>> + p4d_t *p4d; > > > > >>>>>> + pud_t *pud; > > > > >>>>>> + pmd_t *pmd; > > > > >>>>>> + > > > > >>>>>> + for (addr = start; addr < end; addr = next) { > > > > >>>>>> + next = pmd_addr_end(addr, end); > > > > >>>>>> + > > > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node); > > > > >>>>>> + if (!pgd) > > > > >>>>>> + return -ENOMEM; > > > > >>>>>> + > > > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node); > > > > >>>>>> + if (!p4d) > > > > >>>>>> + return -ENOMEM; > > > > >>>>>> + > > > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node); > > > > >>>>>> + if (!pud) > > > > >>>>>> + return -ENOMEM; > > > > >>>>>> + > > > > >>>>>> + pmd = pmd_offset(pud, addr); > > > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) { > > > > >>>>>> + void *p; > > > > >>>>>> + > > > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > > >>>>>> + if (p) { > > > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next); > > > > >>>>>> + continue; > > > > >>>>>> + } else if (altmap) > > > > >>>>>> + return -ENOMEM; /* no fallback */ > > > > >>>>> > > > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy > > > > >>>>> with an altmap for the pmd case, but not for the pte case. > > > > >>>> The generic version is the same as X86. It seems that ARM64 always > > > > >>>> fallback whether there is an altmap, but X86 only fallback in the no > > > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give > > > > >>>> some explaination? > > > > >>> > > > > >>> Right, I think we need to understand the new behaviour here before we adopt > > > > >>> it on arm64. > > > > >> Hi, Dan, > > > > >> Could you please tell us the reason? Thanks. > > > > >> > > > > >> And Sudarshan, > > > > >> You are the author of adding a fallback mechanism to ARM64, do you > > > > >> know why ARM64 is different from X86 (only fallback in no altmap > > > > >> case)? > > > > > > > > I think that's a purely theoretical issue: I assume that in any case we > > > > care about, the altmap should be reasonably sized and aligned such that > > > > this will always succeed. > > > > > > > > To me it even sounds like the best idea to *consistently* fail if there > > > > is no more space in the altmap, even if we'd have to fallback to PTE > > > > (again, highly unlikely that this is relevant in practice). Could > > > > indicate an altmap-size configuration issue. > > > > > > Does David's explanation make things clear? Moreover, I think Dan's > > > dedicated comments "no fallback" implies that his design is carefully > > > considered. So I think the generic version using the X86 logic is just > > > OK. > > > > I think the comment isn't worth the metaphorical paper that it's written > > on! If you can bulk it up a bit based on David's reasoning, then that would > > help. But yes, I'm happy with the code now, thanks both. > OK, I will add a detailed comment here. Apologies for coming late to the party here, original ping came while I was on vacation and I only just now noticed the direct questions. All resolved now or is a question still pending?
Hi, Dan, On Fri, Jul 22, 2022 at 5:54 AM Dan Williams <dan.j.williams@intel.com> wrote: > > Huacai Chen wrote: > > Hi, Will, > > > > On Thu, Jul 21, 2022 at 5:55 PM Will Deacon <will@kernel.org> wrote: > > > > > > On Thu, Jul 21, 2022 at 10:08:10AM +0800, Huacai Chen wrote: > > > > On Wed, Jul 20, 2022 at 5:34 PM David Hildenbrand <david@redhat.com> wrote: > > > > > On 14.07.22 14:34, Huacai Chen wrote: > > > > > > On Fri, Jul 8, 2022 at 5:47 PM Huacai Chen <chenhuacai@kernel.org> wrote: > > > > > >> On Thu, Jul 7, 2022 at 12:17 AM Will Deacon <will@kernel.org> wrote: > > > > > >>> On Tue, Jul 05, 2022 at 09:07:59PM +0800, Huacai Chen wrote: > > > > > >>>> On Tue, Jul 5, 2022 at 5:29 PM Will Deacon <will@kernel.org> wrote: > > > > > >>>>> On Mon, Jul 04, 2022 at 07:25:25PM +0800, Huacai Chen wrote: > > > > > >>>>>> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, > > > > > >>>>>> + int node, struct vmem_altmap *altmap) > > > > > >>>>>> +{ > > > > > >>>>>> + unsigned long addr; > > > > > >>>>>> + unsigned long next; > > > > > >>>>>> + pgd_t *pgd; > > > > > >>>>>> + p4d_t *p4d; > > > > > >>>>>> + pud_t *pud; > > > > > >>>>>> + pmd_t *pmd; > > > > > >>>>>> + > > > > > >>>>>> + for (addr = start; addr < end; addr = next) { > > > > > >>>>>> + next = pmd_addr_end(addr, end); > > > > > >>>>>> + > > > > > >>>>>> + pgd = vmemmap_pgd_populate(addr, node); > > > > > >>>>>> + if (!pgd) > > > > > >>>>>> + return -ENOMEM; > > > > > >>>>>> + > > > > > >>>>>> + p4d = vmemmap_p4d_populate(pgd, addr, node); > > > > > >>>>>> + if (!p4d) > > > > > >>>>>> + return -ENOMEM; > > > > > >>>>>> + > > > > > >>>>>> + pud = vmemmap_pud_populate(p4d, addr, node); > > > > > >>>>>> + if (!pud) > > > > > >>>>>> + return -ENOMEM; > > > > > >>>>>> + > > > > > >>>>>> + pmd = pmd_offset(pud, addr); > > > > > >>>>>> + if (pmd_none(READ_ONCE(*pmd))) { > > > > > >>>>>> + void *p; > > > > > >>>>>> + > > > > > >>>>>> + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > > > >>>>>> + if (p) { > > > > > >>>>>> + vmemmap_set_pmd(pmd, p, node, addr, next); > > > > > >>>>>> + continue; > > > > > >>>>>> + } else if (altmap) > > > > > >>>>>> + return -ENOMEM; /* no fallback */ > > > > > >>>>> > > > > > >>>>> Why do you return -ENOMEM if 'altmap' here? That seems to be different to > > > > > >>>>> what we currently have on arm64 and it's not clear to me why we're happy > > > > > >>>>> with an altmap for the pmd case, but not for the pte case. > > > > > >>>> The generic version is the same as X86. It seems that ARM64 always > > > > > >>>> fallback whether there is an altmap, but X86 only fallback in the no > > > > > >>>> altmap case. I don't know the reason of X86, can Dan Williams give > > > > > >>>> some explaination? > > > > > >>> > > > > > >>> Right, I think we need to understand the new behaviour here before we adopt > > > > > >>> it on arm64. > > > > > >> Hi, Dan, > > > > > >> Could you please tell us the reason? Thanks. > > > > > >> > > > > > >> And Sudarshan, > > > > > >> You are the author of adding a fallback mechanism to ARM64, do you > > > > > >> know why ARM64 is different from X86 (only fallback in no altmap > > > > > >> case)? > > > > > > > > > > I think that's a purely theoretical issue: I assume that in any case we > > > > > care about, the altmap should be reasonably sized and aligned such that > > > > > this will always succeed. > > > > > > > > > > To me it even sounds like the best idea to *consistently* fail if there > > > > > is no more space in the altmap, even if we'd have to fallback to PTE > > > > > (again, highly unlikely that this is relevant in practice). Could > > > > > indicate an altmap-size configuration issue. > > > > > > > > Does David's explanation make things clear? Moreover, I think Dan's > > > > dedicated comments "no fallback" implies that his design is carefully > > > > considered. So I think the generic version using the X86 logic is just > > > > OK. > > > > > > I think the comment isn't worth the metaphorical paper that it's written > > > on! If you can bulk it up a bit based on David's reasoning, then that would > > > help. But yes, I'm happy with the code now, thanks both. > > OK, I will add a detailed comment here. > > Apologies for coming late to the party here, original ping came while I > was on vacation and I only just now noticed the direct questions. All > resolved now or is a question still pending? I updated the patch and added a detailed comment based on David's explanation [1]. If that description is correct, I think there are no more questions. Thank you. [1] https://lore.kernel.org/loongarch/20220721130419.1904711-4-chenhuacai@loongson.cn/T/#u Huacai >
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 626ec32873c6..b080a65c719d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return vmemmap_populate_basepages(start, end, node, altmap); } #else /* !ARM64_KERNEL_USES_PMD_MAPS */ +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node, + unsigned long addr, unsigned long next) +{ + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, + unsigned long next) +{ + vmemmap_verify((pte_t *)pmdp, node, addr, next); + return 1; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - unsigned long addr = start; - unsigned long next; - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); - do { - next = pmd_addr_end(addr, end); - - pgdp = vmemmap_pgd_populate(addr, node); - if (!pgdp) - return -ENOMEM; - - p4dp = vmemmap_p4d_populate(pgdp, addr, node); - if (!p4dp) - return -ENOMEM; - - pudp = vmemmap_pud_populate(p4dp, addr, node); - if (!pudp) - return -ENOMEM; - - pmdp = pmd_offset(pudp, addr); - if (pmd_none(READ_ONCE(*pmdp))) { - void *p = NULL; - - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); - if (!p) { - if (vmemmap_populate_basepages(addr, next, node, altmap)) - return -ENOMEM; - continue; - } - - pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); - } else - vmemmap_verify((pte_t *)pmdp, node, addr, next); - } while (addr = next, addr != end); - - return 0; + return vmemmap_populate_hugepages(start, end, node, altmap); } #endif /* !ARM64_KERNEL_USES_PMD_MAPS */ diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 35128229fe46..3190b3cd52d1 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -158,52 +158,25 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, - int node, struct vmem_altmap *altmap) +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) { - unsigned long addr = start; - unsigned long next; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - for (addr = start; addr < end; addr = next) { - next = pmd_addr_end(addr, end); - - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - p4d = vmemmap_p4d_populate(pgd, addr, node); - if (!p4d) - return -ENOMEM; - pud = vmemmap_pud_populate(p4d, addr, node); - if (!pud) - return -ENOMEM; - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - void *p = NULL; - - p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL); - if (p) { - pmd_t entry; - - entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); - pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL; - set_pmd_at(&init_mm, addr, pmd, entry); - - continue; - } - } else if (pmd_val(*pmd) & _PAGE_HUGE) { - vmemmap_verify((pte_t *)pmd, node, addr, next); - continue; - } - if (vmemmap_populate_basepages(addr, next, node, NULL)) - return -ENOMEM; - } - - return 0; + pmd_t entry; + + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); + pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL; + set_pmd_at(&init_mm, addr, pmd, entry); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, + unsigned long next) +{ + int huge = pmd_val(*pmd) & _PAGE_HUGE; + + if (huge) + vmemmap_verify((pte_t *)pmd, node, addr, next); + + return huge; } #if CONFIG_PGTABLE_LEVELS == 2 diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 39c5246964a9..4911093ee2f3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1532,72 +1532,44 @@ static long __meminitdata addr_start, addr_end; static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; -static int __meminit vmemmap_populate_hugepages(unsigned long start, - unsigned long end, int node, struct vmem_altmap *altmap) +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) { - unsigned long addr; - unsigned long next; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - for (addr = start; addr < end; addr = next) { - next = pmd_addr_end(addr, end); - - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - - p4d = vmemmap_p4d_populate(pgd, addr, node); - if (!p4d) - return -ENOMEM; - - pud = vmemmap_pud_populate(p4d, addr, node); - if (!pud) - return -ENOMEM; - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - void *p; - - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); - if (p) { - pte_t entry; - - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE); - set_pmd(pmd, __pmd(pte_val(entry))); + pte_t entry; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, + PAGE_KERNEL_LARGE); + set_pmd(pmd, __pmd(pte_val(entry))); + + /* check to see if we have contiguous blocks */ + if (p_end != p || node_start != node) { + if (p_start) + pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + addr_start = addr; + node_start = node; + p_start = p; + } - /* check to see if we have contiguous blocks */ - if (p_end != p || node_start != node) { - if (p_start) - pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", - addr_start, addr_end-1, p_start, p_end-1, node_start); - addr_start = addr; - node_start = node; - p_start = p; - } + addr_end = addr + PMD_SIZE; + p_end = p + PMD_SIZE; - addr_end = addr + PMD_SIZE; - p_end = p + PMD_SIZE; + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) + vmemmap_use_new_sub_pmd(addr, next); +} - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) - vmemmap_use_new_sub_pmd(addr, next); +int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, + unsigned long next) +{ + int large = pmd_large(*pmd); - continue; - } else if (altmap) - return -ENOMEM; /* no fallback */ - } else if (pmd_large(*pmd)) { - vmemmap_verify((pte_t *)pmd, node, addr, next); - vmemmap_use_sub_pmd(addr, next); - continue; - } - if (vmemmap_populate_basepages(addr, next, node, NULL)) - return -ENOMEM; + if (pmd_large(*pmd)) { + vmemmap_verify((pte_t *)pmd, node, addr, next); + vmemmap_use_sub_pmd(addr, next); } - return 0; + + return large; } int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, diff --git a/include/linux/mm.h b/include/linux/mm.h index f6ed6bc0a65f..894d59e1ffd6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3216,8 +3216,14 @@ struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); +void vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next); +int vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, + unsigned long next); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +int vmemmap_populate_hugepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 33e2a1ceee72..6f2e40bb695d 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, return vmemmap_populate_range(start, end, node, altmap, NULL); } +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) +{ +} + +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, + unsigned long next) +{ + return 0; +} + +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) +{ + unsigned long addr; + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + + pud = vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(READ_ONCE(*pmd))) { + void *p; + + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (p) { + vmemmap_set_pmd(pmd, p, node, addr, next); + continue; + } else if (altmap) + return -ENOMEM; /* no fallback */ + } else if (vmemmap_check_pmd(pmd, node, addr, next)) + continue; + if (vmemmap_populate_basepages(addr, next, node, altmap)) + return -ENOMEM; + } + return 0; +} + /* * For compound pages bigger than section size (e.g. x86 1G compound * pages with 2M subsection size) fill the rest of sections as tail