Message ID | 20200812010655.96339-1-liwei213@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] arm64: mm: free unused memmap for sparse memory model that define VMEMMAP | expand |
On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote: > For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP > do not free the reserved memory for the page map, this patch do it. I've been thinking about it a bit more and it seems that instead of freeing unused memory map it would be better to allocate the exact memory map from the beginning. In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to __populate_section_memmap() with the calculated value for architectures that define HAVE_ARCH_PFN_VALID. Than, I beleive it would be possible to remove free_unused_memmap() in arm64 and probably in arm32 as well. > Signed-off-by: Wei Li <liwei213@huawei.com> > Signed-off-by: Chen Feng <puck.chen@hisilicon.com> > Signed-off-by: Xia Qing <saberlily.xia@hisilicon.com> > > v2: fix the patch v1 compile errors that are not based on the latest mainline. > --- > arch/arm64/mm/init.c | 81 +++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 71 insertions(+), 10 deletions(-) > > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 1e93cfc7c47a..600889945cd0 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -441,7 +441,48 @@ void __init bootmem_init(void) > memblock_dump_all(); > } > > -#ifndef CONFIG_SPARSEMEM_VMEMMAP > +#ifdef CONFIG_SPARSEMEM_VMEMMAP > +#define VMEMMAP_PAGE_INUSE 0xFD > +static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) > +{ > + unsigned long addr, end; > + unsigned long next; > + pmd_t *pmd; > + void *page_addr; > + phys_addr_t phys_addr; > + > + addr = (unsigned long)pfn_to_page(start_pfn); > + end = (unsigned long)pfn_to_page(end_pfn); > + > + pmd = pmd_off_k(addr); > + for (; addr < end; addr = next, pmd++) { > + next = pmd_addr_end(addr, end); > + > + if (!pmd_present(*pmd)) > + continue; > + > + if (IS_ALIGNED(addr, PMD_SIZE) && > + IS_ALIGNED(next, PMD_SIZE)) { > + phys_addr = __pfn_to_phys(pmd_pfn(*pmd)); > + memblock_free(phys_addr, PMD_SIZE); > + pmd_clear(pmd); > + } else { > + /* If here, we are freeing vmemmap pages. */ > + memset((void *)addr, VMEMMAP_PAGE_INUSE, next - addr); > + page_addr = page_address(pmd_page(*pmd)); > + > + if (!memchr_inv(page_addr, VMEMMAP_PAGE_INUSE, > + PMD_SIZE)) { > + phys_addr = __pfn_to_phys(pmd_pfn(*pmd)); > + memblock_free(phys_addr, PMD_SIZE); > + pmd_clear(pmd); > + } > + } > + } > + > + flush_tlb_all(); > +} > +#else > static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) > { > struct page *start_pg, *end_pg; > @@ -468,31 +509,53 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) > memblock_free(pg, pgend - pg); > } > > +#endif > + > /* > * The mem_map array can get very big. Free the unused area of the memory map. > */ > static void __init free_unused_memmap(void) > { > - unsigned long start, prev_end = 0; > + unsigned long start, cur_start, prev_end = 0; > struct memblock_region *reg; > > for_each_memblock(memory, reg) { > - start = __phys_to_pfn(reg->base); > + cur_start = __phys_to_pfn(reg->base); > > #ifdef CONFIG_SPARSEMEM > /* > * Take care not to free memmap entries that don't exist due > * to SPARSEMEM sections which aren't present. > */ > - start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); > -#endif > + start = min(cur_start, ALIGN(prev_end, PAGES_PER_SECTION)); > + > /* > - * If we had a previous bank, and there is a space between the > - * current bank and the previous, free it. > + * Free memory in the case of: > + * 1. if cur_start - prev_end <= PAGES_PER_SECTION, > + * free pre_end ~ cur_start. > + * 2. if cur_start - prev_end > PAGES_PER_SECTION, > + * free pre_end ~ ALIGN(prev_end, PAGES_PER_SECTION). > */ > if (prev_end && prev_end < start) > free_memmap(prev_end, start); > > + /* > + * Free memory in the case of: > + * if cur_start - prev_end > PAGES_PER_SECTION, > + * free ALIGN_DOWN(cur_start, PAGES_PER_SECTION) ~ cur_start. > + */ > + if (cur_start > start && > + !IS_ALIGNED(cur_start, PAGES_PER_SECTION)) > + free_memmap(ALIGN_DOWN(cur_start, PAGES_PER_SECTION), > + cur_start); > +#else > + /* > + * If we had a previous bank, and there is a space between the > + * current bank and the previous, free it. > + */ > + if (prev_end && prev_end < cur_start) > + free_memmap(prev_end, cur_start); > +#endif > /* > * Align up here since the VM subsystem insists that the > * memmap entries are valid from the bank end aligned to > @@ -507,7 +570,6 @@ static void __init free_unused_memmap(void) > free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); > #endif > } > -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ > > /* > * mem_init() marks the free areas in the mem_map and tells us how much memory > @@ -524,9 +586,8 @@ void __init mem_init(void) > > set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); > > -#ifndef CONFIG_SPARSEMEM_VMEMMAP > free_unused_memmap(); > -#endif > + > /* this will put all unused low memory onto the freelists */ > memblock_free_all(); > > -- > 2.15.0 >
On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote: > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote: > > For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP > > do not free the reserved memory for the page map, this patch do it. > > I've been thinking about it a bit more and it seems that instead of > freeing unused memory map it would be better to allocate the exact > memory map from the beginning. > > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to > __populate_section_memmap() with the calculated value for architectures > that define HAVE_ARCH_PFN_VALID. Or just use a smaller PAGES_PER_SECTION and reduce the waste ;). Just to be clear, are you suggesting that we should use pfn_valid() on the pages within a section to calculate the actual range? The pfn_valid() implementation on arm64 checks for the validity of a sparse section, so this would be called from within the sparse_init() code path. I hope there's no dependency but I haven't checked. If it works, it's fine by me, it solves the FLATMEM mem_map freeing as well. With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE mem_map section would cover 2^(21-6) pages, so that's equivalent to a SECTION_SIZE_BITS of 21-6+12 = 27. If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op.
On Thu, Sep 03, 2020 at 01:05:58PM +0100, Catalin Marinas wrote: > On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote: > > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote: > > > For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP > > > do not free the reserved memory for the page map, this patch do it. > > > > I've been thinking about it a bit more and it seems that instead of > > freeing unused memory map it would be better to allocate the exact > > memory map from the beginning. > > > > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to > > __populate_section_memmap() with the calculated value for architectures > > that define HAVE_ARCH_PFN_VALID. > > Or just use a smaller PAGES_PER_SECTION and reduce the waste ;). > > Just to be clear, are you suggesting that we should use pfn_valid() on > the pages within a section to calculate the actual range? The > pfn_valid() implementation on arm64 checks for the validity of a sparse > section, so this would be called from within the sparse_init() code > path. I hope there's no dependency but I haven't checked. If it works, > it's fine by me, it solves the FLATMEM mem_map freeing as well. What I meant was that sparse_init()->__populate_section_memmap() would not blindly presume that the entire section is valid, but would take into account The actual DRAM banks listed in memblock.memory. For that to work we'll need a version of pfn_valid() that does not rely on the validity of sparse section, but uses some other means, e.g. memblock. Apparently, I've looked at arm32 version of pfn_valid() and missed the section validity check :) I was thinking about doing something like this for 32-bit systems (non-ARM) that cannot affort small sections because of the limited space in the page->flags. > With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so > it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE > amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE mem_map > section would cover 2^(21-6) pages, so that's equivalent to a > SECTION_SIZE_BITS of 21-6+12 = 27. > > If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op. > > -- > Catalin
> -----Original Message----- > From: Catalin Marinas [mailto:catalin.marinas@arm.com] > Sent: Friday, September 4, 2020 12:06 AM > To: Mike Rapoport <rppt@linux.ibm.com> > Cc: liwei (CM) <liwei213@huawei.com>; will@kernel.org; Xiaqing (A) > <saberlily.xia@hisilicon.com>; Chenfeng (puck) <puck.chen@hisilicon.com>; > butao <butao@hisilicon.com>; fengbaopeng <fengbaopeng2@hisilicon.com>; > nsaenzjulienne@suse.de; steve.capper@arm.com; Song Bao Hua (Barry Song) > <song.bao.hua@hisilicon.com>; linux-arm-kernel@lists.infradead.org; > linux-kernel@vger.kernel.org; sujunfei <sujunfei2@hisilicon.com> > Subject: Re: [PATCH v2] arm64: mm: free unused memmap for sparse memory > model that define VMEMMAP > > On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote: > > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote: > > > For the memory hole, sparse memory model that define > SPARSEMEM_VMEMMAP > > > do not free the reserved memory for the page map, this patch do it. > > > > I've been thinking about it a bit more and it seems that instead of > > freeing unused memory map it would be better to allocate the exact > > memory map from the beginning. > > > > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to > > __populate_section_memmap() with the calculated value for architectures > > that define HAVE_ARCH_PFN_VALID. > > Or just use a smaller PAGES_PER_SECTION and reduce the waste ;). > > Just to be clear, are you suggesting that we should use pfn_valid() on > the pages within a section to calculate the actual range? The > pfn_valid() implementation on arm64 checks for the validity of a sparse > section, so this would be called from within the sparse_init() code > path. I hope there's no dependency but I haven't checked. If it works, > it's fine by me, it solves the FLATMEM mem_map freeing as well. > > With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so > it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE > amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE mem_map > section would cover 2^(21-6) pages, so that's equivalent to a > SECTION_SIZE_BITS of 21-6+12 = 27. > > If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op. It would be the simplest way to fix this issue. It seems X86_64 is also using 27. @wei, has you ever tried to send a patch to change SECTION_SIZE_BITS to 27 for ARM64? > > -- > Catalin Thanks Barry
Hi, Barry I have changed SECTION_SIZE_BITS to 27 in our products, but I don't have to submit it. -----邮件原件----- 发件人: Song Bao Hua (Barry Song) 发送时间: 2020年11月16日 16:34 收件人: Catalin Marinas <catalin.marinas@arm.com>; Mike Rapoport <rppt@linux.ibm.com>; liwei (CM) <liwei213@huawei.com> 抄送: will@kernel.org; Xiaqing (A) <saberlily.xia@hisilicon.com>; Chenfeng (puck) <puck.chen@hisilicon.com>; butao <butao@hisilicon.com>; fengbaopeng <fengbaopeng2@hisilicon.com>; nsaenzjulienne@suse.de; steve.capper@arm.com; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; sujunfei <sujunfei2@hisilicon.com>; Linuxarm <linuxarm@huawei.com> 主题: RE: [PATCH v2] arm64: mm: free unused memmap for sparse memory model that define VMEMMAP > -----Original Message----- > From: Catalin Marinas [mailto:catalin.marinas@arm.com] > Sent: Friday, September 4, 2020 12:06 AM > To: Mike Rapoport <rppt@linux.ibm.com> > Cc: liwei (CM) <liwei213@huawei.com>; will@kernel.org; Xiaqing (A) > <saberlily.xia@hisilicon.com>; Chenfeng (puck) > <puck.chen@hisilicon.com>; butao <butao@hisilicon.com>; fengbaopeng > <fengbaopeng2@hisilicon.com>; nsaenzjulienne@suse.de; > steve.capper@arm.com; Song Bao Hua (Barry Song) > <song.bao.hua@hisilicon.com>; linux-arm-kernel@lists.infradead.org; > linux-kernel@vger.kernel.org; sujunfei <sujunfei2@hisilicon.com> > Subject: Re: [PATCH v2] arm64: mm: free unused memmap for sparse > memory model that define VMEMMAP > > On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote: > > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote: > > > For the memory hole, sparse memory model that define > SPARSEMEM_VMEMMAP > > > do not free the reserved memory for the page map, this patch do it. > > > > I've been thinking about it a bit more and it seems that instead of > > freeing unused memory map it would be better to allocate the exact > > memory map from the beginning. > > > > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to > > __populate_section_memmap() with the calculated value for > > architectures that define HAVE_ARCH_PFN_VALID. > > Or just use a smaller PAGES_PER_SECTION and reduce the waste ;). > > Just to be clear, are you suggesting that we should use pfn_valid() on > the pages within a section to calculate the actual range? The > pfn_valid() implementation on arm64 checks for the validity of a > sparse section, so this would be called from within the sparse_init() > code path. I hope there's no dependency but I haven't checked. If it > works, it's fine by me, it solves the FLATMEM mem_map freeing as well. > > With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so > it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE > amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE > mem_map section would cover 2^(21-6) pages, so that's equivalent to a > SECTION_SIZE_BITS of 21-6+12 = 27. > > If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op. It would be the simplest way to fix this issue. It seems X86_64 is also using 27. @wei, has you ever tried to send a patch to change SECTION_SIZE_BITS to 27 for ARM64? > > -- > Catalin Thanks Barry
On Mon, Nov 16, 2020 at 08:42:17AM +0000, liwei (CM) wrote: > I have changed SECTION_SIZE_BITS to 27 in our products, but I don't > have to submit it. Well, if you send a patch, I'm happy to merge it.
Hi, Catalin Sorry late for you, I will submit the patch as soon as possible. Thanks! -----邮件原件----- 发件人: Catalin Marinas [mailto:catalin.marinas@arm.com] 发送时间: 2020年11月16日 18:47 收件人: liwei (CM) <liwei213@huawei.com> 抄送: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>; Mike Rapoport <rppt@linux.ibm.com>; will@kernel.org; Xiaqing (A) <saberlily.xia@hisilicon.com>; Chenfeng (puck) <puck.chen@hisilicon.com>; butao <butao@hisilicon.com>; fengbaopeng <fengbaopeng2@hisilicon.com>; nsaenzjulienne@suse.de; steve.capper@arm.com; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; sujunfei <sujunfei2@hisilicon.com>; Linuxarm <linuxarm@huawei.com> 主题: Re: 答复: [PATCH v2] arm64: mm: free unused memmap for sparse memory model that define VMEMMAP On Mon, Nov 16, 2020 at 08:42:17AM +0000, liwei (CM) wrote: > I have changed SECTION_SIZE_BITS to 27 in our products, but I don't > have to submit it. Well, if you send a patch, I'm happy to merge it. -- Catalin
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..600889945cd0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -441,7 +441,48 @@ void __init bootmem_init(void) memblock_dump_all(); } -#ifndef CONFIG_SPARSEMEM_VMEMMAP +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_PAGE_INUSE 0xFD +static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long addr, end; + unsigned long next; + pmd_t *pmd; + void *page_addr; + phys_addr_t phys_addr; + + addr = (unsigned long)pfn_to_page(start_pfn); + end = (unsigned long)pfn_to_page(end_pfn); + + pmd = pmd_off_k(addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE)) { + phys_addr = __pfn_to_phys(pmd_pfn(*pmd)); + memblock_free(phys_addr, PMD_SIZE); + pmd_clear(pmd); + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, VMEMMAP_PAGE_INUSE, next - addr); + page_addr = page_address(pmd_page(*pmd)); + + if (!memchr_inv(page_addr, VMEMMAP_PAGE_INUSE, + PMD_SIZE)) { + phys_addr = __pfn_to_phys(pmd_pfn(*pmd)); + memblock_free(phys_addr, PMD_SIZE); + pmd_clear(pmd); + } + } + } + + flush_tlb_all(); +} +#else static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; @@ -468,31 +509,53 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) memblock_free(pg, pgend - pg); } +#endif + /* * The mem_map array can get very big. Free the unused area of the memory map. */ static void __init free_unused_memmap(void) { - unsigned long start, prev_end = 0; + unsigned long start, cur_start, prev_end = 0; struct memblock_region *reg; for_each_memblock(memory, reg) { - start = __phys_to_pfn(reg->base); + cur_start = __phys_to_pfn(reg->base); #ifdef CONFIG_SPARSEMEM /* * Take care not to free memmap entries that don't exist due * to SPARSEMEM sections which aren't present. */ - start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); -#endif + start = min(cur_start, ALIGN(prev_end, PAGES_PER_SECTION)); + /* - * If we had a previous bank, and there is a space between the - * current bank and the previous, free it. + * Free memory in the case of: + * 1. if cur_start - prev_end <= PAGES_PER_SECTION, + * free pre_end ~ cur_start. + * 2. if cur_start - prev_end > PAGES_PER_SECTION, + * free pre_end ~ ALIGN(prev_end, PAGES_PER_SECTION). */ if (prev_end && prev_end < start) free_memmap(prev_end, start); + /* + * Free memory in the case of: + * if cur_start - prev_end > PAGES_PER_SECTION, + * free ALIGN_DOWN(cur_start, PAGES_PER_SECTION) ~ cur_start. + */ + if (cur_start > start && + !IS_ALIGNED(cur_start, PAGES_PER_SECTION)) + free_memmap(ALIGN_DOWN(cur_start, PAGES_PER_SECTION), + cur_start); +#else + /* + * If we had a previous bank, and there is a space between the + * current bank and the previous, free it. + */ + if (prev_end && prev_end < cur_start) + free_memmap(prev_end, cur_start); +#endif /* * Align up here since the VM subsystem insists that the * memmap entries are valid from the bank end aligned to @@ -507,7 +570,6 @@ static void __init free_unused_memmap(void) free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); #endif } -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ /* * mem_init() marks the free areas in the mem_map and tells us how much memory @@ -524,9 +586,8 @@ void __init mem_init(void) set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); -#ifndef CONFIG_SPARSEMEM_VMEMMAP free_unused_memmap(); -#endif + /* this will put all unused low memory onto the freelists */ memblock_free_all();