Message ID | 20250206132754.2596694-3-rppt@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | kexec: introduce Kexec HandOver (KHO) | expand |
On Thu, Feb 06, 2025 at 03:27:42PM +0200, Mike Rapoport wrote: >From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > >to denote areas that were reserved for kernel use either directly with >memblock_reserve_kern() or via memblock allocations. > >Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> >--- > include/linux/memblock.h | 16 +++++++++++++++- > mm/memblock.c | 32 ++++++++++++++++++++++++-------- > 2 files changed, 39 insertions(+), 9 deletions(-) > >diff --git a/include/linux/memblock.h b/include/linux/memblock.h >index e79eb6ac516f..65e274550f5d 100644 >--- a/include/linux/memblock.h >+++ b/include/linux/memblock.h >@@ -50,6 +50,7 @@ enum memblock_flags { > MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ > MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ > MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ >+ MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */ Above memblock_flags, there are comments on explaining those flags. Seems we miss it for MEMBLOCK_RSRV_KERN. > }; > > /** >@@ -116,7 +117,19 @@ int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid, > int memblock_add(phys_addr_t base, phys_addr_t size); > int memblock_remove(phys_addr_t base, phys_addr_t size); > int memblock_phys_free(phys_addr_t base, phys_addr_t size); >-int memblock_reserve(phys_addr_t base, phys_addr_t size); >+int __memblock_reserve(phys_addr_t base, phys_addr_t size, int nid, >+ enum memblock_flags flags); >+ >+static __always_inline int memblock_reserve(phys_addr_t base, phys_addr_t size) >+{ >+ return __memblock_reserve(base, size, NUMA_NO_NODE, 0); ^ MEMBLOCK_NONE ? >+} >+ >+static __always_inline int memblock_reserve_kern(phys_addr_t base, phys_addr_t size) >+{ >+ return __memblock_reserve(base, size, NUMA_NO_NODE, MEMBLOCK_RSRV_KERN); >+} >+ > #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP > int memblock_physmem_add(phys_addr_t base, phys_addr_t size); > #endif >@@ -477,6 +490,7 @@ static inline __init_memblock bool memblock_bottom_up(void) > > phys_addr_t memblock_phys_mem_size(void); > phys_addr_t memblock_reserved_size(void); >+phys_addr_t memblock_reserved_kern_size(int nid); > unsigned long memblock_estimated_nr_free_pages(void); > phys_addr_t memblock_start_of_DRAM(void); > phys_addr_t memblock_end_of_DRAM(void); >diff --git a/mm/memblock.c b/mm/memblock.c >index 95af35fd1389..4c33baf4d97c 100644 >--- a/mm/memblock.c >+++ b/mm/memblock.c >@@ -491,7 +491,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, > * needn't do it > */ > if (!use_slab) >- BUG_ON(memblock_reserve(addr, new_alloc_size)); >+ BUG_ON(memblock_reserve_kern(addr, new_alloc_size)); > > /* Update slab flag */ > *in_slab = use_slab; >@@ -641,7 +641,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, > #ifdef CONFIG_NUMA > WARN_ON(nid != memblock_get_region_node(rgn)); > #endif >- WARN_ON(flags != rgn->flags); >+ WARN_ON(flags != MEMBLOCK_NONE && flags != rgn->flags); > nr_new++; > if (insert) { > if (start_rgn == -1) >@@ -901,14 +901,15 @@ int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) > return memblock_remove_range(&memblock.reserved, base, size); > } > >-int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) >+int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size, >+ int nid, enum memblock_flags flags) > { > phys_addr_t end = base + size - 1; > >- memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, >- &base, &end, (void *)_RET_IP_); >+ memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, >+ &base, &end, nid, flags, (void *)_RET_IP_); > >- return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); >+ return memblock_add_range(&memblock.reserved, base, size, nid, flags); > } > > #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP >@@ -1459,14 +1460,14 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, > again: > found = memblock_find_in_range_node(size, align, start, end, nid, > flags); >- if (found && !memblock_reserve(found, size)) >+ if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN)) Maybe we could use memblock_reserve_kern() directly. If my understanding is correct, the reserved region's nid is not used. BTW, one question here. How we handle concurrent memblock allocation? If two threads find the same available range and do the reservation, it seems to be a problem to me. Or I missed something? > goto done; > > if (numa_valid_node(nid) && !exact_nid) { > found = memblock_find_in_range_node(size, align, start, > end, NUMA_NO_NODE, > flags); >- if (found && !memblock_reserve(found, size)) >+ if (found && !memblock_reserve_kern(found, size)) > goto done; > } > >@@ -1751,6 +1752,20 @@ phys_addr_t __init_memblock memblock_reserved_size(void) > return memblock.reserved.total_size; > } > >+phys_addr_t __init_memblock memblock_reserved_kern_size(int nid) >+{ >+ struct memblock_region *r; >+ phys_addr_t total = 0; >+ >+ for_each_reserved_mem_region(r) { >+ if (nid == memblock_get_region_node(r) || !numa_valid_node(nid)) >+ if (r->flags & MEMBLOCK_RSRV_KERN) >+ total += r->size; >+ } >+ >+ return total; >+} >+ > /** > * memblock_estimated_nr_free_pages - return estimated number of free pages > * from memblock point of view >@@ -2397,6 +2412,7 @@ static const char * const flagname[] = { > [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", > [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", > [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", >+ [ilog2(MEMBLOCK_RSRV_KERN)] = "RSV_KERN", > }; > > static int memblock_debug_show(struct seq_file *m, void *private) >-- >2.47.2 >
Hi, On Tue, Feb 18, 2025 at 03:50:04PM +0000, Wei Yang wrote: > On Thu, Feb 06, 2025 at 03:27:42PM +0200, Mike Rapoport wrote: > >From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > > >to denote areas that were reserved for kernel use either directly with > >memblock_reserve_kern() or via memblock allocations. > > > >Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > >--- > > include/linux/memblock.h | 16 +++++++++++++++- > > mm/memblock.c | 32 ++++++++++++++++++++++++-------- > > 2 files changed, 39 insertions(+), 9 deletions(-) > > > >diff --git a/include/linux/memblock.h b/include/linux/memblock.h > >index e79eb6ac516f..65e274550f5d 100644 > >--- a/include/linux/memblock.h > >+++ b/include/linux/memblock.h > >@@ -50,6 +50,7 @@ enum memblock_flags { > > MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ > > MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ > > MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ > >+ MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */ > > Above memblock_flags, there are comments on explaining those flags. > > Seems we miss it for MEMBLOCK_RSRV_KERN. Right, thanks! > > > > #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP > >@@ -1459,14 +1460,14 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, > > again: > > found = memblock_find_in_range_node(size, align, start, end, nid, > > flags); > >- if (found && !memblock_reserve(found, size)) > >+ if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN)) > > Maybe we could use memblock_reserve_kern() directly. If my understanding is > correct, the reserved region's nid is not used. We use nid of reserved regions in reserve_bootmem_region() (commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()")) but KHO needs to know the distribution of reserved memory among the nodes before memmap_init_reserved_pages(). > BTW, one question here. How we handle concurrent memblock allocation? If two > threads find the same available range and do the reservation, it seems to be a > problem to me. Or I missed something? memblock allocations end before smp_init(), there is no possible concurrency. > -- > Wei Yang > Help you, Help me
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e79eb6ac516f..65e274550f5d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,6 +50,7 @@ enum memblock_flags { MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ + MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */ }; /** @@ -116,7 +117,19 @@ int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid, int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_phys_free(phys_addr_t base, phys_addr_t size); -int memblock_reserve(phys_addr_t base, phys_addr_t size); +int __memblock_reserve(phys_addr_t base, phys_addr_t size, int nid, + enum memblock_flags flags); + +static __always_inline int memblock_reserve(phys_addr_t base, phys_addr_t size) +{ + return __memblock_reserve(base, size, NUMA_NO_NODE, 0); +} + +static __always_inline int memblock_reserve_kern(phys_addr_t base, phys_addr_t size) +{ + return __memblock_reserve(base, size, NUMA_NO_NODE, MEMBLOCK_RSRV_KERN); +} + #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif @@ -477,6 +490,7 @@ static inline __init_memblock bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); +phys_addr_t memblock_reserved_kern_size(int nid); unsigned long memblock_estimated_nr_free_pages(void); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); diff --git a/mm/memblock.c b/mm/memblock.c index 95af35fd1389..4c33baf4d97c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -491,7 +491,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, * needn't do it */ if (!use_slab) - BUG_ON(memblock_reserve(addr, new_alloc_size)); + BUG_ON(memblock_reserve_kern(addr, new_alloc_size)); /* Update slab flag */ *in_slab = use_slab; @@ -641,7 +641,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, #ifdef CONFIG_NUMA WARN_ON(nid != memblock_get_region_node(rgn)); #endif - WARN_ON(flags != rgn->flags); + WARN_ON(flags != MEMBLOCK_NONE && flags != rgn->flags); nr_new++; if (insert) { if (start_rgn == -1) @@ -901,14 +901,15 @@ int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) return memblock_remove_range(&memblock.reserved, base, size); } -int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size, + int nid, enum memblock_flags flags) { phys_addr_t end = base + size - 1; - memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, - &base, &end, (void *)_RET_IP_); + memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, + &base, &end, nid, flags, (void *)_RET_IP_); - return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); + return memblock_add_range(&memblock.reserved, base, size, nid, flags); } #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP @@ -1459,14 +1460,14 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, again: found = memblock_find_in_range_node(size, align, start, end, nid, flags); - if (found && !memblock_reserve(found, size)) + if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN)) goto done; if (numa_valid_node(nid) && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); - if (found && !memblock_reserve(found, size)) + if (found && !memblock_reserve_kern(found, size)) goto done; } @@ -1751,6 +1752,20 @@ phys_addr_t __init_memblock memblock_reserved_size(void) return memblock.reserved.total_size; } +phys_addr_t __init_memblock memblock_reserved_kern_size(int nid) +{ + struct memblock_region *r; + phys_addr_t total = 0; + + for_each_reserved_mem_region(r) { + if (nid == memblock_get_region_node(r) || !numa_valid_node(nid)) + if (r->flags & MEMBLOCK_RSRV_KERN) + total += r->size; + } + + return total; +} + /** * memblock_estimated_nr_free_pages - return estimated number of free pages * from memblock point of view @@ -2397,6 +2412,7 @@ static const char * const flagname[] = { [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", + [ilog2(MEMBLOCK_RSRV_KERN)] = "RSV_KERN", }; static int memblock_debug_show(struct seq_file *m, void *private)