diff mbox series

[v2,07/22] numa: Introduce a generic memory_add_physaddr_to_nid()

Message ID 159457120334.754248.12908401960465408733.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State Superseded
Headers show
Series device-dax: Support sub-dividing soft-reserved ranges | expand

Commit Message

Dan Williams July 12, 2020, 4:26 p.m. UTC
For architectures that opt into storing their numa data in memblock
(only ARM64 currently), add a memblock generic way to interrogate that
data for memory_add_physaddr_to_nid(). This requires ARCH_KEEP_MEMBLOCK
to keep memblock text and data around after boot.

Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Jia He <justin.he@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/mm.h |   11 +++++++++++
 mm/Kconfig         |    6 ++++++
 mm/page_alloc.c    |   23 ++++++++++++++++++++++-
 3 files changed, 39 insertions(+), 1 deletion(-)

Comments

Mike Rapoport July 13, 2020, 6:58 a.m. UTC | #1
Hi Dan,

On Sun, Jul 12, 2020 at 09:26:43AM -0700, Dan Williams wrote:
> For architectures that opt into storing their numa data in memblock
> (only ARM64 currently), add a memblock generic way to interrogate that
> data for memory_add_physaddr_to_nid(). This requires ARCH_KEEP_MEMBLOCK
> to keep memblock text and data around after boot.

I afraid we are too far from using memblock as a generic placeholder for
numa data. Although all architectures now have the numa info in
memblock, only arm64 uses memblock as the primary source of that data.

I'd rather prefer Jia's solution [1] to have a weak default for
memory_add_physaddr_to_nid() and let architectures override it.

[1] https://lore.kernel.org/lkml/20200710031619.18762-2-justin.he@arm.com

> Cc: Mike Rapoport <rppt@linux.ibm.com>
> Cc: Jia He <justin.he@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  include/linux/mm.h |   11 +++++++++++
>  mm/Kconfig         |    6 ++++++
>  mm/page_alloc.c    |   23 ++++++++++++++++++++++-
>  3 files changed, 39 insertions(+), 1 deletion(-)
>
Dan Williams July 13, 2020, 3:42 p.m. UTC | #2
On Sun, Jul 12, 2020 at 11:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> Hi Dan,
>
> On Sun, Jul 12, 2020 at 09:26:43AM -0700, Dan Williams wrote:
> > For architectures that opt into storing their numa data in memblock
> > (only ARM64 currently), add a memblock generic way to interrogate that
> > data for memory_add_physaddr_to_nid(). This requires ARCH_KEEP_MEMBLOCK
> > to keep memblock text and data around after boot.
>
> I afraid we are too far from using memblock as a generic placeholder for
> numa data. Although all architectures now have the numa info in
> memblock, only arm64 uses memblock as the primary source of that data.
>
> I'd rather prefer Jia's solution [1] to have a weak default for
> memory_add_physaddr_to_nid() and let architectures override it.

I'm ok with that as long as we do the same for phys_to_target_node().

Will had the concern about adding a generic numa-info facility the
last I tried this. I just don't see a practical way to get there in
the near term.
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..1e76ee5da20b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2425,6 +2425,17 @@  unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
 						unsigned long end_pfn);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
+
+/*
+ * Allow archs to opt-in to keeping get_pfn_range_for_nid() available
+ * after boot.
+ */
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
+#define __init_or_memblock
+#else
+#define __init_or_memblock __init
+#endif
+
 extern void get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn);
 extern unsigned long find_min_pfn_with_active_regions(void);
diff --git a/mm/Kconfig b/mm/Kconfig
index 0051f735ad98..178ed76cae1b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -139,6 +139,12 @@  config HAVE_FAST_GUP
 config ARCH_KEEP_MEMBLOCK
 	bool
 
+# If the arch populates numa data into memblock, it can use memblock
+# apis to interrogate that data at runtime.
+config MEMBLOCK_NUMA_INFO
+	bool
+	depends on ARCH_KEEP_MEMBLOCK
+
 config MEMORY_ISOLATION
 	bool
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48eb0f1410d4..df8bd169dbb4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6351,7 +6351,7 @@  void __init sparse_memory_present_with_active_regions(int nid)
  * with no available memory, a warning is printed and the start and end
  * PFNs will be 0.
  */
-void __init get_pfn_range_for_nid(unsigned int nid,
+void __init_or_memblock get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn)
 {
 	unsigned long this_start_pfn, this_end_pfn;
@@ -6369,6 +6369,27 @@  void __init get_pfn_range_for_nid(unsigned int nid,
 		*start_pfn = 0;
 }
 
+/*
+ * Generic implementation of memory_add_physaddr_to_nid() depends on
+ * architecture using memblock data for numa information.
+ */
+#ifdef CONFIG_MEMBLOCK_NUMA_INFO
+int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
+{
+	unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
+	int nid;
+
+	for_each_online_node(nid) {
+		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+		if (pfn >= start_pfn && pfn <= end_pfn)
+			return nid;
+	}
+	/* Default to node0 as not all callers are prepared for this to fail */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif /* CONFIG_MEMBLOCK_NUMA_INFO */
+
 /*
  * This finds a zone that can be used for ZONE_MOVABLE pages. The
  * assumption is made that zones within a node are ordered in monotonic