diff mbox series

[1/1] NUMA: Introduce NODE_DATA->node_present_pages(RAM pages)

Message ID 20241022101029.967911-1-bernhardkaindl7@gmail.com (mailing list archive)
State New
Headers show
Series [1/1] NUMA: Introduce NODE_DATA->node_present_pages(RAM pages) | expand

Commit Message

Bernhard Kaindl Oct. 22, 2024, 10:10 a.m. UTC
From: Bernhard Kaindl <bernhard.kaindl@cloud.com>

Some admin tools like 'xl info -n' like to display the total memory
for each NUMA node. The Xen backend[1] of hwloc comes to mind too.

The total amount of RAM on a NUMA node is not needed by Xen internally:
Xen only uses NODE_DATA->node_spanned_pages, but that can be confusing
for users as it includes memory holes (can be as large as 2GB on x86).

Calculate the RAM per NUMA node by iterating over arch_get_ram_range()
which returns the e820 RAM entries on x86 and update it on memory_add().

Use NODE_DATA->node_present_pages (like in the Linux kernel) to hold
this info and in a later commit, find a way for tools to read it.

[1] hwloc with Xen backend: https://github.com/xenserver-next/hwloc/

Signed-off-by: Bernhard Kaindl <bernhard.kaindl@cloud.com>
---
Changes in v2:
- Remove update of numainfo call, only calculate RAM for each node.
- Calculate RAM based on page boundaries, coding style fixes
Changes in v3:
- Use PFN_UP/DOWN, refactored further to simplify the code, while leaving
  compiler-level optimisations to the compiler's optimisation passes.
---
 xen/arch/x86/x86_64/mm.c |  3 +++
 xen/common/numa.c        | 31 ++++++++++++++++++++++++++++---
 xen/include/xen/numa.h   |  3 +++
 3 files changed, 34 insertions(+), 3 deletions(-)

Comments

Alejandro Vallejo Oct. 22, 2024, 11:20 a.m. UTC | #1
Hi,

The subject was probably meant to have a v3?

On Tue Oct 22, 2024 at 11:10 AM BST, Bernhard Kaindl wrote:
> From: Bernhard Kaindl <bernhard.kaindl@cloud.com>
>
> Some admin tools like 'xl info -n' like to display the total memory
> for each NUMA node. The Xen backend[1] of hwloc comes to mind too.
>
> The total amount of RAM on a NUMA node is not needed by Xen internally:
> Xen only uses NODE_DATA->node_spanned_pages, but that can be confusing
> for users as it includes memory holes (can be as large as 2GB on x86).
>
> Calculate the RAM per NUMA node by iterating over arch_get_ram_range()
> which returns the e820 RAM entries on x86 and update it on memory_add().
>
> Use NODE_DATA->node_present_pages (like in the Linux kernel) to hold
> this info and in a later commit, find a way for tools to read it.

Part of this information would be more helpful in a comment in the definition
of node_data, I think.

>
> [1] hwloc with Xen backend: https://github.com/xenserver-next/hwloc/
>
> Signed-off-by: Bernhard Kaindl <bernhard.kaindl@cloud.com>
> ---
> Changes in v2:
> - Remove update of numainfo call, only calculate RAM for each node.
> - Calculate RAM based on page boundaries, coding style fixes
> Changes in v3:
> - Use PFN_UP/DOWN, refactored further to simplify the code, while leaving
>   compiler-level optimisations to the compiler's optimisation passes.
> ---
>  xen/arch/x86/x86_64/mm.c |  3 +++
>  xen/common/numa.c        | 31 ++++++++++++++++++++++++++++---
>  xen/include/xen/numa.h   |  3 +++
>  3 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
> index b2a280fba3..66b9bed057 100644
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -1334,6 +1334,9 @@ int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm)
>      share_hotadd_m2p_table(&info);
>      transfer_pages_to_heap(&info);
>  
> +    /* Update the node's present pages (like the total_pages of the system) */
> +    NODE_DATA(node)->node_present_pages += epfn - spfn;
> +
>      return 0;
>  
>  destroy_m2p:
> diff --git a/xen/common/numa.c b/xen/common/numa.c
> index 28a09766fa..374132df08 100644
> --- a/xen/common/numa.c
> +++ b/xen/common/numa.c
> @@ -4,6 +4,7 @@
>   * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
>   */
>  
> +#include "xen/pfn.h"
>  #include <xen/init.h>
>  #include <xen/keyhandler.h>
>  #include <xen/mm.h>
> @@ -499,15 +500,39 @@ int __init compute_hash_shift(const struct node *nodes,
>      return shift;
>  }
>  
> -/* Initialize NODE_DATA given nodeid and start/end */
> +/**
> + * @brief Initialize a NUMA node's NODE_DATA given nodeid and start/end addrs.
> + *
> + * This function sets up the boot memory for a given NUMA node by calculating
> + * the node's start and end page frame numbers (PFNs) and determining
> + * the number of present RAM pages within the node's memory range.
> + *
> + * @param nodeid The identifier of the node to initialize.
> + * @param start The starting physical address of the node's memory range.
> + * @param end The ending physical address of the node's memory range.

I'd add that end is "exclusive". To make it unambiguous.

> + */
>  void __init setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end)
>  {
>      unsigned long start_pfn = paddr_to_pfn(start);
>      unsigned long end_pfn = paddr_to_pfn(end);
> +    struct node_data *numa_node = NODE_DATA(nodeid);
> +    paddr_t start_ram, end_ram;
> +    unsigned long pages = 0;
> +    unsigned int idx = 0;
> +    int err;
>  
> -    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> -    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> +    numa_node->node_start_pfn = start_pfn;
> +    numa_node->node_spanned_pages = end_pfn - start_pfn;
>  
> +    /* Calculate the number of present RAM pages within the node: */
> +    while ( (err = arch_get_ram_range(idx++, &start_ram, &end_ram)) != -ENOENT )

nit: This line seems quite overloaded. Might be easier for the eye as a
do-while, with "int err" being defined inside the loop itself.

> +    {
> +        if ( err || start_ram >= end || end_ram <= start )
> +            continue;  /* Not RAM (err != 0) or range is outside the node */
> +
> +        pages += PFN_DOWN(min(end_ram, end)) - PFN_UP(max(start_ram, start));
> +    }
> +    numa_node->node_present_pages = pages;
>      node_set_online(nodeid);
>  }
>  
> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> index fd1511a6fb..c860f3ad1c 100644
> --- a/xen/include/xen/numa.h
> +++ b/xen/include/xen/numa.h
> @@ -71,6 +71,7 @@ extern nodeid_t *memnodemap;
>  struct node_data {
>      unsigned long node_start_pfn;
>      unsigned long node_spanned_pages;
> +    unsigned long node_present_pages;
>  };
>  
>  extern struct node_data node_data[];
> @@ -91,6 +92,7 @@ static inline nodeid_t mfn_to_nid(mfn_t mfn)
>  
>  #define node_start_pfn(nid)     (NODE_DATA(nid)->node_start_pfn)
>  #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
> +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
>  #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
>                                   NODE_DATA(nid)->node_spanned_pages)
>  
> @@ -123,6 +125,7 @@ extern void numa_set_processor_nodes_parsed(nodeid_t node);
>  extern mfn_t first_valid_mfn;
>  
>  #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
> +#define node_present_pages(nid) total_pages
>  #define node_start_pfn(nid) mfn_x(first_valid_mfn)
>  #define __node_distance(a, b) 20
>  

Cheers,
Alejandro
diff mbox series

Patch

diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index b2a280fba3..66b9bed057 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1334,6 +1334,9 @@  int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm)
     share_hotadd_m2p_table(&info);
     transfer_pages_to_heap(&info);
 
+    /* Update the node's present pages (like the total_pages of the system) */
+    NODE_DATA(node)->node_present_pages += epfn - spfn;
+
     return 0;
 
 destroy_m2p:
diff --git a/xen/common/numa.c b/xen/common/numa.c
index 28a09766fa..374132df08 100644
--- a/xen/common/numa.c
+++ b/xen/common/numa.c
@@ -4,6 +4,7 @@ 
  * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
  */
 
+#include "xen/pfn.h"
 #include <xen/init.h>
 #include <xen/keyhandler.h>
 #include <xen/mm.h>
@@ -499,15 +500,39 @@  int __init compute_hash_shift(const struct node *nodes,
     return shift;
 }
 
-/* Initialize NODE_DATA given nodeid and start/end */
+/**
+ * @brief Initialize a NUMA node's NODE_DATA given nodeid and start/end addrs.
+ *
+ * This function sets up the boot memory for a given NUMA node by calculating
+ * the node's start and end page frame numbers (PFNs) and determining
+ * the number of present RAM pages within the node's memory range.
+ *
+ * @param nodeid The identifier of the node to initialize.
+ * @param start The starting physical address of the node's memory range.
+ * @param end The ending physical address of the node's memory range.
+ */
 void __init setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end)
 {
     unsigned long start_pfn = paddr_to_pfn(start);
     unsigned long end_pfn = paddr_to_pfn(end);
+    struct node_data *numa_node = NODE_DATA(nodeid);
+    paddr_t start_ram, end_ram;
+    unsigned long pages = 0;
+    unsigned int idx = 0;
+    int err;
 
-    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+    numa_node->node_start_pfn = start_pfn;
+    numa_node->node_spanned_pages = end_pfn - start_pfn;
 
+    /* Calculate the number of present RAM pages within the node: */
+    while ( (err = arch_get_ram_range(idx++, &start_ram, &end_ram)) != -ENOENT )
+    {
+        if ( err || start_ram >= end || end_ram <= start )
+            continue;  /* Not RAM (err != 0) or range is outside the node */
+
+        pages += PFN_DOWN(min(end_ram, end)) - PFN_UP(max(start_ram, start));
+    }
+    numa_node->node_present_pages = pages;
     node_set_online(nodeid);
 }
 
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index fd1511a6fb..c860f3ad1c 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -71,6 +71,7 @@  extern nodeid_t *memnodemap;
 struct node_data {
     unsigned long node_start_pfn;
     unsigned long node_spanned_pages;
+    unsigned long node_present_pages;
 };
 
 extern struct node_data node_data[];
@@ -91,6 +92,7 @@  static inline nodeid_t mfn_to_nid(mfn_t mfn)
 
 #define node_start_pfn(nid)     (NODE_DATA(nid)->node_start_pfn)
 #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
+#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
                                  NODE_DATA(nid)->node_spanned_pages)
 
@@ -123,6 +125,7 @@  extern void numa_set_processor_nodes_parsed(nodeid_t node);
 extern mfn_t first_valid_mfn;
 
 #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
+#define node_present_pages(nid) total_pages
 #define node_start_pfn(nid) mfn_x(first_valid_mfn)
 #define __node_distance(a, b) 20