diff mbox series

[11/37] xen/x86: abstract neutral code from acpi_numa_memory_affinity_init

Message ID 20210923120236.3692135-12-wei.chen@arm.com (mailing list archive)
State New, archived
Headers show
Series Add device tree based NUMA support to Arm | expand

Commit Message

Wei Chen Sept. 23, 2021, 12:02 p.m. UTC
There is some code in acpi_numa_memory_affinity_init to update node
memory range and update node_memblk_range array. This code is not
ACPI specific, it can be shared by other NUMA implementation, like
device tree based NUMA implementation.

So in this patch, we abstract this memory range and blocks relative
code to a new function. This will avoid exporting static variables
like node_memblk_range. And the PXM in neutral code print messages
have been replaced by NODE, as PXM is ACPI specific.

Signed-off-by: Wei Chen <wei.chen@arm.com>
---
 xen/arch/x86/srat.c        | 131 +++++++++++++++++++++----------------
 xen/include/asm-x86/numa.h |   3 +
 2 files changed, 77 insertions(+), 57 deletions(-)

Comments

Stefano Stabellini Sept. 24, 2021, 12:38 a.m. UTC | #1
+x86 maintainers


On Thu, 23 Sep 2021, Wei Chen wrote:
> There is some code in acpi_numa_memory_affinity_init to update node
> memory range and update node_memblk_range array. This code is not
> ACPI specific, it can be shared by other NUMA implementation, like
> device tree based NUMA implementation.
> 
> So in this patch, we abstract this memory range and blocks relative
> code to a new function. This will avoid exporting static variables
> like node_memblk_range. And the PXM in neutral code print messages
> have been replaced by NODE, as PXM is ACPI specific.
> 
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
>  xen/arch/x86/srat.c        | 131 +++++++++++++++++++++----------------
>  xen/include/asm-x86/numa.h |   3 +
>  2 files changed, 77 insertions(+), 57 deletions(-)
> 
> diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
> index 3334ede7a5..18bc6b19bb 100644
> --- a/xen/arch/x86/srat.c
> +++ b/xen/arch/x86/srat.c
> @@ -104,6 +104,14 @@ nodeid_t setup_node(unsigned pxm)
>  	return node;
>  }
>  
> +bool __init numa_memblks_available(void)
> +{
> +	if (num_node_memblks < NR_NODE_MEMBLKS)
> +		return true;
> +
> +	return false;
> +}
> +
>  int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node)
>  {
>  	int i;
> @@ -301,69 +309,35 @@ static bool __init is_node_memory_continuous(nodeid_t nid,
>  	return true;
>  }
>  
> -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
> -void __init
> -acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> +/* Neutral NUMA memory affinity init function for ACPI and DT */
> +int __init numa_update_node_memblks(nodeid_t node,
> +		paddr_t start, paddr_t size, bool hotplug)
>  {
> -	paddr_t start, end;
> -	unsigned pxm;
> -	nodeid_t node;
> +	paddr_t end = start + size;
>  	int i;
>  
> -	if (srat_disabled())
> -		return;
> -	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> -		bad_srat();
> -		return;
> -	}
> -	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
> -		return;
> -
> -	start = ma->base_address;
> -	end = start + ma->length;
> -	/* Supplement the heuristics in l1tf_calculations(). */
> -	l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
> -
> -	if (num_node_memblks >= NR_NODE_MEMBLKS)
> -	{
> -		dprintk(XENLOG_WARNING,
> -                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> -		bad_srat();
> -		return;
> -	}
> -
> -	pxm = ma->proximity_domain;
> -	if (srat_rev < 2)
> -		pxm &= 0xff;
> -	node = setup_node(pxm);
> -	if (node == NUMA_NO_NODE) {
> -		bad_srat();
> -		return;
> -	}
> -	/* It is fine to add this area to the nodes data it will be used later*/
> +	/* It is fine to add this area to the nodes data it will be used later */
>  	i = conflicting_memblks(start, end);
>  	if (i < 0)
>  		/* everything fine */;
>  	else if (memblk_nodeid[i] == node) {
> -		bool mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
> -		                !test_bit(i, memblk_hotplug);
> +		bool mismatch = !hotplug != !test_bit(i, memblk_hotplug);
>  
> -		printk("%sSRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with itself (%"PRIpaddr"-%"PRIpaddr")\n",
> -		       mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end,
> +		printk("%sSRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with itself (%"PRIpaddr"-%"PRIpaddr")\n",
> +		       mismatch ? KERN_ERR : KERN_WARNING, node, start, end,
>  		       node_memblk_range[i].start, node_memblk_range[i].end);
>  		if (mismatch) {
> -			bad_srat();
> -			return;
> +			return -1;
>  		}
>  	} else {
>  		printk(KERN_ERR
> -		       "SRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with PXM %u (%"PRIpaddr"-%"PRIpaddr")\n",
> -		       pxm, start, end, node_to_pxm(memblk_nodeid[i]),
> +		       "SRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with NODE %u (%"PRIpaddr"-%"PRIpaddr")\n",
> +		       node, start, end, memblk_nodeid[i],
>  		       node_memblk_range[i].start, node_memblk_range[i].end);
> -		bad_srat();
> -		return;
> +		return -1;
>  	}
> -	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
> +
> +	if (!hotplug) {
>  		struct node *nd = &nodes[node];
>  
>  		if (!node_test_and_set(node, memory_nodes_parsed)) {
> @@ -375,26 +349,69 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
>  			if (nd->end < end)
>  				nd->end = end;
>  
> -			/* Check whether this range contains memory for other nodes */
> -			if (!is_node_memory_continuous(node, nd->start, nd->end)) {
> -				bad_srat();
> -				return;
> -			}
> +			if (!is_node_memory_continuous(node, nd->start, nd->end))
> +				return -1;
>  		}
>  	}
> -	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
> -	       node, pxm, start, end,
> -	       ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
> +
> +	printk(KERN_INFO "SRAT: Node %u %"PRIpaddr"-%"PRIpaddr"%s\n",
> +	       node, start, end, hotplug ? " (hotplug)" : "");
>  
>  	node_memblk_range[num_node_memblks].start = start;
>  	node_memblk_range[num_node_memblks].end = end;
>  	memblk_nodeid[num_node_memblks] = node;
> -	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> +	if (hotplug) {
>  		__set_bit(num_node_memblks, memblk_hotplug);
>  		if (end > mem_hotplug_boundary())
>  			mem_hotplug_update_boundary(end);
>  	}
>  	num_node_memblks++;
> +
> +	return 0;
> +}
> +
> +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
> +void __init
> +acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> +{
> +	unsigned pxm;
> +	nodeid_t node;
> +	int ret;
> +
> +	if (srat_disabled())
> +		return;
> +	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> +		bad_srat();
> +		return;
> +	}
> +	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
> +		return;
> +
> +	/* Supplement the heuristics in l1tf_calculations(). */
> +	l1tf_safe_maddr = max(l1tf_safe_maddr,
> +			ROUNDUP((ma->base_address + ma->length), PAGE_SIZE));
> +
> +	if (!numa_memblks_available())
> +	{
> +		dprintk(XENLOG_WARNING,
> +                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> +		bad_srat();
> +		return;
> +	}
> +
> +	pxm = ma->proximity_domain;
> +	if (srat_rev < 2)
> +		pxm &= 0xff;
> +	node = setup_node(pxm);
> +	if (node == NUMA_NO_NODE) {
> +		bad_srat();
> +		return;
> +	}
> +
> +	ret = numa_update_node_memblks(node, ma->base_address, ma->length,
> +					ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE);
> +	if (ret != 0)
> +		bad_srat();
>  }
>  
>  /* Sanity check to catch more bad SRATs (they are amazingly common).
> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> index 50cfd8e7ef..5772a70665 100644
> --- a/xen/include/asm-x86/numa.h
> +++ b/xen/include/asm-x86/numa.h
> @@ -74,6 +74,9 @@ static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
>  				 NODE_DATA(nid)->node_spanned_pages)
>  
>  extern int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
> +extern bool numa_memblks_available(void);
> +extern int numa_update_node_memblks(nodeid_t node,
> +		paddr_t start, paddr_t size, bool hotplug);
>  
>  void srat_parse_regions(paddr_t addr);
>  extern u8 __node_distance(nodeid_t a, nodeid_t b);
> -- 
> 2.25.1
>
Jan Beulich Jan. 24, 2022, 4:50 p.m. UTC | #2
On 23.09.2021 14:02, Wei Chen wrote:
> There is some code in acpi_numa_memory_affinity_init to update node
> memory range and update node_memblk_range array. This code is not
> ACPI specific, it can be shared by other NUMA implementation, like
> device tree based NUMA implementation.
> 
> So in this patch, we abstract this memory range and blocks relative
> code to a new function. This will avoid exporting static variables
> like node_memblk_range. And the PXM in neutral code print messages
> have been replaced by NODE, as PXM is ACPI specific.
> 
> Signed-off-by: Wei Chen <wei.chen@arm.com>

SRAT is an ACPI concept, which I assume has no meaning with DT. Hence
any generically usable logic here wants, I think, separating out into
a file which is not SRAT-specific (peeking ahead, specifically not a
file named "numa_srat.c"). This may in turn require some more though
regarding the proper split between the stuff remaining in srat.c and
the stuff becoming kind of library code. In particular this may mean
moving some of the static variables as well, and with them perhaps
some further functions (while I did peek ahead, I didn't look closely
at the later patch doing the actual movement). And it is then hard to
see why the separation needs to happen in two steps - you could move
the generically usable code to a new file right away.

> --- a/xen/arch/x86/srat.c
> +++ b/xen/arch/x86/srat.c
> @@ -104,6 +104,14 @@ nodeid_t setup_node(unsigned pxm)
>  	return node;
>  }
>  
> +bool __init numa_memblks_available(void)
> +{
> +	if (num_node_memblks < NR_NODE_MEMBLKS)
> +		return true;
> +
> +	return false;
> +}

Please can you avoid expressing things in more complex than necessary
ways? Here I don't see why it can't just be

bool __init numa_memblks_available(void)
{
	return num_node_memblks < NR_NODE_MEMBLKS;
}

> @@ -301,69 +309,35 @@ static bool __init is_node_memory_continuous(nodeid_t nid,
>  	return true;
>  }
>  
> -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
> -void __init
> -acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> +/* Neutral NUMA memory affinity init function for ACPI and DT */
> +int __init numa_update_node_memblks(nodeid_t node,
> +		paddr_t start, paddr_t size, bool hotplug)

Indentation.

>  {
> -	paddr_t start, end;
> -	unsigned pxm;
> -	nodeid_t node;
> +	paddr_t end = start + size;
>  	int i;
>  
> -	if (srat_disabled())
> -		return;
> -	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> -		bad_srat();
> -		return;
> -	}
> -	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
> -		return;
> -
> -	start = ma->base_address;
> -	end = start + ma->length;
> -	/* Supplement the heuristics in l1tf_calculations(). */
> -	l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
> -
> -	if (num_node_memblks >= NR_NODE_MEMBLKS)
> -	{
> -		dprintk(XENLOG_WARNING,
> -                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> -		bad_srat();
> -		return;
> -	}
> -
> -	pxm = ma->proximity_domain;
> -	if (srat_rev < 2)
> -		pxm &= 0xff;
> -	node = setup_node(pxm);
> -	if (node == NUMA_NO_NODE) {
> -		bad_srat();
> -		return;
> -	}
> -	/* It is fine to add this area to the nodes data it will be used later*/
> +	/* It is fine to add this area to the nodes data it will be used later */
>  	i = conflicting_memblks(start, end);
>  	if (i < 0)
>  		/* everything fine */;
>  	else if (memblk_nodeid[i] == node) {
> -		bool mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
> -		                !test_bit(i, memblk_hotplug);
> +		bool mismatch = !hotplug != !test_bit(i, memblk_hotplug);
>  
> -		printk("%sSRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with itself (%"PRIpaddr"-%"PRIpaddr")\n",
> -		       mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end,
> +		printk("%sSRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with itself (%"PRIpaddr"-%"PRIpaddr")\n",

Nit: Unlike PXM, which is an acronym, "node" doesn't want to be all upper
case.

Also did you check that the node <-> PXM association is known to a reader
of a log at this point in time?

> +		       mismatch ? KERN_ERR : KERN_WARNING, node, start, end,
>  		       node_memblk_range[i].start, node_memblk_range[i].end);
>  		if (mismatch) {
> -			bad_srat();
> -			return;
> +			return -1;
>  		}
>  	} else {
>  		printk(KERN_ERR
> -		       "SRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with PXM %u (%"PRIpaddr"-%"PRIpaddr")\n",
> -		       pxm, start, end, node_to_pxm(memblk_nodeid[i]),
> +		       "SRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with NODE %u (%"PRIpaddr"-%"PRIpaddr")\n",
> +		       node, start, end, memblk_nodeid[i],
>  		       node_memblk_range[i].start, node_memblk_range[i].end);
> -		bad_srat();
> -		return;
> +		return -1;

Please no -1 return values. Either a function means to return boolean,
in which case it should use bool / true / false, or it means to return
a proper errno-style error code.

> @@ -375,26 +349,69 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
>  			if (nd->end < end)
>  				nd->end = end;
>  
> -			/* Check whether this range contains memory for other nodes */
> -			if (!is_node_memory_continuous(node, nd->start, nd->end)) {
> -				bad_srat();
> -				return;
> -			}
> +			if (!is_node_memory_continuous(node, nd->start, nd->end))
> +				return -1;
>  		}
>  	}
> -	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
> -	       node, pxm, start, end,
> -	       ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
> +
> +	printk(KERN_INFO "SRAT: Node %u %"PRIpaddr"-%"PRIpaddr"%s\n",
> +	       node, start, end, hotplug ? " (hotplug)" : "");

Continuing from a comment further up: Here you remove an instance of
logging the node <-> PXM association.

>  	node_memblk_range[num_node_memblks].start = start;
>  	node_memblk_range[num_node_memblks].end = end;
>  	memblk_nodeid[num_node_memblks] = node;
> -	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> +	if (hotplug) {
>  		__set_bit(num_node_memblks, memblk_hotplug);
>  		if (end > mem_hotplug_boundary())
>  			mem_hotplug_update_boundary(end);
>  	}
>  	num_node_memblks++;
> +
> +	return 0;
> +}
> +
> +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
> +void __init
> +acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> +{
> +	unsigned pxm;
> +	nodeid_t node;
> +	int ret;
> +
> +	if (srat_disabled())
> +		return;
> +	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> +		bad_srat();
> +		return;
> +	}
> +	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
> +		return;
> +
> +	/* Supplement the heuristics in l1tf_calculations(). */
> +	l1tf_safe_maddr = max(l1tf_safe_maddr,
> +			ROUNDUP((ma->base_address + ma->length), PAGE_SIZE));

Indentation and unnecessary pair of parentheses.

> +	if (!numa_memblks_available())
> +	{

For code you touch, please try to bring it into consistent style. Here
the brace wants to move to the previous line, seeing that the file is
using Linux style.

> +		dprintk(XENLOG_WARNING,
> +                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");

Here you want to fix indentation and ideally also format and grammar of
the actual log message.

> +		bad_srat();
> +		return;
> +	}
> +
> +	pxm = ma->proximity_domain;
> +	if (srat_rev < 2)
> +		pxm &= 0xff;
> +	node = setup_node(pxm);
> +	if (node == NUMA_NO_NODE) {
> +		bad_srat();
> +		return;
> +	}
> +
> +	ret = numa_update_node_memblks(node, ma->base_address, ma->length,
> +					ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE);

Indentation again.

Jan
Wei Chen Jan. 26, 2022, 10:39 a.m. UTC | #3
Hi Jan,

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 2022年1月25日 0:51
> To: Wei Chen <Wei.Chen@arm.com>
> Cc: Bertrand Marquis <Bertrand.Marquis@arm.com>; xen-
> devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org
> Subject: Re: [PATCH 11/37] xen/x86: abstract neutral code from
> acpi_numa_memory_affinity_init
> 
> On 23.09.2021 14:02, Wei Chen wrote:
> > There is some code in acpi_numa_memory_affinity_init to update node
> > memory range and update node_memblk_range array. This code is not
> > ACPI specific, it can be shared by other NUMA implementation, like
> > device tree based NUMA implementation.
> >
> > So in this patch, we abstract this memory range and blocks relative
> > code to a new function. This will avoid exporting static variables
> > like node_memblk_range. And the PXM in neutral code print messages
> > have been replaced by NODE, as PXM is ACPI specific.
> >
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> 
> SRAT is an ACPI concept, which I assume has no meaning with DT. Hence
> any generically usable logic here wants, I think, separating out into
> a file which is not SRAT-specific (peeking ahead, specifically not a
> file named "numa_srat.c"). This may in turn require some more though

When I created the file, I wanted to place non-ACPI/DT specific code in
a new file. But I was confused about how to name it. I chose numa_srat.c
as the file name because I thought the device tree is also a static
resource table. But it seems this name is still misleading, because
ACPI SRAT is well known. 

> regarding the proper split between the stuff remaining in srat.c and
> the stuff becoming kind of library code. In particular this may mean
> moving some of the static variables as well, and with them perhaps
> some further functions (while I did peek ahead, I didn't look closely
> at the later patch doing the actual movement). And it is then hard to
> see why the separation needs to happen in two steps - you could move
> the generically usable code to a new file right away.
> 

OK, I will reduce the steps. And I think the "new file" can be common/numa.c.
Because the generically usable code are some logical functions to check numa
memory blocks/ranges and update nodes, we don't need a "numa_srat.c".

> > --- a/xen/arch/x86/srat.c
> > +++ b/xen/arch/x86/srat.c
> > @@ -104,6 +104,14 @@ nodeid_t setup_node(unsigned pxm)
> >  	return node;
> >  }
> >
> > +bool __init numa_memblks_available(void)
> > +{
> > +	if (num_node_memblks < NR_NODE_MEMBLKS)
> > +		return true;
> > +
> > +	return false;
> > +}
> 
> Please can you avoid expressing things in more complex than necessary
> ways? Here I don't see why it can't just be

OK, I will simplify it.

> 
> bool __init numa_memblks_available(void)
> {
> 	return num_node_memblks < NR_NODE_MEMBLKS;
> }
> 
> > @@ -301,69 +309,35 @@ static bool __init
> is_node_memory_continuous(nodeid_t nid,
> >  	return true;
> >  }
> >
> > -/* Callback for parsing of the Proximity Domain <-> Memory Area
> mappings */
> > -void __init
> > -acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> > +/* Neutral NUMA memory affinity init function for ACPI and DT */
> > +int __init numa_update_node_memblks(nodeid_t node,
> > +		paddr_t start, paddr_t size, bool hotplug)
> 
> Indentation.

OK.

> 
> >  {
> > -	paddr_t start, end;
> > -	unsigned pxm;
> > -	nodeid_t node;
> > +	paddr_t end = start + size;
> >  	int i;
> >
> > -	if (srat_disabled())
> > -		return;
> > -	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> > -		bad_srat();
> > -		return;
> > -	}
> > -	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
> > -		return;
> > -
> > -	start = ma->base_address;
> > -	end = start + ma->length;
> > -	/* Supplement the heuristics in l1tf_calculations(). */
> > -	l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
> > -
> > -	if (num_node_memblks >= NR_NODE_MEMBLKS)
> > -	{
> > -		dprintk(XENLOG_WARNING,
> > -                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> > -		bad_srat();
> > -		return;
> > -	}
> > -
> > -	pxm = ma->proximity_domain;
> > -	if (srat_rev < 2)
> > -		pxm &= 0xff;
> > -	node = setup_node(pxm);
> > -	if (node == NUMA_NO_NODE) {
> > -		bad_srat();
> > -		return;
> > -	}
> > -	/* It is fine to add this area to the nodes data it will be used
> later*/
> > +	/* It is fine to add this area to the nodes data it will be used
> later */
> >  	i = conflicting_memblks(start, end);
> >  	if (i < 0)
> >  		/* everything fine */;
> >  	else if (memblk_nodeid[i] == node) {
> > -		bool mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
> > -		                !test_bit(i, memblk_hotplug);
> > +		bool mismatch = !hotplug != !test_bit(i, memblk_hotplug);
> >
> > -		printk("%sSRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with
> itself (%"PRIpaddr"-%"PRIpaddr")\n",
> > -		       mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end,
> > +		printk("%sSRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps
> with itself (%"PRIpaddr"-%"PRIpaddr")\n",
> 
> Nit: Unlike PXM, which is an acronym, "node" doesn't want to be all upper
> case.
> 

OK.

> Also did you check that the node <-> PXM association is known to a reader
> of a log at this point in time?
> 

Yes, I read your comment below. The original log contains node <-> PXM mapping.
Because PXM is ACPI specific, I think in neutral code, we just need node in
log. But this change removed the log of node <-> PXM association, it was my
mistake. I will add them back in next version. But I don't think they will stay in
neutal code, it would be in ACPI specific code.

I also had wanted to keep PXM <-> node mapping in the log, so I set up PXM to
node 1-1 mapping for the device tree. But then I thought it was an unnecessary
burden on the device tree, so I selected to remove PXM in log.  

> > +		       mismatch ? KERN_ERR : KERN_WARNING, node, start, end,
> >  		       node_memblk_range[i].start, node_memblk_range[i].end);
> >  		if (mismatch) {
> > -			bad_srat();
> > -			return;
> > +			return -1;
> >  		}
> >  	} else {
> >  		printk(KERN_ERR
> > -		       "SRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with
> PXM %u (%"PRIpaddr"-%"PRIpaddr")\n",
> > -		       pxm, start, end, node_to_pxm(memblk_nodeid[i]),
> > +		       "SRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with
> NODE %u (%"PRIpaddr"-%"PRIpaddr")\n",
> > +		       node, start, end, memblk_nodeid[i],
> >  		       node_memblk_range[i].start, node_memblk_range[i].end);
> > -		bad_srat();
> > -		return;
> > +		return -1;
> 
> Please no -1 return values. Either a function means to return boolean,
> in which case it should use bool / true / false, or it means to return
> a proper errno-style error code.
> 

Ok, I will do it.

> > @@ -375,26 +349,69 @@ acpi_numa_memory_affinity_init(const struct
> acpi_srat_mem_affinity *ma)
> >  			if (nd->end < end)
> >  				nd->end = end;
> >
> > -			/* Check whether this range contains memory for other
> nodes */
> > -			if (!is_node_memory_continuous(node, nd->start, nd->end))
> {
> > -				bad_srat();
> > -				return;
> > -			}
> > +			if (!is_node_memory_continuous(node, nd->start, nd->end))
> > +				return -1;
> >  		}
> >  	}
> > -	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
> > -	       node, pxm, start, end,
> > -	       ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
> > +
> > +	printk(KERN_INFO "SRAT: Node %u %"PRIpaddr"-%"PRIpaddr"%s\n",
> > +	       node, start, end, hotplug ? " (hotplug)" : "");
> 
> Continuing from a comment further up: Here you remove an instance of
> logging the node <-> PXM association.
> 

see my above comments.

> >  	node_memblk_range[num_node_memblks].start = start;
> >  	node_memblk_range[num_node_memblks].end = end;
> >  	memblk_nodeid[num_node_memblks] = node;
> > -	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> > +	if (hotplug) {
> >  		__set_bit(num_node_memblks, memblk_hotplug);
> >  		if (end > mem_hotplug_boundary())
> >  			mem_hotplug_update_boundary(end);
> >  	}
> >  	num_node_memblks++;
> > +
> > +	return 0;
> > +}
> > +
> > +/* Callback for parsing of the Proximity Domain <-> Memory Area
> mappings */
> > +void __init
> > +acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> > +{
> > +	unsigned pxm;
> > +	nodeid_t node;
> > +	int ret;
> > +
> > +	if (srat_disabled())
> > +		return;
> > +	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> > +		bad_srat();
> > +		return;
> > +	}
> > +	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
> > +		return;
> > +
> > +	/* Supplement the heuristics in l1tf_calculations(). */
> > +	l1tf_safe_maddr = max(l1tf_safe_maddr,
> > +			ROUNDUP((ma->base_address + ma->length), PAGE_SIZE));
> 
> Indentation and unnecessary pair of parentheses.
> 

OK.

> > +	if (!numa_memblks_available())
> > +	{
> 
> For code you touch, please try to bring it into consistent style. Here
> the brace wants to move to the previous line, seeing that the file is
> using Linux style.
> 

OK.

> > +		dprintk(XENLOG_WARNING,
> > +                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> 
> Here you want to fix indentation and ideally also format and grammar of
> the actual log message.
> 

I will fix it, thanks.

> > +		bad_srat();
> > +		return;
> > +	}
> > +
> > +	pxm = ma->proximity_domain;
> > +	if (srat_rev < 2)
> > +		pxm &= 0xff;
> > +	node = setup_node(pxm);
> > +	if (node == NUMA_NO_NODE) {
> > +		bad_srat();
> > +		return;
> > +	}
> > +
> > +	ret = numa_update_node_memblks(node, ma->base_address, ma->length,
> > +					ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE);
> 
> Indentation again.

Ok.

> 
> Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
index 3334ede7a5..18bc6b19bb 100644
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -104,6 +104,14 @@  nodeid_t setup_node(unsigned pxm)
 	return node;
 }
 
+bool __init numa_memblks_available(void)
+{
+	if (num_node_memblks < NR_NODE_MEMBLKS)
+		return true;
+
+	return false;
+}
+
 int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node)
 {
 	int i;
@@ -301,69 +309,35 @@  static bool __init is_node_memory_continuous(nodeid_t nid,
 	return true;
 }
 
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
-acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
+/* Neutral NUMA memory affinity init function for ACPI and DT */
+int __init numa_update_node_memblks(nodeid_t node,
+		paddr_t start, paddr_t size, bool hotplug)
 {
-	paddr_t start, end;
-	unsigned pxm;
-	nodeid_t node;
+	paddr_t end = start + size;
 	int i;
 
-	if (srat_disabled())
-		return;
-	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
-		bad_srat();
-		return;
-	}
-	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
-		return;
-
-	start = ma->base_address;
-	end = start + ma->length;
-	/* Supplement the heuristics in l1tf_calculations(). */
-	l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
-
-	if (num_node_memblks >= NR_NODE_MEMBLKS)
-	{
-		dprintk(XENLOG_WARNING,
-                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
-		bad_srat();
-		return;
-	}
-
-	pxm = ma->proximity_domain;
-	if (srat_rev < 2)
-		pxm &= 0xff;
-	node = setup_node(pxm);
-	if (node == NUMA_NO_NODE) {
-		bad_srat();
-		return;
-	}
-	/* It is fine to add this area to the nodes data it will be used later*/
+	/* It is fine to add this area to the nodes data it will be used later */
 	i = conflicting_memblks(start, end);
 	if (i < 0)
 		/* everything fine */;
 	else if (memblk_nodeid[i] == node) {
-		bool mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
-		                !test_bit(i, memblk_hotplug);
+		bool mismatch = !hotplug != !test_bit(i, memblk_hotplug);
 
-		printk("%sSRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with itself (%"PRIpaddr"-%"PRIpaddr")\n",
-		       mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end,
+		printk("%sSRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with itself (%"PRIpaddr"-%"PRIpaddr")\n",
+		       mismatch ? KERN_ERR : KERN_WARNING, node, start, end,
 		       node_memblk_range[i].start, node_memblk_range[i].end);
 		if (mismatch) {
-			bad_srat();
-			return;
+			return -1;
 		}
 	} else {
 		printk(KERN_ERR
-		       "SRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with PXM %u (%"PRIpaddr"-%"PRIpaddr")\n",
-		       pxm, start, end, node_to_pxm(memblk_nodeid[i]),
+		       "SRAT: NODE %u (%"PRIpaddr"-%"PRIpaddr") overlaps with NODE %u (%"PRIpaddr"-%"PRIpaddr")\n",
+		       node, start, end, memblk_nodeid[i],
 		       node_memblk_range[i].start, node_memblk_range[i].end);
-		bad_srat();
-		return;
+		return -1;
 	}
-	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
+
+	if (!hotplug) {
 		struct node *nd = &nodes[node];
 
 		if (!node_test_and_set(node, memory_nodes_parsed)) {
@@ -375,26 +349,69 @@  acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
 			if (nd->end < end)
 				nd->end = end;
 
-			/* Check whether this range contains memory for other nodes */
-			if (!is_node_memory_continuous(node, nd->start, nd->end)) {
-				bad_srat();
-				return;
-			}
+			if (!is_node_memory_continuous(node, nd->start, nd->end))
+				return -1;
 		}
 	}
-	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
-	       node, pxm, start, end,
-	       ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
+
+	printk(KERN_INFO "SRAT: Node %u %"PRIpaddr"-%"PRIpaddr"%s\n",
+	       node, start, end, hotplug ? " (hotplug)" : "");
 
 	node_memblk_range[num_node_memblks].start = start;
 	node_memblk_range[num_node_memblks].end = end;
 	memblk_nodeid[num_node_memblks] = node;
-	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+	if (hotplug) {
 		__set_bit(num_node_memblks, memblk_hotplug);
 		if (end > mem_hotplug_boundary())
 			mem_hotplug_update_boundary(end);
 	}
 	num_node_memblks++;
+
+	return 0;
+}
+
+/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
+void __init
+acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
+{
+	unsigned pxm;
+	nodeid_t node;
+	int ret;
+
+	if (srat_disabled())
+		return;
+	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
+		bad_srat();
+		return;
+	}
+	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
+		return;
+
+	/* Supplement the heuristics in l1tf_calculations(). */
+	l1tf_safe_maddr = max(l1tf_safe_maddr,
+			ROUNDUP((ma->base_address + ma->length), PAGE_SIZE));
+
+	if (!numa_memblks_available())
+	{
+		dprintk(XENLOG_WARNING,
+                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
+		bad_srat();
+		return;
+	}
+
+	pxm = ma->proximity_domain;
+	if (srat_rev < 2)
+		pxm &= 0xff;
+	node = setup_node(pxm);
+	if (node == NUMA_NO_NODE) {
+		bad_srat();
+		return;
+	}
+
+	ret = numa_update_node_memblks(node, ma->base_address, ma->length,
+					ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE);
+	if (ret != 0)
+		bad_srat();
 }
 
 /* Sanity check to catch more bad SRATs (they are amazingly common).
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index 50cfd8e7ef..5772a70665 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -74,6 +74,9 @@  static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
 				 NODE_DATA(nid)->node_spanned_pages)
 
 extern int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
+extern bool numa_memblks_available(void);
+extern int numa_update_node_memblks(nodeid_t node,
+		paddr_t start, paddr_t size, bool hotplug);
 
 void srat_parse_regions(paddr_t addr);
 extern u8 __node_distance(nodeid_t a, nodeid_t b);