[V5,1/4] ACPI: Support Generic Initiator only domains
diff mbox series

Message ID 20191004114330.104746-2-Jonathan.Cameron@huawei.com
State Deferred, archived
Headers show
Series
  • ACPI: Support Generic Initiator proximity domains
Related show

Commit Message

Jonathan Cameron Oct. 4, 2019, 11:43 a.m. UTC
Generic Initiators are a new ACPI concept that allows for the
description of proximity domains that contain a device which
performs memory access (such as a network card) but neither
host CPU nor Memory.

This patch has the parsing code and provides the infrastructure
for an architecture to associate these new domains with their
nearest memory processing node.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/acpi/numa/srat.c       | 62 +++++++++++++++++++++++++++++++++-
 drivers/base/node.c            |  3 ++
 include/asm-generic/topology.h |  3 ++
 include/linux/nodemask.h       |  1 +
 include/linux/topology.h       |  7 ++++
 5 files changed, 75 insertions(+), 1 deletion(-)

Comments

Rafael J. Wysocki Oct. 18, 2019, 10:18 a.m. UTC | #1
On Friday, October 4, 2019 1:43:27 PM CEST Jonathan Cameron wrote:
> Generic Initiators are a new ACPI concept that allows for the
> description of proximity domains that contain a device which
> performs memory access (such as a network card) but neither
> host CPU nor Memory.
> 
> This patch has the parsing code and provides the infrastructure
> for an architecture to associate these new domains with their
> nearest memory processing node.
> 
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

This depends on the series from Dan at:

https://lore.kernel.org/linux-acpi/CAPcyv4gBSX58CWH4HZ28w0_cZRzJrhgdEFHa2g8KDqyv8aFqZQ@mail.gmail.com/T/#m1acce3ae8f29f680c0d95fd1e840e703949fbc48

AFAICS, so please respin when that one hits the Linus' tree.

> ---
>  drivers/acpi/numa/srat.c       | 62 +++++++++++++++++++++++++++++++++-
>  drivers/base/node.c            |  3 ++
>  include/asm-generic/topology.h |  3 ++
>  include/linux/nodemask.h       |  1 +
>  include/linux/topology.h       |  7 ++++
>  5 files changed, 75 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
> index eadbf90e65d1..fe34315a9234 100644
> --- a/drivers/acpi/numa/srat.c
> +++ b/drivers/acpi/numa/srat.c
> @@ -170,6 +170,38 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header)
>  		}
>  		break;
>  
> +	case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +	{
> +		struct acpi_srat_generic_affinity *p =
> +			(struct acpi_srat_generic_affinity *)header;
> +		char name[9] = {};
> +
> +		if (p->device_handle_type == 0) {
> +			/*
> +			 * For pci devices this may be the only place they
> +			 * are assigned a proximity domain
> +			 */
> +			pr_debug("SRAT Generic Initiator(Seg:%u BDF:%u) in proximity domain %d %s\n",
> +				 *(u16 *)(&p->device_handle[0]),
> +				 *(u16 *)(&p->device_handle[2]),
> +				 p->proximity_domain,
> +				 (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
> +				"enabled" : "disabled");
> +		} else {
> +			/*
> +			 * In this case we can rely on the device having a
> +			 * proximity domain reference
> +			 */
> +			memcpy(name, p->device_handle, 8);
> +			pr_info("SRAT Generic Initiator(HID=%.8s UID=%.4s) in proximity domain %d %s\n",
> +				(char *)(&p->device_handle[0]),
> +				(char *)(&p->device_handle[8]),
> +				p->proximity_domain,
> +				(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
> +				"enabled" : "disabled");
> +		}
> +	}
> +	break;
>  	default:
>  		pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
>  			header->type);
> @@ -378,6 +410,32 @@ acpi_parse_gicc_affinity(union acpi_subtable_headers *header,
>  	return 0;
>  }
>  
> +static int __init
> +acpi_parse_gi_affinity(union acpi_subtable_headers *header,
> +		       const unsigned long end)
> +{
> +	struct acpi_srat_generic_affinity *gi_affinity;
> +	int node;
> +
> +	gi_affinity = (struct acpi_srat_generic_affinity *)header;
> +	if (!gi_affinity)
> +		return -EINVAL;
> +	acpi_table_print_srat_entry(&header->common);
> +
> +	if (!(gi_affinity->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED))
> +		return -EINVAL;
> +
> +	node = acpi_map_pxm_to_node(gi_affinity->proximity_domain);
> +	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
> +		pr_err("SRAT: Too many proximity domains.\n");
> +		return -EINVAL;
> +	}
> +	node_set(node, numa_nodes_parsed);
> +	node_set_state(node, N_GENERIC_INITIATOR);
> +
> +	return 0;
> +}
> +
>  static int __initdata parsed_numa_memblks;
>  
>  static int __init
> @@ -433,7 +491,7 @@ int __init acpi_numa_init(void)
>  
>  	/* SRAT: System Resource Affinity Table */
>  	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
> -		struct acpi_subtable_proc srat_proc[3];
> +		struct acpi_subtable_proc srat_proc[4];
>  
>  		memset(srat_proc, 0, sizeof(srat_proc));
>  		srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
> @@ -442,6 +500,8 @@ int __init acpi_numa_init(void)
>  		srat_proc[1].handler = acpi_parse_x2apic_affinity;
>  		srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
>  		srat_proc[2].handler = acpi_parse_gicc_affinity;
> +		srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY;
> +		srat_proc[3].handler = acpi_parse_gi_affinity;
>  
>  		acpi_table_parse_entries_array(ACPI_SIG_SRAT,
>  					sizeof(struct acpi_table_srat),
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index 296546ffed6c..e5863baa8cb6 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -977,6 +977,8 @@ static struct node_attr node_state_attr[] = {
>  #endif
>  	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
>  	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
> +	[N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
> +					   N_GENERIC_INITIATOR),
>  };
>  
>  static struct attribute *node_state_attrs[] = {
> @@ -988,6 +990,7 @@ static struct attribute *node_state_attrs[] = {
>  #endif
>  	&node_state_attr[N_MEMORY].attr.attr,
>  	&node_state_attr[N_CPU].attr.attr,
> +	&node_state_attr[N_GENERIC_INITIATOR].attr.attr,
>  	NULL
>  };
>  
> diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
> index 238873739550..54d0b4176a45 100644
> --- a/include/asm-generic/topology.h
> +++ b/include/asm-generic/topology.h
> @@ -71,6 +71,9 @@
>  #ifndef set_cpu_numa_mem
>  #define set_cpu_numa_mem(cpu, node)
>  #endif
> +#ifndef set_gi_numa_mem
> +#define set_gi_numa_mem(gi, node)
> +#endif
>  
>  #endif	/* !CONFIG_NUMA || !CONFIG_HAVE_MEMORYLESS_NODES */
>  
> diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
> index 27e7fa36f707..1aebf766fb52 100644
> --- a/include/linux/nodemask.h
> +++ b/include/linux/nodemask.h
> @@ -399,6 +399,7 @@ enum node_states {
>  #endif
>  	N_MEMORY,		/* The node has memory(regular, high, movable) */
>  	N_CPU,		/* The node has one or more cpus */
> +	N_GENERIC_INITIATOR,	/* The node is a GI only node */
>  	NR_NODE_STATES
>  };
>  
> diff --git a/include/linux/topology.h b/include/linux/topology.h
> index eb2fe6edd73c..05ccf011e489 100644
> --- a/include/linux/topology.h
> +++ b/include/linux/topology.h
> @@ -140,6 +140,13 @@ static inline void set_numa_mem(int node)
>  }
>  #endif
>  
> +#ifndef set_gi_numa_mem
> +static inline void set_gi_numa_mem(int gi, int node)
> +{
> +	_node_numa_mem_[gi] = node;
> +}
> +#endif
> +
>  #ifndef node_to_mem_node
>  static inline int node_to_mem_node(int node)
>  {
>
Jonathan Cameron Oct. 18, 2019, 12:46 p.m. UTC | #2
On Fri, 18 Oct 2019 12:18:33 +0200
"Rafael J. Wysocki" <rjw@rjwysocki.net> wrote:

> On Friday, October 4, 2019 1:43:27 PM CEST Jonathan Cameron wrote:
> > Generic Initiators are a new ACPI concept that allows for the
> > description of proximity domains that contain a device which
> > performs memory access (such as a network card) but neither
> > host CPU nor Memory.
> > 
> > This patch has the parsing code and provides the infrastructure
> > for an architecture to associate these new domains with their
> > nearest memory processing node.
> > 
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
> 
> This depends on the series from Dan at:
> 
> https://lore.kernel.org/linux-acpi/CAPcyv4gBSX58CWH4HZ28w0_cZRzJrhgdEFHa2g8KDqyv8aFqZQ@mail.gmail.com/T/#m1acce3ae8f29f680c0d95fd1e840e703949fbc48
> 
Hi Rafael,

Yes. Cover letter mentions it was rebased on v4 of that series.

> AFAICS, so please respin when that one hits the Linus' tree.

Sure, though that pushes it out another cycle and it's beginning to
get a bit silly (just rebases since April).

I guess it can't be helped given the series hits several trees.

Note that this version applies completely clean on top of V7 of
Dan's SPM/hmem set applied to the tip tree (which I assume is the
route that will take).  Hence, unless something else changes, the
respin will be identical to this version.

Thanks,

Jonathan

> 
> > ---
> >  drivers/acpi/numa/srat.c       | 62 +++++++++++++++++++++++++++++++++-
> >  drivers/base/node.c            |  3 ++
> >  include/asm-generic/topology.h |  3 ++
> >  include/linux/nodemask.h       |  1 +
> >  include/linux/topology.h       |  7 ++++
> >  5 files changed, 75 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
> > index eadbf90e65d1..fe34315a9234 100644
> > --- a/drivers/acpi/numa/srat.c
> > +++ b/drivers/acpi/numa/srat.c
> > @@ -170,6 +170,38 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header)
> >  		}
> >  		break;
> >  
> > +	case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> > +	{
> > +		struct acpi_srat_generic_affinity *p =
> > +			(struct acpi_srat_generic_affinity *)header;
> > +		char name[9] = {};
> > +
> > +		if (p->device_handle_type == 0) {
> > +			/*
> > +			 * For pci devices this may be the only place they
> > +			 * are assigned a proximity domain
> > +			 */
> > +			pr_debug("SRAT Generic Initiator(Seg:%u BDF:%u) in proximity domain %d %s\n",
> > +				 *(u16 *)(&p->device_handle[0]),
> > +				 *(u16 *)(&p->device_handle[2]),
> > +				 p->proximity_domain,
> > +				 (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
> > +				"enabled" : "disabled");
> > +		} else {
> > +			/*
> > +			 * In this case we can rely on the device having a
> > +			 * proximity domain reference
> > +			 */
> > +			memcpy(name, p->device_handle, 8);
> > +			pr_info("SRAT Generic Initiator(HID=%.8s UID=%.4s) in proximity domain %d %s\n",
> > +				(char *)(&p->device_handle[0]),
> > +				(char *)(&p->device_handle[8]),
> > +				p->proximity_domain,
> > +				(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
> > +				"enabled" : "disabled");
> > +		}
> > +	}
> > +	break;
> >  	default:
> >  		pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
> >  			header->type);
> > @@ -378,6 +410,32 @@ acpi_parse_gicc_affinity(union acpi_subtable_headers *header,
> >  	return 0;
> >  }
> >  
> > +static int __init
> > +acpi_parse_gi_affinity(union acpi_subtable_headers *header,
> > +		       const unsigned long end)
> > +{
> > +	struct acpi_srat_generic_affinity *gi_affinity;
> > +	int node;
> > +
> > +	gi_affinity = (struct acpi_srat_generic_affinity *)header;
> > +	if (!gi_affinity)
> > +		return -EINVAL;
> > +	acpi_table_print_srat_entry(&header->common);
> > +
> > +	if (!(gi_affinity->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED))
> > +		return -EINVAL;
> > +
> > +	node = acpi_map_pxm_to_node(gi_affinity->proximity_domain);
> > +	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
> > +		pr_err("SRAT: Too many proximity domains.\n");
> > +		return -EINVAL;
> > +	}
> > +	node_set(node, numa_nodes_parsed);
> > +	node_set_state(node, N_GENERIC_INITIATOR);
> > +
> > +	return 0;
> > +}
> > +
> >  static int __initdata parsed_numa_memblks;
> >  
> >  static int __init
> > @@ -433,7 +491,7 @@ int __init acpi_numa_init(void)
> >  
> >  	/* SRAT: System Resource Affinity Table */
> >  	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
> > -		struct acpi_subtable_proc srat_proc[3];
> > +		struct acpi_subtable_proc srat_proc[4];
> >  
> >  		memset(srat_proc, 0, sizeof(srat_proc));
> >  		srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
> > @@ -442,6 +500,8 @@ int __init acpi_numa_init(void)
> >  		srat_proc[1].handler = acpi_parse_x2apic_affinity;
> >  		srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
> >  		srat_proc[2].handler = acpi_parse_gicc_affinity;
> > +		srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY;
> > +		srat_proc[3].handler = acpi_parse_gi_affinity;
> >  
> >  		acpi_table_parse_entries_array(ACPI_SIG_SRAT,
> >  					sizeof(struct acpi_table_srat),
> > diff --git a/drivers/base/node.c b/drivers/base/node.c
> > index 296546ffed6c..e5863baa8cb6 100644
> > --- a/drivers/base/node.c
> > +++ b/drivers/base/node.c
> > @@ -977,6 +977,8 @@ static struct node_attr node_state_attr[] = {
> >  #endif
> >  	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
> >  	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
> > +	[N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
> > +					   N_GENERIC_INITIATOR),
> >  };
> >  
> >  static struct attribute *node_state_attrs[] = {
> > @@ -988,6 +990,7 @@ static struct attribute *node_state_attrs[] = {
> >  #endif
> >  	&node_state_attr[N_MEMORY].attr.attr,
> >  	&node_state_attr[N_CPU].attr.attr,
> > +	&node_state_attr[N_GENERIC_INITIATOR].attr.attr,
> >  	NULL
> >  };
> >  
> > diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
> > index 238873739550..54d0b4176a45 100644
> > --- a/include/asm-generic/topology.h
> > +++ b/include/asm-generic/topology.h
> > @@ -71,6 +71,9 @@
> >  #ifndef set_cpu_numa_mem
> >  #define set_cpu_numa_mem(cpu, node)
> >  #endif
> > +#ifndef set_gi_numa_mem
> > +#define set_gi_numa_mem(gi, node)
> > +#endif
> >  
> >  #endif	/* !CONFIG_NUMA || !CONFIG_HAVE_MEMORYLESS_NODES */
> >  
> > diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
> > index 27e7fa36f707..1aebf766fb52 100644
> > --- a/include/linux/nodemask.h
> > +++ b/include/linux/nodemask.h
> > @@ -399,6 +399,7 @@ enum node_states {
> >  #endif
> >  	N_MEMORY,		/* The node has memory(regular, high, movable) */
> >  	N_CPU,		/* The node has one or more cpus */
> > +	N_GENERIC_INITIATOR,	/* The node is a GI only node */
> >  	NR_NODE_STATES
> >  };
> >  
> > diff --git a/include/linux/topology.h b/include/linux/topology.h
> > index eb2fe6edd73c..05ccf011e489 100644
> > --- a/include/linux/topology.h
> > +++ b/include/linux/topology.h
> > @@ -140,6 +140,13 @@ static inline void set_numa_mem(int node)
> >  }
> >  #endif
> >  
> > +#ifndef set_gi_numa_mem
> > +static inline void set_gi_numa_mem(int gi, int node)
> > +{
> > +	_node_numa_mem_[gi] = node;
> > +}
> > +#endif
> > +
> >  #ifndef node_to_mem_node
> >  static inline int node_to_mem_node(int node)
> >  {
> >   
> 
> 
> 
>
Rafael J. Wysocki Nov. 7, 2019, 2:54 p.m. UTC | #3
On Friday, October 18, 2019 2:46:56 PM CET Jonathan Cameron wrote:
> On Fri, 18 Oct 2019 12:18:33 +0200
> "Rafael J. Wysocki" <rjw@rjwysocki.net> wrote:
> 
> > On Friday, October 4, 2019 1:43:27 PM CEST Jonathan Cameron wrote:
> > > Generic Initiators are a new ACPI concept that allows for the
> > > description of proximity domains that contain a device which
> > > performs memory access (such as a network card) but neither
> > > host CPU nor Memory.
> > > 
> > > This patch has the parsing code and provides the infrastructure
> > > for an architecture to associate these new domains with their
> > > nearest memory processing node.
> > > 
> > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
> > 
> > This depends on the series from Dan at:
> > 
> > https://lore.kernel.org/linux-acpi/CAPcyv4gBSX58CWH4HZ28w0_cZRzJrhgdEFHa2g8KDqyv8aFqZQ@mail.gmail.com/T/#m1acce3ae8f29f680c0d95fd1e840e703949fbc48
> > 
> Hi Rafael,
> 
> Yes. Cover letter mentions it was rebased on v4 of that series.
> 
> > AFAICS, so please respin when that one hits the Linus' tree.
> 
> Sure, though that pushes it out another cycle and it's beginning to
> get a bit silly (just rebases since April).
> 
> I guess it can't be helped given the series hits several trees.

I've just applied the Dan's series and I can take patch [1/4] from this one,
but for the [2-3/4] I'd like to get some ACKs from the arm64 and x86 people
respectively.

Thanks!
Jonathan Cameron Nov. 12, 2019, 5:07 p.m. UTC | #4
On Thu, 7 Nov 2019 15:54:28 +0100
"Rafael J. Wysocki" <rjw@rjwysocki.net> wrote:

> On Friday, October 18, 2019 2:46:56 PM CET Jonathan Cameron wrote:
> > On Fri, 18 Oct 2019 12:18:33 +0200
> > "Rafael J. Wysocki" <rjw@rjwysocki.net> wrote:
> >   
> > > On Friday, October 4, 2019 1:43:27 PM CEST Jonathan Cameron wrote:  
> > > > Generic Initiators are a new ACPI concept that allows for the
> > > > description of proximity domains that contain a device which
> > > > performs memory access (such as a network card) but neither
> > > > host CPU nor Memory.
> > > > 
> > > > This patch has the parsing code and provides the infrastructure
> > > > for an architecture to associate these new domains with their
> > > > nearest memory processing node.
> > > > 
> > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>    
> > > 
> > > This depends on the series from Dan at:
> > > 
> > > https://lore.kernel.org/linux-acpi/CAPcyv4gBSX58CWH4HZ28w0_cZRzJrhgdEFHa2g8KDqyv8aFqZQ@mail.gmail.com/T/#m1acce3ae8f29f680c0d95fd1e840e703949fbc48
> > >   
> > Hi Rafael,
> > 
> > Yes. Cover letter mentions it was rebased on v4 of that series.
> >   
> > > AFAICS, so please respin when that one hits the Linus' tree.  
> > 
> > Sure, though that pushes it out another cycle and it's beginning to
> > get a bit silly (just rebases since April).
> > 
> > I guess it can't be helped given the series hits several trees.  
> 
> I've just applied the Dan's series and I can take patch [1/4] from this one,
> but for the [2-3/4] I'd like to get some ACKs from the arm64 and x86 people
> respectively.

Thanks Rafael!

Absolutely understood on the need for Acks.

For ARM let us try a few more CCs

+CC Will, Lorenzo, Hanjun.

Also Ingo on basis of showing a passing interest in the x86 patch
previously.  Otherwise I think we have the x86 people most like to
comment already cc'd.

https://patchwork.kernel.org/cover/11174247/ has the full series.

I'd appreciate anyone who has time taking a look at these.  The
actual actions in the architectures are very simple, but I may well
be missing some subtlety.

> 
> Thanks!
> 
Thanks,

Jonathan
Dan Williams Nov. 12, 2019, 5:55 p.m. UTC | #5
[ add Tao Xu ]

On Fri, Oct 4, 2019 at 4:45 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> Generic Initiators are a new ACPI concept that allows for the
> description of proximity domains that contain a device which
> performs memory access (such as a network card) but neither
> host CPU nor Memory.
>
> This patch has the parsing code and provides the infrastructure
> for an architecture to associate these new domains with their
> nearest memory processing node.

Thanks for this Jonathan. May I ask how this was tested? Tao has been
working on qemu support for HMAT [1]. I have not checked if it already
supports generic initiator entries, but it would be helpful to include
an example of how the kernel sees these configurations in practice.

[1]: http://patchwork.ozlabs.org/cover/1096737/
Jonathan Cameron Nov. 13, 2019, 9:47 a.m. UTC | #6
On Tue, 12 Nov 2019 09:55:17 -0800
Dan Williams <dan.j.williams@intel.com> wrote:

> [ add Tao Xu ]
> 
> On Fri, Oct 4, 2019 at 4:45 AM Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> >
> > Generic Initiators are a new ACPI concept that allows for the
> > description of proximity domains that contain a device which
> > performs memory access (such as a network card) but neither
> > host CPU nor Memory.
> >
> > This patch has the parsing code and provides the infrastructure
> > for an architecture to associate these new domains with their
> > nearest memory processing node.  
> 
> Thanks for this Jonathan. May I ask how this was tested? Tao has been
> working on qemu support for HMAT [1]. I have not checked if it already
> supports generic initiator entries, but it would be helpful to include
> an example of how the kernel sees these configurations in practice.
> 
> [1]: http://patchwork.ozlabs.org/cover/1096737/

Tested against qemu with SRAT and SLIT table overrides from an
initrd to actually create the node and give it distances
(those all turn up correctly in the normal places).  DSDT override
used to move an emulated network card into the GI numa node.  That
currently requires the PCI patch referred to in the cover letter.
On arm64 tested both on qemu and real hardware (overrides on tables
even for real hardware as I can't persuade our BIOS team to implement
Generic Initiators until an OS is actually using them.)

Main real requirement is memory allocations then occur from one of
the nodes at the minimal distance when you are do a devm_ allocation
from a device assigned. Also need to be able to query the distances
to allow load balancing etc.  All that works as expected.

It only has a fairly tangential connection to HMAT in that HMAT
can provide information on GI nodes.  Given HMAT code is quite happy
with memoryless nodes anyway it should work.  QEMU doesn't currently
have support to create GI SRAT entries let alone HMAT using them.

Whilst I could look at adding such support to QEMU, it's not
exactly high priority to emulate something we can test easily
by overriding the tables before the kernel reads them.

I'll look at how hard it is to build an HMAT tables for my test
configs based on the ones I used to test your HMAT patches a while
back.  Should be easy if tedious.

Jonathan
Tao Xu Nov. 13, 2019, 1:57 p.m. UTC | #7
On 11/13/2019 5:47 PM, Jonathan Cameron wrote:
> On Tue, 12 Nov 2019 09:55:17 -0800
> Dan Williams <dan.j.williams@intel.com> wrote:
> 
>> [ add Tao Xu ]
>>
>> On Fri, Oct 4, 2019 at 4:45 AM Jonathan Cameron
>> <Jonathan.Cameron@huawei.com> wrote:
>>>
>>> Generic Initiators are a new ACPI concept that allows for the
>>> description of proximity domains that contain a device which
>>> performs memory access (such as a network card) but neither
>>> host CPU nor Memory.
>>>
>>> This patch has the parsing code and provides the infrastructure
>>> for an architecture to associate these new domains with their
>>> nearest memory processing node.
>>
>> Thanks for this Jonathan. May I ask how this was tested? Tao has been
>> working on qemu support for HMAT [1]. I have not checked if it already
>> supports generic initiator entries, but it would be helpful to include
>> an example of how the kernel sees these configurations in practice.
>>
>> [1]: http://patchwork.ozlabs.org/cover/1096737/
> 
> Tested against qemu with SRAT and SLIT table overrides from an
> initrd to actually create the node and give it distances
> (those all turn up correctly in the normal places).  DSDT override
> used to move an emulated network card into the GI numa node.  That
> currently requires the PCI patch referred to in the cover letter.
> On arm64 tested both on qemu and real hardware (overrides on tables
> even for real hardware as I can't persuade our BIOS team to implement
> Generic Initiators until an OS is actually using them.)
> 
> Main real requirement is memory allocations then occur from one of
> the nodes at the minimal distance when you are do a devm_ allocation
> from a device assigned. Also need to be able to query the distances
> to allow load balancing etc.  All that works as expected.
> 
> It only has a fairly tangential connection to HMAT in that HMAT
> can provide information on GI nodes.  Given HMAT code is quite happy
> with memoryless nodes anyway it should work.  QEMU doesn't currently
> have support to create GI SRAT entries let alone HMAT using them.
> 
> Whilst I could look at adding such support to QEMU, it's not
> exactly high priority to emulate something we can test easily
> by overriding the tables before the kernel reads them.
> 
> I'll look at how hard it is to build an HMAT tables for my test
> configs based on the ones I used to test your HMAT patches a while
> back.  Should be easy if tedious.
> 
> Jonathan
> 
Indeed, HMAT can support Generic Initiator, but as far as I know, QEMU 
only can emulate a node with cpu and memory, or memory-only. Even if we 
assign a node with cpu only, qemu will raise error. Considering 
compatibility, there are lots of work to do for QEMU if we change NUMA 
or SRAT table.
Dan Williams Nov. 13, 2019, 4:52 p.m. UTC | #8
On Wed, Nov 13, 2019 at 5:57 AM Tao Xu <tao3.xu@intel.com> wrote:
>
> On 11/13/2019 5:47 PM, Jonathan Cameron wrote:
> > On Tue, 12 Nov 2019 09:55:17 -0800
> > Dan Williams <dan.j.williams@intel.com> wrote:
> >
> >> [ add Tao Xu ]
> >>
> >> On Fri, Oct 4, 2019 at 4:45 AM Jonathan Cameron
> >> <Jonathan.Cameron@huawei.com> wrote:
> >>>
> >>> Generic Initiators are a new ACPI concept that allows for the
> >>> description of proximity domains that contain a device which
> >>> performs memory access (such as a network card) but neither
> >>> host CPU nor Memory.
> >>>
> >>> This patch has the parsing code and provides the infrastructure
> >>> for an architecture to associate these new domains with their
> >>> nearest memory processing node.
> >>
> >> Thanks for this Jonathan. May I ask how this was tested? Tao has been
> >> working on qemu support for HMAT [1]. I have not checked if it already
> >> supports generic initiator entries, but it would be helpful to include
> >> an example of how the kernel sees these configurations in practice.
> >>
> >> [1]: http://patchwork.ozlabs.org/cover/1096737/
> >
> > Tested against qemu with SRAT and SLIT table overrides from an
> > initrd to actually create the node and give it distances
> > (those all turn up correctly in the normal places).  DSDT override
> > used to move an emulated network card into the GI numa node.  That
> > currently requires the PCI patch referred to in the cover letter.
> > On arm64 tested both on qemu and real hardware (overrides on tables
> > even for real hardware as I can't persuade our BIOS team to implement
> > Generic Initiators until an OS is actually using them.)
> >
> > Main real requirement is memory allocations then occur from one of
> > the nodes at the minimal distance when you are do a devm_ allocation
> > from a device assigned. Also need to be able to query the distances
> > to allow load balancing etc.  All that works as expected.
> >
> > It only has a fairly tangential connection to HMAT in that HMAT
> > can provide information on GI nodes.  Given HMAT code is quite happy
> > with memoryless nodes anyway it should work.  QEMU doesn't currently
> > have support to create GI SRAT entries let alone HMAT using them.
> >
> > Whilst I could look at adding such support to QEMU, it's not
> > exactly high priority to emulate something we can test easily
> > by overriding the tables before the kernel reads them.
> >
> > I'll look at how hard it is to build an HMAT tables for my test
> > configs based on the ones I used to test your HMAT patches a while
> > back.  Should be easy if tedious.
> >
> > Jonathan
> >
> Indeed, HMAT can support Generic Initiator, but as far as I know, QEMU
> only can emulate a node with cpu and memory, or memory-only. Even if we
> assign a node with cpu only, qemu will raise error. Considering
> compatibility, there are lots of work to do for QEMU if we change NUMA
> or SRAT table.

Thanks for the background. It would still be a useful feature to be
able to define a memory + generic-initiator node in qemu. That will
mirror real world accelerators with local memory configurations.
Jonathan Cameron Nov. 13, 2019, 5:48 p.m. UTC | #9
On Wed, 13 Nov 2019 21:57:24 +0800
Tao Xu <tao3.xu@intel.com> wrote:

> On 11/13/2019 5:47 PM, Jonathan Cameron wrote:
> > On Tue, 12 Nov 2019 09:55:17 -0800
> > Dan Williams <dan.j.williams@intel.com> wrote:
> >   
> >> [ add Tao Xu ]
> >>
> >> On Fri, Oct 4, 2019 at 4:45 AM Jonathan Cameron
> >> <Jonathan.Cameron@huawei.com> wrote:  
> >>>
> >>> Generic Initiators are a new ACPI concept that allows for the
> >>> description of proximity domains that contain a device which
> >>> performs memory access (such as a network card) but neither
> >>> host CPU nor Memory.
> >>>
> >>> This patch has the parsing code and provides the infrastructure
> >>> for an architecture to associate these new domains with their
> >>> nearest memory processing node.  
> >>
> >> Thanks for this Jonathan. May I ask how this was tested? Tao has been
> >> working on qemu support for HMAT [1]. I have not checked if it already
> >> supports generic initiator entries, but it would be helpful to include
> >> an example of how the kernel sees these configurations in practice.
> >>
> >> [1]: http://patchwork.ozlabs.org/cover/1096737/  
> > 
> > Tested against qemu with SRAT and SLIT table overrides from an
> > initrd to actually create the node and give it distances
> > (those all turn up correctly in the normal places).  DSDT override
> > used to move an emulated network card into the GI numa node.  That
> > currently requires the PCI patch referred to in the cover letter.
> > On arm64 tested both on qemu and real hardware (overrides on tables
> > even for real hardware as I can't persuade our BIOS team to implement
> > Generic Initiators until an OS is actually using them.)
> > 
> > Main real requirement is memory allocations then occur from one of
> > the nodes at the minimal distance when you are do a devm_ allocation
> > from a device assigned. Also need to be able to query the distances
> > to allow load balancing etc.  All that works as expected.
> > 
> > It only has a fairly tangential connection to HMAT in that HMAT
> > can provide information on GI nodes.  Given HMAT code is quite happy
> > with memoryless nodes anyway it should work.  QEMU doesn't currently
> > have support to create GI SRAT entries let alone HMAT using them.
> > 
> > Whilst I could look at adding such support to QEMU, it's not
> > exactly high priority to emulate something we can test easily
> > by overriding the tables before the kernel reads them.
> > 
> > I'll look at how hard it is to build an HMAT tables for my test
> > configs based on the ones I used to test your HMAT patches a while
> > back.  Should be easy if tedious.
> > 
> > Jonathan
> >   
> Indeed, HMAT can support Generic Initiator, but as far as I know, QEMU 
> only can emulate a node with cpu and memory, or memory-only. Even if we 
> assign a node with cpu only, qemu will raise error. Considering 
> compatibility, there are lots of work to do for QEMU if we change NUMA 
> or SRAT table.
> 

I faked up a quick HMAT table.

Used a configuration with 3x CPU and memory nodes, 1x memory only node
and 1x GI node.  Two test cases, one where the GI initiator is further than
the CPU containing nodes from the memory only node (realistic case for
existing hardware). That behaves as expected and there are no
/sys/node/bus/nodeX/access0 entries for the GI node
+ appropriate ones for the memory only node as normal.

The other case is more interesting we have the memory only node nearer
to the GI node than to any of the CPUs.  In that case for x86 at least
the HMAT code is happy to put an access0 directory GI in the GI node
with empty access0/initiators and the memory node under access0/targets

The memory only node is node4 and the GI node node3.

So relevant dirs under /sys/bus/nodes/devices

node3/access0/initators/ Empty
node3/access0/targets/node4

node4/access0/initators/[node3 read_bandwidth write_bandwith etc]
node4/access0/targets/ Empty

So the result current (I think - the HMAT interface still confuses
me :) is that a GI node is treated like a CPU node.  This might mean
there is no useful information available if you want to figure out
which CPU containing node is nearest to Memory when the GI node is
nearer still.

Is this a problem?  I'm not sure...  

If we don't want to include GI nodes then we can possibly
use the node_state(N_CPU, x) method to check before considering
them, or I guess parse SRAT to extract that info directly. 

I tried this and it seems to work so can add patch doing this
next version if we think this is the 'right' thing to do.

So what do you think 'should' happen? 

Jonathan
Jonathan Cameron Nov. 13, 2019, 5:56 p.m. UTC | #10
On Wed, 13 Nov 2019 08:52:46 -0800
Dan Williams <dan.j.williams@intel.com> wrote:

> On Wed, Nov 13, 2019 at 5:57 AM Tao Xu <tao3.xu@intel.com> wrote:
> >
> > On 11/13/2019 5:47 PM, Jonathan Cameron wrote:  
> > > On Tue, 12 Nov 2019 09:55:17 -0800
> > > Dan Williams <dan.j.williams@intel.com> wrote:
> > >  
> > >> [ add Tao Xu ]
> > >>
> > >> On Fri, Oct 4, 2019 at 4:45 AM Jonathan Cameron
> > >> <Jonathan.Cameron@huawei.com> wrote:  
> > >>>
> > >>> Generic Initiators are a new ACPI concept that allows for the
> > >>> description of proximity domains that contain a device which
> > >>> performs memory access (such as a network card) but neither
> > >>> host CPU nor Memory.
> > >>>
> > >>> This patch has the parsing code and provides the infrastructure
> > >>> for an architecture to associate these new domains with their
> > >>> nearest memory processing node.  
> > >>
> > >> Thanks for this Jonathan. May I ask how this was tested? Tao has been
> > >> working on qemu support for HMAT [1]. I have not checked if it already
> > >> supports generic initiator entries, but it would be helpful to include
> > >> an example of how the kernel sees these configurations in practice.
> > >>
> > >> [1]: http://patchwork.ozlabs.org/cover/1096737/  
> > >
> > > Tested against qemu with SRAT and SLIT table overrides from an
> > > initrd to actually create the node and give it distances
> > > (those all turn up correctly in the normal places).  DSDT override
> > > used to move an emulated network card into the GI numa node.  That
> > > currently requires the PCI patch referred to in the cover letter.
> > > On arm64 tested both on qemu and real hardware (overrides on tables
> > > even for real hardware as I can't persuade our BIOS team to implement
> > > Generic Initiators until an OS is actually using them.)
> > >
> > > Main real requirement is memory allocations then occur from one of
> > > the nodes at the minimal distance when you are do a devm_ allocation
> > > from a device assigned. Also need to be able to query the distances
> > > to allow load balancing etc.  All that works as expected.
> > >
> > > It only has a fairly tangential connection to HMAT in that HMAT
> > > can provide information on GI nodes.  Given HMAT code is quite happy
> > > with memoryless nodes anyway it should work.  QEMU doesn't currently
> > > have support to create GI SRAT entries let alone HMAT using them.
> > >
> > > Whilst I could look at adding such support to QEMU, it's not
> > > exactly high priority to emulate something we can test easily
> > > by overriding the tables before the kernel reads them.
> > >
> > > I'll look at how hard it is to build an HMAT tables for my test
> > > configs based on the ones I used to test your HMAT patches a while
> > > back.  Should be easy if tedious.
> > >
> > > Jonathan
> > >  
> > Indeed, HMAT can support Generic Initiator, but as far as I know, QEMU
> > only can emulate a node with cpu and memory, or memory-only. Even if we
> > assign a node with cpu only, qemu will raise error. Considering
> > compatibility, there are lots of work to do for QEMU if we change NUMA
> > or SRAT table.  
> 
> Thanks for the background. It would still be a useful feature to be
> able to define a memory + generic-initiator node in qemu. That will
> mirror real world accelerators with local memory configurations.

Ah crossed with my essay.  This simple case you have here is easier to
discuss.  Lets call it a GPU on a coherent interconnect with local memory.

What do you think should happen for access0 in sysfs?  Do we want the
GPU reflected in there or not?

This particular case doesn't actually need a GI, though perhaps you
might want one purely to give HMAT based info.  On a pre GI system
you would just use a memory only node and use DSDT _PXM to put the
GPU device in it.

Whilst I agree a means of testing this in qemu might be more
friendly than doing it by overriding tables, the overriding route
lets you do the crazy corner cases + generate 'invalid' tables
which are also useful for testing.

Thanks,

Jonathan

Patch
diff mbox series

diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index eadbf90e65d1..fe34315a9234 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -170,6 +170,38 @@  acpi_table_print_srat_entry(struct acpi_subtable_header *header)
 		}
 		break;
 
+	case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+	{
+		struct acpi_srat_generic_affinity *p =
+			(struct acpi_srat_generic_affinity *)header;
+		char name[9] = {};
+
+		if (p->device_handle_type == 0) {
+			/*
+			 * For pci devices this may be the only place they
+			 * are assigned a proximity domain
+			 */
+			pr_debug("SRAT Generic Initiator(Seg:%u BDF:%u) in proximity domain %d %s\n",
+				 *(u16 *)(&p->device_handle[0]),
+				 *(u16 *)(&p->device_handle[2]),
+				 p->proximity_domain,
+				 (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
+				"enabled" : "disabled");
+		} else {
+			/*
+			 * In this case we can rely on the device having a
+			 * proximity domain reference
+			 */
+			memcpy(name, p->device_handle, 8);
+			pr_info("SRAT Generic Initiator(HID=%.8s UID=%.4s) in proximity domain %d %s\n",
+				(char *)(&p->device_handle[0]),
+				(char *)(&p->device_handle[8]),
+				p->proximity_domain,
+				(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
+				"enabled" : "disabled");
+		}
+	}
+	break;
 	default:
 		pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
 			header->type);
@@ -378,6 +410,32 @@  acpi_parse_gicc_affinity(union acpi_subtable_headers *header,
 	return 0;
 }
 
+static int __init
+acpi_parse_gi_affinity(union acpi_subtable_headers *header,
+		       const unsigned long end)
+{
+	struct acpi_srat_generic_affinity *gi_affinity;
+	int node;
+
+	gi_affinity = (struct acpi_srat_generic_affinity *)header;
+	if (!gi_affinity)
+		return -EINVAL;
+	acpi_table_print_srat_entry(&header->common);
+
+	if (!(gi_affinity->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED))
+		return -EINVAL;
+
+	node = acpi_map_pxm_to_node(gi_affinity->proximity_domain);
+	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+		pr_err("SRAT: Too many proximity domains.\n");
+		return -EINVAL;
+	}
+	node_set(node, numa_nodes_parsed);
+	node_set_state(node, N_GENERIC_INITIATOR);
+
+	return 0;
+}
+
 static int __initdata parsed_numa_memblks;
 
 static int __init
@@ -433,7 +491,7 @@  int __init acpi_numa_init(void)
 
 	/* SRAT: System Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
-		struct acpi_subtable_proc srat_proc[3];
+		struct acpi_subtable_proc srat_proc[4];
 
 		memset(srat_proc, 0, sizeof(srat_proc));
 		srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
@@ -442,6 +500,8 @@  int __init acpi_numa_init(void)
 		srat_proc[1].handler = acpi_parse_x2apic_affinity;
 		srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
 		srat_proc[2].handler = acpi_parse_gicc_affinity;
+		srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY;
+		srat_proc[3].handler = acpi_parse_gi_affinity;
 
 		acpi_table_parse_entries_array(ACPI_SIG_SRAT,
 					sizeof(struct acpi_table_srat),
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..e5863baa8cb6 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -977,6 +977,8 @@  static struct node_attr node_state_attr[] = {
 #endif
 	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
+	[N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
+					   N_GENERIC_INITIATOR),
 };
 
 static struct attribute *node_state_attrs[] = {
@@ -988,6 +990,7 @@  static struct attribute *node_state_attrs[] = {
 #endif
 	&node_state_attr[N_MEMORY].attr.attr,
 	&node_state_attr[N_CPU].attr.attr,
+	&node_state_attr[N_GENERIC_INITIATOR].attr.attr,
 	NULL
 };
 
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 238873739550..54d0b4176a45 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -71,6 +71,9 @@ 
 #ifndef set_cpu_numa_mem
 #define set_cpu_numa_mem(cpu, node)
 #endif
+#ifndef set_gi_numa_mem
+#define set_gi_numa_mem(gi, node)
+#endif
 
 #endif	/* !CONFIG_NUMA || !CONFIG_HAVE_MEMORYLESS_NODES */
 
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 27e7fa36f707..1aebf766fb52 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -399,6 +399,7 @@  enum node_states {
 #endif
 	N_MEMORY,		/* The node has memory(regular, high, movable) */
 	N_CPU,		/* The node has one or more cpus */
+	N_GENERIC_INITIATOR,	/* The node is a GI only node */
 	NR_NODE_STATES
 };
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index eb2fe6edd73c..05ccf011e489 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -140,6 +140,13 @@  static inline void set_numa_mem(int node)
 }
 #endif
 
+#ifndef set_gi_numa_mem
+static inline void set_gi_numa_mem(int gi, int node)
+{
+	_node_numa_mem_[gi] = node;
+}
+#endif
+
 #ifndef node_to_mem_node
 static inline int node_to_mem_node(int node)
 {