diff mbox

[v5,12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

Message ID 1461116439-22991-13-git-send-email-ddaney.cavm@gmail.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

David Daney April 20, 2016, 1:40 a.m. UTC
From: Hanjun Guo <hanjun.guo@linaro.org>

Introduce a new file to hold ACPI based NUMA information parsing from
SRAT and SLIT.

SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
ranges to Proximity Domain mapping.  SLIT has the information of inter
node distances(relative number for access latency).

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
[rrichter@cavium.com Reworked for numa v10 series ]
Signed-off-by: Robert Richter <rrichter@cavium.com>
[david.daney@cavium.com reorderd and combinded with other patches in Hanjun Guo's original set]
Signed-off-by: David Daney <david.daney@cavium.com>
---
 arch/arm64/include/asm/acpi.h |   8 +++
 arch/arm64/include/asm/numa.h |   2 +
 arch/arm64/kernel/Makefile    |   1 +
 arch/arm64/kernel/acpi_numa.c | 149 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/smp.c       |   2 +
 arch/arm64/mm/numa.c          |   5 +-
 6 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/acpi_numa.c

Comments

Dennis Chen April 20, 2016, 7:41 a.m. UTC | #1
On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
> From: Hanjun Guo <hanjun.guo@linaro.org>
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
> [rrichter@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter <rrichter@cavium.com>
> [david.daney@cavium.com reorderd and combinded with other patches in Hanjun Guo's original set]
> Signed-off-by: David Daney <david.daney@cavium.com>
> ---
>  arch/arm64/include/asm/acpi.h |   8 +++
>  arch/arm64/include/asm/numa.h |   2 +
>  arch/arm64/kernel/Makefile    |   1 +
>  arch/arm64/kernel/acpi_numa.c | 149 ++++++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/smp.c       |   2 +
>  arch/arm64/mm/numa.c          |   5 +-
>  6 files changed, 166 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>
> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
> index aee323b..4b13ecd 100644
> --- a/arch/arm64/include/asm/acpi.h
> +++ b/arch/arm64/include/asm/acpi.h
> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>  #endif
>
> +#ifdef CONFIG_ACPI_NUMA
> +int arm64_acpi_numa_init(void);
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
> +#else
> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #endif /*_ASM_ACPI_H*/
> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
> index e9b4f29..600887e 100644
> --- a/arch/arm64/include/asm/numa.h
> +++ b/arch/arm64/include/asm/numa.h
> @@ -5,6 +5,8 @@
>
>  #ifdef CONFIG_NUMA
>
> +#define NR_NODE_MEMBLKS                (MAX_NUMNODES * 2)
> +
>  /* currently, arm64 implements flat NUMA topology */
>  #define parent_node(node)      (node)
>
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 3793003..69569c6 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)                       += efi.o efi-entry.stub.o
>  arm64-obj-$(CONFIG_PCI)                        += pci.o
>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>  arm64-obj-$(CONFIG_ACPI)               += acpi.o
> +arm64-obj-$(CONFIG_ACPI_NUMA)          += acpi_numa.o
>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)        += acpi_parking_protocol.o
>  arm64-obj-$(CONFIG_PARAVIRT)           += paravirt.o
>  arm64-obj-$(CONFIG_RANDOMIZE_BASE)     += kaslr.o
> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
> new file mode 100644
> index 0000000..fd72070
> --- /dev/null
> +++ b/arch/arm64/kernel/acpi_numa.c
> @@ -0,0 +1,149 @@
> +/*
> + * ACPI 5.1 based NUMA setup for ARM64
> + * Lots of code was borrowed from arch/x86/mm/srat.c
> + *
> + * Copyright 2004 Andi Kleen, SuSE Labs.
> + * Copyright (C) 2013-2016, Linaro Ltd.
> + *             Author: Hanjun Guo <hanjun.guo@linaro.org>
> + *
> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> + *
> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> + * Assumes all memory regions belonging to a single proximity domain
> + * are in one chunk. Holes between them will be included in the node.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> +
> +#include <linux/acpi.h>
> +#include <linux/bitmap.h>
> +#include <linux/bootmem.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/memblock.h>
> +#include <linux/mmzone.h>
> +#include <linux/module.h>
> +#include <linux/topology.h>
> +
> +#include <acpi/processor.h>
> +#include <asm/numa.h>
> +
> +static int cpus_in_srat;
> +
> +struct __node_cpu_hwid {
> +       u32 node_id;    /* logical node containing this CPU */
> +       u64 cpu_hwid;   /* MPIDR for this CPU */
> +};
> +
> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
> +
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
> +{
> +       int i;
> +
> +       for (i = 0; i < cpus_in_srat; i++) {
> +               if (hwid == early_node_cpu_hwid[i].cpu_hwid)
> +                       return early_node_cpu_hwid[i].node_id;
> +       }
> +
> +       return NUMA_NO_NODE;
> +}
> +
> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
> +{
> +       unsigned long madt_end, entry;
> +       struct acpi_table_madt *madt;
> +       acpi_size tbl_size;
> +
> +       if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
> +                       (struct acpi_table_header **)&madt, &tbl_size)))
> +               return -ENODEV;
> +
> +       entry = (unsigned long)madt;
> +       madt_end = entry + madt->header.length;
> +
> +       /* Parse all entries looking for a match. */
> +       entry += sizeof(struct acpi_table_madt);
> +       while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
> +               struct acpi_subtable_header *header =
> +                       (struct acpi_subtable_header *)entry;
> +
> +               if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
> +                       struct acpi_madt_generic_interrupt *gicc =
> +                               container_of(header,
> +                               struct acpi_madt_generic_interrupt, header);
> +
> +                       if ((gicc->flags & ACPI_MADT_ENABLED) &&
> +                           (gicc->uid == acpi_id)) {
> +                               *mpidr = gicc->arm_mpidr;
> +                               early_acpi_os_unmap_memory(madt, tbl_size);
> +                               return 0;
> +                       }
> +               }
> +               entry += header->length;
> +       }
> +
> +       early_acpi_os_unmap_memory(madt, tbl_size);
> +       return -ENODEV;
> +}
> +
> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
> +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
> +{
> +       int pxm, node;
> +       u64 mpidr;
> +
> +       if (srat_disabled())
> +               return;
> +
> +       if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
> +                       pa->header.length);
> +               bad_srat();
> +               return;
> +       }
> +
> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
> +               return;
> +
> +       if (cpus_in_srat >= NR_CPUS) {
> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
> +                            NR_CPUS);
> +               return;
> +       }
> +
> +       pxm = pa->proximity_domain;
> +       node = acpi_map_pxm_to_node(pxm);
> +
> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
> +               bad_srat();
> +               return;
> +       }
> +
> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
> +                       pxm, pa->acpi_processor_uid);
> +               bad_srat();
> +               return;
> +       }
> +
> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
> +       node_set(node, numa_nodes_parsed);
> +       cpus_in_srat++;
> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
> +               pxm, mpidr, node, cpus_in_srat);
> +}
> +
> +int __init arm64_acpi_numa_init(void)
> +{
> +       int ret;
> +
> +       ret = acpi_numa_init();
> +       if (ret)
> +               return ret;
> +
> +       return srat_disabled() ? -EINVAL : 0;
> +}
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index bebc4c6..6c7ef8f 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -524,6 +524,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
>          */
>         acpi_set_mailbox_entry(cpu_count, processor);
>
> +       early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
> +
>         cpu_count++;
>  }
>
> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> index 6cb03f9..fc15186 100644
> --- a/arch/arm64/mm/numa.c
> +++ b/arch/arm64/mm/numa.c
> @@ -17,6 +17,7 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
> +#include <linux/acpi.h>
>  #include <linux/bootmem.h>
>  #include <linux/memblock.h>
>  #include <linux/module.h>
> @@ -388,7 +389,9 @@ static int __init dummy_numa_init(void)
>  void __init arm64_numa_init(void)
>  {
>         if (!numa_off) {
> -               if (!numa_init(of_numa_init))
> +               if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
> +                       return;
> +               if (acpi_disabled && !numa_init(of_numa_init))
>                         return;
>         }
>
On top of the latest version of the dt-based numa patch, if 'numa=off'
specified in the command line,
this function will fallback to invoke dummy_numa_init(), which give
rise the question here is, do we need to
touch any ACPI related stuff in the case? If not, then the output
message "No NUMA configuration found" followed
seems is not necessary since it's a little bit confusing in case of
numa has already been turned off explicitly.

Thanks,
Dennis

> --
> 1.7.11.7
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni April 20, 2016, 8:31 a.m. UTC | #2
On Wed, Apr 20, 2016 at 1:11 PM, Dennis Chen <dennis.chen@linaro.org> wrote:
> On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
>> From: Hanjun Guo <hanjun.guo@linaro.org>
>>
>> Introduce a new file to hold ACPI based NUMA information parsing from
>> SRAT and SLIT.
>>
>> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
>> ranges to Proximity Domain mapping.  SLIT has the information of inter
>> node distances(relative number for access latency).
>>
>> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
>> [rrichter@cavium.com Reworked for numa v10 series ]
>> Signed-off-by: Robert Richter <rrichter@cavium.com>
>> [david.daney@cavium.com reorderd and combinded with other patches in Hanjun Guo's original set]
>> Signed-off-by: David Daney <david.daney@cavium.com>
>> ---
>>  arch/arm64/include/asm/acpi.h |   8 +++
>>  arch/arm64/include/asm/numa.h |   2 +
>>  arch/arm64/kernel/Makefile    |   1 +
>>  arch/arm64/kernel/acpi_numa.c | 149 ++++++++++++++++++++++++++++++++++++++++++
>>  arch/arm64/kernel/smp.c       |   2 +
>>  arch/arm64/mm/numa.c          |   5 +-
>>  6 files changed, 166 insertions(+), 1 deletion(-)
>>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>>
>> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
>> index aee323b..4b13ecd 100644
>> --- a/arch/arm64/include/asm/acpi.h
>> +++ b/arch/arm64/include/asm/acpi.h
>> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>>  #endif
>>
>> +#ifdef CONFIG_ACPI_NUMA
>> +int arm64_acpi_numa_init(void);
>> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
>> +#else
>> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
>> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
>> +#endif /* CONFIG_ACPI_NUMA */
>> +
>>  #endif /*_ASM_ACPI_H*/
>> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
>> index e9b4f29..600887e 100644
>> --- a/arch/arm64/include/asm/numa.h
>> +++ b/arch/arm64/include/asm/numa.h
>> @@ -5,6 +5,8 @@
>>
>>  #ifdef CONFIG_NUMA
>>
>> +#define NR_NODE_MEMBLKS                (MAX_NUMNODES * 2)
>> +
>>  /* currently, arm64 implements flat NUMA topology */
>>  #define parent_node(node)      (node)
>>
>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
>> index 3793003..69569c6 100644
>> --- a/arch/arm64/kernel/Makefile
>> +++ b/arch/arm64/kernel/Makefile
>> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)                       += efi.o efi-entry.stub.o
>>  arm64-obj-$(CONFIG_PCI)                        += pci.o
>>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>>  arm64-obj-$(CONFIG_ACPI)               += acpi.o
>> +arm64-obj-$(CONFIG_ACPI_NUMA)          += acpi_numa.o
>>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)        += acpi_parking_protocol.o
>>  arm64-obj-$(CONFIG_PARAVIRT)           += paravirt.o
>>  arm64-obj-$(CONFIG_RANDOMIZE_BASE)     += kaslr.o
>> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
>> new file mode 100644
>> index 0000000..fd72070
>> --- /dev/null
>> +++ b/arch/arm64/kernel/acpi_numa.c
>> @@ -0,0 +1,149 @@
>> +/*
>> + * ACPI 5.1 based NUMA setup for ARM64
>> + * Lots of code was borrowed from arch/x86/mm/srat.c
>> + *
>> + * Copyright 2004 Andi Kleen, SuSE Labs.
>> + * Copyright (C) 2013-2016, Linaro Ltd.
>> + *             Author: Hanjun Guo <hanjun.guo@linaro.org>
>> + *
>> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
>> + *
>> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
>> + * Assumes all memory regions belonging to a single proximity domain
>> + * are in one chunk. Holes between them will be included in the node.
>> + */
>> +
>> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
>> +
>> +#include <linux/acpi.h>
>> +#include <linux/bitmap.h>
>> +#include <linux/bootmem.h>
>> +#include <linux/kernel.h>
>> +#include <linux/mm.h>
>> +#include <linux/memblock.h>
>> +#include <linux/mmzone.h>
>> +#include <linux/module.h>
>> +#include <linux/topology.h>
>> +
>> +#include <acpi/processor.h>
>> +#include <asm/numa.h>
>> +
>> +static int cpus_in_srat;
>> +
>> +struct __node_cpu_hwid {
>> +       u32 node_id;    /* logical node containing this CPU */
>> +       u64 cpu_hwid;   /* MPIDR for this CPU */
>> +};
>> +
>> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
>> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
>> +
>> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
>> +{
>> +       int i;
>> +
>> +       for (i = 0; i < cpus_in_srat; i++) {
>> +               if (hwid == early_node_cpu_hwid[i].cpu_hwid)
>> +                       return early_node_cpu_hwid[i].node_id;
>> +       }
>> +
>> +       return NUMA_NO_NODE;
>> +}
>> +
>> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
>> +{
>> +       unsigned long madt_end, entry;
>> +       struct acpi_table_madt *madt;
>> +       acpi_size tbl_size;
>> +
>> +       if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
>> +                       (struct acpi_table_header **)&madt, &tbl_size)))
>> +               return -ENODEV;
>> +
>> +       entry = (unsigned long)madt;
>> +       madt_end = entry + madt->header.length;
>> +
>> +       /* Parse all entries looking for a match. */
>> +       entry += sizeof(struct acpi_table_madt);
>> +       while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
>> +               struct acpi_subtable_header *header =
>> +                       (struct acpi_subtable_header *)entry;
>> +
>> +               if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
>> +                       struct acpi_madt_generic_interrupt *gicc =
>> +                               container_of(header,
>> +                               struct acpi_madt_generic_interrupt, header);
>> +
>> +                       if ((gicc->flags & ACPI_MADT_ENABLED) &&
>> +                           (gicc->uid == acpi_id)) {
>> +                               *mpidr = gicc->arm_mpidr;
>> +                               early_acpi_os_unmap_memory(madt, tbl_size);
>> +                               return 0;
>> +                       }
>> +               }
>> +               entry += header->length;
>> +       }
>> +
>> +       early_acpi_os_unmap_memory(madt, tbl_size);
>> +       return -ENODEV;
>> +}
>> +
>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>> +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
>> +{
>> +       int pxm, node;
>> +       u64 mpidr;
>> +
>> +       if (srat_disabled())
>> +               return;
>> +
>> +       if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
>> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
>> +                       pa->header.length);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>> +               return;
>> +
>> +       if (cpus_in_srat >= NR_CPUS) {
>> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
>> +                            NR_CPUS);
>> +               return;
>> +       }
>> +
>> +       pxm = pa->proximity_domain;
>> +       node = acpi_map_pxm_to_node(pxm);
>> +
>> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
>> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
>> +                       pxm, pa->acpi_processor_uid);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
>> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>> +       node_set(node, numa_nodes_parsed);
>> +       cpus_in_srat++;
>> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>> +               pxm, mpidr, node, cpus_in_srat);
>> +}
>> +
>> +int __init arm64_acpi_numa_init(void)
>> +{
>> +       int ret;
>> +
>> +       ret = acpi_numa_init();
>> +       if (ret)
>> +               return ret;
>> +
>> +       return srat_disabled() ? -EINVAL : 0;
>> +}
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index bebc4c6..6c7ef8f 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -524,6 +524,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
>>          */
>>         acpi_set_mailbox_entry(cpu_count, processor);
>>
>> +       early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
>> +
>>         cpu_count++;
>>  }
>>
>> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
>> index 6cb03f9..fc15186 100644
>> --- a/arch/arm64/mm/numa.c
>> +++ b/arch/arm64/mm/numa.c
>> @@ -17,6 +17,7 @@
>>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>>   */
>>
>> +#include <linux/acpi.h>
>>  #include <linux/bootmem.h>
>>  #include <linux/memblock.h>
>>  #include <linux/module.h>
>> @@ -388,7 +389,9 @@ static int __init dummy_numa_init(void)
>>  void __init arm64_numa_init(void)
>>  {
>>         if (!numa_off) {
>> -               if (!numa_init(of_numa_init))
>> +               if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
>> +                       return;
>> +               if (acpi_disabled && !numa_init(of_numa_init))
>>                         return;
>>         }
>>
> On top of the latest version of the dt-based numa patch, if 'numa=off'
> specified in the command line,
> this function will fallback to invoke dummy_numa_init(), which give
> rise the question here is, do we need to
> touch any ACPI related stuff in the case? If not, then the output

no, ACPI is not fallback configuration for DT and vice versa.

> message "No NUMA configuration found" followed
> seems is not necessary since it's a little bit confusing in case of
> numa has already been turned off explicitly.

thanks, this print can be moved out.
from function dummy_numa_init and it can be added in function arm64_numa_init
as a last line of if (!numa_off) to indicate, ACPI/DT based NUMA
configuration failed.

more appropriate would be,
pr_info("%s\n", "NUMA configuration failed or not found");

thanks
Ganapat

>
> Thanks,
> Dennis
>
>> --
>> 1.7.11.7
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Daney April 20, 2016, 4:29 p.m. UTC | #3
On 04/20/2016 01:31 AM, Ganapatrao Kulkarni wrote:
> On Wed, Apr 20, 2016 at 1:11 PM, Dennis Chen <dennis.chen@linaro.org> wrote:
>> On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
[...]
>>> @@ -388,7 +389,9 @@ static int __init dummy_numa_init(void)
>>>   void __init arm64_numa_init(void)
>>>   {
>>>          if (!numa_off) {
>>> -               if (!numa_init(of_numa_init))
>>> +               if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
>>> +                       return;
>>> +               if (acpi_disabled && !numa_init(of_numa_init))
>>>                          return;
>>>          }
>>>
>> On top of the latest version of the dt-based numa patch, if 'numa=off'
>> specified in the command line,
>> this function will fallback to invoke dummy_numa_init(), which give
>> rise the question here is, do we need to
>> touch any ACPI related stuff in the case? If not, then the output
>
> no, ACPI is not fallback configuration for DT and vice versa.
>
>> message "No NUMA configuration found" followed
>> seems is not necessary since it's a little bit confusing in case of
>> numa has already been turned off explicitly.
>
> thanks, this print can be moved out.
> from function dummy_numa_init and it can be added in function arm64_numa_init
> as a last line of if (!numa_off) to indicate, ACPI/DT based NUMA
> configuration failed.
>
> more appropriate would be,
> pr_info("%s\n", "NUMA configuration failed or not found");
>

Although purely cosmetic, I agree that we need to improve the messages 
as to not confuse people.

I will rework the messages with your suggestions in mind to see if we 
can get something that is both concise and unambiguously reflects what 
the user asked for.

David.



--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dennis Chen April 21, 2016, 10:06 a.m. UTC | #4
On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
> From: Hanjun Guo <hanjun.guo@linaro.org>
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
> [rrichter@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter <rrichter@cavium.com>
> [david.daney@cavium.com reorderd and combinded with other patches in Hanjun Guo's original set]
> Signed-off-by: David Daney <david.daney@cavium.com>
> ---
>  arch/arm64/include/asm/acpi.h |   8 +++
>  arch/arm64/include/asm/numa.h |   2 +
>  arch/arm64/kernel/Makefile    |   1 +
>  arch/arm64/kernel/acpi_numa.c | 149 ++++++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/smp.c       |   2 +
>  arch/arm64/mm/numa.c          |   5 +-
>  6 files changed, 166 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>
> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
> index aee323b..4b13ecd 100644
> --- a/arch/arm64/include/asm/acpi.h
> +++ b/arch/arm64/include/asm/acpi.h
> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>  #endif
>
> +#ifdef CONFIG_ACPI_NUMA
> +int arm64_acpi_numa_init(void);
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
> +#else
> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #endif /*_ASM_ACPI_H*/
> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
> index e9b4f29..600887e 100644
> --- a/arch/arm64/include/asm/numa.h
> +++ b/arch/arm64/include/asm/numa.h
> @@ -5,6 +5,8 @@
>
>  #ifdef CONFIG_NUMA
>
> +#define NR_NODE_MEMBLKS                (MAX_NUMNODES * 2)
> +
>  /* currently, arm64 implements flat NUMA topology */
>  #define parent_node(node)      (node)
>
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 3793003..69569c6 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)                       += efi.o efi-entry.stub.o
>  arm64-obj-$(CONFIG_PCI)                        += pci.o
>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>  arm64-obj-$(CONFIG_ACPI)               += acpi.o
> +arm64-obj-$(CONFIG_ACPI_NUMA)          += acpi_numa.o
>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)        += acpi_parking_protocol.o
>  arm64-obj-$(CONFIG_PARAVIRT)           += paravirt.o
>  arm64-obj-$(CONFIG_RANDOMIZE_BASE)     += kaslr.o
> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
> new file mode 100644
> index 0000000..fd72070
> --- /dev/null
> +++ b/arch/arm64/kernel/acpi_numa.c
> @@ -0,0 +1,149 @@
> +/*
> + * ACPI 5.1 based NUMA setup for ARM64
> + * Lots of code was borrowed from arch/x86/mm/srat.c
> + *
> + * Copyright 2004 Andi Kleen, SuSE Labs.
> + * Copyright (C) 2013-2016, Linaro Ltd.
> + *             Author: Hanjun Guo <hanjun.guo@linaro.org>
> + *
> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> + *
> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> + * Assumes all memory regions belonging to a single proximity domain
> + * are in one chunk. Holes between them will be included in the node.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> +
> +#include <linux/acpi.h>
> +#include <linux/bitmap.h>
> +#include <linux/bootmem.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/memblock.h>
> +#include <linux/mmzone.h>
> +#include <linux/module.h>
> +#include <linux/topology.h>
> +
> +#include <acpi/processor.h>
> +#include <asm/numa.h>
> +
> +static int cpus_in_srat;
> +
> +struct __node_cpu_hwid {
> +       u32 node_id;    /* logical node containing this CPU */
> +       u64 cpu_hwid;   /* MPIDR for this CPU */
> +};
> +
> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
> +
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
> +{
> +       int i;
> +
> +       for (i = 0; i < cpus_in_srat; i++) {
> +               if (hwid == early_node_cpu_hwid[i].cpu_hwid)
> +                       return early_node_cpu_hwid[i].node_id;
> +       }
> +
> +       return NUMA_NO_NODE;
> +}
> +
> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
> +{
> +       unsigned long madt_end, entry;
> +       struct acpi_table_madt *madt;
> +       acpi_size tbl_size;
> +
> +       if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
> +                       (struct acpi_table_header **)&madt, &tbl_size)))
> +               return -ENODEV;
> +
> +       entry = (unsigned long)madt;
> +       madt_end = entry + madt->header.length;
> +
> +       /* Parse all entries looking for a match. */
> +       entry += sizeof(struct acpi_table_madt);
> +       while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
> +               struct acpi_subtable_header *header =
> +                       (struct acpi_subtable_header *)entry;
> +
> +               if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
> +                       struct acpi_madt_generic_interrupt *gicc =
> +                               container_of(header,
> +                               struct acpi_madt_generic_interrupt, header);
> +
> +                       if ((gicc->flags & ACPI_MADT_ENABLED) &&
> +                           (gicc->uid == acpi_id)) {
> +                               *mpidr = gicc->arm_mpidr;
> +                               early_acpi_os_unmap_memory(madt, tbl_size);
> +                               return 0;
> +                       }
> +               }
> +               entry += header->length;
> +       }
> +
> +       early_acpi_os_unmap_memory(madt, tbl_size);
> +       return -ENODEV;
> +}
> +
> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
> +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
> +{
> +       int pxm, node;
> +       u64 mpidr;
> +
> +       if (srat_disabled())
> +               return;
> +
> +       if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
> +                       pa->header.length);
> +               bad_srat();
> +               return;
> +       }
> +
> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
> +               return;
> +
> +       if (cpus_in_srat >= NR_CPUS) {
> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
> +                            NR_CPUS);
> +               return;
> +       }
> +
> +       pxm = pa->proximity_domain;
> +       node = acpi_map_pxm_to_node(pxm);
> +
> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
> +               bad_srat();
> +               return;
> +       }
> +
> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
> +                       pxm, pa->acpi_processor_uid);
> +               bad_srat();
> +               return;
> +       }
> +
> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
> +       node_set(node, numa_nodes_parsed);
> +       cpus_in_srat++;
> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
> +               pxm, mpidr, node, cpus_in_srat);
> +}

What does the *cpu* means in above pr_info function? If it's the
logical processor ID or ACPI processor UID, then I suggest to use
pa->acpi_processor_uid instead of cpus_in_srat, I understand the
cpus_in_srat is just a count number of the entries of GICC Affinity
Struct instance in SRAT, correct me if I am wrong. So at least it sees
to me, the above pr_info will output message looks like:
SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 1
SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 2
SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 3

While the /sys/devices/system/cpu will use the ACPI processor UID to
generate the index of the cpu, like:
cpu0  cpu1  cpu2 ...

As the GICC Affinity Struct indicated, the ps->proximity_domain is the
domain to which the logical processor belongs...

Thanks,
Dennis

> +
> +int __init arm64_acpi_numa_init(void)
> +{
> +       int ret;
> +
> +       ret = acpi_numa_init();
> +       if (ret)
> +               return ret;
> +
> +       return srat_disabled() ? -EINVAL : 0;
> +}
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index bebc4c6..6c7ef8f 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -524,6 +524,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
>          */
>         acpi_set_mailbox_entry(cpu_count, processor);
>
> +       early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
> +
>         cpu_count++;
>  }
>
> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> index 6cb03f9..fc15186 100644
> --- a/arch/arm64/mm/numa.c
> +++ b/arch/arm64/mm/numa.c
> @@ -17,6 +17,7 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
> +#include <linux/acpi.h>
>  #include <linux/bootmem.h>
>  #include <linux/memblock.h>
>  #include <linux/module.h>
> @@ -388,7 +389,9 @@ static int __init dummy_numa_init(void)
>  void __init arm64_numa_init(void)
>  {
>         if (!numa_off) {
> -               if (!numa_init(of_numa_init))
> +               if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
> +                       return;
> +               if (acpi_disabled && !numa_init(of_numa_init))
>                         return;
>         }
>
> --
> 1.7.11.7
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Daney April 27, 2016, 1:14 a.m. UTC | #5
On 04/21/2016 03:06 AM, Dennis Chen wrote:
> On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
[...]
>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>> +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
>> +{
>> +       int pxm, node;
>> +       u64 mpidr;
>> +
>> +       if (srat_disabled())
>> +               return;
>> +
>> +       if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
>> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
>> +                       pa->header.length);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>> +               return;
>> +
>> +       if (cpus_in_srat >= NR_CPUS) {
>> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
>> +                            NR_CPUS);
>> +               return;
>> +       }
>> +
>> +       pxm = pa->proximity_domain;
>> +       node = acpi_map_pxm_to_node(pxm);
>> +
>> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
>> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
>> +                       pxm, pa->acpi_processor_uid);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
>> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>> +       node_set(node, numa_nodes_parsed);
>> +       cpus_in_srat++;
>> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>> +               pxm, mpidr, node, cpus_in_srat);
>> +}
>
> What does the *cpu* means in above pr_info function? If it's the
> logical processor ID or ACPI processor UID, then I suggest to use
> pa->acpi_processor_uid instead of cpus_in_srat, I understand the
> cpus_in_srat is just a count number of the entries of GICC Affinity
> Struct instance in SRAT, correct me if I am wrong. So at least it sees
> to me, the above pr_info will output message looks like:
> SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 1
> SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 2
> SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 3
>

Yes, that is correct, and for my system seems to be what we want as the 
names in /sys/devices/system/cpu/ and /proc/cpu_info agree with the 
sequential numbering (0..95) with 48 CPUs on each node.

If I make the change you suggest, I get :
.
.
.
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x0 -> Node 0 cpu 0
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x1 -> Node 0 cpu 1
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x2 -> Node 0 cpu 2
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x3 -> Node 0 cpu 3
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x4 -> Node 0 cpu 4
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x5 -> Node 0 cpu 5
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x6 -> Node 0 cpu 6
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x7 -> Node 0 cpu 7
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x8 -> Node 0 cpu 8
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x9 -> Node 0 cpu 9
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xa -> Node 0 cpu 10
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xb -> Node 0 cpu 11
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xc -> Node 0 cpu 12
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xd -> Node 0 cpu 13
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xe -> Node 0 cpu 14
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xf -> Node 0 cpu 15
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 256
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 257
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 258
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x103 -> Node 0 cpu 259
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x104 -> Node 0 cpu 260
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x105 -> Node 0 cpu 261
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x106 -> Node 0 cpu 262
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x107 -> Node 0 cpu 263
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x108 -> Node 0 cpu 264
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x109 -> Node 0 cpu 265
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10a -> Node 0 cpu 266
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10b -> Node 0 cpu 267
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10c -> Node 0 cpu 268
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10d -> Node 0 cpu 269
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10e -> Node 0 cpu 270
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10f -> Node 0 cpu 271
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x200 -> Node 0 cpu 512
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x201 -> Node 0 cpu 513
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x202 -> Node 0 cpu 514
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x203 -> Node 0 cpu 515
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x204 -> Node 0 cpu 516
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x205 -> Node 0 cpu 517
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x206 -> Node 0 cpu 518
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x207 -> Node 0 cpu 519
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x208 -> Node 0 cpu 520
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x209 -> Node 0 cpu 521
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20a -> Node 0 cpu 522
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20b -> Node 0 cpu 523
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20c -> Node 0 cpu 524
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20d -> Node 0 cpu 525
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20e -> Node 0 cpu 526
[    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20f -> Node 0 cpu 527
[    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10000 -> Node 1 cpu 65536
[    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10001 -> Node 1 cpu 65537
[    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10002 -> Node 1 cpu 65538
[    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10003 -> Node 1 cpu 65539
.
.
.

Not really what I would want.


> While the /sys/devices/system/cpu will use the ACPI processor UID to
> generate the index of the cpu, like:
> cpu0  cpu1  cpu2 ...
>
> As the GICC Affinity Struct indicated, the ps->proximity_domain is the
> domain to which the logical processor belongs...
>
> Thanks,
> Dennis
>

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hanjun Guo April 27, 2016, 4:04 a.m. UTC | #6
Hi Dennis, David,

Sorry for the late reply, please see my comments below.

On 2016/4/27 9:14, David Daney wrote:
> On 04/21/2016 03:06 AM, Dennis Chen wrote:
>> On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
> [...]
>>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>>> +void __init acpi_numa_gicc_affinity_init(struct
>>> acpi_srat_gicc_affinity *pa)
>>> +{
>>> +       int pxm, node;
>>> +       u64 mpidr;
>>> +
>>> +       if (srat_disabled())
>>> +               return;
>>> +
>>> +       if (pa->header.length < sizeof(struct
>>> acpi_srat_gicc_affinity)) {
>>> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
>>> +                       pa->header.length);
>>> +               bad_srat();
>>> +               return;
>>> +       }
>>> +
>>> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>>> +               return;
>>> +
>>> +       if (cpus_in_srat >= NR_CPUS) {
>>> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small,
>>> may not be able to use all cpus\n",
>>> +                            NR_CPUS);
>>> +               return;
>>> +       }
>>> +
>>> +       pxm = pa->proximity_domain;
>>> +       node = acpi_map_pxm_to_node(pxm);
>>> +
>>> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>>> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
>>> +               bad_srat();
>>> +               return;
>>> +       }
>>> +
>>> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
>>> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid
>>> MPIDR in MADT\n",
>>> +                       pxm, pa->acpi_processor_uid);
>>> +               bad_srat();
>>> +               return;
>>> +       }
>>> +
>>> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
>>> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>>> +       node_set(node, numa_nodes_parsed);
>>> +       cpus_in_srat++;
>>> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>>> +               pxm, mpidr, node, cpus_in_srat);
>>> +}
>>
>> What does the *cpu* means in above pr_info function? If it's the
>> logical processor ID or ACPI processor UID, then I suggest to use
>> pa->acpi_processor_uid instead of cpus_in_srat, I understand the

I think print cpus_in_srat is pointless here, as the logic cpu number
is allocated by OS when initializing SMP by scanning MADT table. As
Dennis said, it's just a count number, not a number mapping to MPIDR.

ACPI processor UID is the key value to connect MADT, SRAT, DSDT.

For MADT, it will have MPIDR and ACPI processor UID, and OS will
create mappings to MPIDR and cpu logical number,
ACPI processor UID <------> MPIDR <------> CPU logical number

In SRAT, there is ACPI processor UID represented, mappings will be
ACPI processor UID <------> PXM <------> NUMA node logical number

So we can use ACPI processor UID to get the MPIDR by scanning the
MADT, then we can map NUMA node logical number to cpu logical
number later.

>> cpus_in_srat is just a count number of the entries of GICC Affinity
>> Struct instance in SRAT, correct me if I am wrong. So at least it sees
>> to me, the above pr_info will output message looks like:
>> SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 1
>> SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 2
>> SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 3
>>
>
> Yes, that is correct, and for my system seems to be what we want as the
> names in /sys/devices/system/cpu/ and /proc/cpu_info agree with the
> sequential numbering (0..95) with 48 CPUs on each node.

That's because you place CPUs in the same order both in MADT and SRAT :)
if not, that will be not match.

>
> If I make the change you suggest, I get :
> .
> .
> .
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x0 -> Node 0 cpu 0
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x1 -> Node 0 cpu 1
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x2 -> Node 0 cpu 2
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x3 -> Node 0 cpu 3
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x4 -> Node 0 cpu 4
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x5 -> Node 0 cpu 5
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x6 -> Node 0 cpu 6
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x7 -> Node 0 cpu 7
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x8 -> Node 0 cpu 8
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x9 -> Node 0 cpu 9
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xa -> Node 0 cpu 10
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xb -> Node 0 cpu 11
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xc -> Node 0 cpu 12
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xd -> Node 0 cpu 13
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xe -> Node 0 cpu 14
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xf -> Node 0 cpu 15
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 256
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 257
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 258
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x103 -> Node 0 cpu 259
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x104 -> Node 0 cpu 260
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x105 -> Node 0 cpu 261
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x106 -> Node 0 cpu 262
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x107 -> Node 0 cpu 263
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x108 -> Node 0 cpu 264
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x109 -> Node 0 cpu 265
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10a -> Node 0 cpu 266
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10b -> Node 0 cpu 267
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10c -> Node 0 cpu 268
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10d -> Node 0 cpu 269
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10e -> Node 0 cpu 270
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10f -> Node 0 cpu 271
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x200 -> Node 0 cpu 512
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x201 -> Node 0 cpu 513
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x202 -> Node 0 cpu 514
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x203 -> Node 0 cpu 515
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x204 -> Node 0 cpu 516
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x205 -> Node 0 cpu 517
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x206 -> Node 0 cpu 518
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x207 -> Node 0 cpu 519
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x208 -> Node 0 cpu 520
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x209 -> Node 0 cpu 521
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20a -> Node 0 cpu 522
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20b -> Node 0 cpu 523
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20c -> Node 0 cpu 524
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20d -> Node 0 cpu 525
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20e -> Node 0 cpu 526
> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20f -> Node 0 cpu 527
> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10000 -> Node 1 cpu 65536
> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10001 -> Node 1 cpu 65537
> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10002 -> Node 1 cpu 65538
> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10003 -> Node 1 cpu 65539
> .
> .
> .
>
> Not really what I would want.

How about remove the print for "cpu"? it's not the right value we want,
and we can get such mapping information under in sysfs.

>
>
>> While the /sys/devices/system/cpu will use the ACPI processor UID to
>> generate the index of the cpu, like:
>> cpu0  cpu1  cpu2 ...
>>
>> As the GICC Affinity Struct indicated, the ps->proximity_domain is the
>> domain to which the logical processor belongs...

Yes, we can get such information in /sys/devices/system/node, I think
we can only print:

ACPI: NUMA: SRAT: PXM x -> MPIDR y -> Node z

Thanks
Hanjun

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dennis Chen April 27, 2016, 11:37 a.m. UTC | #7
Hi Hanjun,

Thanks for the clarification and some little comments ;-)

On 27 April 2016 at 12:04, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> Hi Dennis, David,
>
> Sorry for the late reply, please see my comments below.
>
>
> On 2016/4/27 9:14, David Daney wrote:
>>
>> On 04/21/2016 03:06 AM, Dennis Chen wrote:
>>>
>>> On 20 April 2016 at 09:40, David Daney <ddaney.cavm@gmail.com> wrote:
>>
>> [...]
>>>>
>>>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>>>> +void __init acpi_numa_gicc_affinity_init(struct
>>>> acpi_srat_gicc_affinity *pa)
>>>> +{
>>>> +       int pxm, node;
>>>> +       u64 mpidr;
>>>> +
>>>> +       if (srat_disabled())
>>>> +               return;
>>>> +
>>>> +       if (pa->header.length < sizeof(struct
>>>> acpi_srat_gicc_affinity)) {
>>>> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
>>>> +                       pa->header.length);
>>>> +               bad_srat();
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>>>> +               return;
>>>> +
>>>> +       if (cpus_in_srat >= NR_CPUS) {
>>>> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small,
>>>> may not be able to use all cpus\n",
>>>> +                            NR_CPUS);
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       pxm = pa->proximity_domain;
>>>> +       node = acpi_map_pxm_to_node(pxm);
>>>> +
>>>> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>>>> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
>>>> +               bad_srat();
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
>>>> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid
>>>> MPIDR in MADT\n",
>>>> +                       pxm, pa->acpi_processor_uid);
>>>> +               bad_srat();
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
>>>> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>>>> +       node_set(node, numa_nodes_parsed);
>>>> +       cpus_in_srat++;
>>>> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>>>> +               pxm, mpidr, node, cpus_in_srat);
>>>> +}
>>>
>>>
>>> What does the *cpu* means in above pr_info function? If it's the
>>> logical processor ID or ACPI processor UID, then I suggest to use
>>> pa->acpi_processor_uid instead of cpus_in_srat, I understand the
>
>
> I think print cpus_in_srat is pointless here, as the logic cpu number
> is allocated by OS when initializing SMP by scanning MADT table. As
> Dennis said, it's just a count number, not a number mapping to MPIDR.
>
> ACPI processor UID is the key value to connect MADT, SRAT, DSDT.
>
> For MADT, it will have MPIDR and ACPI processor UID, and OS will
> create mappings to MPIDR and cpu logical number,
> ACPI processor UID <------> MPIDR <------> CPU logical number
>
> In SRAT, there is ACPI processor UID represented, mappings will be
> ACPI processor UID <------> PXM <------> NUMA node logical number
>
> So we can use ACPI processor UID to get the MPIDR by scanning the
> MADT, then we can map NUMA node logical number to cpu logical
> number later.
>
Right, kernel will record the logical cpu index info (begins from 0,
the boot cpu) into the cpu_possible bit map by parsing the MADT GICC
sub-table. So I am thinking here if we can reduce the parsing walk by
only one, because I see the acpi_numa_gicc_affinity_init() calls
get_mpidr_in_madt() to traverse the entire MADT, actually the kernel
will also traverse the MADT in smp_init_cpus(), merge them into one?
>
>>> cpus_in_srat is just a count number of the entries of GICC Affinity
>>> Struct instance in SRAT, correct me if I am wrong. So at least it sees
>>> to me, the above pr_info will output message looks like:
>>> SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 1
>>> SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 2
>>> SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 3
>>>
>>
>> Yes, that is correct, and for my system seems to be what we want as the
>> names in /sys/devices/system/cpu/ and /proc/cpu_info agree with the
>> sequential numbering (0..95) with 48 CPUs on each node.
>
>
> That's because you place CPUs in the same order both in MADT and SRAT :)
> if not, that will be not match.
>
Hmm, I think I made a mistake presumption here that the ACPI processor
UID is the logical processor number, just take a look at the ACPI
processor driver module code, the logic process number should comes
from the bit map. And from the below output message pasted by David,
we can see that his firmware is using MPIDR as the ACPI processor UID,
it's not incorrect implementation according to the ACPI spec...
>
>>
>> If I make the change you suggest, I get :
>> .
>> .
>> .
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x0 -> Node 0 cpu 0
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x1 -> Node 0 cpu 1
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x2 -> Node 0 cpu 2
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x3 -> Node 0 cpu 3
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x4 -> Node 0 cpu 4
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x5 -> Node 0 cpu 5
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x6 -> Node 0 cpu 6
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x7 -> Node 0 cpu 7
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x8 -> Node 0 cpu 8
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x9 -> Node 0 cpu 9
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xa -> Node 0 cpu 10
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xb -> Node 0 cpu 11
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xc -> Node 0 cpu 12
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xd -> Node 0 cpu 13
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xe -> Node 0 cpu 14
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0xf -> Node 0 cpu 15
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 256
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 257
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 258
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x103 -> Node 0 cpu 259
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x104 -> Node 0 cpu 260
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x105 -> Node 0 cpu 261
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x106 -> Node 0 cpu 262
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x107 -> Node 0 cpu 263
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x108 -> Node 0 cpu 264
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x109 -> Node 0 cpu 265
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10a -> Node 0 cpu 266
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10b -> Node 0 cpu 267
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10c -> Node 0 cpu 268
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10d -> Node 0 cpu 269
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10e -> Node 0 cpu 270
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x10f -> Node 0 cpu 271
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x200 -> Node 0 cpu 512
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x201 -> Node 0 cpu 513
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x202 -> Node 0 cpu 514
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x203 -> Node 0 cpu 515
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x204 -> Node 0 cpu 516
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x205 -> Node 0 cpu 517
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x206 -> Node 0 cpu 518
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x207 -> Node 0 cpu 519
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x208 -> Node 0 cpu 520
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x209 -> Node 0 cpu 521
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20a -> Node 0 cpu 522
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20b -> Node 0 cpu 523
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20c -> Node 0 cpu 524
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20d -> Node 0 cpu 525
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20e -> Node 0 cpu 526
>> [    0.000000] ACPI: NUMA: SRAT: PXM 0 -> MPIDR 0x20f -> Node 0 cpu 527
>> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10000 -> Node 1 cpu
>> 65536
>> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10001 -> Node 1 cpu
>> 65537
>> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10002 -> Node 1 cpu
>> 65538
>> [    0.000000] ACPI: NUMA: SRAT: PXM 1 -> MPIDR 0x10003 -> Node 1 cpu
>> 65539
>> .
>> .
>> .
>>
>> Not really what I would want.
>
>
> How about remove the print for "cpu"? it's not the right value we want,
> and we can get such mapping information under in sysfs.
>
I think you may remove the *cpu* in the print, indeed it's pointless.
You can, IMO, use cpu_possible bitmask if you want to get the logical
processor number just as the ACPI processor does. About the
early_node_cpu_hwid, the only reason I can see now is to map logic cpu
into the numa node number, can we refer to the implementation under
x86, anyway I am not sure since not very familiar with that codes
dependency...
>
>>
>>
>>> While the /sys/devices/system/cpu will use the ACPI processor UID to
>>> generate the index of the cpu, like:
>>> cpu0  cpu1  cpu2 ...
>>>
>>> As the GICC Affinity Struct indicated, the ps->proximity_domain is the
>>> domain to which the logical processor belongs...
>
>
> Yes, we can get such information in /sys/devices/system/node, I think
> we can only print:
>
> ACPI: NUMA: SRAT: PXM x -> MPIDR y -> Node z
Agree.
>
>
> Thanks
> Hanjun
>
David Daney April 27, 2016, 3:40 p.m. UTC | #8
On 04/27/2016 04:37 AM, Dennis Chen wrote:
> Hi Hanjun,
>
> Thanks for the clarification and some little comments ;-)
>
> On 27 April 2016 at 12:04, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> Hi Dennis, David,
>>
[...]
>>
>>
>> Yes, we can get such information in /sys/devices/system/node, I think
>> we can only print:
>>
>> ACPI: NUMA: SRAT: PXM x -> MPIDR y -> Node z
> Agree.

I am also in agreement.  I will send a revised patch that implements this.

Thanks,
David.

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index aee323b..4b13ecd 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -113,4 +113,12 @@  static inline const char *acpi_get_enable_method(int cpu)
 pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
 #endif
 
+#ifdef CONFIG_ACPI_NUMA
+int arm64_acpi_numa_init(void);
+int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
+#else
+static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
+static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
+#endif /* CONFIG_ACPI_NUMA */
+
 #endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index e9b4f29..600887e 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -5,6 +5,8 @@ 
 
 #ifdef CONFIG_NUMA
 
+#define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
+
 /* currently, arm64 implements flat NUMA topology */
 #define parent_node(node)	(node)
 
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 3793003..69569c6 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -42,6 +42,7 @@  arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_ACPI_NUMA)		+= acpi_numa.o
 arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 arm64-obj-$(CONFIG_PARAVIRT)		+= paravirt.o
 arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
new file mode 100644
index 0000000..fd72070
--- /dev/null
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -0,0 +1,149 @@ 
+/*
+ * ACPI 5.1 based NUMA setup for ARM64
+ * Lots of code was borrowed from arch/x86/mm/srat.c
+ *
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2013-2016, Linaro Ltd.
+ *		Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ */
+
+#define pr_fmt(fmt) "ACPI: NUMA: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitmap.h>
+#include <linux/bootmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include <acpi/processor.h>
+#include <asm/numa.h>
+
+static int cpus_in_srat;
+
+struct __node_cpu_hwid {
+	u32 node_id;    /* logical node containing this CPU */
+	u64 cpu_hwid;   /* MPIDR for this CPU */
+};
+
+static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
+[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
+
+int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
+{
+	int i;
+
+	for (i = 0; i < cpus_in_srat; i++) {
+		if (hwid == early_node_cpu_hwid[i].cpu_hwid)
+			return early_node_cpu_hwid[i].node_id;
+	}
+
+	return NUMA_NO_NODE;
+}
+
+static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
+{
+	unsigned long madt_end, entry;
+	struct acpi_table_madt *madt;
+	acpi_size tbl_size;
+
+	if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
+			(struct acpi_table_header **)&madt, &tbl_size)))
+		return -ENODEV;
+
+	entry = (unsigned long)madt;
+	madt_end = entry + madt->header.length;
+
+	/* Parse all entries looking for a match. */
+	entry += sizeof(struct acpi_table_madt);
+	while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
+		struct acpi_subtable_header *header =
+			(struct acpi_subtable_header *)entry;
+
+		if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
+			struct acpi_madt_generic_interrupt *gicc =
+				container_of(header,
+				struct acpi_madt_generic_interrupt, header);
+
+			if ((gicc->flags & ACPI_MADT_ENABLED) &&
+			    (gicc->uid == acpi_id)) {
+				*mpidr = gicc->arm_mpidr;
+				early_acpi_os_unmap_memory(madt, tbl_size);
+				return 0;
+			}
+		}
+		entry += header->length;
+	}
+
+	early_acpi_os_unmap_memory(madt, tbl_size);
+	return -ENODEV;
+}
+
+/* Callback for Proximity Domain -> ACPI processor UID mapping */
+void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
+{
+	int pxm, node;
+	u64 mpidr;
+
+	if (srat_disabled())
+		return;
+
+	if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
+		pr_err("SRAT: Invalid SRAT header length: %d\n",
+			pa->header.length);
+		bad_srat();
+		return;
+	}
+
+	if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
+		return;
+
+	if (cpus_in_srat >= NR_CPUS) {
+		pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
+			     NR_CPUS);
+		return;
+	}
+
+	pxm = pa->proximity_domain;
+	node = acpi_map_pxm_to_node(pxm);
+
+	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+		pr_err("SRAT: Too many proximity domains %d\n", pxm);
+		bad_srat();
+		return;
+	}
+
+	if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
+		pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
+			pxm, pa->acpi_processor_uid);
+		bad_srat();
+		return;
+	}
+
+	early_node_cpu_hwid[cpus_in_srat].node_id = node;
+	early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
+	node_set(node, numa_nodes_parsed);
+	cpus_in_srat++;
+	pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
+		pxm, mpidr, node, cpus_in_srat);
+}
+
+int __init arm64_acpi_numa_init(void)
+{
+	int ret;
+
+	ret = acpi_numa_init();
+	if (ret)
+		return ret;
+
+	return srat_disabled() ? -EINVAL : 0;
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index bebc4c6..6c7ef8f 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -524,6 +524,8 @@  acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
 	 */
 	acpi_set_mailbox_entry(cpu_count, processor);
 
+	early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
+
 	cpu_count++;
 }
 
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 6cb03f9..fc15186 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -17,6 +17,7 @@ 
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/module.h>
@@ -388,7 +389,9 @@  static int __init dummy_numa_init(void)
 void __init arm64_numa_init(void)
 {
 	if (!numa_off) {
-		if (!numa_init(of_numa_init))
+		if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
+			return;
+		if (acpi_disabled && !numa_init(of_numa_init))
 			return;
 	}