| Submitter | David Rientjes |
|---|---|
| Date | 2009-10-27 20:25:51 |
| Message ID | <alpine.DEB.2.00.0910271323410.32543@chino.kir.corp.google.com> |
| Download | mbox | patch |
| Permalink | /patch/56172/ |
| State | New |
| Headers | show |
Comments
I applied your previous patch with the change to use static and
here's the console output from a live system:
[ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0
[ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1
[ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2
[ 0.000000] SRAT: PXM 3 -> APIC {96-103,112-119} -> Node 3
[ 0.000000] SRAT: PXM 4 -> APIC {128-135,144-151} -> Node 4
[ 0.000000] SRAT: PXM 5 -> APIC {160-167,176-183} -> Node 5
[ 0.000000] SRAT: PXM 6 -> APIC {192-199,208-215} -> Node 6
[ 0.000000] SRAT: PXM 7 -> APIC {224-231,240-247} -> Node 7
[ 0.000000] SRAT: PXM 8 -> APIC {256-263,272-279} -> Node 8
[ 0.000000] SRAT: PXM 9 -> APIC {288-295,304-311} -> Node 9
[ 0.000000] SRAT: PXM 10 -> APIC {320-327,336-343} -> Node 10
[ 0.000000] SRAT: PXM 11 -> APIC {352-359,368-375} -> Node 11
[ 0.000000] SRAT: PXM 12 -> APIC {384-391,400-407} -> Node 12
[ 0.000000] SRAT: PXM 13 -> APIC {416-423,432-439} -> Node 13
[ 0.000000] SRAT: PXM 14 -> APIC {448-455,464-471} -> Node 14
[ 0.000000] SRAT: PXM 15 -> APIC {480-487,496-503} -> Node 15
[ 0.000000] SRAT: PXM 16 -> APIC {512-519,528-535} -> Node 16
[ 0.000000] SRAT: PXM 17 -> APIC {544-551,560-567} -> Node 17
[ 0.000000] SRAT: PXM 18 -> APIC {576-583,592-599} -> Node 18
[ 0.000000] SRAT: PXM 19 -> APIC {608-615,624-631} -> Node 19
[ 0.000000] SRAT: PXM 20 -> APIC {640-647,656-663} -> Node 20
[ 0.000000] SRAT: PXM 21 -> APIC {672-679,688-695} -> Node 21
[ 0.000000] SRAT: PXM 22 -> APIC {704-711,720-727} -> Node 22
[ 0.000000] SRAT: PXM 23 -> APIC {736-743,752-759} -> Node 23
David Rientjes wrote:
> On Tue, 27 Oct 2009, Mike Travis wrote:
>
>> Hi David,
>>
>> Very Cool, I'll try it out and let you know how it goes.
>>
>> Note that it would be better to declare the BITMAP in the
>> static initdata section so it doesn't grow the stack by 4k
>> bytes. (And it's thrown away after the kernel starts.)
>>
>
> Right, here's an updated version. I was thinking of MAX_PXM_DOMAINS being
> 256 instead of MAX_LOCAL_APIC :)
>
> Here's an updated version. apicid_map and apicid_list don't need to be
> synchronized because there're no concurrency issues here on init.
>
>
>
> x86: reduce srat verbosity in the kernel log
>
> It's possible to reduce the number of SRAT messages emitted to the kernel
> log by printing each valid pxm once and then creating bitmaps to represent
> the apicids that map to the same node.
>
> This reduces lines such as
>
> SRAT: PXM 0 -> APIC 0 -> Node 0
> SRAT: PXM 0 -> APIC 1 -> Node 0
> SRAT: PXM 1 -> APIC 2 -> Node 1
> SRAT: PXM 1 -> APIC 3 -> Node 1
>
> to
>
> SRAT: PXM 0 -> APIC {0-1} -> Node 0
> SRAT: PXM 1 -> APIC {2-3} -> Node 1
>
> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
> arch/x86/mm/srat_64.c | 32 ++++++++++++++++++++++++++++----
> drivers/acpi/numa.c | 5 +++++
> include/linux/acpi.h | 3 ++-
> 3 files changed, 35 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
> --- a/arch/x86/mm/srat_64.c
> +++ b/arch/x86/mm/srat_64.c
> @@ -36,6 +36,9 @@ static int num_node_memblks __initdata;
> static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
> static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
>
> +static DECLARE_BITMAP(apicid_map, MAX_LOCAL_APIC) __initdata;
> +static char apicid_list[MAX_LOCAL_APIC] __initdata;
> +
> static __init int setup_node(int pxm)
> {
> return acpi_map_pxm_to_node(pxm);
> @@ -136,8 +139,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
> apicid_to_node[apic_id] = node;
> node_set(node, cpu_nodes_parsed);
> acpi_numa = 1;
> - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
> - pxm, apic_id, node);
> }
>
> /* Callback for Proximity Domain -> LAPIC mapping */
> @@ -170,8 +171,31 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
> apicid_to_node[apic_id] = node;
> node_set(node, cpu_nodes_parsed);
> acpi_numa = 1;
> - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
> - pxm, apic_id, node);
> +}
> +
> +void __init acpi_numa_print_srat_mapping(void)
> +{
> + int i, j;
> +
> + for (i = 0; i < MAX_PXM_DOMAINS; i++) {
> + int nid;
> +
> + nid = pxm_to_node(i);
> + if (nid == NUMA_NO_NODE)
> + continue;
> +
> + bitmap_zero(apicid_map, MAX_LOCAL_APIC);
> + for (j = 0; j < MAX_LOCAL_APIC; j++)
> + if (apicid_to_node[j] == nid)
> + set_bit(j, apicid_map);
> +
> + if (bitmap_empty(apicid_map, MAX_LOCAL_APIC))
> + continue;
> + bitmap_scnlistprintf(apicid_list, MAX_LOCAL_APIC,
> + apicid_map, MAX_LOCAL_APIC);
> + pr_info("SRAT: PXM %u -> APIC {%s} -> Node %u\n",
> + i, apicid_list, nid);
> + }
> }
>
> #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
> --- a/drivers/acpi/numa.c
> +++ b/drivers/acpi/numa.c
> @@ -281,6 +281,10 @@ acpi_table_parse_srat(enum acpi_srat_type id,
> handler, max_entries);
> }
>
> +void __init __attribute__((weak)) acpi_numa_print_srat_mapping(void)
> +{
> +}
> +
> int __init acpi_numa_init(void)
> {
> /* SRAT: Static Resource Affinity Table */
> @@ -292,6 +296,7 @@ int __init acpi_numa_init(void)
> acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
> acpi_parse_memory_affinity,
> NR_NODE_MEMBLKS);
> + acpi_numa_print_srat_mapping();
> }
>
> /* SLIT: System Locality Information Table */
> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
> --- a/include/linux/acpi.h
> +++ b/include/linux/acpi.h
> @@ -92,12 +92,13 @@ int acpi_table_parse_madt (enum acpi_madt_type id, acpi_table_entry_handler hand
> int acpi_parse_mcfg (struct acpi_table_header *header);
> void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
>
> -/* the following four functions are architecture-dependent */
> +/* the following six functions are architecture-dependent */
> void acpi_numa_slit_init (struct acpi_table_slit *slit);
> void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
> void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
> void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
> void acpi_numa_arch_fixup(void);
> +void acpi_numa_print_srat_mapping(void);
>
> #ifdef CONFIG_ACPI_HOTPLUG_CPU
> /* Arch dependent functions for cpu hotplug support */
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 27 Oct 2009, Mike Travis wrote: > I applied your previous patch with the change to use static and > here's the console output from a live system: > > > [ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0 > [ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1 > [ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2 > [ 0.000000] SRAT: PXM 3 -> APIC {96-103,112-119} -> Node 3 > [ 0.000000] SRAT: PXM 4 -> APIC {128-135,144-151} -> Node 4 > [ 0.000000] SRAT: PXM 5 -> APIC {160-167,176-183} -> Node 5 > [ 0.000000] SRAT: PXM 6 -> APIC {192-199,208-215} -> Node 6 > [ 0.000000] SRAT: PXM 7 -> APIC {224-231,240-247} -> Node 7 > [ 0.000000] SRAT: PXM 8 -> APIC {256-263,272-279} -> Node 8 > [ 0.000000] SRAT: PXM 9 -> APIC {288-295,304-311} -> Node 9 > [ 0.000000] SRAT: PXM 10 -> APIC {320-327,336-343} -> Node 10 > [ 0.000000] SRAT: PXM 11 -> APIC {352-359,368-375} -> Node 11 > [ 0.000000] SRAT: PXM 12 -> APIC {384-391,400-407} -> Node 12 > [ 0.000000] SRAT: PXM 13 -> APIC {416-423,432-439} -> Node 13 > [ 0.000000] SRAT: PXM 14 -> APIC {448-455,464-471} -> Node 14 > [ 0.000000] SRAT: PXM 15 -> APIC {480-487,496-503} -> Node 15 > [ 0.000000] SRAT: PXM 16 -> APIC {512-519,528-535} -> Node 16 > [ 0.000000] SRAT: PXM 17 -> APIC {544-551,560-567} -> Node 17 > [ 0.000000] SRAT: PXM 18 -> APIC {576-583,592-599} -> Node 18 > [ 0.000000] SRAT: PXM 19 -> APIC {608-615,624-631} -> Node 19 > [ 0.000000] SRAT: PXM 20 -> APIC {640-647,656-663} -> Node 20 > [ 0.000000] SRAT: PXM 21 -> APIC {672-679,688-695} -> Node 21 > [ 0.000000] SRAT: PXM 22 -> APIC {704-711,720-727} -> Node 22 > [ 0.000000] SRAT: PXM 23 -> APIC {736-743,752-759} -> Node 23 > Quite the system you have there :) What was once 760 lines has been reduced to 24 without removing any information. This seems to be the most we can reduce this particular output since we don't support mapping multiple pxms to a single node. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[David Rientjes - Tue, Oct 27, 2009 at 01:25:51PM -0700]
| On Tue, 27 Oct 2009, Mike Travis wrote:
|
...
| +
| +void __init acpi_numa_print_srat_mapping(void)
| +{
| + int i, j;
| +
| + for (i = 0; i < MAX_PXM_DOMAINS; i++) {
| + int nid;
| +
| + nid = pxm_to_node(i);
| + if (nid == NUMA_NO_NODE)
Btw, David, while you at it, I just curious -- shouldn't we test it
with NID_INVAL (as pxm_to_node_map initially defined to)? Not a big
deal at all (since they are both = -1) but for the record.
Or perhaps I miss something?
| + continue;
...
-- Cyrill
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 27 Oct 2009, Cyrill Gorcunov wrote: > | +void __init acpi_numa_print_srat_mapping(void) > | +{ > | + int i, j; > | + > | + for (i = 0; i < MAX_PXM_DOMAINS; i++) { > | + int nid; > | + > | + nid = pxm_to_node(i); > | + if (nid == NUMA_NO_NODE) > > Btw, David, while you at it, I just curious -- shouldn't we test it > with NID_INVAL (as pxm_to_node_map initially defined to)? Not a big > deal at all (since they are both = -1) but for the record. > Or perhaps I miss something? > I don't think we need to address that since NID_INVAL is going away and will be replaced by NUMA_NO_NODE since Lee has exposed it globally in his mempolicy patchset, and as you mention they are the same anyway. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[David Rientjes - Tue, Oct 27, 2009 at 02:06:21PM -0700]
| On Tue, 27 Oct 2009, Cyrill Gorcunov wrote:
|
| > | +void __init acpi_numa_print_srat_mapping(void)
| > | +{
| > | + int i, j;
| > | +
| > | + for (i = 0; i < MAX_PXM_DOMAINS; i++) {
| > | + int nid;
| > | +
| > | + nid = pxm_to_node(i);
| > | + if (nid == NUMA_NO_NODE)
| >
| > Btw, David, while you at it, I just curious -- shouldn't we test it
| > with NID_INVAL (as pxm_to_node_map initially defined to)? Not a big
| > deal at all (since they are both = -1) but for the record.
| > Or perhaps I miss something?
| >
|
| I don't think we need to address that since NID_INVAL is going away and
| will be replaced by NUMA_NO_NODE since Lee has exposed it globally in his
| mempolicy patchset, and as you mention they are the same anyway.
|
I see. Thanks!
-- Cyrill
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
David Rientjes wrote: > On Tue, 27 Oct 2009, Mike Travis wrote: > >> I applied your previous patch with the change to use static and >> here's the console output from a live system: >> >> >> [ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0 >> [ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1 >> [ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2 >> [ 0.000000] SRAT: PXM 3 -> APIC {96-103,112-119} -> Node 3 >> [ 0.000000] SRAT: PXM 4 -> APIC {128-135,144-151} -> Node 4 >> [ 0.000000] SRAT: PXM 5 -> APIC {160-167,176-183} -> Node 5 >> [ 0.000000] SRAT: PXM 6 -> APIC {192-199,208-215} -> Node 6 >> [ 0.000000] SRAT: PXM 7 -> APIC {224-231,240-247} -> Node 7 >> [ 0.000000] SRAT: PXM 8 -> APIC {256-263,272-279} -> Node 8 >> [ 0.000000] SRAT: PXM 9 -> APIC {288-295,304-311} -> Node 9 >> [ 0.000000] SRAT: PXM 10 -> APIC {320-327,336-343} -> Node 10 >> [ 0.000000] SRAT: PXM 11 -> APIC {352-359,368-375} -> Node 11 >> [ 0.000000] SRAT: PXM 12 -> APIC {384-391,400-407} -> Node 12 >> [ 0.000000] SRAT: PXM 13 -> APIC {416-423,432-439} -> Node 13 >> [ 0.000000] SRAT: PXM 14 -> APIC {448-455,464-471} -> Node 14 >> [ 0.000000] SRAT: PXM 15 -> APIC {480-487,496-503} -> Node 15 >> [ 0.000000] SRAT: PXM 16 -> APIC {512-519,528-535} -> Node 16 >> [ 0.000000] SRAT: PXM 17 -> APIC {544-551,560-567} -> Node 17 >> [ 0.000000] SRAT: PXM 18 -> APIC {576-583,592-599} -> Node 18 >> [ 0.000000] SRAT: PXM 19 -> APIC {608-615,624-631} -> Node 19 >> [ 0.000000] SRAT: PXM 20 -> APIC {640-647,656-663} -> Node 20 >> [ 0.000000] SRAT: PXM 21 -> APIC {672-679,688-695} -> Node 21 >> [ 0.000000] SRAT: PXM 22 -> APIC {704-711,720-727} -> Node 22 >> [ 0.000000] SRAT: PXM 23 -> APIC {736-743,752-759} -> Node 23 >> > > Quite the system you have there :) What was once 760 lines has been > reduced to 24 without removing any information. > > This seems to be the most we can reduce this particular output since we > don't support mapping multiple pxms to a single node. Yes, thanks very much for the optimization. (And you can add my Acked-by or whatever you need.) Tomorrow I will have more time on the system and will try out all the new changes together, mostly with summarizing the Processor stats. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Quite the system you have there :) What was once 760 lines has been >> reduced to 24 without removing any information. >> >> This seems to be the most we can reduce this particular output since we >> don't support mapping multiple pxms to a single node. > > Yes, thanks very much for the optimization. > > (And you can add my Acked-by or whatever you need.) Looks also good to me, thanks. Also Acked-by. -Andi
> +static DECLARE_BITMAP(apicid_map, MAX_LOCAL_APIC) __initdata; > +static char apicid_list[MAX_LOCAL_APIC] __initdata; Is MAX_LOCAL_APIC really big enough to print them all in ASCII? It would be better to not use that large a buffer, but print in smaller pieces (I realize this would enlarge your patch, but then it would also save a lot of BSS) -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Rientjes wrote: > On Tue, 27 Oct 2009, Mike Travis wrote: > >> I applied your previous patch with the change to use static and >> here's the console output from a live system: >> >> >> [ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0 >> [ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1 >> [ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2 >> [ 0.000000] SRAT: PXM 3 -> APIC {96-103,112-119} -> Node 3 >> [ 0.000000] SRAT: PXM 4 -> APIC {128-135,144-151} -> Node 4 >> [ 0.000000] SRAT: PXM 5 -> APIC {160-167,176-183} -> Node 5 >> [ 0.000000] SRAT: PXM 6 -> APIC {192-199,208-215} -> Node 6 >> [ 0.000000] SRAT: PXM 7 -> APIC {224-231,240-247} -> Node 7 >> [ 0.000000] SRAT: PXM 8 -> APIC {256-263,272-279} -> Node 8 >> [ 0.000000] SRAT: PXM 9 -> APIC {288-295,304-311} -> Node 9 >> [ 0.000000] SRAT: PXM 10 -> APIC {320-327,336-343} -> Node 10 >> [ 0.000000] SRAT: PXM 11 -> APIC {352-359,368-375} -> Node 11 >> [ 0.000000] SRAT: PXM 12 -> APIC {384-391,400-407} -> Node 12 >> [ 0.000000] SRAT: PXM 13 -> APIC {416-423,432-439} -> Node 13 >> [ 0.000000] SRAT: PXM 14 -> APIC {448-455,464-471} -> Node 14 >> [ 0.000000] SRAT: PXM 15 -> APIC {480-487,496-503} -> Node 15 >> [ 0.000000] SRAT: PXM 16 -> APIC {512-519,528-535} -> Node 16 >> [ 0.000000] SRAT: PXM 17 -> APIC {544-551,560-567} -> Node 17 >> [ 0.000000] SRAT: PXM 18 -> APIC {576-583,592-599} -> Node 18 >> [ 0.000000] SRAT: PXM 19 -> APIC {608-615,624-631} -> Node 19 >> [ 0.000000] SRAT: PXM 20 -> APIC {640-647,656-663} -> Node 20 >> [ 0.000000] SRAT: PXM 21 -> APIC {672-679,688-695} -> Node 21 >> [ 0.000000] SRAT: PXM 22 -> APIC {704-711,720-727} -> Node 22 >> [ 0.000000] SRAT: PXM 23 -> APIC {736-743,752-759} -> Node 23 >> > > Quite the system you have there :) What was once 760 lines has been > reduced to 24 without removing any information. > can you change the apic to hex print? YH -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 28 Oct 2009, Andi Kleen wrote: > > +static DECLARE_BITMAP(apicid_map, MAX_LOCAL_APIC) __initdata; > > +static char apicid_list[MAX_LOCAL_APIC] __initdata; > > Is MAX_LOCAL_APIC really big enough to print them all in ASCII? > > It would be better to not use that large a buffer, but print > in smaller pieces (I realize this would enlarge your patch, > but then it would also save a lot of BSS) > MAX_LOCAL_APIC was definitely an arbitrary choice here and has very little relevance. scnlistprintf will protect against overflow, but we still need to decide upon a constant that will emit the most information possible while not overly polluting the printk and saving on bss, as you mentioned. I suspect we could agree on a value as little as 128 and it would work for the overwhelming majority (all?) of users. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 28 Oct 2009, Andi Kleen wrote: > >> Quite the system you have there :) What was once 760 lines has been > >> reduced to 24 without removing any information. > >> > >> This seems to be the most we can reduce this particular output since we > >> don't support mapping multiple pxms to a single node. > > > > Yes, thanks very much for the optimization. > > > > (And you can add my Acked-by or whatever you need.) > > Looks also good to me, thanks. Also Acked-by. > Thanks Andi. I'm hoping Ingo will pick this up and not have a problem with the use of NUMA_NO_NODE vs. NID_INVAL since there's a patch pending in -mm that removes the former and this saves a Linus build error when he pushes for 2.6.33 (and they are both defined the same anyway). -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 27 Oct 2009, Yinghai Lu wrote: > can you change the apic to hex print? > That would be an extension made on top of my patch (which may be difficult without adding an additional library function to be used in this case since it relies on bitmap_scnlistprintf()). It's been printed as an unsigned int for well over four years so I don't see any specific urgency, anyway. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > > > MAX_LOCAL_APIC was definitely an arbitrary choice here and has very little > relevance. scnlistprintf will protect against overflow, but we still need > to decide upon a constant that will emit the most information possible > while not overly polluting the printk and saving on bss, as you mentioned. > I suspect we could agree on a value as little as 128 and it would work for > the overwhelming majority (all?) of users. For now at least seems reasonable to limit to 128 or so yes (and go back to the stack). if we ever have sparse apic ids for nodes then that might change; but in this case could still just do a acpidump or teach the printer to be more clever and support strides. It would be just good to have some indication in the output if there was a overflow. -Andi
Andi Kleen wrote: >> MAX_LOCAL_APIC was definitely an arbitrary choice here and has very little >> relevance. scnlistprintf will protect against overflow, but we still need >> to decide upon a constant that will emit the most information possible >> while not overly polluting the printk and saving on bss, as you mentioned. >> I suspect we could agree on a value as little as 128 and it would work for >> the overwhelming majority (all?) of users. > > For now at least seems reasonable to limit to 128 or so yes (and go > back to the stack). if we ever have sparse apic ids for nodes > then that might change; but in this case could still just do > a acpidump or teach the printer to be more clever and support > strides. > > It would be just good to have some indication in the output > if there was a overflow. > > -Andi > I don't understand the importance of this when the memory is given back after the system starts up anyway...? Thanks, Mike -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 28 Oct 2009, Mike Travis wrote: > I don't understand the importance of this when the memory is given back > after the system starts up anyway...? > Printing a list of apic ids longer than 128 characters would pollute the kernel log and this upper bound will probably never be reached based on the way apic ids are created for physical and logical processors: they are normally reduced to ranges instead of comma seperated entities. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Rientjes wrote: > On Wed, 28 Oct 2009, Mike Travis wrote: > >> I don't understand the importance of this when the memory is given back >> after the system starts up anyway...? >> > > Printing a list of apic ids longer than 128 characters would pollute the > kernel log and this upper bound will probably never be reached based on > the way apic ids are created for physical and logical processors: they are > normally reduced to ranges instead of comma seperated entities. Your latest patch tested: [ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0 [ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1 [ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2 [ 0.000000] SRAT: PXM 3 -> APIC {96-103,112-119} -> Node 3 [ 0.000000] SRAT: PXM 4 -> APIC {128-135,144-151} -> Node 4 [ 0.000000] SRAT: PXM 5 -> APIC {160-167,176-183} -> Node 5 [ 0.000000] SRAT: PXM 6 -> APIC {192-199,208-215} -> Node 6 [ 0.000000] SRAT: PXM 7 -> APIC {224-231,240-247} -> Node 7 [ 0.000000] SRAT: PXM 8 -> APIC {256-263,272-279} -> Node 8 [ 0.000000] SRAT: PXM 9 -> APIC {288-295,304-311} -> Node 9 [ 0.000000] SRAT: PXM 10 -> APIC {320-327,336-343} -> Node 10 [ 0.000000] SRAT: PXM 11 -> APIC {352-359,368-375} -> Node 11 [ 0.000000] SRAT: PXM 12 -> APIC {384-391,400-407} -> Node 12 [ 0.000000] SRAT: PXM 13 -> APIC {416-423,432-439} -> Node 13 [ 0.000000] SRAT: PXM 14 -> APIC {448-455,464-471} -> Node 14 [ 0.000000] SRAT: PXM 15 -> APIC {480-487,496-503} -> Node 15 [ 0.000000] SRAT: PXM 16 -> APIC {512-519,528-535} -> Node 16 [ 0.000000] SRAT: PXM 17 -> APIC {544-551,560-567} -> Node 17 [ 0.000000] SRAT: PXM 18 -> APIC {576-583,592-599} -> Node 18 [ 0.000000] SRAT: PXM 19 -> APIC {608-615,624-631} -> Node 19 [ 0.000000] SRAT: PXM 20 -> APIC {640-647,656-663} -> Node 20 [ 0.000000] SRAT: PXM 21 -> APIC {672-679,688-695} -> Node 21 [ 0.000000] SRAT: PXM 22 -> APIC {704-711,720-727} -> Node 22 [ 0.000000] SRAT: PXM 23 -> APIC {736-743,752-759} -> Node 23 [ 0.000000] SRAT: PXM 24 -> APIC {768-775,784-791} -> Node 24 [ 0.000000] SRAT: PXM 25 -> APIC {800-807,816-823} -> Node 25 [ 0.000000] SRAT: PXM 26 -> APIC {832-839,848-855} -> Node 26 [ 0.000000] SRAT: PXM 27 -> APIC {864-871,880-887} -> Node 27 [ 0.000000] SRAT: PXM 28 -> APIC {896-903,912-919} -> Node 28 [ 0.000000] SRAT: PXM 29 -> APIC {928-935,944-951} -> Node 29 [ 0.000000] SRAT: PXM 30 -> APIC {960-967,976-983} -> Node 30 [ 0.000000] SRAT: PXM 31 -> APIC {992-999,1008-1015} -> Node 31 [ 0.000000] SRAT: PXM 32 -> APIC {1024-1031,1040-1047} -> Node 32 [ 0.000000] SRAT: PXM 33 -> APIC {1056-1063,1072-1079} -> Node 33 [ 0.000000] SRAT: PXM 34 -> APIC {1088-1095,1104-1111} -> Node 34 [ 0.000000] SRAT: PXM 35 -> APIC {1120-1127,1136-1143} -> Node 35 [ 0.000000] SRAT: PXM 36 -> APIC {1152-1159,1168-1175} -> Node 36 [ 0.000000] SRAT: PXM 37 -> APIC {1184-1191,1200-1207} -> Node 37 [ 0.000000] SRAT: PXM 38 -> APIC {1216-1223,1232-1239} -> Node 38 [ 0.000000] SRAT: PXM 39 -> APIC {1248-1255,1264-1271} -> Node 39 -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 28 Oct 2009, Mike Travis wrote: > Your latest patch tested: > > [ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0 > [ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1 > [ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2 > [ 0.000000] SRAT: PXM 3 -> APIC {96-103,112-119} -> Node 3 > [ 0.000000] SRAT: PXM 4 -> APIC {128-135,144-151} -> Node 4 > [ 0.000000] SRAT: PXM 5 -> APIC {160-167,176-183} -> Node 5 > [ 0.000000] SRAT: PXM 6 -> APIC {192-199,208-215} -> Node 6 > [ 0.000000] SRAT: PXM 7 -> APIC {224-231,240-247} -> Node 7 > [ 0.000000] SRAT: PXM 8 -> APIC {256-263,272-279} -> Node 8 > [ 0.000000] SRAT: PXM 9 -> APIC {288-295,304-311} -> Node 9 > [ 0.000000] SRAT: PXM 10 -> APIC {320-327,336-343} -> Node 10 > [ 0.000000] SRAT: PXM 11 -> APIC {352-359,368-375} -> Node 11 > [ 0.000000] SRAT: PXM 12 -> APIC {384-391,400-407} -> Node 12 > [ 0.000000] SRAT: PXM 13 -> APIC {416-423,432-439} -> Node 13 > [ 0.000000] SRAT: PXM 14 -> APIC {448-455,464-471} -> Node 14 > [ 0.000000] SRAT: PXM 15 -> APIC {480-487,496-503} -> Node 15 > [ 0.000000] SRAT: PXM 16 -> APIC {512-519,528-535} -> Node 16 > [ 0.000000] SRAT: PXM 17 -> APIC {544-551,560-567} -> Node 17 > [ 0.000000] SRAT: PXM 18 -> APIC {576-583,592-599} -> Node 18 > [ 0.000000] SRAT: PXM 19 -> APIC {608-615,624-631} -> Node 19 > [ 0.000000] SRAT: PXM 20 -> APIC {640-647,656-663} -> Node 20 > [ 0.000000] SRAT: PXM 21 -> APIC {672-679,688-695} -> Node 21 > [ 0.000000] SRAT: PXM 22 -> APIC {704-711,720-727} -> Node 22 > [ 0.000000] SRAT: PXM 23 -> APIC {736-743,752-759} -> Node 23 > [ 0.000000] SRAT: PXM 24 -> APIC {768-775,784-791} -> Node 24 > [ 0.000000] SRAT: PXM 25 -> APIC {800-807,816-823} -> Node 25 > [ 0.000000] SRAT: PXM 26 -> APIC {832-839,848-855} -> Node 26 > [ 0.000000] SRAT: PXM 27 -> APIC {864-871,880-887} -> Node 27 > [ 0.000000] SRAT: PXM 28 -> APIC {896-903,912-919} -> Node 28 > [ 0.000000] SRAT: PXM 29 -> APIC {928-935,944-951} -> Node 29 > [ 0.000000] SRAT: PXM 30 -> APIC {960-967,976-983} -> Node 30 > [ 0.000000] SRAT: PXM 31 -> APIC {992-999,1008-1015} -> Node 31 > [ 0.000000] SRAT: PXM 32 -> APIC {1024-1031,1040-1047} -> Node 32 > [ 0.000000] SRAT: PXM 33 -> APIC {1056-1063,1072-1079} -> Node 33 > [ 0.000000] SRAT: PXM 34 -> APIC {1088-1095,1104-1111} -> Node 34 > [ 0.000000] SRAT: PXM 35 -> APIC {1120-1127,1136-1143} -> Node 35 > [ 0.000000] SRAT: PXM 36 -> APIC {1152-1159,1168-1175} -> Node 36 > [ 0.000000] SRAT: PXM 37 -> APIC {1184-1191,1200-1207} -> Node 37 > [ 0.000000] SRAT: PXM 38 -> APIC {1216-1223,1232-1239} -> Node 38 > [ 0.000000] SRAT: PXM 39 -> APIC {1248-1255,1264-1271} -> Node 39 Looks good, 1272 lines reduced to 40. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Rientjes wrote: > On Wed, 28 Oct 2009, Mike Travis wrote: > >> I don't understand the importance of this when the memory is given back >> after the system starts up anyway...? >> > > Printing a list of apic ids longer than 128 characters would pollute the > kernel log and this upper bound will probably never be reached based on > the way apic ids are created for physical and logical processors: they are > normally reduced to ranges instead of comma seperated entities. Ahh, ok, thanks. Does that mean this 10,649 character line full of periods is illegal? [ 102.551570] Completing Region/Field/Buffer/Package initialization: ............... [long time later] ......... <4>Clocksource tsc unstable (delta = 4396383657849 ns) I'm having trouble finding it. Does it look familiar to anyone? Thanks, Mike -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 28 Oct 2009, Mike Travis wrote: > > Printing a list of apic ids longer than 128 characters would pollute the > > kernel log and this upper bound will probably never be reached based on the > > way apic ids are created for physical and logical processors: they are > > normally reduced to ranges instead of comma seperated entities. > > Ahh, ok, thanks. > > Does that mean this 10,649 character line full of periods is illegal? > I'm not saying it would be illegal, merely that it would be harm readability. Based on how apic id's are formed from processor ids, though, I think we're really talking about an upper limit (128) that will never be reached. > [ 102.551570] Completing Region/Field/Buffer/Package initialization: > ............... [long time later] ......... > <4>Clocksource tsc unstable (delta = 4396383657849 ns) > > I'm having trouble finding it. Does it look familiar to anyone? > It's debugging output from acpi_ns_initialize_objects() and each period is from acpi_ns_init_one_device(). You can suppress it by disabing CONFIG_ACPI_DEBUG. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Rientjes wrote: > On Wed, 28 Oct 2009, Mike Travis wrote: > >>> Printing a list of apic ids longer than 128 characters would pollute the >>> kernel log and this upper bound will probably never be reached based on the >>> way apic ids are created for physical and logical processors: they are >>> normally reduced to ranges instead of comma seperated entities. >> Ahh, ok, thanks. >> >> Does that mean this 10,649 character line full of periods is illegal? >> > > I'm not saying it would be illegal, merely that it would be harm > readability. Based on how apic id's are formed from processor ids, > though, I think we're really talking about an upper limit (128) that will > never be reached. We actually have many, many more than that by adding on some extra bits to the CPU's apicid. These select which blade in the system to target. > >> [ 102.551570] Completing Region/Field/Buffer/Package initialization: >> ............... [long time later] ......... >> <4>Clocksource tsc unstable (delta = 4396383657849 ns) >> >> I'm having trouble finding it. Does it look familiar to anyone? >> > > It's debugging output from acpi_ns_initialize_objects() and each period is > from acpi_ns_init_one_device(). You can suppress it by disabing > CONFIG_ACPI_DEBUG. Ahh, didn't know that was set in the (our) default config. Is it normally set by distros? -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 28 Oct 2009, Mike Travis wrote: > > I'm not saying it would be illegal, merely that it would be harm > > readability. Based on how apic id's are formed from processor ids, though, > > I think we're really talking about an upper limit (128) that will never be > > reached. > > We actually have many, many more than that by adding on some extra bits > to the CPU's apicid. These select which blade in the system to target. > Maybe I've been vague in my rationale for why this limit will probably never be reached. The way apic ids are constructed, with physical and logical processor ids, it tends to lend itself to ranges where bitmap_scnlistprintf() can specify a large number of apic ids with relatively few ASCII characters because logical processors typically do not have differing pxms. For us to reach the 128 character upper bound, scnlistprintf() would need to have many, many distinct ranges; your example showed two ranges per pxm (many more machines would have only a single range). In other words, we're not predicting to have "1-2,4-6,8-9,11-13,15-17," etc, that we often have with nodemasks. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Rientjes wrote: > On Wed, 28 Oct 2009, Mike Travis wrote: > >>> I'm not saying it would be illegal, merely that it would be harm >>> readability. Based on how apic id's are formed from processor ids, though, >>> I think we're really talking about an upper limit (128) that will never be >>> reached. >> We actually have many, many more than that by adding on some extra bits >> to the CPU's apicid. These select which blade in the system to target. >> > > Maybe I've been vague in my rationale for why this limit will probably > never be reached. The way apic ids are constructed, with physical and > logical processor ids, it tends to lend itself to ranges where > bitmap_scnlistprintf() can specify a large number of apic ids with > relatively few ASCII characters because logical processors typically do > not have differing pxms. For us to reach the 128 character upper bound, > scnlistprintf() would need to have many, many distinct ranges; your > example showed two ranges per pxm (many more machines would have only a > single range). In other words, we're not predicting to have > "1-2,4-6,8-9,11-13,15-17," etc, that we often have with nodemasks. Yes, you are correct. (I was confused... ;-) I believe the disjointed ranges came from the hyperthread cpus..? Which if true means there'll probably be as many distinct ranges as there are threads per core? -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 29 Oct 2009, Mike Travis wrote: > I believe the disjointed ranges came from the hyperthread cpus..? Which if > true means there'll probably be as many distinct ranges as there are threads > per core? > Not necessarily, look at the first few lines of your new output: [ 0.000000] SRAT: PXM 0 -> APIC {0-7,16-23} -> Node 0 [ 0.000000] SRAT: PXM 1 -> APIC {32-39,48-55} -> Node 1 [ 0.000000] SRAT: PXM 2 -> APIC {64-71,80-87} -> Node 2 ... If those values are in hex, you have these apic id ranges: 0x00-0x07, 0x10-0x17 0x20-0x27, 0x30-0x37 0x40-0x47, 0x50-0x57 ... So it's most likely that each of the physical processors has eight logical processors (represented by the least significant three bits) and there are two physical processors (the more significant four bits) per node. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Patch
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -36,6 +36,9 @@ static int num_node_memblks __initdata; static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; +static DECLARE_BITMAP(apicid_map, MAX_LOCAL_APIC) __initdata; +static char apicid_list[MAX_LOCAL_APIC] __initdata; + static __init int setup_node(int pxm) { return acpi_map_pxm_to_node(pxm); @@ -136,8 +139,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", - pxm, apic_id, node); } /* Callback for Proximity Domain -> LAPIC mapping */ @@ -170,8 +171,31 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", - pxm, apic_id, node); +} + +void __init acpi_numa_print_srat_mapping(void) +{ + int i, j; + + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + int nid; + + nid = pxm_to_node(i); + if (nid == NUMA_NO_NODE) + continue; + + bitmap_zero(apicid_map, MAX_LOCAL_APIC); + for (j = 0; j < MAX_LOCAL_APIC; j++) + if (apicid_to_node[j] == nid) + set_bit(j, apicid_map); + + if (bitmap_empty(apicid_map, MAX_LOCAL_APIC)) + continue; + bitmap_scnlistprintf(apicid_list, MAX_LOCAL_APIC, + apicid_map, MAX_LOCAL_APIC); + pr_info("SRAT: PXM %u -> APIC {%s} -> Node %u\n", + i, apicid_list, nid); + } } #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -281,6 +281,10 @@ acpi_table_parse_srat(enum acpi_srat_type id, handler, max_entries); } +void __init __attribute__((weak)) acpi_numa_print_srat_mapping(void) +{ +} + int __init acpi_numa_init(void) { /* SRAT: Static Resource Affinity Table */ @@ -292,6 +296,7 @@ int __init acpi_numa_init(void) acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); + acpi_numa_print_srat_mapping(); } /* SLIT: System Locality Information Table */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -92,12 +92,13 @@ int acpi_table_parse_madt (enum acpi_madt_type id, acpi_table_entry_handler hand int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); -/* the following four functions are architecture-dependent */ +/* the following six functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); void acpi_numa_arch_fixup(void); +void acpi_numa_print_srat_mapping(void); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */