diff mbox series

[PATCHv2,02/12] acpi/hmat: Parse and report heterogeneous memory

Message ID 20181211010310.8551-3-keith.busch@intel.com (mailing list archive)
State New, archived
Headers show
Series Heterogeneous memory node attributes | expand

Commit Message

Keith Busch Dec. 11, 2018, 1:03 a.m. UTC
Systems may provide different memory types and export this information
in the ACPI Heterogeneous Memory Attribute Table (HMAT). Parse these
tables provided by the platform and report the memory access and caching
attributes.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/acpi/Kconfig  |   8 +++
 drivers/acpi/Makefile |   1 +
 drivers/acpi/hmat.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/tables.c |   9 +++
 include/linux/acpi.h  |   1 +
 5 files changed, 211 insertions(+)
 create mode 100644 drivers/acpi/hmat.c

Comments

Dan Williams Dec. 11, 2018, 6:03 a.m. UTC | #1
On Mon, Dec 10, 2018 at 5:05 PM Keith Busch <keith.busch@intel.com> wrote:
>
> Systems may provide different memory types and export this information
> in the ACPI Heterogeneous Memory Attribute Table (HMAT). Parse these
> tables provided by the platform and report the memory access and caching
> attributes.
>
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
>  drivers/acpi/Kconfig  |   8 +++
>  drivers/acpi/Makefile |   1 +
>  drivers/acpi/hmat.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/acpi/tables.c |   9 +++
>  include/linux/acpi.h  |   1 +
>  5 files changed, 211 insertions(+)
>  create mode 100644 drivers/acpi/hmat.c
>
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index 7cea769c37df..9a05af3a18cf 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -327,6 +327,14 @@ config ACPI_NUMA
>         depends on (X86 || IA64 || ARM64)
>         default y if IA64_GENERIC || IA64_SGI_SN2 || ARM64
>
> +config ACPI_HMAT
> +       bool "ACPI Heterogeneous Memory Attribute Table Support"
> +       depends on ACPI_NUMA
> +       help
> +        Parses representation of the ACPI Heterogeneous Memory Attributes
> +        Table (HMAT) and set the memory node relationships and access
> +        attributes.
> +
>  config ACPI_CUSTOM_DSDT_FILE
>         string "Custom DSDT Table file to include"
>         default ""
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index edc039313cd6..b5e13499f88b 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -55,6 +55,7 @@ acpi-$(CONFIG_X86)            += x86/apple.o
>  acpi-$(CONFIG_X86)             += x86/utils.o
>  acpi-$(CONFIG_DEBUG_FS)                += debugfs.o
>  acpi-$(CONFIG_ACPI_NUMA)       += numa.o
> +acpi-$(CONFIG_ACPI_HMAT)       += hmat.o
>  acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
>  acpi-y                         += acpi_lpat.o
>  acpi-$(CONFIG_ACPI_LPIT)       += acpi_lpit.o
> diff --git a/drivers/acpi/hmat.c b/drivers/acpi/hmat.c
> new file mode 100644
> index 000000000000..ef3881f0f370
> --- /dev/null
> +++ b/drivers/acpi/hmat.c
[..]
> +static __init int hmat_init(void)
> +{
> +       struct acpi_subtable_proc subtable_proc;
> +       struct acpi_table_header *tbl;
> +       enum acpi_hmat_type i;
> +       acpi_status status;
> +
> +       if (srat_disabled())
> +               return 0;
> +
> +       status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl);
> +       if (ACPI_FAILURE(status))
> +               return 0;
> +
> +       if (acpi_table_parse(ACPI_SIG_HMAT, parse_noop))
> +               goto out_put;
> +
> +       memset(&subtable_proc, 0, sizeof(subtable_proc));
> +       subtable_proc.handler = hmat_parse_subtable;
> +       for (i = ACPI_HMAT_TYPE_ADDRESS_RANGE; i < ACPI_HMAT_TYPE_RESERVED; i++) {
> +               subtable_proc.id = i;
> +               if (acpi_table_parse_entries_array(ACPI_SIG_HMAT,
> +                                       sizeof(struct acpi_table_hmat),
> +                                       &subtable_proc, 1, 0) < 0)
> +                       goto out_put;
> +       }
> + out_put:
> +       acpi_put_table(tbl);
> +       return 0;
> +}
> +subsys_initcall(hmat_init);

I have a use case to detect the presence of a memory-side-cache early
at init time [1]. To me this means that hmat_init() needs to happen as
a part of acpi_numa_init(). Subsequently I think that also means that
the sysfs portion needs to be broken out to its own init path that can
probably run at module_init() priority.

Perhaps we should split this patch set into two? The table parsing
with an in-kernel user is a bit easier to reason about and can go in
first. Towards that end can I steal / refllow patches 1 & 2 into the
memory randomization series? Other ideas how to handle this?

[1]: https://lkml.org/lkml/2018/10/12/309
Keith Busch Dec. 11, 2018, 4:55 p.m. UTC | #2
On Mon, Dec 10, 2018 at 10:03:40PM -0800, Dan Williams wrote:
> I have a use case to detect the presence of a memory-side-cache early
> at init time [1]. To me this means that hmat_init() needs to happen as
> a part of acpi_numa_init(). Subsequently I think that also means that
> the sysfs portion needs to be broken out to its own init path that can
> probably run at module_init() priority.
> 
> Perhaps we should split this patch set into two? The table parsing
> with an in-kernel user is a bit easier to reason about and can go in
> first. Towards that end can I steal / refllow patches 1 & 2 into the
> memory randomization series? Other ideas how to handle this?
> 
> [1]: https://lkml.org/lkml/2018/10/12/309

To that end, will something like the following work for you? This just
needs to happen after patch 1.

---
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index f5e09c39ff22..03ef3c8ba4ea 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -40,6 +40,8 @@ static int pxm_to_node_map[MAX_PXM_DOMAINS]
 static int node_to_pxm_map[MAX_NUMNODES]
 			= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
 
+static unsigned long node_side_cached[BITS_TO_LONGS(MAX_PXM_DOMAINS)];
+
 unsigned char acpi_srat_revision __initdata;
 int acpi_numa __initdata;
 
@@ -262,6 +264,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	u64 start, end;
 	u32 hotpluggable;
 	int node, pxm;
+	bool side_cached;
 
 	if (srat_disabled())
 		goto out_err;
@@ -308,6 +311,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
 			(unsigned long long)start, (unsigned long long)end - 1);
 
+	side_cached = test_bit(pxm, node_side_cached);
+	if (side_cached && memblock_mark_sidecached(start, ma->length))
+		pr_warn("SRAT: Failed to mark side cached range [mem %#010Lx-%#010Lx] in memblock\n",
+			(unsigned long long)start, (unsigned long long)end - 1);
+
 	max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
 
 	return 0;
@@ -411,6 +419,19 @@ acpi_parse_memory_affinity(union acpi_subtable_headers * header,
 	return 0;
 }
 
+static int __init
+acpi_parse_cache(union acpi_subtable_headers *header, const unsigned long end)
+{
+	struct acpi_hmat_cache *cache = (void *)header;
+	u32 attrs;
+
+	attrs = cache->cache_attributes;
+	if (((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) ==
+						ACPI_HMAT_CA_DIRECT_MAPPED)
+		set_bit(cache->memory_PD, node_side_cached);
+	return 0;
+}
+
 static int __init acpi_parse_srat(struct acpi_table_header *table)
 {
 	struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
@@ -422,6 +443,11 @@ static int __init acpi_parse_srat(struct acpi_table_header *table)
 	return 0;
 }
 
+static __init int acpi_parse_hmat(struct acpi_table_header *table)
+{
+	return 0;
+}
+
 static int __init
 acpi_table_parse_srat(enum acpi_srat_type id,
 		      acpi_tbl_entry_handler handler, unsigned int max_entries)
@@ -460,6 +486,16 @@ int __init acpi_numa_init(void)
 					sizeof(struct acpi_table_srat),
 					srat_proc, ARRAY_SIZE(srat_proc), 0);
 
+		if (!acpi_table_parse(ACPI_SIG_HMAT, acpi_parse_hmat)) {
+			struct acpi_subtable_proc hmat_proc;
+
+			memset(&hmat_proc, 0, sizeof(hmat_proc));
+			hmat_proc.handler = acpi_parse_cache;
+			hmat_proc.id = ACPI_HMAT_TYPE_CACHE;
+			acpi_table_parse_entries_array(ACPI_SIG_HMAT,
+						sizeof(struct acpi_table_hmat),
+						&hmat_proc, 1, 0);
+		}
 		cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 					    acpi_parse_memory_affinity, 0);
 	}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index aee299a6aa76..a24c918a4496 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -44,6 +44,7 @@ enum memblock_flags {
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
 	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
 	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
+	MEMBLOCK_SIDECACHED	= 0x8,  /* System side caches memory access */
 };
 
 /**
@@ -130,6 +131,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
+int memblock_mark_sidecached(phys_addr_t base, phys_addr_t size);
 enum memblock_flags choose_memblock_flags(void);
 
 unsigned long memblock_free_all(void);
@@ -227,6 +229,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
 	return m->flags & MEMBLOCK_NOMAP;
 }
 
+static inline bool memblock_is_sidecached(struct memblock_region *m)
+{
+	return m->flags & MEMBLOCK_SIDECACHED;
+}
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
 			    unsigned long  *end_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index 9a2d5ae81ae1..827b709afdcd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -865,6 +865,11 @@ int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
 	return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG);
 }
 
+int __init_memblock memblock_mark_sidecached(phys_addr_t base, phys_addr_t size)
+{
+	return memblock_setclr_flag(base, size, 1, MEMBLOCK_SIDECACHED);
+}
+
 /**
  * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
  * @base: the base phys addr of the region
--
Dan Williams Dec. 11, 2018, 8:29 p.m. UTC | #3
On Tue, Dec 11, 2018 at 8:58 AM Keith Busch <keith.busch@intel.com> wrote:
>
> On Mon, Dec 10, 2018 at 10:03:40PM -0800, Dan Williams wrote:
> > I have a use case to detect the presence of a memory-side-cache early
> > at init time [1]. To me this means that hmat_init() needs to happen as
> > a part of acpi_numa_init(). Subsequently I think that also means that
> > the sysfs portion needs to be broken out to its own init path that can
> > probably run at module_init() priority.
> >
> > Perhaps we should split this patch set into two? The table parsing
> > with an in-kernel user is a bit easier to reason about and can go in
> > first. Towards that end can I steal / refllow patches 1 & 2 into the
> > memory randomization series? Other ideas how to handle this?
> >
> > [1]: https://lkml.org/lkml/2018/10/12/309
>
> To that end, will something like the following work for you? This just
> needs to happen after patch 1.
>
> ---
> diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
> index f5e09c39ff22..03ef3c8ba4ea 100644
> --- a/drivers/acpi/numa.c
> +++ b/drivers/acpi/numa.c
> @@ -40,6 +40,8 @@ static int pxm_to_node_map[MAX_PXM_DOMAINS]
>  static int node_to_pxm_map[MAX_NUMNODES]
>                         = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
>
> +static unsigned long node_side_cached[BITS_TO_LONGS(MAX_PXM_DOMAINS)];
> +
>  unsigned char acpi_srat_revision __initdata;
>  int acpi_numa __initdata;
>
> @@ -262,6 +264,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
>         u64 start, end;
>         u32 hotpluggable;
>         int node, pxm;
> +       bool side_cached;
>
>         if (srat_disabled())
>                 goto out_err;
> @@ -308,6 +311,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
>                 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
>                         (unsigned long long)start, (unsigned long long)end - 1);
>
> +       side_cached = test_bit(pxm, node_side_cached);
> +       if (side_cached && memblock_mark_sidecached(start, ma->length))
> +               pr_warn("SRAT: Failed to mark side cached range [mem %#010Lx-%#010Lx] in memblock\n",
> +                       (unsigned long long)start, (unsigned long long)end - 1);
> +
>         max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
>
>         return 0;
> @@ -411,6 +419,19 @@ acpi_parse_memory_affinity(union acpi_subtable_headers * header,
>         return 0;
>  }
>
> +static int __init
> +acpi_parse_cache(union acpi_subtable_headers *header, const unsigned long end)
> +{
> +       struct acpi_hmat_cache *cache = (void *)header;
> +       u32 attrs;
> +
> +       attrs = cache->cache_attributes;
> +       if (((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) ==
> +                                               ACPI_HMAT_CA_DIRECT_MAPPED)
> +               set_bit(cache->memory_PD, node_side_cached);

I'm not sure I see a use case for 'node_side_cached'. Instead I need
to know if a cache intercepts a "System RAM" resource, because a cache
in front of a reserved address range would not be impacted by page
allocator randomization. Or, are you saying have memblock generically
describes this capability and move the responsibility of acting on
that data to a higher level?

The other detail to consider is the cache ratio size, but that would
be a follow on feature. The use case is to automatically determine the
ratio to pass to numa_emulation:

    cc9aec03e58f x86/numa_emulation: Introduce uniform split capability

> +       return 0;
> +}
> +
>  static int __init acpi_parse_srat(struct acpi_table_header *table)
>  {
>         struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
> @@ -422,6 +443,11 @@ static int __init acpi_parse_srat(struct acpi_table_header *table)
>         return 0;
>  }
>
> +static __init int acpi_parse_hmat(struct acpi_table_header *table)
> +{
> +       return 0;
> +}

What's this acpi_parse_hmat() stub for?

> +
>  static int __init
>  acpi_table_parse_srat(enum acpi_srat_type id,
>                       acpi_tbl_entry_handler handler, unsigned int max_entries)
> @@ -460,6 +486,16 @@ int __init acpi_numa_init(void)
>                                         sizeof(struct acpi_table_srat),
>                                         srat_proc, ARRAY_SIZE(srat_proc), 0);
>
> +               if (!acpi_table_parse(ACPI_SIG_HMAT, acpi_parse_hmat)) {
> +                       struct acpi_subtable_proc hmat_proc;
> +
> +                       memset(&hmat_proc, 0, sizeof(hmat_proc));
> +                       hmat_proc.handler = acpi_parse_cache;
> +                       hmat_proc.id = ACPI_HMAT_TYPE_CACHE;
> +                       acpi_table_parse_entries_array(ACPI_SIG_HMAT,
> +                                               sizeof(struct acpi_table_hmat),
> +                                               &hmat_proc, 1, 0);
> +               }
>                 cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
>                                             acpi_parse_memory_affinity, 0);
>         }
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index aee299a6aa76..a24c918a4496 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -44,6 +44,7 @@ enum memblock_flags {
>         MEMBLOCK_HOTPLUG        = 0x1,  /* hotpluggable region */
>         MEMBLOCK_MIRROR         = 0x2,  /* mirrored region */
>         MEMBLOCK_NOMAP          = 0x4,  /* don't add to kernel direct mapping */
> +       MEMBLOCK_SIDECACHED     = 0x8,  /* System side caches memory access */

I'm concerned that we may be stretching memblock past its intended use
case especially for just this randomization case. For example, I think
memblock_find_in_range() gets confused in the presence of
MEMBLOCK_SIDECACHED memblocks.
Keith Busch Dec. 11, 2018, 8:44 p.m. UTC | #4
On Tue, Dec 11, 2018 at 12:29:45PM -0800, Dan Williams wrote:
> On Tue, Dec 11, 2018 at 8:58 AM Keith Busch <keith.busch@intel.com> wrote:
> > +static int __init
> > +acpi_parse_cache(union acpi_subtable_headers *header, const unsigned long end)
> > +{
> > +       struct acpi_hmat_cache *cache = (void *)header;
> > +       u32 attrs;
> > +
> > +       attrs = cache->cache_attributes;
> > +       if (((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) ==
> > +                                               ACPI_HMAT_CA_DIRECT_MAPPED)
> > +               set_bit(cache->memory_PD, node_side_cached);
> 
> I'm not sure I see a use case for 'node_side_cached'. Instead I need
> to know if a cache intercepts a "System RAM" resource, because a cache
> in front of a reserved address range would not be impacted by page
> allocator randomization. Or, are you saying have memblock generically
> describes this capability and move the responsibility of acting on
> that data to a higher level?

The "node_side_cached" array isn't intended to be used directly. It's
just holding the PXM's that HMAT says have a side cache so we know which
PXM's have that attribute before parsing SRAT's memory affinity.

The intention was that this is just another attribute of a memory range
similiar to hotpluggable. Whoever needs to use it may query it from
the memblock, if that makes sense.

> The other detail to consider is the cache ratio size, but that would
> be a follow on feature. The use case is to automatically determine the
> ratio to pass to numa_emulation:
> 
>     cc9aec03e58f x86/numa_emulation: Introduce uniform split capability

Will look into that.
 
> > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > index aee299a6aa76..a24c918a4496 100644
> > --- a/include/linux/memblock.h
> > +++ b/include/linux/memblock.h
> > @@ -44,6 +44,7 @@ enum memblock_flags {
> >         MEMBLOCK_HOTPLUG        = 0x1,  /* hotpluggable region */
> >         MEMBLOCK_MIRROR         = 0x2,  /* mirrored region */
> >         MEMBLOCK_NOMAP          = 0x4,  /* don't add to kernel direct mapping */
> > +       MEMBLOCK_SIDECACHED     = 0x8,  /* System side caches memory access */
> 
> I'm concerned that we may be stretching memblock past its intended use
> case especially for just this randomization case. For example, I think
> memblock_find_in_range() gets confused in the presence of
> MEMBLOCK_SIDECACHED memblocks.

Ok, I see. Is there a better structure or interface that you may recommend
for your use case to identify which memory ranges contain this attribute?
Dan Williams Dec. 11, 2018, 10:50 p.m. UTC | #5
On Tue, Dec 11, 2018 at 12:47 PM Keith Busch <keith.busch@intel.com> wrote:
>
> On Tue, Dec 11, 2018 at 12:29:45PM -0800, Dan Williams wrote:
> > On Tue, Dec 11, 2018 at 8:58 AM Keith Busch <keith.busch@intel.com> wrote:
> > > +static int __init
> > > +acpi_parse_cache(union acpi_subtable_headers *header, const unsigned long end)
> > > +{
> > > +       struct acpi_hmat_cache *cache = (void *)header;
> > > +       u32 attrs;
> > > +
> > > +       attrs = cache->cache_attributes;
> > > +       if (((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) ==
> > > +                                               ACPI_HMAT_CA_DIRECT_MAPPED)
> > > +               set_bit(cache->memory_PD, node_side_cached);
> >
> > I'm not sure I see a use case for 'node_side_cached'. Instead I need
> > to know if a cache intercepts a "System RAM" resource, because a cache
> > in front of a reserved address range would not be impacted by page
> > allocator randomization. Or, are you saying have memblock generically
> > describes this capability and move the responsibility of acting on
> > that data to a higher level?
>
> The "node_side_cached" array isn't intended to be used directly. It's
> just holding the PXM's that HMAT says have a side cache so we know which
> PXM's have that attribute before parsing SRAT's memory affinity.
>
> The intention was that this is just another attribute of a memory range
> similiar to hotpluggable. Whoever needs to use it may query it from
> the memblock, if that makes sense.
>
> > The other detail to consider is the cache ratio size, but that would
> > be a follow on feature. The use case is to automatically determine the
> > ratio to pass to numa_emulation:
> >
> >     cc9aec03e58f x86/numa_emulation: Introduce uniform split capability
>
> Will look into that.
>
> > > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > > index aee299a6aa76..a24c918a4496 100644
> > > --- a/include/linux/memblock.h
> > > +++ b/include/linux/memblock.h
> > > @@ -44,6 +44,7 @@ enum memblock_flags {
> > >         MEMBLOCK_HOTPLUG        = 0x1,  /* hotpluggable region */
> > >         MEMBLOCK_MIRROR         = 0x2,  /* mirrored region */
> > >         MEMBLOCK_NOMAP          = 0x4,  /* don't add to kernel direct mapping */
> > > +       MEMBLOCK_SIDECACHED     = 0x8,  /* System side caches memory access */
> >
> > I'm concerned that we may be stretching memblock past its intended use
> > case especially for just this randomization case. For example, I think
> > memblock_find_in_range() gets confused in the presence of
> > MEMBLOCK_SIDECACHED memblocks.
>
> Ok, I see. Is there a better structure or interface that you may recommend
> for your use case to identify which memory ranges contain this attribute?

Well, no I don't think there is one. We just need to either audit
existing memblock users to make sure they are prepared for this new
type, or move the cache information somewhere else. I'd be inclined to
just move it to new fields, or a sub-struct in struct memblock_region.
Then we can put the cache set-way info and near memory size
information in there as well. Likely need a new
CONFIG_HAVE_MEMBLOCK_CACHE_INFO gated by the presence of HMAT support.
diff mbox series

Patch

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 7cea769c37df..9a05af3a18cf 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -327,6 +327,14 @@  config ACPI_NUMA
 	depends on (X86 || IA64 || ARM64)
 	default y if IA64_GENERIC || IA64_SGI_SN2 || ARM64
 
+config ACPI_HMAT
+	bool "ACPI Heterogeneous Memory Attribute Table Support"
+	depends on ACPI_NUMA
+	help
+	 Parses representation of the ACPI Heterogeneous Memory Attributes
+	 Table (HMAT) and set the memory node relationships and access
+	 attributes.
+
 config ACPI_CUSTOM_DSDT_FILE
 	string "Custom DSDT Table file to include"
 	default ""
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index edc039313cd6..b5e13499f88b 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -55,6 +55,7 @@  acpi-$(CONFIG_X86)		+= x86/apple.o
 acpi-$(CONFIG_X86)		+= x86/utils.o
 acpi-$(CONFIG_DEBUG_FS)		+= debugfs.o
 acpi-$(CONFIG_ACPI_NUMA)	+= numa.o
+acpi-$(CONFIG_ACPI_HMAT)	+= hmat.o
 acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
 acpi-y				+= acpi_lpat.o
 acpi-$(CONFIG_ACPI_LPIT)	+= acpi_lpit.o
diff --git a/drivers/acpi/hmat.c b/drivers/acpi/hmat.c
new file mode 100644
index 000000000000..ef3881f0f370
--- /dev/null
+++ b/drivers/acpi/hmat.c
@@ -0,0 +1,192 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Heterogeneous Memory Attributes Table (HMAT) representation
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <acpi/acpi_numa.h>
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/node.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+static __init const char *hmat_data_type(u8 type)
+{
+	switch (type) {
+	case ACPI_HMAT_ACCESS_LATENCY:
+		return "Access Latency";
+	case ACPI_HMAT_READ_LATENCY:
+		return "Read Latency";
+	case ACPI_HMAT_WRITE_LATENCY:
+		return "Write Latency";
+	case ACPI_HMAT_ACCESS_BANDWIDTH:
+		return "Access Bandwidth";
+	case ACPI_HMAT_READ_BANDWIDTH:
+		return "Read Bandwidth";
+	case ACPI_HMAT_WRITE_BANDWIDTH:
+		return "Write Bandwidth";
+	default:
+		return "Reserved";
+	};
+}
+
+static __init const char *hmat_data_type_suffix(u8 type)
+{
+	switch (type) {
+	case ACPI_HMAT_ACCESS_LATENCY:
+	case ACPI_HMAT_READ_LATENCY:
+	case ACPI_HMAT_WRITE_LATENCY:
+		return " nsec";
+	case ACPI_HMAT_ACCESS_BANDWIDTH:
+	case ACPI_HMAT_READ_BANDWIDTH:
+	case ACPI_HMAT_WRITE_BANDWIDTH:
+		return " MB/s";
+	default:
+		return "";
+	};
+}
+
+static __init int hmat_parse_locality(union acpi_subtable_headers *header,
+				      const unsigned long end)
+{
+	struct acpi_hmat_locality *loc = (void *)header;
+	unsigned int init, targ, total_size, ipds, tpds;
+	u32 *inits, *targs, value;
+	u16 *entries;
+	u8 type;
+
+	if (loc->header.length < sizeof(*loc)) {
+		pr_err("HMAT: Unexpected locality header length: %d\n",
+			loc->header.length);
+		return -EINVAL;
+	}
+
+	type = loc->data_type;
+	ipds = loc->number_of_initiator_Pds;
+	tpds = loc->number_of_target_Pds;
+	total_size = sizeof(*loc) + sizeof(*entries) * ipds * tpds +
+		     sizeof(*inits) * ipds + sizeof(*targs) * tpds;
+	if (loc->header.length < total_size) {
+		pr_err("HMAT: Unexpected locality header length:%d, minimum required:%d\n",
+			loc->header.length, total_size);
+		return -EINVAL;
+	}
+
+	pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n",
+		loc->flags, hmat_data_type(type), ipds, tpds,
+		loc->entry_base_unit);
+
+	inits = (u32 *)(loc + 1);
+	targs = &inits[ipds];
+	entries = (u16 *)(&targs[tpds]);
+	for (targ = 0; targ < tpds; targ++) {
+		for (init = 0; init < ipds; init++) {
+			value = entries[init * tpds + targ];
+			value = (value * loc->entry_base_unit) / 10;
+			pr_info("  Initiator-Target[%d-%d]:%d%s\n",
+				inits[init], targs[targ], value,
+				hmat_data_type_suffix(type));
+		}
+	}
+	return 0;
+}
+
+static __init int hmat_parse_cache(union acpi_subtable_headers *header,
+				   const unsigned long end)
+{
+	struct acpi_hmat_cache *cache = (void *)header;
+	u32 attrs;
+
+	if (cache->header.length < sizeof(*cache)) {
+		pr_err("HMAT: Unexpected cache header length: %d\n",
+			cache->header.length);
+		return -EINVAL;
+	}
+
+	attrs = cache->cache_attributes;
+	pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
+		cache->memory_PD, cache->cache_size, attrs,
+		cache->number_of_SMBIOShandles);
+
+	return 0;
+}
+
+static int __init hmat_parse_address_range(union acpi_subtable_headers *header,
+					   const unsigned long end)
+{
+	struct acpi_hmat_address_range *spa = (void *)header;
+
+	if (spa->header.length != sizeof(*spa)) {
+		pr_err("HMAT: Unexpected address range header length: %d\n",
+			spa->header.length);
+		return -EINVAL;
+	}
+	pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+		spa->physical_address_base, spa->physical_address_length,
+		spa->flags, spa->processor_PD, spa->memory_PD);
+	return 0;
+}
+
+static int __init hmat_parse_subtable(union acpi_subtable_headers *header,
+				      const unsigned long end)
+{
+	struct acpi_hmat_structure *hdr = (void *)header;
+
+	if (!hdr)
+		return -EINVAL;
+
+	switch (hdr->type) {
+	case ACPI_HMAT_TYPE_ADDRESS_RANGE:
+		return hmat_parse_address_range(header, end);
+	case ACPI_HMAT_TYPE_LOCALITY:
+		return hmat_parse_locality(header, end);
+	case ACPI_HMAT_TYPE_CACHE:
+		return hmat_parse_cache(header, end);
+	default:
+		return -EINVAL;
+	}
+}
+
+static __init int parse_noop(struct acpi_table_header *table)
+{
+	return 0;
+}
+
+static __init int hmat_init(void)
+{
+	struct acpi_subtable_proc subtable_proc;
+	struct acpi_table_header *tbl;
+	enum acpi_hmat_type i;
+	acpi_status status;
+
+	if (srat_disabled())
+		return 0;
+
+	status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl);
+	if (ACPI_FAILURE(status))
+		return 0;
+
+	if (acpi_table_parse(ACPI_SIG_HMAT, parse_noop))
+		goto out_put;
+
+	memset(&subtable_proc, 0, sizeof(subtable_proc));
+	subtable_proc.handler = hmat_parse_subtable;
+	for (i = ACPI_HMAT_TYPE_ADDRESS_RANGE; i < ACPI_HMAT_TYPE_RESERVED; i++) {
+		subtable_proc.id = i;
+		if (acpi_table_parse_entries_array(ACPI_SIG_HMAT,
+					sizeof(struct acpi_table_hmat),
+					&subtable_proc, 1, 0) < 0)
+			goto out_put;
+	}
+ out_put:
+	acpi_put_table(tbl);
+	return 0;
+}
+subsys_initcall(hmat_init);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index e9643b4267c7..bc1addf715dc 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -51,6 +51,7 @@  static int acpi_apic_instance __initdata;
 
 enum acpi_subtable_type {
 	ACPI_SUBTABLE_COMMON,
+	ACPI_SUBTABLE_HMAT,
 };
 
 struct acpi_subtable_entry {
@@ -232,6 +233,8 @@  acpi_get_entry_type(struct acpi_subtable_entry *entry)
 	switch (entry->type) {
 	case ACPI_SUBTABLE_COMMON:
 		return entry->hdr->common.type;
+	case ACPI_SUBTABLE_HMAT:
+		return entry->hdr->hmat.type;
 	}
 	return 0;
 }
@@ -242,6 +245,8 @@  acpi_get_entry_length(struct acpi_subtable_entry *entry)
 	switch (entry->type) {
 	case ACPI_SUBTABLE_COMMON:
 		return entry->hdr->common.length;
+	case ACPI_SUBTABLE_HMAT:
+		return entry->hdr->hmat.length;
 	}
 	return 0;
 }
@@ -252,6 +257,8 @@  acpi_get_subtable_header_length(struct acpi_subtable_entry *entry)
 	switch (entry->type) {
 	case ACPI_SUBTABLE_COMMON:
 		return sizeof(entry->hdr->common);
+	case ACPI_SUBTABLE_HMAT:
+		return sizeof(entry->hdr->hmat);
 	}
 	return 0;
 }
@@ -259,6 +266,8 @@  acpi_get_subtable_header_length(struct acpi_subtable_entry *entry)
 static enum acpi_subtable_type __init
 acpi_get_subtable_type(char *id)
 {
+	if (strncmp(id, ACPI_SIG_HMAT, 4) == 0)
+		return ACPI_SUBTABLE_HMAT;
 	return ACPI_SUBTABLE_COMMON;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 18805a967c70..4373f5ba0f95 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -143,6 +143,7 @@  enum acpi_address_range_id {
 /* Table Handlers */
 union acpi_subtable_headers {
 	struct acpi_subtable_header common;
+	struct acpi_hmat_structure hmat;
 };
 
 typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);