diff mbox

[v6,05/12] ACPI/PPTT: Add Processor Properties Topology Table parsing

Message ID 20180113005920.28658-6-jeremy.linton@arm.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Jeremy Linton Jan. 13, 2018, 12:59 a.m. UTC
ACPI 6.2 adds a new table, which describes how processing units
are related to each other in tree like fashion. Caches are
also sprinkled throughout the tree and describe the properties
of the caches in relation to other caches and processing units.

Add the code to parse the cache hierarchy and report the total
number of levels of cache for a given core using
acpi_find_last_cache_level() as well as fill out the individual
cores cache information with cache_setup_acpi() once the
cpu_cacheinfo structure has been populated by the arch specific
code.

An additional patch later in the set adds the ability to report
peers in the topology using find_acpi_cpu_topology()
to report a unique ID for each processing unit at a given level
in the tree. These unique id's can then be used to match related
processing units which exist as threads, COD (clusters
on die), within a given package, etc.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
---
 drivers/acpi/pptt.c | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 476 insertions(+)
 create mode 100644 drivers/acpi/pptt.c

Comments

Sudeep Holla Jan. 15, 2018, 2:58 p.m. UTC | #1
On Fri, Jan 12, 2018 at 06:59:13PM -0600, Jeremy Linton wrote:
> ACPI 6.2 adds a new table, which describes how processing units
> are related to each other in tree like fashion. Caches are
> also sprinkled throughout the tree and describe the properties
> of the caches in relation to other caches and processing units.
> 
> Add the code to parse the cache hierarchy and report the total
> number of levels of cache for a given core using
> acpi_find_last_cache_level() as well as fill out the individual
> cores cache information with cache_setup_acpi() once the
> cpu_cacheinfo structure has been populated by the arch specific
> code.
> 
> An additional patch later in the set adds the ability to report
> peers in the topology using find_acpi_cpu_topology()
> to report a unique ID for each processing unit at a given level
> in the tree. These unique id's can then be used to match related
> processing units which exist as threads, COD (clusters
> on die), within a given package, etc.
> 
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
> ---
>  drivers/acpi/pptt.c | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 476 insertions(+)
>  create mode 100644 drivers/acpi/pptt.c
> 
> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
> new file mode 100644
> index 000000000000..2c4b3ed862a8
> --- /dev/null
> +++ b/drivers/acpi/pptt.c
> @@ -0,0 +1,476 @@
> +/*
> + * Copyright (C) 2018, ARM
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * This file implements parsing of Processor Properties Topology Table (PPTT)
> + * which is optionally used to describe the processor and cache topology.
> + * Due to the relative pointers used throughout the table, this doesn't
> + * leverage the existing subtable parsing in the kernel.
> + *
> + * The PPTT structure is an inverted tree, with each node potentially
> + * holding one or two inverted tree data structures describing
> + * the caches available at that level. Each cache structure optionally
> + * contains properties describing the cache at a given level which can be
> + * used to override hardware probed values.
> + */
> +#define pr_fmt(fmt) "ACPI PPTT: " fmt
> +
> +#include <linux/acpi.h>
> +#include <linux/cacheinfo.h>
> +#include <acpi/processor.h>
> +
> +/* total number of attributes checked by the properties code */
> +#define PPTT_CHECKED_ATTRIBUTES 6

See comment on this below. If we retain this, move it closer to the usage so
that it's easier to understand what it actually stands for.

> +
> +/*
> + * Given the PPTT table, find and verify that the subtable entry
> + * is located within the table
> + */
> +static struct acpi_subtable_header *fetch_pptt_subtable(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	struct acpi_subtable_header *entry;
> +
> +	/* there isn't a subtable at reference 0 */
> +	if (pptt_ref < sizeof(struct acpi_subtable_header))
> +		return NULL;
> +
> +	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
> +		return NULL;
> +
> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
> +
> +	if (pptt_ref + entry->length > table_hdr->length)
> +		return NULL;
> +
> +	return entry;
> +}
> +
> +static struct acpi_pptt_processor *fetch_pptt_node(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr,
> +								 pptt_ref);
> +}
> +
> +static struct acpi_pptt_cache *fetch_pptt_cache(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr,
> +							     pptt_ref);
> +}
> +
> +static struct acpi_subtable_header *acpi_get_pptt_resource(
> +	struct acpi_table_header *table_hdr,
> +	struct acpi_pptt_processor *node, int resource)
> +{
> +	u32 *ref;
> +
> +	if (resource >= node->number_of_priv_resources)
> +		return NULL;
> +
> +	ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
> +	ref += resource;
> +
> +	return fetch_pptt_subtable(table_hdr, *ref);
> +}
> +
> +/*
> + * Attempt to find a given cache level, while counting the max number
> + * of cache levels for the cache node.
> + *
> + * Given a pptt resource, verify that it is a cache node, then walk
> + * down each level of caches, counting how many levels are found
> + * as well as checking the cache type (icache, dcache, unified). If a
> + * level & type match, then we set found, and continue the search.
> + * Once the entire cache branch has been walked return its max
> + * depth.
> + */
> +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> +				int local_level,
> +				struct acpi_subtable_header *res,
> +				struct acpi_pptt_cache **found,
> +				int level, int type)
> +{
> +	struct acpi_pptt_cache *cache;
> +
> +	if (res->type != ACPI_PPTT_TYPE_CACHE)
> +		return 0;
> +
> +	cache = (struct acpi_pptt_cache *) res;
> +	while (cache) {
> +		local_level++;
> +
> +		if ((local_level == level) &&
> +		    (cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) &&
> +		    ((cache->attributes & ACPI_PPTT_MASK_CACHE_TYPE) == type)) {
> +			if ((*found != NULL) && (cache != *found))
> +				pr_err("Found duplicate cache level/type unable to determine uniqueness\n");
> +
> +			pr_debug("Found cache @ level %d\n", level);
> +			*found = cache;
> +			/*
> +			 * continue looking at this node's resource list
> +			 * to verify that we don't find a duplicate
> +			 * cache node.
> +			 */
> +		}
> +		cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
> +	}
> +	return local_level;
> +}
> +
> +/*
> + * Given a CPU node look for cache levels that exist at this level, and then
> + * for each cache node, count how many levels exist below (logically above) it.
> + * If a level and type are specified, and we find that level/type, abort
> + * processing and return the acpi_pptt_cache structure.
> + */
> +static struct acpi_pptt_cache *acpi_find_cache_level(
> +	struct acpi_table_header *table_hdr,
> +	struct acpi_pptt_processor *cpu_node,
> +	int *starting_level, int level, int type)
> +{
> +	struct acpi_subtable_header *res;
> +	int number_of_levels = *starting_level;
> +	int resource = 0;
> +	struct acpi_pptt_cache *ret = NULL;
> +	int local_level;
> +
> +	/* walk down from processor node */
> +	while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
> +		resource++;
> +
> +		local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
> +						   res, &ret, level, type);
> +		/*
> +		 * we are looking for the max depth. Since its potentially
> +		 * possible for a given node to have resources with differing
> +		 * depths verify that the depth we have found is the largest.
> +		 */
> +		if (number_of_levels < local_level)
> +			number_of_levels = local_level;
> +	}
> +	if (number_of_levels > *starting_level)
> +		*starting_level = number_of_levels;
> +
> +	return ret;
> +}
> +
> +/*
> + * Given a processor node containing a processing unit, walk into it and count
> + * how many levels exist solely for it, and then walk up each level until we hit
> + * the root node (ignore the package level because it may be possible to have
> + * caches that exist across packages). Count the number of cache levels that
> + * exist at each level on the way up.
> + */
> +static int acpi_process_node(struct acpi_table_header *table_hdr,
> +			     struct acpi_pptt_processor *cpu_node)
> +{
> +	int total_levels = 0;
> +
> +	do {
> +		acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0);
> +		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> +	} while (cpu_node);
> +
> +	return total_levels;
> +}
> +
> +/*
> + * Determine if the *node parameter is a leaf node by iterating the
> + * PPTT table, looking for nodes which reference it.
> + * Return 0 if we find a node referencing the passed node,
> + * or 1 if we don't.
> + */
> +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
> +			       struct acpi_pptt_processor *node)
> +{
> +	struct acpi_subtable_header *entry;
> +	unsigned long table_end;
> +	u32 node_entry;
> +	struct acpi_pptt_processor *cpu_node;
> +
> +	table_end = (unsigned long)table_hdr + table_hdr->length;
> +	node_entry = ACPI_PTR_DIFF(node, table_hdr);
> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
> +			     sizeof(struct acpi_table_pptt));
> +
> +	while ((unsigned long)(entry + 1) < table_end) {

Is entry + 1 check sufficient to access entry of length ?
Shouldn't that be entry + sizeof(struct acpi_pptt_processor *) so that
we are sure it's valid entry ?

> +		cpu_node = (struct acpi_pptt_processor *)entry;
> +		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
> +		    (cpu_node->parent == node_entry))
> +			return 0;
> +		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
> +				     entry->length);
> +	}
> +	return 1;
> +}
> +
> +/*
> + * Find the subtable entry describing the provided processor.
> + * This is done by iterating the PPTT table looking for processor nodes
> + * which have an acpi_processor_id that matches the acpi_cpu_id parameter
> + * passed into the function. If we find a node that matches this criteria
> + * we verify that its a leaf node in the topology rather than depending
> + * on the valid flag, which doesn't need to be set for leaf nodes.
> + */
> +static struct acpi_pptt_processor *acpi_find_processor_node(
> +	struct acpi_table_header *table_hdr,
> +	u32 acpi_cpu_id)
> +{
> +	struct acpi_subtable_header *entry;
> +	unsigned long table_end;
> +	struct acpi_pptt_processor *cpu_node;
> +
> +	table_end = (unsigned long)table_hdr + table_hdr->length;
> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
> +			     sizeof(struct acpi_table_pptt));
> +
> +	/* find the processor structure associated with this cpuid */
> +	while ((unsigned long)(entry + 1) < table_end) {

Same comment as above on entry + 1.

> +		cpu_node = (struct acpi_pptt_processor *)entry;
> +
> +		if (entry->length == 0) {
> +			pr_err("Invalid zero length subtable\n");
> +			break;
> +		}
> +		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
> +		    (acpi_cpu_id == cpu_node->acpi_processor_id) &&
> +		     acpi_pptt_leaf_node(table_hdr, cpu_node)) {
> +			return (struct acpi_pptt_processor *)entry;
> +		}
> +
> +		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
> +				     entry->length);
> +	}
> +
> +	return NULL;
> +}
> +
> +static int acpi_find_cache_levels(struct acpi_table_header *table_hdr,
> +				  u32 acpi_cpu_id)
> +{
> +	int number_of_levels = 0;
> +	struct acpi_pptt_processor *cpu;
> +
> +	cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id);
> +	if (cpu)
> +		number_of_levels = acpi_process_node(table_hdr, cpu);
> +
> +	return number_of_levels;
> +}
> +
> +/* Convert the linux cache_type to a ACPI PPTT cache type value */
> +static u8 acpi_cache_type(enum cache_type type)
> +{

[nit] Just wondering if we can avoid this with some static mapping:

static u8 acpi_cache_type[] = {
        [CACHE_TYPE_NONE] = 0,
        [CACHE_TYPE_DATA] = ACPI_PPTT_CACHE_TYPE_DATA,
        [CACHE_TYPE_INST] = ACPI_PPTT_CACHE_TYPE_INSTR,
        [CACHE_TYPE_UNIFIED] = ACPI_PPTT_CACHE_TYPE_UNIFIED,
};

> +	switch (type) {
> +	case CACHE_TYPE_DATA:
> +		pr_debug("Looking for data cache\n");
> +		return ACPI_PPTT_CACHE_TYPE_DATA;
> +	case CACHE_TYPE_INST:
> +		pr_debug("Looking for instruction cache\n");
> +		return ACPI_PPTT_CACHE_TYPE_INSTR;
> +	default:
> +	case CACHE_TYPE_UNIFIED:
> +		pr_debug("Looking for unified cache\n");
> +		/*
> +		 * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED
> +		 * contains the bit pattern that will match both
> +		 * ACPI unified bit patterns because we use it later
> +		 * to match both cases.
> +		 */
> +		return ACPI_PPTT_CACHE_TYPE_UNIFIED;
> +	}
> +}
> +
> +/* find the ACPI node describing the cache type/level for the given CPU */
> +static struct acpi_pptt_cache *acpi_find_cache_node(
> +	struct acpi_table_header *table_hdr, u32 acpi_cpu_id,
> +	enum cache_type type, unsigned int level,
> +	struct acpi_pptt_processor **node)
> +{
> +	int total_levels = 0;
> +	struct acpi_pptt_cache *found = NULL;
> +	struct acpi_pptt_processor *cpu_node;
> +	u8 acpi_type = acpi_cache_type(type);
> +
> +	pr_debug("Looking for CPU %d's level %d cache type %d\n",
> +		 acpi_cpu_id, level, acpi_type);
> +
> +	cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
> +
> +	while ((cpu_node) && (!found)) {
> +		found = acpi_find_cache_level(table_hdr, cpu_node,
> +					      &total_levels, level, acpi_type);
> +		*node = cpu_node;
> +		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> +	}
> +
> +	return found;
> +}
> +
> +/*
> + * The ACPI spec implies that the fields in the cache structures are used to
> + * extend and correct the information probed from the hardware. In the case
> + * of arm64 the CCSIDR probing has been removed because it might be incorrect.

Though ARM64 is only user now, it may get obsolete, so better to drop that
comment.

> + */
> +static void update_cache_properties(struct cacheinfo *this_leaf,
> +				    struct acpi_pptt_cache *found_cache,
> +				    struct acpi_pptt_processor *cpu_node)
> +{
> +	int valid_flags = 0;
> +
> +	if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) {
> +		this_leaf->size = found_cache->size;
> +		valid_flags++;
> +	}
> +	if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) {
> +		this_leaf->coherency_line_size = found_cache->line_size;
> +		valid_flags++;
> +	}
> +	if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) {
> +		this_leaf->number_of_sets = found_cache->number_of_sets;
> +		valid_flags++;
> +	}
> +	if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) {
> +		this_leaf->ways_of_associativity = found_cache->associativity;
> +		valid_flags++;
> +	}
> +	if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) {
> +		switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) {
> +		case ACPI_PPTT_CACHE_POLICY_WT:
> +			this_leaf->attributes = CACHE_WRITE_THROUGH;
> +			break;
> +		case ACPI_PPTT_CACHE_POLICY_WB:
> +			this_leaf->attributes = CACHE_WRITE_BACK;
> +			break;
> +		}
> +		valid_flags++;
> +	}
> +	if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) {
> +		switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) {
> +		case ACPI_PPTT_CACHE_READ_ALLOCATE:
> +			this_leaf->attributes |= CACHE_READ_ALLOCATE;
> +			break;
> +		case ACPI_PPTT_CACHE_WRITE_ALLOCATE:
> +			this_leaf->attributes |= CACHE_WRITE_ALLOCATE;
> +			break;
> +		case ACPI_PPTT_CACHE_RW_ALLOCATE:
> +		case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT:
> +			this_leaf->attributes |=
> +				CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE;
> +			break;
> +		}
> +		valid_flags++;
> +	}
> +	/*
> +	 * If all the above flags are valid, and the cache type is NOCACHE
> +	 * update the cache type as well.
> +	 */

I am not sure if it makes sense to mandate at least last 2 (read allocate
and write policy). They can be optional.

> +	if ((this_leaf->type == CACHE_TYPE_NOCACHE) &&
> +	    (valid_flags == PPTT_CHECKED_ATTRIBUTES))
> +		this_leaf->type = CACHE_TYPE_UNIFIED;
> +}
> +
> +/*
> + * Update the kernel cache information for each level of cache
> + * associated with the given acpi cpu.
> + */
> +static void cache_setup_acpi_cpu(struct acpi_table_header *table,
> +				 unsigned int cpu)
> +{
> +	struct acpi_pptt_cache *found_cache;
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
> +	struct cacheinfo *this_leaf;
> +	unsigned int index = 0;
> +	struct acpi_pptt_processor *cpu_node = NULL;
> +
> +	while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> +		this_leaf = this_cpu_ci->info_list + index;
> +		found_cache = acpi_find_cache_node(table, acpi_cpu_id,
> +						   this_leaf->type,
> +						   this_leaf->level,
> +						   &cpu_node);
> +		pr_debug("found = %p %p\n", found_cache, cpu_node);
> +		if (found_cache)
> +			update_cache_properties(this_leaf,
> +						found_cache,
> +						cpu_node);

[nit] unnecessary line break ?

> +
> +		index++;
> +	}
> +}
> +
> +/**
> + * acpi_find_last_cache_level() - Determines the number of cache levels for a PE

[nit] PE ? I think you mean processing element, but that's too ARM ARM thingy
:), can you s/PE/CPU ?

> + * @cpu: Kernel logical cpu number
> + *
> + * Given a logical cpu number, returns the number of levels of cache represented
> + * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0
> + * indicating we didn't find any cache levels.
> + *
> + * Return: Cache levels visible to this core.
> + */
> +int acpi_find_last_cache_level(unsigned int cpu)
> +{
> +	u32 acpi_cpu_id;
> +	struct acpi_table_header *table;
> +	int number_of_levels = 0;
> +	acpi_status status;
> +
> +	pr_debug("Cache Setup find last level cpu=%d\n", cpu);
> +
> +	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
> +	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
> +	if (ACPI_FAILURE(status)) {
> +		pr_err_once("No PPTT table found, cache topology may be inaccurate\n");
> +	} else {
> +		number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
> +		acpi_put_table(table);
> +	}
> +	pr_debug("Cache Setup find last level level=%d\n", number_of_levels);
> +
> +	return number_of_levels;
> +}
> +
> +/**
> + * cache_setup_acpi() - Override CPU cache topology with data from the PPTT

[nit]			  ^^^^ may be override/setup or just setup ?

> + * @cpu: Kernel logical cpu number

[nit] kernel is implicit, no ?

--
Regards,
Sudeep
Sudeep Holla Jan. 15, 2018, 3:48 p.m. UTC | #2
On Fri, Jan 12, 2018 at 06:59:13PM -0600, Jeremy Linton wrote:
> ACPI 6.2 adds a new table, which describes how processing units
> are related to each other in tree like fashion. Caches are
> also sprinkled throughout the tree and describe the properties
> of the caches in relation to other caches and processing units.
> 
> Add the code to parse the cache hierarchy and report the total
> number of levels of cache for a given core using
> acpi_find_last_cache_level() as well as fill out the individual
> cores cache information with cache_setup_acpi() once the
> cpu_cacheinfo structure has been populated by the arch specific
> code.
> 
> An additional patch later in the set adds the ability to report
> peers in the topology using find_acpi_cpu_topology()
> to report a unique ID for each processing unit at a given level
> in the tree. These unique id's can then be used to match related
> processing units which exist as threads, COD (clusters
> on die), within a given package, etc.
> 
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
> ---
>  drivers/acpi/pptt.c | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 476 insertions(+)
>  create mode 100644 drivers/acpi/pptt.c
> 
> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
> new file mode 100644
> index 000000000000..2c4b3ed862a8
> --- /dev/null
> +++ b/drivers/acpi/pptt.c
> @@ -0,0 +1,476 @@
> +/*
> + * Copyright (C) 2018, ARM
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * This file implements parsing of Processor Properties Topology Table (PPTT)
> + * which is optionally used to describe the processor and cache topology.
> + * Due to the relative pointers used throughout the table, this doesn't
> + * leverage the existing subtable parsing in the kernel.
> + *
> + * The PPTT structure is an inverted tree, with each node potentially
> + * holding one or two inverted tree data structures describing
> + * the caches available at that level. Each cache structure optionally
> + * contains properties describing the cache at a given level which can be
> + * used to override hardware probed values.
> + */
> +#define pr_fmt(fmt) "ACPI PPTT: " fmt
> +
> +#include <linux/acpi.h>
> +#include <linux/cacheinfo.h>
> +#include <acpi/processor.h>
> +
> +/* total number of attributes checked by the properties code */
> +#define PPTT_CHECKED_ATTRIBUTES 6
> +
> +/*
> + * Given the PPTT table, find and verify that the subtable entry
> + * is located within the table
> + */
> +static struct acpi_subtable_header *fetch_pptt_subtable(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	struct acpi_subtable_header *entry;
> +
> +	/* there isn't a subtable at reference 0 */
> +	if (pptt_ref < sizeof(struct acpi_subtable_header))
> +		return NULL;
> +
> +	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
> +		return NULL;
> +
> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
> +
> +	if (pptt_ref + entry->length > table_hdr->length)
> +		return NULL;
> +
> +	return entry;
> +}
> +
> +static struct acpi_pptt_processor *fetch_pptt_node(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr,
> +								 pptt_ref);
> +}
> +
> +static struct acpi_pptt_cache *fetch_pptt_cache(
> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
> +{
> +	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr,
> +							     pptt_ref);
> +}
> +
> +static struct acpi_subtable_header *acpi_get_pptt_resource(
> +	struct acpi_table_header *table_hdr,
> +	struct acpi_pptt_processor *node, int resource)
> +{
> +	u32 *ref;
> +
> +	if (resource >= node->number_of_priv_resources)
> +		return NULL;
> +
> +	ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
> +	ref += resource;
> +
> +	return fetch_pptt_subtable(table_hdr, *ref);
> +}
> +
> +/*
> + * Attempt to find a given cache level, while counting the max number
> + * of cache levels for the cache node.
> + *
> + * Given a pptt resource, verify that it is a cache node, then walk
> + * down each level of caches, counting how many levels are found
> + * as well as checking the cache type (icache, dcache, unified). If a
> + * level & type match, then we set found, and continue the search.
> + * Once the entire cache branch has been walked return its max
> + * depth.
> + */
> +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> +				int local_level,
> +				struct acpi_subtable_header *res,
> +				struct acpi_pptt_cache **found,
> +				int level, int type)
> +{
> +	struct acpi_pptt_cache *cache;
> +
> +	if (res->type != ACPI_PPTT_TYPE_CACHE)
> +		return 0;
> +
> +	cache = (struct acpi_pptt_cache *) res;
> +	while (cache) {
> +		local_level++;
> +
> +		if ((local_level == level) &&
> +		    (cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) &&
> +		    ((cache->attributes & ACPI_PPTT_MASK_CACHE_TYPE) == type)) {
> +			if ((*found != NULL) && (cache != *found))

I forgot to mention this earlier, I see you have used parentheses too
liberally at quite some place in this file like the above 4 line.
Please drop those unnecessary parentheses.

--
Regards,
Sudeep
Jeremy Linton Jan. 16, 2018, 8:22 p.m. UTC | #3
Hi,

On 01/15/2018 09:48 AM, Sudeep Holla wrote:
> On Fri, Jan 12, 2018 at 06:59:13PM -0600, Jeremy Linton wrote:
>> ACPI 6.2 adds a new table, which describes how processing units
>> are related to each other in tree like fashion. Caches are
>> also sprinkled throughout the tree and describe the properties
>> of the caches in relation to other caches and processing units.
>>
>> Add the code to parse the cache hierarchy and report the total
>> number of levels of cache for a given core using
>> acpi_find_last_cache_level() as well as fill out the individual
>> cores cache information with cache_setup_acpi() once the
>> cpu_cacheinfo structure has been populated by the arch specific
>> code.
>>
>> An additional patch later in the set adds the ability to report
>> peers in the topology using find_acpi_cpu_topology()
>> to report a unique ID for each processing unit at a given level
>> in the tree. These unique id's can then be used to match related
>> processing units which exist as threads, COD (clusters
>> on die), within a given package, etc.
>>
>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>> ---
>>   drivers/acpi/pptt.c | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 476 insertions(+)
>>   create mode 100644 drivers/acpi/pptt.c
>>
>> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
>> new file mode 100644
>> index 000000000000..2c4b3ed862a8
>> --- /dev/null
>> +++ b/drivers/acpi/pptt.c
>> @@ -0,0 +1,476 @@
>> +/*
>> + * Copyright (C) 2018, ARM
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * This file implements parsing of Processor Properties Topology Table (PPTT)
>> + * which is optionally used to describe the processor and cache topology.
>> + * Due to the relative pointers used throughout the table, this doesn't
>> + * leverage the existing subtable parsing in the kernel.
>> + *
>> + * The PPTT structure is an inverted tree, with each node potentially
>> + * holding one or two inverted tree data structures describing
>> + * the caches available at that level. Each cache structure optionally
>> + * contains properties describing the cache at a given level which can be
>> + * used to override hardware probed values.
>> + */
>> +#define pr_fmt(fmt) "ACPI PPTT: " fmt
>> +
>> +#include <linux/acpi.h>
>> +#include <linux/cacheinfo.h>
>> +#include <acpi/processor.h>
>> +
>> +/* total number of attributes checked by the properties code */
>> +#define PPTT_CHECKED_ATTRIBUTES 6
>> +
>> +/*
>> + * Given the PPTT table, find and verify that the subtable entry
>> + * is located within the table
>> + */
>> +static struct acpi_subtable_header *fetch_pptt_subtable(
>> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
>> +{
>> +	struct acpi_subtable_header *entry;
>> +
>> +	/* there isn't a subtable at reference 0 */
>> +	if (pptt_ref < sizeof(struct acpi_subtable_header))
>> +		return NULL;
>> +
>> +	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
>> +		return NULL;
>> +
>> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
>> +
>> +	if (pptt_ref + entry->length > table_hdr->length)
>> +		return NULL;
>> +
>> +	return entry;
>> +}
>> +
>> +static struct acpi_pptt_processor *fetch_pptt_node(
>> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
>> +{
>> +	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr,
>> +								 pptt_ref);
>> +}
>> +
>> +static struct acpi_pptt_cache *fetch_pptt_cache(
>> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
>> +{
>> +	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr,
>> +							     pptt_ref);
>> +}
>> +
>> +static struct acpi_subtable_header *acpi_get_pptt_resource(
>> +	struct acpi_table_header *table_hdr,
>> +	struct acpi_pptt_processor *node, int resource)
>> +{
>> +	u32 *ref;
>> +
>> +	if (resource >= node->number_of_priv_resources)
>> +		return NULL;
>> +
>> +	ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
>> +	ref += resource;
>> +
>> +	return fetch_pptt_subtable(table_hdr, *ref);
>> +}
>> +
>> +/*
>> + * Attempt to find a given cache level, while counting the max number
>> + * of cache levels for the cache node.
>> + *
>> + * Given a pptt resource, verify that it is a cache node, then walk
>> + * down each level of caches, counting how many levels are found
>> + * as well as checking the cache type (icache, dcache, unified). If a
>> + * level & type match, then we set found, and continue the search.
>> + * Once the entire cache branch has been walked return its max
>> + * depth.
>> + */
>> +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
>> +				int local_level,
>> +				struct acpi_subtable_header *res,
>> +				struct acpi_pptt_cache **found,
>> +				int level, int type)
>> +{
>> +	struct acpi_pptt_cache *cache;
>> +
>> +	if (res->type != ACPI_PPTT_TYPE_CACHE)
>> +		return 0;
>> +
>> +	cache = (struct acpi_pptt_cache *) res;
>> +	while (cache) {
>> +		local_level++;
>> +
>> +		if ((local_level == level) &&
>> +		    (cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) &&
>> +		    ((cache->attributes & ACPI_PPTT_MASK_CACHE_TYPE) == type)) {
>> +			if ((*found != NULL) && (cache != *found))
> 
> I forgot to mention this earlier, I see you have used parentheses too
> liberally at quite some place in this file like the above 4 line.
> Please drop those unnecessary parentheses.

Yes, those tend to get leftover when I remove code, I realized not long 
after posting this that in one of the previous patches I actually added 
a pair where there wasn't one because I first shuffled something into 
the if condition, then a few revisions later dropped it, and when I 
squashed it before posting the result was some pointless churn.

I will rescan the whole set for this.
Jeremy Linton Jan. 16, 2018, 8:55 p.m. UTC | #4
Hi,

On 01/15/2018 08:58 AM, Sudeep Holla wrote:
> On Fri, Jan 12, 2018 at 06:59:13PM -0600, Jeremy Linton wrote:
>> ACPI 6.2 adds a new table, which describes how processing units
>> are related to each other in tree like fashion. Caches are
>> also sprinkled throughout the tree and describe the properties
>> of the caches in relation to other caches and processing units.
>>
>> Add the code to parse the cache hierarchy and report the total
>> number of levels of cache for a given core using
>> acpi_find_last_cache_level() as well as fill out the individual
>> cores cache information with cache_setup_acpi() once the
>> cpu_cacheinfo structure has been populated by the arch specific
>> code.
>>
>> An additional patch later in the set adds the ability to report
>> peers in the topology using find_acpi_cpu_topology()
>> to report a unique ID for each processing unit at a given level
>> in the tree. These unique id's can then be used to match related
>> processing units which exist as threads, COD (clusters
>> on die), within a given package, etc.
>>
>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>> ---
>>   drivers/acpi/pptt.c | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 476 insertions(+)
>>   create mode 100644 drivers/acpi/pptt.c
>>
>> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
>> new file mode 100644
>> index 000000000000..2c4b3ed862a8
>> --- /dev/null
>> +++ b/drivers/acpi/pptt.c
>> @@ -0,0 +1,476 @@
>> +/*
>> + * Copyright (C) 2018, ARM
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * This file implements parsing of Processor Properties Topology Table (PPTT)
>> + * which is optionally used to describe the processor and cache topology.
>> + * Due to the relative pointers used throughout the table, this doesn't
>> + * leverage the existing subtable parsing in the kernel.
>> + *
>> + * The PPTT structure is an inverted tree, with each node potentially
>> + * holding one or two inverted tree data structures describing
>> + * the caches available at that level. Each cache structure optionally
>> + * contains properties describing the cache at a given level which can be
>> + * used to override hardware probed values.
>> + */
>> +#define pr_fmt(fmt) "ACPI PPTT: " fmt
>> +
>> +#include <linux/acpi.h>
>> +#include <linux/cacheinfo.h>
>> +#include <acpi/processor.h>
>> +
>> +/* total number of attributes checked by the properties code */
>> +#define PPTT_CHECKED_ATTRIBUTES 6
> 
> See comment on this below. If we retain this, move it closer to the usage so
> that it's easier to understand what it actually stands for.

Sure.

> 
>> +
>> +/*
>> + * Given the PPTT table, find and verify that the subtable entry
>> + * is located within the table
>> + */
>> +static struct acpi_subtable_header *fetch_pptt_subtable(
>> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
>> +{
>> +	struct acpi_subtable_header *entry;
>> +
>> +	/* there isn't a subtable at reference 0 */
>> +	if (pptt_ref < sizeof(struct acpi_subtable_header))
>> +		return NULL;
>> +
>> +	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
>> +		return NULL;
>> +
>> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
>> +
>> +	if (pptt_ref + entry->length > table_hdr->length)
>> +		return NULL;
>> +
>> +	return entry;
>> +}
>> +
>> +static struct acpi_pptt_processor *fetch_pptt_node(
>> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
>> +{
>> +	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr,
>> +								 pptt_ref);
>> +}
>> +
>> +static struct acpi_pptt_cache *fetch_pptt_cache(
>> +	struct acpi_table_header *table_hdr, u32 pptt_ref)
>> +{
>> +	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr,
>> +							     pptt_ref);
>> +}
>> +
>> +static struct acpi_subtable_header *acpi_get_pptt_resource(
>> +	struct acpi_table_header *table_hdr,
>> +	struct acpi_pptt_processor *node, int resource)
>> +{
>> +	u32 *ref;
>> +
>> +	if (resource >= node->number_of_priv_resources)
>> +		return NULL;
>> +
>> +	ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
>> +	ref += resource;
>> +
>> +	return fetch_pptt_subtable(table_hdr, *ref);
>> +}
>> +
>> +/*
>> + * Attempt to find a given cache level, while counting the max number
>> + * of cache levels for the cache node.
>> + *
>> + * Given a pptt resource, verify that it is a cache node, then walk
>> + * down each level of caches, counting how many levels are found
>> + * as well as checking the cache type (icache, dcache, unified). If a
>> + * level & type match, then we set found, and continue the search.
>> + * Once the entire cache branch has been walked return its max
>> + * depth.
>> + */
>> +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
>> +				int local_level,
>> +				struct acpi_subtable_header *res,
>> +				struct acpi_pptt_cache **found,
>> +				int level, int type)
>> +{
>> +	struct acpi_pptt_cache *cache;
>> +
>> +	if (res->type != ACPI_PPTT_TYPE_CACHE)
>> +		return 0;
>> +
>> +	cache = (struct acpi_pptt_cache *) res;
>> +	while (cache) {
>> +		local_level++;
>> +
>> +		if ((local_level == level) &&
>> +		    (cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) &&
>> +		    ((cache->attributes & ACPI_PPTT_MASK_CACHE_TYPE) == type)) {
>> +			if ((*found != NULL) && (cache != *found))
>> +				pr_err("Found duplicate cache level/type unable to determine uniqueness\n");
>> +
>> +			pr_debug("Found cache @ level %d\n", level);
>> +			*found = cache;
>> +			/*
>> +			 * continue looking at this node's resource list
>> +			 * to verify that we don't find a duplicate
>> +			 * cache node.
>> +			 */
>> +		}
>> +		cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
>> +	}
>> +	return local_level;
>> +}
>> +
>> +/*
>> + * Given a CPU node look for cache levels that exist at this level, and then
>> + * for each cache node, count how many levels exist below (logically above) it.
>> + * If a level and type are specified, and we find that level/type, abort
>> + * processing and return the acpi_pptt_cache structure.
>> + */
>> +static struct acpi_pptt_cache *acpi_find_cache_level(
>> +	struct acpi_table_header *table_hdr,
>> +	struct acpi_pptt_processor *cpu_node,
>> +	int *starting_level, int level, int type)
>> +{
>> +	struct acpi_subtable_header *res;
>> +	int number_of_levels = *starting_level;
>> +	int resource = 0;
>> +	struct acpi_pptt_cache *ret = NULL;
>> +	int local_level;
>> +
>> +	/* walk down from processor node */
>> +	while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
>> +		resource++;
>> +
>> +		local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
>> +						   res, &ret, level, type);
>> +		/*
>> +		 * we are looking for the max depth. Since its potentially
>> +		 * possible for a given node to have resources with differing
>> +		 * depths verify that the depth we have found is the largest.
>> +		 */
>> +		if (number_of_levels < local_level)
>> +			number_of_levels = local_level;
>> +	}
>> +	if (number_of_levels > *starting_level)
>> +		*starting_level = number_of_levels;
>> +
>> +	return ret;
>> +}
>> +
>> +/*
>> + * Given a processor node containing a processing unit, walk into it and count
>> + * how many levels exist solely for it, and then walk up each level until we hit
>> + * the root node (ignore the package level because it may be possible to have
>> + * caches that exist across packages). Count the number of cache levels that
>> + * exist at each level on the way up.
>> + */
>> +static int acpi_process_node(struct acpi_table_header *table_hdr,
>> +			     struct acpi_pptt_processor *cpu_node)
>> +{
>> +	int total_levels = 0;
>> +
>> +	do {
>> +		acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0);
>> +		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
>> +	} while (cpu_node);
>> +
>> +	return total_levels;
>> +}
>> +
>> +/*
>> + * Determine if the *node parameter is a leaf node by iterating the
>> + * PPTT table, looking for nodes which reference it.
>> + * Return 0 if we find a node referencing the passed node,
>> + * or 1 if we don't.
>> + */
>> +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
>> +			       struct acpi_pptt_processor *node)
>> +{
>> +	struct acpi_subtable_header *entry;
>> +	unsigned long table_end;
>> +	u32 node_entry;
>> +	struct acpi_pptt_processor *cpu_node;
>> +
>> +	table_end = (unsigned long)table_hdr + table_hdr->length;
>> +	node_entry = ACPI_PTR_DIFF(node, table_hdr);
>> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
>> +			     sizeof(struct acpi_table_pptt));
>> +
>> +	while ((unsigned long)(entry + 1) < table_end) {
> 
> Is entry + 1 check sufficient to access entry of length ?
> Shouldn't that be entry + sizeof(struct acpi_pptt_processor *) so that
> we are sure it's valid entry ?

All we need is the subtable_header size which gives us the type/len. As 
we are just scanning the whole table touching the entry->length and the 
while() terminates if that is > table_end it should be ok. In general 
sizeof(acpi_pptt_processor) isn't right either since the structure only 
covers a larger "header" portion due to attached entries extending 
beyond it.
> 
>> +		cpu_node = (struct acpi_pptt_processor *)entry;
>> +		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
>> +		    (cpu_node->parent == node_entry))
>> +			return 0;
>> +		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
>> +				     entry->length);
>> +	}
>> +	return 1;
>> +}
>> +
>> +/*
>> + * Find the subtable entry describing the provided processor.
>> + * This is done by iterating the PPTT table looking for processor nodes
>> + * which have an acpi_processor_id that matches the acpi_cpu_id parameter
>> + * passed into the function. If we find a node that matches this criteria
>> + * we verify that its a leaf node in the topology rather than depending
>> + * on the valid flag, which doesn't need to be set for leaf nodes.
>> + */
>> +static struct acpi_pptt_processor *acpi_find_processor_node(
>> +	struct acpi_table_header *table_hdr,
>> +	u32 acpi_cpu_id)
>> +{
>> +	struct acpi_subtable_header *entry;
>> +	unsigned long table_end;
>> +	struct acpi_pptt_processor *cpu_node;
>> +
>> +	table_end = (unsigned long)table_hdr + table_hdr->length;
>> +	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
>> +			     sizeof(struct acpi_table_pptt));
>> +
>> +	/* find the processor structure associated with this cpuid */
>> +	while ((unsigned long)(entry + 1) < table_end) {
> 
> Same comment as above on entry + 
This one is probably less clear than the one above, because we do access 
a full acpi_pptt_processor sized structure, but only after making sure 
that is actually a processor node. If anything the check should probably 
dereference the len as a second check aka

while ((entry+1 < table_end) && (entry+1->length < table_end))

I think this may have been changed after previous review comments asked 
for the cpu_node assignment earlier and of course moving the leaf_node 
check into the if condition to avoid a bit of extra processing.

> 
>> +		cpu_node = (struct acpi_pptt_processor *)entry;
>> +
>> +		if (entry->length == 0) {
>> +			pr_err("Invalid zero length subtable\n");
>> +			break;
>> +		}
>> +		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
>> +		    (acpi_cpu_id == cpu_node->acpi_processor_id) &&
>> +		     acpi_pptt_leaf_node(table_hdr, cpu_node)) {
>> +			return (struct acpi_pptt_processor *)entry;
>> +		}
>> +
>> +		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
>> +				     entry->length);
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +static int acpi_find_cache_levels(struct acpi_table_header *table_hdr,
>> +				  u32 acpi_cpu_id)
>> +{
>> +	int number_of_levels = 0;
>> +	struct acpi_pptt_processor *cpu;
>> +
>> +	cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id);
>> +	if (cpu)
>> +		number_of_levels = acpi_process_node(table_hdr, cpu);
>> +
>> +	return number_of_levels;
>> +}
>> +
>> +/* Convert the linux cache_type to a ACPI PPTT cache type value */
>> +static u8 acpi_cache_type(enum cache_type type)
>> +{
> 
> [nit] Just wondering if we can avoid this with some static mapping:
> 
> static u8 acpi_cache_type[] = {
>          [CACHE_TYPE_NONE] = 0,
>          [CACHE_TYPE_DATA] = ACPI_PPTT_CACHE_TYPE_DATA,
>          [CACHE_TYPE_INST] = ACPI_PPTT_CACHE_TYPE_INSTR,
>          [CACHE_TYPE_UNIFIED] = ACPI_PPTT_CACHE_TYPE_UNIFIED,
> };

Potentially, but the default case below is important and makes it a 
little less brittle because, as the recent DT commit, in your table 
TYPE_NONE actually needs to map to ACPI TYPE_UNIFIED to find the nodes.

Doesn't matter much to me, and I would convert it if the switch() got a 
lot bigger, but right now I tend to think what the code actually would 
look like is a two entry conversion (data/instruction) with a default 
initially set. So a loop for two entries is borderline IMHO.

> 
>> +	switch (type) {
>> +	case CACHE_TYPE_DATA:
>> +		pr_debug("Looking for data cache\n");
>> +		return ACPI_PPTT_CACHE_TYPE_DATA;
>> +	case CACHE_TYPE_INST:
>> +		pr_debug("Looking for instruction cache\n");
>> +		return ACPI_PPTT_CACHE_TYPE_INSTR;
>> +	default:
>> +	case CACHE_TYPE_UNIFIED:
>> +		pr_debug("Looking for unified cache\n");
>> +		/*
>> +		 * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED
>> +		 * contains the bit pattern that will match both
>> +		 * ACPI unified bit patterns because we use it later
>> +		 * to match both cases.
>> +		 */
>> +		return ACPI_PPTT_CACHE_TYPE_UNIFIED;
>> +	}
>> +}
>> +
>> +/* find the ACPI node describing the cache type/level for the given CPU */
>> +static struct acpi_pptt_cache *acpi_find_cache_node(
>> +	struct acpi_table_header *table_hdr, u32 acpi_cpu_id,
>> +	enum cache_type type, unsigned int level,
>> +	struct acpi_pptt_processor **node)
>> +{
>> +	int total_levels = 0;
>> +	struct acpi_pptt_cache *found = NULL;
>> +	struct acpi_pptt_processor *cpu_node;
>> +	u8 acpi_type = acpi_cache_type(type);
>> +
>> +	pr_debug("Looking for CPU %d's level %d cache type %d\n",
>> +		 acpi_cpu_id, level, acpi_type);
>> +
>> +	cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
>> +
>> +	while ((cpu_node) && (!found)) {
>> +		found = acpi_find_cache_level(table_hdr, cpu_node,
>> +					      &total_levels, level, acpi_type);
>> +		*node = cpu_node;
>> +		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
>> +	}
>> +
>> +	return found;
>> +}
>> +
>> +/*
>> + * The ACPI spec implies that the fields in the cache structures are used to
>> + * extend and correct the information probed from the hardware. In the case
>> + * of arm64 the CCSIDR probing has been removed because it might be incorrect.
> 
> Though ARM64 is only user now, it may get obsolete, so better to drop that
> comment.

Ok.

> 
>> + */
>> +static void update_cache_properties(struct cacheinfo *this_leaf,
>> +				    struct acpi_pptt_cache *found_cache,
>> +				    struct acpi_pptt_processor *cpu_node)
>> +{
>> +	int valid_flags = 0;
>> +
>> +	if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) {
>> +		this_leaf->size = found_cache->size;
>> +		valid_flags++;
>> +	}
>> +	if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) {
>> +		this_leaf->coherency_line_size = found_cache->line_size;
>> +		valid_flags++;
>> +	}
>> +	if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) {
>> +		this_leaf->number_of_sets = found_cache->number_of_sets;
>> +		valid_flags++;
>> +	}
>> +	if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) {
>> +		this_leaf->ways_of_associativity = found_cache->associativity;
>> +		valid_flags++;
>> +	}
>> +	if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) {
>> +		switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) {
>> +		case ACPI_PPTT_CACHE_POLICY_WT:
>> +			this_leaf->attributes = CACHE_WRITE_THROUGH;
>> +			break;
>> +		case ACPI_PPTT_CACHE_POLICY_WB:
>> +			this_leaf->attributes = CACHE_WRITE_BACK;
>> +			break;
>> +		}
>> +		valid_flags++;
>> +	}
>> +	if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) {
>> +		switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) {
>> +		case ACPI_PPTT_CACHE_READ_ALLOCATE:
>> +			this_leaf->attributes |= CACHE_READ_ALLOCATE;
>> +			break;
>> +		case ACPI_PPTT_CACHE_WRITE_ALLOCATE:
>> +			this_leaf->attributes |= CACHE_WRITE_ALLOCATE;
>> +			break;
>> +		case ACPI_PPTT_CACHE_RW_ALLOCATE:
>> +		case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT:
>> +			this_leaf->attributes |=
>> +				CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE;
>> +			break;
>> +		}
>> +		valid_flags++;
>> +	}
>> +	/*
>> +	 * If all the above flags are valid, and the cache type is NOCACHE
>> +	 * update the cache type as well.
>> +	 */
> 
> I am not sure if it makes sense to mandate at least last 2 (read allocate
> and write policy). They can be optional.

As I mentioned in the previous set, I'm of the opinion that some are 
more useful than others, but to avoid having a discussion about which 
ones, just decided to do them all. After all, its not going to hurt (AFAIK).


If your more _sure_ and no one else has an opinion then i will remove 
those two.


> 
>> +	if ((this_leaf->type == CACHE_TYPE_NOCACHE) &&
>> +	    (valid_flags == PPTT_CHECKED_ATTRIBUTES))
>> +		this_leaf->type = CACHE_TYPE_UNIFIED;
>> +}
>> +
>> +/*
>> + * Update the kernel cache information for each level of cache
>> + * associated with the given acpi cpu.
>> + */
>> +static void cache_setup_acpi_cpu(struct acpi_table_header *table,
>> +				 unsigned int cpu)
>> +{
>> +	struct acpi_pptt_cache *found_cache;
>> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
>> +	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
>> +	struct cacheinfo *this_leaf;
>> +	unsigned int index = 0;
>> +	struct acpi_pptt_processor *cpu_node = NULL;
>> +
>> +	while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
>> +		this_leaf = this_cpu_ci->info_list + index;
>> +		found_cache = acpi_find_cache_node(table, acpi_cpu_id,
>> +						   this_leaf->type,
>> +						   this_leaf->level,
>> +						   &cpu_node);
>> +		pr_debug("found = %p %p\n", found_cache, cpu_node);
>> +		if (found_cache)
>> +			update_cache_properties(this_leaf,
>> +						found_cache,
>> +						cpu_node);
> 
> [nit] unnecessary line break ?
> 
>> +
>> +		index++;
>> +	}
>> +}
>> +
>> +/**
>> + * acpi_find_last_cache_level() - Determines the number of cache levels for a PE
> 
> [nit] PE ? I think you mean processing element, but that's too ARM ARM thingy
> :), can you s/PE/CPU ?

Yes, I probably slipped up there.

> 
>> + * @cpu: Kernel logical cpu number
>> + *
>> + * Given a logical cpu number, returns the number of levels of cache represented
>> + * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0
>> + * indicating we didn't find any cache levels.
>> + *
>> + * Return: Cache levels visible to this core.
>> + */
>> +int acpi_find_last_cache_level(unsigned int cpu)
>> +{
>> +	u32 acpi_cpu_id;
>> +	struct acpi_table_header *table;
>> +	int number_of_levels = 0;
>> +	acpi_status status;
>> +
>> +	pr_debug("Cache Setup find last level cpu=%d\n", cpu);
>> +
>> +	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
>> +	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
>> +	if (ACPI_FAILURE(status)) {
>> +		pr_err_once("No PPTT table found, cache topology may be inaccurate\n");
>> +	} else {
>> +		number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
>> +		acpi_put_table(table);
>> +	}
>> +	pr_debug("Cache Setup find last level level=%d\n", number_of_levels);
>> +
>> +	return number_of_levels;
>> +}
>> +
>> +/**
>> + * cache_setup_acpi() - Override CPU cache topology with data from the PPTT
> 
> [nit]			  ^^^^ may be override/setup or just setup ?

I think of it more as "expand upon", or override, but its obviosuly 
creating (setting new) things too.


> 
>> + * @cpu: Kernel logical cpu number
> 
> [nit] kernel is implicit, no ?

Probably...
Sudeep Holla Jan. 17, 2018, 5:58 p.m. UTC | #5
On 16/01/18 20:55, Jeremy Linton wrote:
> 

[...]

>>> +/*
>>> + * Determine if the *node parameter is a leaf node by iterating the
>>> + * PPTT table, looking for nodes which reference it.
>>> + * Return 0 if we find a node referencing the passed node,
>>> + * or 1 if we don't.
>>> + */
>>> +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
>>> +                   struct acpi_pptt_processor *node)
>>> +{
>>> +    struct acpi_subtable_header *entry;
>>> +    unsigned long table_end;
>>> +    u32 node_entry;
>>> +    struct acpi_pptt_processor *cpu_node;
>>> +
>>> +    table_end = (unsigned long)table_hdr + table_hdr->length;
>>> +    node_entry = ACPI_PTR_DIFF(node, table_hdr);
>>> +    entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
>>> +                 sizeof(struct acpi_table_pptt));
>>> +
>>> +    while ((unsigned long)(entry + 1) < table_end) {
>>
>> Is entry + 1 check sufficient to access entry of length ?
>> Shouldn't that be entry + sizeof(struct acpi_pptt_processor *) so that
>> we are sure it's valid entry ?
> 
> All we need is the subtable_header size which gives us the type/len. As
> we are just scanning the whole table touching the entry->length and the
> while() terminates if that is > table_end it should be ok. In general
> sizeof(acpi_pptt_processor) isn't right either since the structure only
> covers a larger "header" portion due to attached entries extending
> beyond it.

OK, understood. In that case it should be at least
entry + sizeof(struct acpi_subtable_header), no ?

I did a quick check and acpi_parse_entries_array does exactly same.
Does it make sense to keep it consistent with that ?

Also looking at acpi_parse_entries_array, I recall now that it has some
kind of handler to deal with such table parsing. I think we should be
able to reuse, I need to stare more at the code to see how :(.
Let me know if you already looked at that and found reasons not to use it.

>>> +        cpu_node = (struct acpi_pptt_processor *)entry;
>>> +        if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
>>> +            (cpu_node->parent == node_entry))
>>> +            return 0;
>>> +        entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
>>> +                     entry->length);
>>> +    }
>>> +    return 1;
>>> +}
>>> +
>>> +/*
>>> + * Find the subtable entry describing the provided processor.
>>> + * This is done by iterating the PPTT table looking for processor nodes
>>> + * which have an acpi_processor_id that matches the acpi_cpu_id
>>> parameter
>>> + * passed into the function. If we find a node that matches this
>>> criteria
>>> + * we verify that its a leaf node in the topology rather than depending
>>> + * on the valid flag, which doesn't need to be set for leaf nodes.
>>> + */
>>> +static struct acpi_pptt_processor *acpi_find_processor_node(
>>> +    struct acpi_table_header *table_hdr,
>>> +    u32 acpi_cpu_id)
>>> +{
>>> +    struct acpi_subtable_header *entry;
>>> +    unsigned long table_end;
>>> +    struct acpi_pptt_processor *cpu_node;
>>> +
>>> +    table_end = (unsigned long)table_hdr + table_hdr->length;
>>> +    entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
>>> +                 sizeof(struct acpi_table_pptt));
>>> +
>>> +    /* find the processor structure associated with this cpuid */
>>> +    while ((unsigned long)(entry + 1) < table_end) {
>>
>> Same comment as above on entry + 
> This one is probably less clear than the one above, because we do access
> a full acpi_pptt_processor sized structure, but only after making sure
> that is actually a processor node. If anything the check should probably
> dereference the len as a second check aka
> 
> while ((entry+1 < table_end) && (entry+1->length < table_end))
> 
> I think this may have been changed after previous review comments asked
> for the cpu_node assignment earlier and of course moving the leaf_node
> check into the if condition to avoid a bit of extra processing.
> 

Makes sense.


>>> +/* Convert the linux cache_type to a ACPI PPTT cache type value */
>>> +static u8 acpi_cache_type(enum cache_type type)
>>> +{
>>
>> [nit] Just wondering if we can avoid this with some static mapping:
>>
>> static u8 acpi_cache_type[] = {
>>          [CACHE_TYPE_NONE] = 0,
>>          [CACHE_TYPE_DATA] = ACPI_PPTT_CACHE_TYPE_DATA,
>>          [CACHE_TYPE_INST] = ACPI_PPTT_CACHE_TYPE_INSTR,
>>          [CACHE_TYPE_UNIFIED] = ACPI_PPTT_CACHE_TYPE_UNIFIED,
>> };
> 
> Potentially, but the default case below is important and makes it a
> little less brittle because, as the recent DT commit, in your table
> TYPE_NONE actually needs to map to ACPI TYPE_UNIFIED to find the nodes.
> 

OK

> Doesn't matter much to me, and I would convert it if the switch() got a
> lot bigger, but right now I tend to think what the code actually would
> look like is a two entry conversion (data/instruction) with a default
> initially set. So a loop for two entries is borderline IMHO.
> 

Sure, that sounds good.


>>> +    /*
>>> +     * If all the above flags are valid, and the cache type is NOCACHE
>>> +     * update the cache type as well.
>>> +     */
>>
>> I am not sure if it makes sense to mandate at least last 2 (read allocate
>> and write policy). They can be optional.
> 
> As I mentioned in the previous set, I'm of the opinion that some are
> more useful than others, but to avoid having a discussion about which
> ones, just decided to do them all. After all, its not going to hurt
> (AFAIK).
> 

Sorry I missed to notice that.

> 
> If your more _sure_ and no one else has an opinion then i will remove
> those two.
>

That was just my opinion based on the possibility that some vendors
don't what to provide those information. We can wait until we come
across tables that have these missing.
Sudeep Holla Jan. 17, 2018, 6 p.m. UTC | #6
On 16/01/18 20:22, Jeremy Linton wrote:
> Hi,
> 

[..]

>>
>> I forgot to mention this earlier, I see you have used parentheses too
>> liberally at quite some place in this file like the above 4 line.
>> Please drop those unnecessary parentheses.
> 
> Yes, those tend to get leftover when I remove code, I realized not long
> after posting this that in one of the previous patches I actually added
> a pair where there wasn't one because I first shuffled something into
> the if condition, then a few revisions later dropped it, and when I
> squashed it before posting the result was some pointless churn.
> 
> I will rescan the whole set for this.
> 

Better, just to avoid keeping such things until the end, resulting in
churning up series just for that.
diff mbox

Patch

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
new file mode 100644
index 000000000000..2c4b3ed862a8
--- /dev/null
+++ b/drivers/acpi/pptt.c
@@ -0,0 +1,476 @@ 
+/*
+ * Copyright (C) 2018, ARM
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * This file implements parsing of Processor Properties Topology Table (PPTT)
+ * which is optionally used to describe the processor and cache topology.
+ * Due to the relative pointers used throughout the table, this doesn't
+ * leverage the existing subtable parsing in the kernel.
+ *
+ * The PPTT structure is an inverted tree, with each node potentially
+ * holding one or two inverted tree data structures describing
+ * the caches available at that level. Each cache structure optionally
+ * contains properties describing the cache at a given level which can be
+ * used to override hardware probed values.
+ */
+#define pr_fmt(fmt) "ACPI PPTT: " fmt
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <acpi/processor.h>
+
+/* total number of attributes checked by the properties code */
+#define PPTT_CHECKED_ATTRIBUTES 6
+
+/*
+ * Given the PPTT table, find and verify that the subtable entry
+ * is located within the table
+ */
+static struct acpi_subtable_header *fetch_pptt_subtable(
+	struct acpi_table_header *table_hdr, u32 pptt_ref)
+{
+	struct acpi_subtable_header *entry;
+
+	/* there isn't a subtable at reference 0 */
+	if (pptt_ref < sizeof(struct acpi_subtable_header))
+		return NULL;
+
+	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
+		return NULL;
+
+	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
+
+	if (pptt_ref + entry->length > table_hdr->length)
+		return NULL;
+
+	return entry;
+}
+
+static struct acpi_pptt_processor *fetch_pptt_node(
+	struct acpi_table_header *table_hdr, u32 pptt_ref)
+{
+	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr,
+								 pptt_ref);
+}
+
+static struct acpi_pptt_cache *fetch_pptt_cache(
+	struct acpi_table_header *table_hdr, u32 pptt_ref)
+{
+	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr,
+							     pptt_ref);
+}
+
+static struct acpi_subtable_header *acpi_get_pptt_resource(
+	struct acpi_table_header *table_hdr,
+	struct acpi_pptt_processor *node, int resource)
+{
+	u32 *ref;
+
+	if (resource >= node->number_of_priv_resources)
+		return NULL;
+
+	ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
+	ref += resource;
+
+	return fetch_pptt_subtable(table_hdr, *ref);
+}
+
+/*
+ * Attempt to find a given cache level, while counting the max number
+ * of cache levels for the cache node.
+ *
+ * Given a pptt resource, verify that it is a cache node, then walk
+ * down each level of caches, counting how many levels are found
+ * as well as checking the cache type (icache, dcache, unified). If a
+ * level & type match, then we set found, and continue the search.
+ * Once the entire cache branch has been walked return its max
+ * depth.
+ */
+static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
+				int local_level,
+				struct acpi_subtable_header *res,
+				struct acpi_pptt_cache **found,
+				int level, int type)
+{
+	struct acpi_pptt_cache *cache;
+
+	if (res->type != ACPI_PPTT_TYPE_CACHE)
+		return 0;
+
+	cache = (struct acpi_pptt_cache *) res;
+	while (cache) {
+		local_level++;
+
+		if ((local_level == level) &&
+		    (cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) &&
+		    ((cache->attributes & ACPI_PPTT_MASK_CACHE_TYPE) == type)) {
+			if ((*found != NULL) && (cache != *found))
+				pr_err("Found duplicate cache level/type unable to determine uniqueness\n");
+
+			pr_debug("Found cache @ level %d\n", level);
+			*found = cache;
+			/*
+			 * continue looking at this node's resource list
+			 * to verify that we don't find a duplicate
+			 * cache node.
+			 */
+		}
+		cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
+	}
+	return local_level;
+}
+
+/*
+ * Given a CPU node look for cache levels that exist at this level, and then
+ * for each cache node, count how many levels exist below (logically above) it.
+ * If a level and type are specified, and we find that level/type, abort
+ * processing and return the acpi_pptt_cache structure.
+ */
+static struct acpi_pptt_cache *acpi_find_cache_level(
+	struct acpi_table_header *table_hdr,
+	struct acpi_pptt_processor *cpu_node,
+	int *starting_level, int level, int type)
+{
+	struct acpi_subtable_header *res;
+	int number_of_levels = *starting_level;
+	int resource = 0;
+	struct acpi_pptt_cache *ret = NULL;
+	int local_level;
+
+	/* walk down from processor node */
+	while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
+		resource++;
+
+		local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
+						   res, &ret, level, type);
+		/*
+		 * we are looking for the max depth. Since its potentially
+		 * possible for a given node to have resources with differing
+		 * depths verify that the depth we have found is the largest.
+		 */
+		if (number_of_levels < local_level)
+			number_of_levels = local_level;
+	}
+	if (number_of_levels > *starting_level)
+		*starting_level = number_of_levels;
+
+	return ret;
+}
+
+/*
+ * Given a processor node containing a processing unit, walk into it and count
+ * how many levels exist solely for it, and then walk up each level until we hit
+ * the root node (ignore the package level because it may be possible to have
+ * caches that exist across packages). Count the number of cache levels that
+ * exist at each level on the way up.
+ */
+static int acpi_process_node(struct acpi_table_header *table_hdr,
+			     struct acpi_pptt_processor *cpu_node)
+{
+	int total_levels = 0;
+
+	do {
+		acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0);
+		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
+	} while (cpu_node);
+
+	return total_levels;
+}
+
+/*
+ * Determine if the *node parameter is a leaf node by iterating the
+ * PPTT table, looking for nodes which reference it.
+ * Return 0 if we find a node referencing the passed node,
+ * or 1 if we don't.
+ */
+static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
+			       struct acpi_pptt_processor *node)
+{
+	struct acpi_subtable_header *entry;
+	unsigned long table_end;
+	u32 node_entry;
+	struct acpi_pptt_processor *cpu_node;
+
+	table_end = (unsigned long)table_hdr + table_hdr->length;
+	node_entry = ACPI_PTR_DIFF(node, table_hdr);
+	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
+			     sizeof(struct acpi_table_pptt));
+
+	while ((unsigned long)(entry + 1) < table_end) {
+		cpu_node = (struct acpi_pptt_processor *)entry;
+		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
+		    (cpu_node->parent == node_entry))
+			return 0;
+		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
+				     entry->length);
+	}
+	return 1;
+}
+
+/*
+ * Find the subtable entry describing the provided processor.
+ * This is done by iterating the PPTT table looking for processor nodes
+ * which have an acpi_processor_id that matches the acpi_cpu_id parameter
+ * passed into the function. If we find a node that matches this criteria
+ * we verify that its a leaf node in the topology rather than depending
+ * on the valid flag, which doesn't need to be set for leaf nodes.
+ */
+static struct acpi_pptt_processor *acpi_find_processor_node(
+	struct acpi_table_header *table_hdr,
+	u32 acpi_cpu_id)
+{
+	struct acpi_subtable_header *entry;
+	unsigned long table_end;
+	struct acpi_pptt_processor *cpu_node;
+
+	table_end = (unsigned long)table_hdr + table_hdr->length;
+	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
+			     sizeof(struct acpi_table_pptt));
+
+	/* find the processor structure associated with this cpuid */
+	while ((unsigned long)(entry + 1) < table_end) {
+		cpu_node = (struct acpi_pptt_processor *)entry;
+
+		if (entry->length == 0) {
+			pr_err("Invalid zero length subtable\n");
+			break;
+		}
+		if ((entry->type == ACPI_PPTT_TYPE_PROCESSOR) &&
+		    (acpi_cpu_id == cpu_node->acpi_processor_id) &&
+		     acpi_pptt_leaf_node(table_hdr, cpu_node)) {
+			return (struct acpi_pptt_processor *)entry;
+		}
+
+		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
+				     entry->length);
+	}
+
+	return NULL;
+}
+
+static int acpi_find_cache_levels(struct acpi_table_header *table_hdr,
+				  u32 acpi_cpu_id)
+{
+	int number_of_levels = 0;
+	struct acpi_pptt_processor *cpu;
+
+	cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id);
+	if (cpu)
+		number_of_levels = acpi_process_node(table_hdr, cpu);
+
+	return number_of_levels;
+}
+
+/* Convert the linux cache_type to a ACPI PPTT cache type value */
+static u8 acpi_cache_type(enum cache_type type)
+{
+	switch (type) {
+	case CACHE_TYPE_DATA:
+		pr_debug("Looking for data cache\n");
+		return ACPI_PPTT_CACHE_TYPE_DATA;
+	case CACHE_TYPE_INST:
+		pr_debug("Looking for instruction cache\n");
+		return ACPI_PPTT_CACHE_TYPE_INSTR;
+	default:
+	case CACHE_TYPE_UNIFIED:
+		pr_debug("Looking for unified cache\n");
+		/*
+		 * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED
+		 * contains the bit pattern that will match both
+		 * ACPI unified bit patterns because we use it later
+		 * to match both cases.
+		 */
+		return ACPI_PPTT_CACHE_TYPE_UNIFIED;
+	}
+}
+
+/* find the ACPI node describing the cache type/level for the given CPU */
+static struct acpi_pptt_cache *acpi_find_cache_node(
+	struct acpi_table_header *table_hdr, u32 acpi_cpu_id,
+	enum cache_type type, unsigned int level,
+	struct acpi_pptt_processor **node)
+{
+	int total_levels = 0;
+	struct acpi_pptt_cache *found = NULL;
+	struct acpi_pptt_processor *cpu_node;
+	u8 acpi_type = acpi_cache_type(type);
+
+	pr_debug("Looking for CPU %d's level %d cache type %d\n",
+		 acpi_cpu_id, level, acpi_type);
+
+	cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
+
+	while ((cpu_node) && (!found)) {
+		found = acpi_find_cache_level(table_hdr, cpu_node,
+					      &total_levels, level, acpi_type);
+		*node = cpu_node;
+		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
+	}
+
+	return found;
+}
+
+/*
+ * The ACPI spec implies that the fields in the cache structures are used to
+ * extend and correct the information probed from the hardware. In the case
+ * of arm64 the CCSIDR probing has been removed because it might be incorrect.
+ */
+static void update_cache_properties(struct cacheinfo *this_leaf,
+				    struct acpi_pptt_cache *found_cache,
+				    struct acpi_pptt_processor *cpu_node)
+{
+	int valid_flags = 0;
+
+	if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) {
+		this_leaf->size = found_cache->size;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) {
+		this_leaf->coherency_line_size = found_cache->line_size;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) {
+		this_leaf->number_of_sets = found_cache->number_of_sets;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) {
+		this_leaf->ways_of_associativity = found_cache->associativity;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) {
+		switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) {
+		case ACPI_PPTT_CACHE_POLICY_WT:
+			this_leaf->attributes = CACHE_WRITE_THROUGH;
+			break;
+		case ACPI_PPTT_CACHE_POLICY_WB:
+			this_leaf->attributes = CACHE_WRITE_BACK;
+			break;
+		}
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) {
+		switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) {
+		case ACPI_PPTT_CACHE_READ_ALLOCATE:
+			this_leaf->attributes |= CACHE_READ_ALLOCATE;
+			break;
+		case ACPI_PPTT_CACHE_WRITE_ALLOCATE:
+			this_leaf->attributes |= CACHE_WRITE_ALLOCATE;
+			break;
+		case ACPI_PPTT_CACHE_RW_ALLOCATE:
+		case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT:
+			this_leaf->attributes |=
+				CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE;
+			break;
+		}
+		valid_flags++;
+	}
+	/*
+	 * If all the above flags are valid, and the cache type is NOCACHE
+	 * update the cache type as well.
+	 */
+	if ((this_leaf->type == CACHE_TYPE_NOCACHE) &&
+	    (valid_flags == PPTT_CHECKED_ATTRIBUTES))
+		this_leaf->type = CACHE_TYPE_UNIFIED;
+}
+
+/*
+ * Update the kernel cache information for each level of cache
+ * associated with the given acpi cpu.
+ */
+static void cache_setup_acpi_cpu(struct acpi_table_header *table,
+				 unsigned int cpu)
+{
+	struct acpi_pptt_cache *found_cache;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	struct cacheinfo *this_leaf;
+	unsigned int index = 0;
+	struct acpi_pptt_processor *cpu_node = NULL;
+
+	while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
+		this_leaf = this_cpu_ci->info_list + index;
+		found_cache = acpi_find_cache_node(table, acpi_cpu_id,
+						   this_leaf->type,
+						   this_leaf->level,
+						   &cpu_node);
+		pr_debug("found = %p %p\n", found_cache, cpu_node);
+		if (found_cache)
+			update_cache_properties(this_leaf,
+						found_cache,
+						cpu_node);
+
+		index++;
+	}
+}
+
+/**
+ * acpi_find_last_cache_level() - Determines the number of cache levels for a PE
+ * @cpu: Kernel logical cpu number
+ *
+ * Given a logical cpu number, returns the number of levels of cache represented
+ * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0
+ * indicating we didn't find any cache levels.
+ *
+ * Return: Cache levels visible to this core.
+ */
+int acpi_find_last_cache_level(unsigned int cpu)
+{
+	u32 acpi_cpu_id;
+	struct acpi_table_header *table;
+	int number_of_levels = 0;
+	acpi_status status;
+
+	pr_debug("Cache Setup find last level cpu=%d\n", cpu);
+
+	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		pr_err_once("No PPTT table found, cache topology may be inaccurate\n");
+	} else {
+		number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
+		acpi_put_table(table);
+	}
+	pr_debug("Cache Setup find last level level=%d\n", number_of_levels);
+
+	return number_of_levels;
+}
+
+/**
+ * cache_setup_acpi() - Override CPU cache topology with data from the PPTT
+ * @cpu: Kernel logical cpu number
+ *
+ * Updates the global cache info provided by cpu_get_cacheinfo()
+ * when there are valid properties in the acpi_pptt_cache nodes. A
+ * successful parse may not result in any updates if none of the
+ * cache levels have any valid flags set.  Futher, a unique value is
+ * associated with each known CPU cache entry. This unique value
+ * can be used to determine whether caches are shared between cpus.
+ *
+ * Return: -ENOENT on failure to find table, or 0 on success
+ */
+int cache_setup_acpi(unsigned int cpu)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+
+	pr_debug("Cache Setup ACPI cpu %d\n", cpu);
+
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		pr_err_once("No PPTT table found, cache topology may be inaccurate\n");
+		return -ENOENT;
+	}
+
+	cache_setup_acpi_cpu(table, cpu);
+	acpi_put_table(table);
+
+	return status;
+}