diff mbox

[1/3] arm64: topology: Add support for topology DT bindings

Message ID 1395252139-16239-1-git-send-email-broonie@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Mark Brown March 19, 2014, 6:02 p.m. UTC
From: Mark Brown <broonie@linaro.org>

Add support for parsing the explicit topology bindings to discover the
topology of the system.

Since it is not currently clear how to map multi-level clusters for the
scheduler all leaf clusters are presented to the scheduler at the same
level. This should be enough to provide good support for current systems.

Signed-off-by: Mark Brown <broonie@linaro.org>
---

Stylistic updates requested by Lorenzo and a fix for a missing error
check in DT parsing.

 arch/arm64/kernel/topology.c | 172 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 166 insertions(+), 6 deletions(-)

Comments

Lorenzo Pieralisi March 20, 2014, 11:26 a.m. UTC | #1
On Wed, Mar 19, 2014 at 06:02:17PM +0000, Mark Brown wrote:
> +#ifdef CONFIG_OF

This ifdef can be removed, CONFIG_OF is always selected for arm64 and
the !CONFIG_OF path

> +#else
> +static inline int parse_dt_topology(void) { return 0; }
> +#endif

is wrong, it should return failure. You should remove the CONFIG_OF
ifdeffery.

> +static int __init get_cpu_for_node(struct device_node *node)
> +{
> +	struct device_node *cpu_node;
> +	int cpu;
> +
> +	cpu_node = of_parse_phandle(node, "cpu", 0);
> +	if (!cpu_node)
> +		return -1;
> +
> +	for_each_possible_cpu(cpu) {
> +		if (of_get_cpu_node(cpu, NULL) == cpu_node)
> +			return cpu;
> +	}
> +
> +	pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
> +	return -1;
> +}
> +
> +static int __init parse_core(struct device_node *core, int cluster_id,
> +			     int core_id)
> +{
> +	char name[10];
> +	bool leaf = true;
> +	int i = 0;
> +	int cpu;
> +	struct device_node *t;
> +
> +	do {
> +		snprintf(name, sizeof(name), "thread%d", i);
> +		t = of_get_child_by_name(core, name);
> +		if (t) {
> +			leaf = false;
> +			cpu = get_cpu_for_node(t);
> +			if (cpu >= 0) {
> +				cpu_topology[cpu].cluster_id = cluster_id;
> +				cpu_topology[cpu].core_id = core_id;
> +				cpu_topology[cpu].thread_id = i;
> +			} else {
> +				pr_err("%s: Can't get CPU for thread\n",
> +				       t->full_name);
> +				return -EINVAL;
> +			}
> +		}
> +		i++;
> +	} while (t);
> +
> +	cpu = get_cpu_for_node(core);
> +	if (cpu >= 0) {
> +		if (!leaf) {
> +			pr_err("%s: Core has both threads and CPU\n",
> +			       core->full_name);
> +			return -EINVAL;
> +		}
> +
> +		cpu_topology[cpu].cluster_id = cluster_id;
> +		cpu_topology[cpu].core_id = core_id;
> +	} else if (leaf) {
> +		pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int __init parse_cluster(struct device_node *cluster, int depth)
> +{
> +	char name[10];
> +	bool leaf = true;
> +	bool has_cores = false;
> +	struct device_node *c;
> +	static int __initdata cluster_id;

WARNING: __initdata should be placed after cluster_id
#103: FILE: arch/arm64/kernel/topology.c:96:
+	static int __initdata cluster_id;

> +	int core_id = 0;
> +	int i, ret;
> +
> +	/*
> +	 * First check for child clusters; we currently ignore any
> +	 * information about the nesting of clusters and present the
> +	 * scheduler with a flat list of them.
> +	 */
> +	i = 0;
> +	do {
> +		snprintf(name, sizeof(name), "cluster%d", i);
> +		c = of_get_child_by_name(cluster, name);
> +		if (c) {
> +			ret = parse_cluster(c, depth + 1);
> +			if (ret != 0)
> +				return ret;
> +			leaf = false;
> +		}
> +		i++;
> +	} while (c);
> +
> +	/* Now check for cores */
> +	i = 0;
> +	do {
> +		snprintf(name, sizeof(name), "core%d", i);
> +		c = of_get_child_by_name(cluster, name);
> +		if (c) {
> +			has_cores = true;
> +
> +			if (depth == 0)
> +				pr_err("%s: cpu-map children should be clusters\n",
> +				       c->full_name);
> +
> +			if (leaf) {
> +				ret = parse_core(c, cluster_id, core_id++);
> +				if (ret != 0) {
> +					return ret;
> +				}

WARNING: braces {} are not necessary for single statement blocks
#139: FILE: arch/arm64/kernel/topology.c:132:
+				if (ret != 0) {
+					return ret;
+				}

> +			} else {
> +				pr_err("%s: Non-leaf cluster with core %s\n",
> +				       cluster->full_name, name);
> +				return -EINVAL;
> +			}
> +		}
> +		i++;
> +	} while (c);
> +
> +	if (leaf && !has_cores)
> +		pr_warn("%s: empty cluster\n", cluster->full_name);
> +
> +	if (leaf)
> +		cluster_id++;
> +
> +	return 0;
> +}
> +
> +static int __init parse_dt_topology(void)
> +{
> +	struct device_node *cn;
> +
> +	cn = of_find_node_by_path("/cpus");
> +	if (!cn) {
> +		pr_err("No CPU information found in DT\n");
> +		return 0;
> +	}
> +
> +	/*
> +	 * When topology is provided cpu-map is essentially a root
> +	 * cluster with restricted subnodes.
> +	 */
> +	cn = of_get_child_by_name(cn, "cpu-map");
> +	if (!cn)
> +		return 0;
> +	return parse_cluster(cn, 0);

We still have a problem here. If the topology does not contain bindings
for some cpu nodes, parse_cluster() does not fail and we end up with an
incomplete topology. We have two choices: either we check the topology
info for all possible cpus here and reset if there is missing information
or we do the lazy version and reset the topology (for all possible cpus) in
update_siblings_masks().
I'd rather do it here, in preparation for MPIDR_EL1 fallback solution
(where there will always be topology information configured and the register
will always be there in all its glory).

This also means that update_sibling_masks() should just pr_debug on
missing information since by the time a cpu get there either the
topology has been parsed correctly or it has been reset for all CPUs, no
need to reset it again.

Lorenzo
Mark Brown March 20, 2014, 1:43 p.m. UTC | #2
On Thu, Mar 20, 2014 at 11:26:50AM +0000, Lorenzo Pieralisi wrote:
> On Wed, Mar 19, 2014 at 06:02:17PM +0000, Mark Brown wrote:
> > +#ifdef CONFIG_OF

> This ifdef can be removed, CONFIG_OF is always selected for arm64 and
> the !CONFIG_OF path

This has been present since the very first time these patches were
posted but hasn't been mentioned as being a problem previously.

> > +#else
> > +static inline int parse_dt_topology(void) { return 0; }
> > +#endif

> is wrong, it should return failure. You should remove the CONFIG_OF
> ifdeffery.

Yup.  It actually won't affect the behaviour at present though - since
it won't do anything the result will be just the same as if we return an
error and reset.

Given ACPI (which really looks like it's going to happen at some point
and presumably make OF optional) I'm not sure removing the handling of
OF is actually constructive but whatever, it's done now...

> > +			if (leaf) {
> > +				ret = parse_core(c, cluster_id, core_id++);
> > +				if (ret != 0) {
> > +					return ret;
> > +				}

> WARNING: braces {} are not necessary for single statement blocks
> #139: FILE: arch/arm64/kernel/topology.c:132:

Like I say I don't think checkpatch is being helpful on this one, the
code looks worse.  Again, whatever.

> We still have a problem here. If the topology does not contain bindings
> for some cpu nodes, parse_cluster() does not fail and we end up with an
> incomplete topology. We have two choices: either we check the topology

Hrm, looking at the topology binding it doesn't specificially require
that the topology be complete.  I can see why you would want that.

> I'd rather do it here, in preparation for MPIDR_EL1 fallback solution
> (where there will always be topology information configured and the register
> will always be there in all its glory).

To be honest at this point I think what I want to do is go back to the
original approach of layering DT on top of MPIDR.  MPIDR is smaller and
simpler code so seems more likely to make progress.  I really do expect
that for a very large proportion of systems it'll be sufficient.
Catalin Marinas March 20, 2014, 5:19 p.m. UTC | #3
On Thu, Mar 20, 2014 at 01:43:57PM +0000, Mark Brown wrote:
> On Thu, Mar 20, 2014 at 11:26:50AM +0000, Lorenzo Pieralisi wrote:
> > On Wed, Mar 19, 2014 at 06:02:17PM +0000, Mark Brown wrote:
> > > +#ifdef CONFIG_OF
> 
> > This ifdef can be removed, CONFIG_OF is always selected for arm64 and
> > the !CONFIG_OF path
> 
> This has been present since the very first time these patches were
> posted but hasn't been mentioned as being a problem previously.

I recall I mentioned the unneeded CONFIG_OF in the past but I can't
really tell whether it was for this set of patches or not.

> > > +#else
> > > +static inline int parse_dt_topology(void) { return 0; }
> > > +#endif
> 
> > is wrong, it should return failure. You should remove the CONFIG_OF
> > ifdeffery.
> 
> Yup.  It actually won't affect the behaviour at present though - since
> it won't do anything the result will be just the same as if we return an
> error and reset.
> 
> Given ACPI (which really looks like it's going to happen at some point
> and presumably make OF optional) I'm not sure removing the handling of
> OF is actually constructive but whatever, it's done now...

CONFIG_OF will always be enabled in the kernel even when we get ACPI. We
still use the chosen DT node to tell the kernel about ACPI.

> > We still have a problem here. If the topology does not contain bindings
> > for some cpu nodes, parse_cluster() does not fail and we end up with an
> > incomplete topology. We have two choices: either we check the topology
> 
> Hrm, looking at the topology binding it doesn't specificially require
> that the topology be complete.  I can see why you would want that.
> 
> > I'd rather do it here, in preparation for MPIDR_EL1 fallback solution
> > (where there will always be topology information configured and the register
> > will always be there in all its glory).
> 
> To be honest at this point I think what I want to do is go back to the
> original approach of layering DT on top of MPIDR.  MPIDR is smaller and
> simpler code so seems more likely to make progress.  I really do expect
> that for a very large proportion of systems it'll be sufficient.

Do you mean the physical MPIDR_EL1 or the DT representation of
MPIDR_EL1?
Mark Brown March 20, 2014, 5:52 p.m. UTC | #4
On Thu, Mar 20, 2014 at 05:19:32PM +0000, Catalin Marinas wrote:
> On Thu, Mar 20, 2014 at 01:43:57PM +0000, Mark Brown wrote:

> > To be honest at this point I think what I want to do is go back to the
> > original approach of layering DT on top of MPIDR.  MPIDR is smaller and
> > simpler code so seems more likely to make progress.  I really do expect
> > that for a very large proportion of systems it'll be sufficient.

> Do you mean the physical MPIDR_EL1 or the DT representation of
> MPIDR_EL1?

Well, the affinities need to be the same anyway (so we can tie the
hardware to the description in DT) though we need to use the physical
register to get the MT bit since the binding requires that this be
omitted from the value stored in DT.  Lorenzo was keen on paying
attention to the MT bit which does seem like a reasonable thing to do.
Lorenzo Pieralisi March 20, 2014, 6:08 p.m. UTC | #5
On Thu, Mar 20, 2014 at 01:43:57PM +0000, Mark Brown wrote:
> On Thu, Mar 20, 2014 at 11:26:50AM +0000, Lorenzo Pieralisi wrote:
> > On Wed, Mar 19, 2014 at 06:02:17PM +0000, Mark Brown wrote:
> > > +#ifdef CONFIG_OF
> 
> > This ifdef can be removed, CONFIG_OF is always selected for arm64 and
> > the !CONFIG_OF path
> 
> This has been present since the very first time these patches were
> posted but hasn't been mentioned as being a problem previously.

I am sorry, I missed it, doing my best to help you get it through.

> > > +#else
> > > +static inline int parse_dt_topology(void) { return 0; }
> > > +#endif
> 
> > is wrong, it should return failure. You should remove the CONFIG_OF
> > ifdeffery.
> 
> Yup.  It actually won't affect the behaviour at present though - since
> it won't do anything the result will be just the same as if we return an
> error and reset.
> 
> Given ACPI (which really looks like it's going to happen at some point
> and presumably make OF optional) I'm not sure removing the handling of
> OF is actually constructive but whatever, it's done now...

DT is there to stay, regardless of ACPI. However, given the function call
logic, returning 0 on !CONFIG_OF was correct since it meant "no cpu-map".
Anyway, CONFIG_OF ifdef should be removed.

> > > +			if (leaf) {
> > > +				ret = parse_core(c, cluster_id, core_id++);
> > > +				if (ret != 0) {
> > > +					return ret;
> > > +				}
> 
> > WARNING: braces {} are not necessary for single statement blocks
> > #139: FILE: arch/arm64/kernel/topology.c:132:
> 
> Like I say I don't think checkpatch is being helpful on this one, the
> code looks worse.  Again, whatever.

Worse or better, it has to be consistent. Either you leave them
everywhere (but there is a coding style, it is for a reason) or you
remove them everywhere (there are other nested paths where it is removed
in the patch). Do not take it as a nitpick please, I just want the code to
be consistent.

> > We still have a problem here. If the topology does not contain bindings
> > for some cpu nodes, parse_cluster() does not fail and we end up with an
> > incomplete topology. We have two choices: either we check the topology
> 
> Hrm, looking at the topology binding it doesn't specificially require
> that the topology be complete.  I can see why you would want that.
> 
> > I'd rather do it here, in preparation for MPIDR_EL1 fallback solution
> > (where there will always be topology information configured and the register
> > will always be there in all its glory).
> 
> To be honest at this point I think what I want to do is go back to the
> original approach of layering DT on top of MPIDR.  MPIDR is smaller and
> simpler code so seems more likely to make progress.  I really do expect
> that for a very large proportion of systems it'll be sufficient.

DT (cpu-map) takes precedence though. Yes, instead of resetting the
topology, falling back to MPIDR_EL1 is acceptable if either there are
broken bindings or cpus with missing topology information.

Honestly, it is not up to the kernel to validate DT, since this adds
complexity, but I think that a big fat WARN_ON on missing or broken
topology information would help fix firmware at early stages.

You should fall back to HW MPIDR_EL1.

I know, it is complex, there is little we can do about that and it is
code run just at cold boot and freed later so I deem that acceptable.

Thanks,
Lorenzo
Mark Brown March 21, 2014, 11:13 a.m. UTC | #6
On Thu, Mar 20, 2014 at 05:19:32PM +0000, Catalin Marinas wrote:
> On Thu, Mar 20, 2014 at 01:43:57PM +0000, Mark Brown wrote:

> > Given ACPI (which really looks like it's going to happen at some point
> > and presumably make OF optional) I'm not sure removing the handling of
> > OF is actually constructive but whatever, it's done now...

> CONFIG_OF will always be enabled in the kernel even when we get ACPI. We
> still use the chosen DT node to tell the kernel about ACPI.

One thing that occurs to me with this - if we've always got a DT even if
we are booting with ACPI that might confuse code that implements
handling for firmware idioms.  The regulator code does this sort of
thing, though in that case we make exactly the same assumptions for ACPI
and DT so there won't be any confusion.  There's a few other examples
though and we might acquire more by the time ACPI gets merged.  Probably
not a big deal but something that needs attention paying.

I was aware that there was a stub DT on ACPI systems but had expected
that if we were booting with real ACPI support that'd get masked from
the running system.  I can see why at least initially we'd want to just
pull in the infrastructure to use when identifying that this is an ACPI
system.
Mark Brown March 21, 2014, 11:32 a.m. UTC | #7
On Thu, Mar 20, 2014 at 06:08:36PM +0000, Lorenzo Pieralisi wrote:

> > > This ifdef can be removed, CONFIG_OF is always selected for arm64 and
> > > the !CONFIG_OF path

> > This has been present since the very first time these patches were
> > posted but hasn't been mentioned as being a problem previously.

> I am sorry, I missed it, doing my best to help you get it through.

Please try to consider this from a submitter point of view; it is quite
frustrating every time something that has been around for a while and
fairly obvious gets identified as a new issue, it makes it hard to have
confidence that addressing review comments is moving closer to getting
things accepted.

> Worse or better, it has to be consistent. Either you leave them
> everywhere (but there is a coding style, it is for a reason) or you
> remove them everywhere (there are other nested paths where it is removed
> in the patch). Do not take it as a nitpick please, I just want the code to
> be consistent.

Hrm, I couldn't actually find any other examples where the if is an edge
statement in a block.

> DT (cpu-map) takes precedence though. Yes, instead of resetting the
> topology, falling back to MPIDR_EL1 is acceptable if either there are
> broken bindings or cpus with missing topology information.

Quite; and hopefully in most cases the hardware designers will set MPIDR
sensibly and so the DT could just omit the topology information entirely
since it's redundant.

> Honestly, it is not up to the kernel to validate DT, since this adds
> complexity, but I think that a big fat WARN_ON on missing or broken
> topology information would help fix firmware at early stages.

I dunno, we're now doing active things like insisting on at least one
level of cluster node which definitely seem to push us into actively
looking for problems where there's no real ambiguity about what's meant.
Though we're not currently doing any WARN_ON()s...

> I know, it is complex, there is little we can do about that and it is
> code run just at cold boot and freed later so I deem that acceptable.

It's not that anything is really complex (the requirements keep on
evolving but nothing is particularly hard), it's that we could most
likely have had something useful for people already.
Catalin Marinas March 21, 2014, 2:52 p.m. UTC | #8
On Thu, Mar 20, 2014 at 05:52:53PM +0000, Mark Brown wrote:
> On Thu, Mar 20, 2014 at 05:19:32PM +0000, Catalin Marinas wrote:
> > On Thu, Mar 20, 2014 at 01:43:57PM +0000, Mark Brown wrote:
> 
> > > To be honest at this point I think what I want to do is go back to the
> > > original approach of layering DT on top of MPIDR.  MPIDR is smaller and
> > > simpler code so seems more likely to make progress.  I really do expect
> > > that for a very large proportion of systems it'll be sufficient.
> 
> > Do you mean the physical MPIDR_EL1 or the DT representation of
> > MPIDR_EL1?
> 
> Well, the affinities need to be the same anyway (so we can tie the
> hardware to the description in DT) though we need to use the physical
> register to get the MT bit since the binding requires that this be
> omitted from the value stored in DT.  Lorenzo was keen on paying
> attention to the MT bit which does seem like a reasonable thing to do.

OK, as long as topology in DT takes priority (in case the hardware got
it wrong).
Catalin Marinas March 21, 2014, 3:01 p.m. UTC | #9
On Fri, Mar 21, 2014 at 11:13:53AM +0000, Mark Brown wrote:
> On Thu, Mar 20, 2014 at 05:19:32PM +0000, Catalin Marinas wrote:
> > On Thu, Mar 20, 2014 at 01:43:57PM +0000, Mark Brown wrote:
> 
> > > Given ACPI (which really looks like it's going to happen at some point
> > > and presumably make OF optional) I'm not sure removing the handling of
> > > OF is actually constructive but whatever, it's done now...
> 
> > CONFIG_OF will always be enabled in the kernel even when we get ACPI. We
> > still use the chosen DT node to tell the kernel about ACPI.
> 
> One thing that occurs to me with this - if we've always got a DT even if
> we are booting with ACPI that might confuse code that implements
> handling for firmware idioms.

The DT presented on an ACPI-capable system only contains the chosen
node (I guess the DT will not even be unflattened) .So the topology
code would check for DT, if not it would check for ACPI (or the other
way around) and only after that fall back to hardware MPIDR. I'm not
sure whether current ACPI gives us rich enough information about
topology like DT, in which case it could simply use MPIDR.

> I was aware that there was a stub DT on ACPI systems but had expected
> that if we were booting with real ACPI support that'd get masked from
> the running system.

That's the plan. The original point was that CONFIG_OF is going to stay
even if CONFIG_ACPI is enabled. But we shouldn't mix the two, so most of
the DT information will not be available to the kernel (not even
CPU topology) if ACPI tables are provided.
Lorenzo Pieralisi March 21, 2014, 3:16 p.m. UTC | #10
On Fri, Mar 21, 2014 at 11:32:02AM +0000, Mark Brown wrote:
> On Thu, Mar 20, 2014 at 06:08:36PM +0000, Lorenzo Pieralisi wrote:
> 
> > > > This ifdef can be removed, CONFIG_OF is always selected for arm64 and
> > > > the !CONFIG_OF path
> 
> > > This has been present since the very first time these patches were
> > > posted but hasn't been mentioned as being a problem previously.
> 
> > I am sorry, I missed it, doing my best to help you get it through.
> 
> Please try to consider this from a submitter point of view; it is quite
> frustrating every time something that has been around for a while and
> fairly obvious gets identified as a new issue, it makes it hard to have
> confidence that addressing review comments is moving closer to getting
> things accepted.

I understand that and I apologize, but I will still flag issues up
as long as I find some. For instance, I noticed patch is missing
of_node_put() calls almost everywhere DT nodes are parsed and we have
to fix that before merging it. I can do that to save another respin.

[...]

> > I know, it is complex, there is little we can do about that and it is
> > code run just at cold boot and freed later so I deem that acceptable.
> 
> It's not that anything is really complex (the requirements keep on
> evolving but nothing is particularly hard), it's that we could most
> likely have had something useful for people already.

I agree and you are right.

In order to add all these checks this code is getting ways too complex for
my taste, it is not your problem and it is not mine either, I just can't help
notice though.

Patch is complete and I think it is almost ready to get merged.
Instead of bothering you with another respin with minor changes I suggest
I will pick the last version up, diff what's needed and repost it here for
final look so that we are done with it.

Please let me know if that's plausible, thank you.

Lorenzo
Mark Brown March 21, 2014, 3:36 p.m. UTC | #11
On Fri, Mar 21, 2014 at 03:01:18PM +0000, Catalin Marinas wrote:
> On Fri, Mar 21, 2014 at 11:13:53AM +0000, Mark Brown wrote:

> > One thing that occurs to me with this - if we've always got a DT even if
> > we are booting with ACPI that might confuse code that implements
> > handling for firmware idioms.

> The DT presented on an ACPI-capable system only contains the chosen
> node (I guess the DT will not even be unflattened) .So the topology
> code would check for DT, if not it would check for ACPI (or the other
> way around) and only after that fall back to hardware MPIDR. I'm not
> sure whether current ACPI gives us rich enough information about
> topology like DT, in which case it could simply use MPIDR.

Sorry, this isn't related to topology - it's to do with other code that
checks if a DT was present and makes decisions based on that.  So long
as of_have_populated_dt() doesn't report true things should be fine but
it's something to watch out for.
Mark Brown March 21, 2014, 4:06 p.m. UTC | #12
On Fri, Mar 21, 2014 at 03:16:17PM +0000, Lorenzo Pieralisi wrote:

> I understand that and I apologize, but I will still flag issues up
> as long as I find some. For instance, I noticed patch is missing

Thanks.  I do agree that it's important to find and fix issues, it was
more about the mechanics of how that gets done.

> Instead of bothering you with another respin with minor changes I suggest
> I will pick the last version up, diff what's needed and repost it here for
> final look so that we are done with it.

> Please let me know if that's plausible, thank you.

I've actually already got a mostly complete respin so I may as well send
that, just doing a bit of testing though I'll wrap in the of_node_put()s
as well now you mention that.  Should be out this afternoon.
diff mbox

Patch

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 3e06b0be4ec8..548f04572e26 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -17,10 +17,163 @@ 
 #include <linux/percpu.h>
 #include <linux/node.h>
 #include <linux/nodemask.h>
+#include <linux/of.h>
 #include <linux/sched.h>
 
 #include <asm/topology.h>
 
+#ifdef CONFIG_OF
+static int __init get_cpu_for_node(struct device_node *node)
+{
+	struct device_node *cpu_node;
+	int cpu;
+
+	cpu_node = of_parse_phandle(node, "cpu", 0);
+	if (!cpu_node)
+		return -1;
+
+	for_each_possible_cpu(cpu) {
+		if (of_get_cpu_node(cpu, NULL) == cpu_node)
+			return cpu;
+	}
+
+	pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+	return -1;
+}
+
+static int __init parse_core(struct device_node *core, int cluster_id,
+			     int core_id)
+{
+	char name[10];
+	bool leaf = true;
+	int i = 0;
+	int cpu;
+	struct device_node *t;
+
+	do {
+		snprintf(name, sizeof(name), "thread%d", i);
+		t = of_get_child_by_name(core, name);
+		if (t) {
+			leaf = false;
+			cpu = get_cpu_for_node(t);
+			if (cpu >= 0) {
+				cpu_topology[cpu].cluster_id = cluster_id;
+				cpu_topology[cpu].core_id = core_id;
+				cpu_topology[cpu].thread_id = i;
+			} else {
+				pr_err("%s: Can't get CPU for thread\n",
+				       t->full_name);
+				return -EINVAL;
+			}
+		}
+		i++;
+	} while (t);
+
+	cpu = get_cpu_for_node(core);
+	if (cpu >= 0) {
+		if (!leaf) {
+			pr_err("%s: Core has both threads and CPU\n",
+			       core->full_name);
+			return -EINVAL;
+		}
+
+		cpu_topology[cpu].cluster_id = cluster_id;
+		cpu_topology[cpu].core_id = core_id;
+	} else if (leaf) {
+		pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+	char name[10];
+	bool leaf = true;
+	bool has_cores = false;
+	struct device_node *c;
+	static int __initdata cluster_id;
+	int core_id = 0;
+	int i, ret;
+
+	/*
+	 * First check for child clusters; we currently ignore any
+	 * information about the nesting of clusters and present the
+	 * scheduler with a flat list of them.
+	 */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "cluster%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			ret = parse_cluster(c, depth + 1);
+			if (ret != 0)
+				return ret;
+			leaf = false;
+		}
+		i++;
+	} while (c);
+
+	/* Now check for cores */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "core%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			has_cores = true;
+
+			if (depth == 0)
+				pr_err("%s: cpu-map children should be clusters\n",
+				       c->full_name);
+
+			if (leaf) {
+				ret = parse_core(c, cluster_id, core_id++);
+				if (ret != 0) {
+					return ret;
+				}
+			} else {
+				pr_err("%s: Non-leaf cluster with core %s\n",
+				       cluster->full_name, name);
+				return -EINVAL;
+			}
+		}
+		i++;
+	} while (c);
+
+	if (leaf && !has_cores)
+		pr_warn("%s: empty cluster\n", cluster->full_name);
+
+	if (leaf)
+		cluster_id++;
+
+	return 0;
+}
+
+static int __init parse_dt_topology(void)
+{
+	struct device_node *cn;
+
+	cn = of_find_node_by_path("/cpus");
+	if (!cn) {
+		pr_err("No CPU information found in DT\n");
+		return 0;
+	}
+
+	/*
+	 * When topology is provided cpu-map is essentially a root
+	 * cluster with restricted subnodes.
+	 */
+	cn = of_get_child_by_name(cn, "cpu-map");
+	if (!cn)
+		return 0;
+	return parse_cluster(cn, 0);
+}
+
+#else
+static inline int parse_dt_topology(void) { return 0; }
+#endif
+
 /*
  * cpu topology table
  */
@@ -74,15 +227,10 @@  void store_cpu_topology(unsigned int cpuid)
 	update_siblings_masks(cpuid);
 }
 
-/*
- * init_cpu_topology is called at boot when only one cpu is running
- * which prevent simultaneous write access to cpu_topology array
- */
-void __init init_cpu_topology(void)
+static void __init reset_cpu_topology(void)
 {
 	unsigned int cpu;
 
-	/* init core mask and power*/
 	for_each_possible_cpu(cpu) {
 		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 
@@ -93,3 +241,15 @@  void __init init_cpu_topology(void)
 		cpumask_clear(&cpu_topo->thread_sibling);
 	}
 }
+
+void __init init_cpu_topology(void)
+{
+	reset_cpu_topology();
+
+	/*
+	 * Discard anything that was parsed if we hit an error so we
+	 * don't use partial information.
+	 */
+	if (parse_dt_topology())
+		reset_cpu_topology();
+}