diff mbox

[v9,05/14] ARM: hisi: enable MCPM implementation

Message ID 1400591427-21922-6-git-send-email-haojian.zhuang@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Haojian Zhuang May 20, 2014, 1:10 p.m. UTC
Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
framework to manage power on HiP04 SoC.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
---
 arch/arm/mach-hisi/Makefile   |   1 +
 arch/arm/mach-hisi/platmcpm.c | 310 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 311 insertions(+)
 create mode 100644 arch/arm/mach-hisi/platmcpm.c

Comments

Nicolas Pitre May 21, 2014, 1:29 a.m. UTC | #1
On Tue, 20 May 2014, Haojian Zhuang wrote:

> Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
> framework to manage power on HiP04 SoC.

There are still unresolved issues with this patch.

[...]
> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned long data, mask;
> +
> +	if (!relocation || !sysctrl)
> +		return -ENODEV;
> +	if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
> +		return -EINVAL;
> +
> +	spin_lock_irq(&boot_lock);
> +
> +	if (hip04_cpu_table[cluster][cpu]) {
> +		hip04_cpu_table[cluster][cpu]++;
> +		spin_unlock_irq(&boot_lock);
> +		return 0;
> +	}
> +
> +	writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
> +	writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
> +	writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
> +	writel_relaxed(0, relocation + 12);
> +
> +	if (hip04_cluster_down(cluster)) {
> +		data = CLUSTER_DEBUG_RESET_BIT;
> +		writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> +		do {
> +			mask = CLUSTER_DEBUG_RESET_STATUS;
> +			data = readl_relaxed(sysctrl + \
> +					     SC_CPU_RESET_STATUS(cluster));
> +		} while (data & mask);
> +		hip04_set_snoop_filter(cluster, 1);
> +	}

Sorry to insist, but I want to repeat the question I asked during the 
previous review as I consider this is important, especially if you want 
to support deep C-States with cpuidle later.  This also has implications 
if you ever want to turn off snoops in hip04_mcpm_power_down() when all 
CPUs in a cluster are down.

You said:

| But it fails on my platform if I execute snooping setup on the new
| CPU.

I then asked:

| It fails how?  I want to make sure if the problem is with the hardware 
| design or the code.
| 
| The assembly code could be wrong.  Are you sure this is not theactual 
| reason?
| 
| Is there some documentation for this stuff?

I also see that the snoop filter is enabled from hip04_cpu_table_init() 
for the CPU that is actually executing that code.  So that must work 
somehow...

> +
> +	hip04_cpu_table[cluster][cpu]++;
> +
> +	data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> +	       CORE_DEBUG_RESET_BIT(cpu);
> +	writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> +	spin_unlock_irq(&boot_lock);
> +	msleep(POLL_MSEC);

Your cover letter for this series mentionned this:

| v9:
|   * Remove delay workaround in mcpm implementation.

Why is it still there?

> +	return 0;
> +}
> +
> +static void hip04_mcpm_power_down(void)
> +{
> +	unsigned int mpidr, cpu, cluster, data = 0;
> +	bool skip_reset = false;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> +	__mcpm_cpu_going_down(cpu, cluster);
> +
> +	spin_lock(&boot_lock);
> +	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> +	hip04_cpu_table[cluster][cpu]--;
> +	if (hip04_cpu_table[cluster][cpu] == 1) {
> +		/* A power_up request went ahead of us. */
> +		skip_reset = true;
> +	} else if (hip04_cpu_table[cluster][cpu] > 1) {
> +		pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
> +		BUG();
> +	}
> +	spin_unlock(&boot_lock);
> +
> +	v7_exit_coherency_flush(louis);
> +
> +	__mcpm_cpu_down(cpu, cluster);
> +
> +	if (!skip_reset) {
> +		data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> +		       CORE_DEBUG_RESET_BIT(cpu);
> +		writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
> +	}
> +}

As I mentioned already this is going to race with the power_up() method.  
Let me illustrate the problem:

	* CPU 0		* CPU 1
	-------		-------
	* mcpm_cpu_power_down()
	* hip04_mcpm_power_down()
	* spin_lock(&boot_lock); [lock acquired]
			* mcpm_cpu_power_up(cpu = 0)
			* spin_lock(&boot_lock); [blocked]
	* hip04_cpu_table[cluster][cpu]--; [value down to 0]
	* skip_reset = false
	* spin_unlock(&boot_lock);
			* spin_lock(&boot_lock); [now succeeds]
	* v7_exit_coherency_flush(louis); [takes a while to complete]
			* hip04_cpu_table[cluster][cpu]++; [value back to 1]
			* bring CPU0 out of reset
	* put CPU0 into reset
			* spin_unlock(&boot_lock);

Here you end up with CPU0 in reset while hip04_cpu_table[cluster][cpu] 
for CPU0 is equal to 1.  The CPU will therefore never start again as 
further calls to power_up() won't see hip04_cpu_table equal to 0 
anymore.

So... I'm asking again: are you absolutely certain that the CPU reset is 
applied at the very moment the corresponding bit is set?  Isn't it 
applied only when the CPU does execute a WFI like most other platforms? 


Nicolas
Haojian Zhuang May 21, 2014, 1:48 a.m. UTC | #2
On 21 May 2014 09:29, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Tue, 20 May 2014, Haojian Zhuang wrote:
>
>> Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
>> framework to manage power on HiP04 SoC.
>
> There are still unresolved issues with this patch.
>
> [...]
>> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
>> +{
>> +     unsigned long data, mask;
>> +
>> +     if (!relocation || !sysctrl)
>> +             return -ENODEV;
>> +     if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
>> +             return -EINVAL;
>> +
>> +     spin_lock_irq(&boot_lock);
>> +
>> +     if (hip04_cpu_table[cluster][cpu]) {
>> +             hip04_cpu_table[cluster][cpu]++;
>> +             spin_unlock_irq(&boot_lock);
>> +             return 0;
>> +     }
>> +
>> +     writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
>> +     writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
>> +     writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
>> +     writel_relaxed(0, relocation + 12);
>> +
>> +     if (hip04_cluster_down(cluster)) {
>> +             data = CLUSTER_DEBUG_RESET_BIT;
>> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
>> +             do {
>> +                     mask = CLUSTER_DEBUG_RESET_STATUS;
>> +                     data = readl_relaxed(sysctrl + \
>> +                                          SC_CPU_RESET_STATUS(cluster));
>> +             } while (data & mask);
>> +             hip04_set_snoop_filter(cluster, 1);
>> +     }
>
> Sorry to insist, but I want to repeat the question I asked during the
> previous review as I consider this is important, especially if you want
> to support deep C-States with cpuidle later.  This also has implications
> if you ever want to turn off snoops in hip04_mcpm_power_down() when all
> CPUs in a cluster are down.
>
> You said:
>
> | But it fails on my platform if I execute snooping setup on the new
> | CPU.
>
> I then asked:
>
> | It fails how?  I want to make sure if the problem is with the hardware
> | design or the code.
> |
> | The assembly code could be wrong.  Are you sure this is not theactual
> | reason?
> |
> | Is there some documentation for this stuff?
>
> I also see that the snoop filter is enabled from hip04_cpu_table_init()
> for the CPU that is actually executing that code.  So that must work
> somehow...
>

Cluster0 is very special. If I didn't enable snoop filter of cluster0, it also
works. I'll check with Hisilicon guys why it's different. The configuration
of snoop filter is a black box to me.

>> +
>> +     hip04_cpu_table[cluster][cpu]++;
>> +
>> +     data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
>> +            CORE_DEBUG_RESET_BIT(cpu);
>> +     writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
>> +     spin_unlock_irq(&boot_lock);
>> +     msleep(POLL_MSEC);
>
> Your cover letter for this series mentionned this:
>
> | v9:
> |   * Remove delay workaround in mcpm implementation.
>
> Why is it still there?
>
Sorry. I cherry pick with the wrong id.

>> +     return 0;
>> +}
>> +
>> +static void hip04_mcpm_power_down(void)
>> +{
>> +     unsigned int mpidr, cpu, cluster, data = 0;
>> +     bool skip_reset = false;
>> +
>> +     mpidr = read_cpuid_mpidr();
>> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +
>> +     __mcpm_cpu_going_down(cpu, cluster);
>> +
>> +     spin_lock(&boot_lock);
>> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
>> +     hip04_cpu_table[cluster][cpu]--;
>> +     if (hip04_cpu_table[cluster][cpu] == 1) {
>> +             /* A power_up request went ahead of us. */
>> +             skip_reset = true;
>> +     } else if (hip04_cpu_table[cluster][cpu] > 1) {
>> +             pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
>> +             BUG();
>> +     }
>> +     spin_unlock(&boot_lock);
>> +
>> +     v7_exit_coherency_flush(louis);
>> +
>> +     __mcpm_cpu_down(cpu, cluster);
>> +
>> +     if (!skip_reset) {
>> +             data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
>> +                    CORE_DEBUG_RESET_BIT(cpu);
>> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
>> +     }
>> +}
>
> As I mentioned already this is going to race with the power_up() method.
> Let me illustrate the problem:
>
>         * CPU 0         * CPU 1
>         -------         -------
>         * mcpm_cpu_power_down()
>         * hip04_mcpm_power_down()
>         * spin_lock(&boot_lock); [lock acquired]
>                         * mcpm_cpu_power_up(cpu = 0)
>                         * spin_lock(&boot_lock); [blocked]
>         * hip04_cpu_table[cluster][cpu]--; [value down to 0]
>         * skip_reset = false
>         * spin_unlock(&boot_lock);
>                         * spin_lock(&boot_lock); [now succeeds]
>         * v7_exit_coherency_flush(louis); [takes a while to complete]
>                         * hip04_cpu_table[cluster][cpu]++; [value back to 1]
>                         * bring CPU0 out of reset
>         * put CPU0 into reset
>                         * spin_unlock(&boot_lock);
>
> Here you end up with CPU0 in reset while hip04_cpu_table[cluster][cpu]
> for CPU0 is equal to 1.  The CPU will therefore never start again as
> further calls to power_up() won't see hip04_cpu_table equal to 0
> anymore.
>
> So... I'm asking again: are you absolutely certain that the CPU reset is
> applied at the very moment the corresponding bit is set?  Isn't it
> applied only when the CPU does execute a WFI like most other platforms?
>

If I put CPU0 into reset mode in wait_for_powerdown() that is executed
in CPU1 or other CPU, this issue doesn't exist. Is it right?

I remember that I could check hip04_cpu_table[cluster][cpu] & CPU0 WFI
status in wait_for_powerdown().

Regards
Haojian
Nicolas Pitre May 21, 2014, 2:06 a.m. UTC | #3
On Wed, 21 May 2014, Haojian Zhuang wrote:

> On 21 May 2014 09:29, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Tue, 20 May 2014, Haojian Zhuang wrote:
> >
> >> Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
> >> framework to manage power on HiP04 SoC.
> >
> > There are still unresolved issues with this patch.
> >
> > [...]
> >> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
> >> +{
> >> +     unsigned long data, mask;
> >> +
> >> +     if (!relocation || !sysctrl)
> >> +             return -ENODEV;
> >> +     if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
> >> +             return -EINVAL;
> >> +
> >> +     spin_lock_irq(&boot_lock);
> >> +
> >> +     if (hip04_cpu_table[cluster][cpu]) {
> >> +             hip04_cpu_table[cluster][cpu]++;
> >> +             spin_unlock_irq(&boot_lock);
> >> +             return 0;
> >> +     }
> >> +
> >> +     writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
> >> +     writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
> >> +     writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
> >> +     writel_relaxed(0, relocation + 12);
> >> +
> >> +     if (hip04_cluster_down(cluster)) {
> >> +             data = CLUSTER_DEBUG_RESET_BIT;
> >> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> >> +             do {
> >> +                     mask = CLUSTER_DEBUG_RESET_STATUS;
> >> +                     data = readl_relaxed(sysctrl + \
> >> +                                          SC_CPU_RESET_STATUS(cluster));
> >> +             } while (data & mask);
> >> +             hip04_set_snoop_filter(cluster, 1);
> >> +     }
> >
> > Sorry to insist, but I want to repeat the question I asked during the
> > previous review as I consider this is important, especially if you want
> > to support deep C-States with cpuidle later.  This also has implications
> > if you ever want to turn off snoops in hip04_mcpm_power_down() when all
> > CPUs in a cluster are down.
> >
> > You said:
> >
> > | But it fails on my platform if I execute snooping setup on the new
> > | CPU.
> >
> > I then asked:
> >
> > | It fails how?  I want to make sure if the problem is with the hardware
> > | design or the code.
> > |
> > | The assembly code could be wrong.  Are you sure this is not theactual
> > | reason?
> > |
> > | Is there some documentation for this stuff?
> >
> > I also see that the snoop filter is enabled from hip04_cpu_table_init()
> > for the CPU that is actually executing that code.  So that must work
> > somehow...
> >
> 
> Cluster0 is very special. If I didn't enable snoop filter of cluster0, it also
> works. I'll check with Hisilicon guys why it's different. The configuration
> of snoop filter is a black box to me.

If you could get more info or some documentation about it that would be 
great.

> >> +static void hip04_mcpm_power_down(void)
> >> +{
> >> +     unsigned int mpidr, cpu, cluster, data = 0;
> >> +     bool skip_reset = false;
> >> +
> >> +     mpidr = read_cpuid_mpidr();
> >> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> >> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> >> +
> >> +     __mcpm_cpu_going_down(cpu, cluster);
> >> +
> >> +     spin_lock(&boot_lock);
> >> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> >> +     hip04_cpu_table[cluster][cpu]--;
> >> +     if (hip04_cpu_table[cluster][cpu] == 1) {
> >> +             /* A power_up request went ahead of us. */
> >> +             skip_reset = true;
> >> +     } else if (hip04_cpu_table[cluster][cpu] > 1) {
> >> +             pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
> >> +             BUG();
> >> +     }
> >> +     spin_unlock(&boot_lock);
> >> +
> >> +     v7_exit_coherency_flush(louis);
> >> +
> >> +     __mcpm_cpu_down(cpu, cluster);
> >> +
> >> +     if (!skip_reset) {
> >> +             data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> >> +                    CORE_DEBUG_RESET_BIT(cpu);
> >> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
> >> +     }
> >> +}
> >
> > As I mentioned already this is going to race with the power_up() method.
> > Let me illustrate the problem:
> >
> >         * CPU 0         * CPU 1
> >         -------         -------
> >         * mcpm_cpu_power_down()
> >         * hip04_mcpm_power_down()
> >         * spin_lock(&boot_lock); [lock acquired]
> >                         * mcpm_cpu_power_up(cpu = 0)
> >                         * spin_lock(&boot_lock); [blocked]
> >         * hip04_cpu_table[cluster][cpu]--; [value down to 0]
> >         * skip_reset = false
> >         * spin_unlock(&boot_lock);
> >                         * spin_lock(&boot_lock); [now succeeds]
> >         * v7_exit_coherency_flush(louis); [takes a while to complete]
> >                         * hip04_cpu_table[cluster][cpu]++; [value back to 1]
> >                         * bring CPU0 out of reset
> >         * put CPU0 into reset
> >                         * spin_unlock(&boot_lock);
> >
> > Here you end up with CPU0 in reset while hip04_cpu_table[cluster][cpu]
> > for CPU0 is equal to 1.  The CPU will therefore never start again as
> > further calls to power_up() won't see hip04_cpu_table equal to 0
> > anymore.
> >
> > So... I'm asking again: are you absolutely certain that the CPU reset is
> > applied at the very moment the corresponding bit is set?  Isn't it
> > applied only when the CPU does execute a WFI like most other platforms?
> >
> 
> If I put CPU0 into reset mode in wait_for_powerdown() that is executed
> in CPU1 or other CPU, this issue doesn't exist. Is it right?

Only if:

1) the lock is taken,

2) hip04_cpu_table[cluster][cpu] is verified to still be 0, and

3) wait_for_powerdown() is actually called.

Here (3) is optional.  It is there only to satisfy the requirements for 
kexec to work properly.  In the case of cpuidle or the switcher this 
method is not used.

I also would like to remind you that you still didn't answer my 
question.  :-)


Nicolas
diff mbox

Patch

diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile
index 2ae1b59..e7a8640 100644
--- a/arch/arm/mach-hisi/Makefile
+++ b/arch/arm/mach-hisi/Makefile
@@ -3,4 +3,5 @@ 
 #
 
 obj-y	+= hisilicon.o
+obj-$(CONFIG_MCPM)		+= platmcpm.o
 obj-$(CONFIG_SMP)		+= platsmp.o hotplug.o
diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
new file mode 100644
index 0000000..b991e82
--- /dev/null
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -0,0 +1,310 @@ 
+/*
+ * Copyright (c) 2013-2014 Linaro Ltd.
+ * Copyright (c) 2013-2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/mcpm.h>
+
+#include "core.h"
+
+/* bits definition in SC_CPU_RESET_REQ[x]/SC_CPU_RESET_DREQ[x]
+ * 1 -- unreset; 0 -- reset
+ */
+#define CORE_RESET_BIT(x)		(1 << x)
+#define NEON_RESET_BIT(x)		(1 << (x + 4))
+#define CORE_DEBUG_RESET_BIT(x)		(1 << (x + 9))
+#define CLUSTER_L2_RESET_BIT		(1 << 8)
+#define CLUSTER_DEBUG_RESET_BIT		(1 << 13)
+
+/*
+ * bits definition in SC_CPU_RESET_STATUS[x]
+ * 1 -- reset status; 0 -- unreset status
+ */
+#define CORE_RESET_STATUS(x)		(1 << x)
+#define NEON_RESET_STATUS(x)		(1 << (x + 4))
+#define CORE_DEBUG_RESET_STATUS(x)	(1 << (x + 9))
+#define CLUSTER_L2_RESET_STATUS		(1 << 8)
+#define CLUSTER_DEBUG_RESET_STATUS	(1 << 13)
+#define CORE_WFI_STATUS(x)		(1 << (x + 16))
+#define CORE_WFE_STATUS(x)		(1 << (x + 20))
+#define CORE_DEBUG_ACK(x)		(1 << (x + 24))
+
+#define SC_CPU_RESET_REQ(x)		(0x520 + (x << 3))	/* reset */
+#define SC_CPU_RESET_DREQ(x)		(0x524 + (x << 3))	/* unreset */
+#define SC_CPU_RESET_STATUS(x)		(0x1520 + (x << 3))
+
+#define FAB_SF_MODE			0x0c
+#define FAB_SF_INVLD			0x10
+
+/* bits definition in FB_SF_INVLD */
+#define FB_SF_INVLD_START		(1 << 8)
+
+#define HIP04_MAX_CLUSTERS		4
+#define HIP04_MAX_CPUS_PER_CLUSTER	4
+
+#define POLL_MSEC	10
+#define TIMEOUT_MSEC	1000
+
+struct hip04_secondary_cpu_data {
+	u32	bootwrapper_phys;
+	u32	bootwrapper_size;
+	u32	bootwrapper_magic;
+	u32	relocation_entry;
+	u32	relocation_size;
+};
+
+static void __iomem *relocation, *sysctrl, *fabric;
+static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
+static DEFINE_SPINLOCK(boot_lock);
+static struct hip04_secondary_cpu_data hip04_boot;
+
+static bool hip04_cluster_down(unsigned int cluster)
+{
+	int i;
+
+	for (i = 0; i < HIP04_MAX_CPUS_PER_CLUSTER; i++)
+		if (hip04_cpu_table[cluster][i])
+			return false;
+	return true;
+}
+
+static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on)
+{
+	unsigned long data;
+
+	if (!fabric)
+		BUG();
+	data = readl_relaxed(fabric + FAB_SF_MODE);
+	if (on)
+		data |= 1 << cluster;
+	else
+		data &= ~(1 << cluster);
+	writel_relaxed(data, fabric + FAB_SF_MODE);
+	while (1) {
+		if (data == readl_relaxed(fabric + FAB_SF_MODE))
+			break;
+	}
+}
+
+static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned long data, mask;
+
+	if (!relocation || !sysctrl)
+		return -ENODEV;
+	if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
+		return -EINVAL;
+
+	spin_lock_irq(&boot_lock);
+
+	if (hip04_cpu_table[cluster][cpu]) {
+		hip04_cpu_table[cluster][cpu]++;
+		spin_unlock_irq(&boot_lock);
+		return 0;
+	}
+
+	writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
+	writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
+	writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
+	writel_relaxed(0, relocation + 12);
+
+	if (hip04_cluster_down(cluster)) {
+		data = CLUSTER_DEBUG_RESET_BIT;
+		writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
+		do {
+			mask = CLUSTER_DEBUG_RESET_STATUS;
+			data = readl_relaxed(sysctrl + \
+					     SC_CPU_RESET_STATUS(cluster));
+		} while (data & mask);
+		hip04_set_snoop_filter(cluster, 1);
+	}
+
+	hip04_cpu_table[cluster][cpu]++;
+
+	data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
+	       CORE_DEBUG_RESET_BIT(cpu);
+	writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
+	spin_unlock_irq(&boot_lock);
+	msleep(POLL_MSEC);
+
+	return 0;
+}
+
+static void hip04_mcpm_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, data = 0;
+	bool skip_reset = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	spin_lock(&boot_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	hip04_cpu_table[cluster][cpu]--;
+	if (hip04_cpu_table[cluster][cpu] == 1) {
+		/* A power_up request went ahead of us. */
+		skip_reset = true;
+	} else if (hip04_cpu_table[cluster][cpu] > 1) {
+		pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
+		BUG();
+	}
+	spin_unlock(&boot_lock);
+
+	v7_exit_coherency_flush(louis);
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	if (!skip_reset) {
+		data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
+		       CORE_DEBUG_RESET_BIT(cpu);
+		writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
+	}
+}
+
+static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int data, tries;
+
+	BUG_ON(cluster >= HIP04_MAX_CLUSTERS ||
+	       cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
+
+	for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; tries++) {
+		data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
+		if (!(data & CORE_RESET_STATUS(cpu))) {
+			msleep(POLL_MSEC);
+			continue;
+		}
+		return 0;
+	}
+	return -ETIMEDOUT;
+}
+
+static void hip04_mcpm_powered_up(void)
+{
+	if (!relocation)
+		return;
+	spin_lock(&boot_lock);
+	writel_relaxed(0, relocation);
+	writel_relaxed(0, relocation + 4);
+	writel_relaxed(0, relocation + 8);
+	writel_relaxed(0, relocation + 12);
+	spin_unlock(&boot_lock);
+}
+
+static const struct mcpm_platform_ops hip04_mcpm_ops = {
+	.power_up		= hip04_mcpm_power_up,
+	.power_down		= hip04_mcpm_power_down,
+	.wait_for_powerdown	= hip04_mcpm_wait_for_powerdown,
+	.powered_up		= hip04_mcpm_powered_up,
+};
+
+static bool __init hip04_cpu_table_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	if (cluster >= HIP04_MAX_CLUSTERS ||
+	    cpu >= HIP04_MAX_CPUS_PER_CLUSTER) {
+		pr_err("%s: boot CPU is out of bound!\n", __func__);
+		return false;
+	}
+	hip04_set_snoop_filter(cluster, 1);
+	hip04_cpu_table[cluster][cpu] = 1;
+	return true;
+}
+
+static int __init hip04_mcpm_init(void)
+{
+	struct device_node *np, *np_fab;
+	int ret = -ENODEV;
+
+	np = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl");
+	if (!np)
+		goto err;
+	np_fab = of_find_compatible_node(NULL, NULL, "hisilicon,hip04-fabric");
+	if (!np_fab)
+		goto err;
+
+	if (of_property_read_u32(np, "bootwrapper-phys",
+				 &hip04_boot.bootwrapper_phys)) {
+		pr_err("failed to get bootwrapper-phys\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "bootwrapper-size",
+				 &hip04_boot.bootwrapper_size)) {
+		pr_err("failed to get bootwrapper-size\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "bootwrapper-magic",
+				 &hip04_boot.bootwrapper_magic)) {
+		pr_err("failed to get bootwrapper-magic\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "relocation-entry",
+				 &hip04_boot.relocation_entry)) {
+		pr_err("failed to get relocation-entry\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "relocation-size",
+				 &hip04_boot.relocation_size)) {
+		pr_err("failed to get relocation-size\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	relocation = ioremap(hip04_boot.relocation_entry,
+			     hip04_boot.relocation_size);
+	if (!relocation) {
+		pr_err("failed to map relocation space\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	sysctrl = of_iomap(np, 0);
+	if (!sysctrl) {
+		pr_err("failed to get sysctrl base\n");
+		ret = -ENOMEM;
+		goto err_sysctrl;
+	}
+	fabric = of_iomap(np_fab, 0);
+	if (!fabric) {
+		pr_err("failed to get fabric base\n");
+		ret = -ENOMEM;
+		goto err_fabric;
+	}
+
+	if (!hip04_cpu_table_init())
+		return -EINVAL;
+	ret = mcpm_platform_register(&hip04_mcpm_ops);
+	if (!ret) {
+		mcpm_sync_init(NULL);
+		pr_info("HiP04 MCPM initialized\n");
+	}
+	mcpm_smp_set_ops();
+	return ret;
+err_fabric:
+	iounmap(sysctrl);
+err_sysctrl:
+	iounmap(relocation);
+err:
+	return ret;
+}
+early_initcall(hip04_mcpm_init);