diff mbox

[v7,06/15] ARM: hisi: enable MCPM implementation

Message ID 1399981484-31628-1-git-send-email-haojian.zhuang@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Haojian Zhuang May 13, 2014, 11:44 a.m. UTC
Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
framework to manage power on HiP04 SoC.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
---
 arch/arm/mach-hisi/Makefile   |   1 +
 arch/arm/mach-hisi/platmcpm.c | 304 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 305 insertions(+)
 create mode 100644 arch/arm/mach-hisi/platmcpm.c

Comments

Nicolas Pitre May 13, 2014, 7:43 p.m. UTC | #1
On Tue, 13 May 2014, Haojian Zhuang wrote:

> Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
> framework to manage power on HiP04 SoC.
> 
> Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>

Some more comments...

[...]
> +static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on)
> +{
> +	unsigned long data;
> +
> +	if (!fabric)
> +		return;

How could this validly be NULL?

> +	data = readl_relaxed(fabric + FAB_SF_MODE);
> +	if (on)
> +		data |= 1 << cluster;
> +	else
> +		data &= ~(1 << cluster);
> +	writel_relaxed(data, fabric + FAB_SF_MODE);
> +	while (1) {
> +		if (data == readl_relaxed(fabric + FAB_SF_MODE))
> +			break;
> +	}
> +}

The above could be easily coded in assembly for the power_up_setup 
callback thusly:

hip04_power_up_setup:

	cmp	r0, #0			@ check affinity level
	bxeq	lr			@ nothing to do at CPU level

	mrc	p15, 0, r0, c0, c0, 5	@ get MPIDR
	ubfx	r0, r0, #8, #8		@ extract cluster number

	adr	r1, .LC0
	ldmia	r1, {r2, r3}
	sub	r2, r2, r1		@ virt_addr - phys_addr
	ldr	r1, [r2, r3]		@ get fabric_phys_addr
	mov	r2, #1
	ldr	r3, [r1, #FAB_SF_MODE]	@ read "data"
	orr	r3, r3, r2, lsl r0	@ set cluster bit
	str	r3, [r1, #FAB_SF_MODE]	@ write it back

1:	ldr	r2, [r1, #FAB_SF_MODE]	@ read register content
	cmp	r2, r3			@ make sure it matches
	bne	1b			@ otherwise retry

	bx	lr

:LC0:	.word	.
	.word	fabric_phys_addr - .LC0

That should be it.

> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned long data, mask;
> +
> +	if (!relocation || !sysctrl)
> +		return -ENODEV;
> +	if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
> +		return -EINVAL;
> +
> +	spin_lock_irq(&boot_lock);
> +	writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
> +	writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
> +	writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
> +	writel_relaxed(0, relocation + 12);

Shouldn't you do the above writes only when 
hip04_cpu_table[cluster][cpu] is zero?  Please see the comment in 
mcpm_cpu_power_down() about unordered calls.

> +	if (hip04_cluster_down(cluster)) {
> +		data = CLUSTER_DEBUG_RESET_BIT;
> +		writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> +		do {
> +			mask = CLUSTER_DEBUG_RESET_STATUS;
> +			data = readl_relaxed(sysctrl + \
> +					     SC_CPU_RESET_STATUS(cluster));
> +		} while (data & mask);
> +		hip04_set_snoop_filter(cluster, 1);
> +	}
> +
> +	hip04_cpu_table[cluster][cpu]++;
> +
> +	data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> +	       CORE_DEBUG_RESET_BIT(cpu);
> +	writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> +	spin_unlock_irq(&boot_lock);
> +	msleep(POLL_MSEC);
> +
> +	return 0;
> +}
> +
> +static void hip04_mcpm_power_down(void)
> +{
> +	unsigned int mpidr, cpu, cluster, data = 0;
> +	bool skip_reset = false;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> +	__mcpm_cpu_going_down(cpu, cluster);
> +
> +	spin_lock(&boot_lock);
> +	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> +	hip04_cpu_table[cluster][cpu]--;
> +	if (hip04_cpu_table[cluster][cpu] == 1) {
> +		/* A power_up request went ahead of us. */
> +		skip_reset = true;
> +	} else if (hip04_cpu_table[cluster][cpu] > 1) {
> +		pr_err("Cluster %d CPU%d is still running\n", cluster, cpu);

This message is misleading.  If execution gets here, that means 
mcpm_cpu_power_up() was called more than twice in a row for the same CPU 
which should never happen.

> +		BUG();
> +	}
> +
> +	spin_unlock(&boot_lock);
> +
> +	v7_exit_coherency_flush(louis);
> +
> +	__mcpm_cpu_down(cpu, cluster);
> +
> +	if (!skip_reset) {
> +		data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> +		       CORE_DEBUG_RESET_BIT(cpu);
> +		writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));

You should not perform this outside the lock protected region as this 
could race with hip04_mcpm_power_up().  Instead, this should be done 
above when hip04_cpu_table[cluster][cpu] == 0 after being decremented.

> +}
> +
> +static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned int data, tries;
> +
> +	BUG_ON(cluster >= HIP04_MAX_CLUSTERS ||
> +	       cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
> +
> +	for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; tries++) {
> +		data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
> +		if (!(data & CORE_RESET_STATUS(cpu))) {
> +			msleep(POLL_MSEC);
> +			continue;
> +		}
> +		return 0;
> +	}
> +	return -ETIMEDOUT;
> +}
> +
> +static void hip04_mcpm_powered_up(void)
> +{
> +	if (!relocation)
> +		return;
> +	spin_lock(&boot_lock);
> +	writel_relaxed(0, relocation);
> +	writel_relaxed(0, relocation + 4);
> +	writel_relaxed(0, relocation + 8);
> +	writel_relaxed(0, relocation + 12);
> +	spin_unlock(&boot_lock);
> +}
> +
> +static const struct mcpm_platform_ops hip04_mcpm_ops = {
> +	.power_up		= hip04_mcpm_power_up,
> +	.power_down		= hip04_mcpm_power_down,
> +	.wait_for_powerdown	= hip04_mcpm_wait_for_powerdown,
> +	.powered_up		= hip04_mcpm_powered_up,
> +};
> +
> +static bool __init hip04_cpu_table_init(void)
> +{
> +	unsigned int mpidr, cpu, cluster;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> +	if (cluster >= HIP04_MAX_CLUSTERS ||
> +	    cpu >= HIP04_MAX_CPUS_PER_CLUSTER) {
> +		pr_err("%s: boot CPU is out of bound!\n", __func__);
> +		return false;
> +	}
> +	hip04_set_snoop_filter(cluster, 1);
> +	hip04_cpu_table[cluster][cpu] = 1;
> +	return true;
> +}
> +
> +static int __init hip04_mcpm_init(void)
> +{
> +	struct device_node *np, *np_fab;
> +	int ret = -ENODEV;
> +
> +	np = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl");
> +	if (!np)
> +		goto err;
> +	np_fab = of_find_compatible_node(NULL, NULL, "hisilicon,hip04-fabric");
> +	if (!np_fab)
> +		goto err;
> +
> +	if (of_property_read_u32(np, "bootwrapper-phys",
> +				 &hip04_boot.bootwrapper_phys)) {
> +		pr_err("failed to get bootwrapper-phys\n");
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +	if (of_property_read_u32(np, "bootwrapper-size",
> +				 &hip04_boot.bootwrapper_size)) {
> +		pr_err("failed to get bootwrapper-size\n");
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +	if (of_property_read_u32(np, "bootwrapper-magic",
> +				 &hip04_boot.bootwrapper_magic)) {
> +		pr_err("failed to get bootwrapper-magic\n");
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +	if (of_property_read_u32(np, "relocation-entry",
> +				 &hip04_boot.relocation_entry)) {
> +		pr_err("failed to get relocation-entry\n");
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +	if (of_property_read_u32(np, "relocation-size",
> +				 &hip04_boot.relocation_size)) {
> +		pr_err("failed to get relocation-size\n");
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	relocation = ioremap(hip04_boot.relocation_entry,
> +			     hip04_boot.relocation_size);
> +	if (!relocation) {
> +		pr_err("failed to map relocation space\n");
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +	sysctrl = of_iomap(np, 0);
> +	if (!sysctrl) {
> +		pr_err("failed to get sysctrl base\n");
> +		ret = -ENOMEM;
> +		goto err_sysctrl;
> +	}
> +	fabric = of_iomap(np_fab, 0);
> +	if (!fabric) {
> +		pr_err("failed to get fabric base\n");
> +		ret = -ENOMEM;
> +		goto err_fabric;
> +	}
> +
> +	if (!hip04_cpu_table_init())
> +		return -EINVAL;
> +	ret = mcpm_platform_register(&hip04_mcpm_ops);
> +	if (!ret) {
> +		mcpm_sync_init(NULL);
> +		pr_info("HiP04 MCPM initialized\n");
> +	}
> +	mcpm_smp_set_ops();
> +	return ret;
> +err_fabric:
> +	iounmap(sysctrl);
> +err_sysctrl:
> +	iounmap(relocation);
> +err:
> +	return ret;
> +}
> +early_initcall(hip04_mcpm_init);
> -- 
> 1.9.1
>
Haojian Zhuang May 15, 2014, 6:23 a.m. UTC | #2
On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Tue, 13 May 2014, Haojian Zhuang wrote:
>
>> Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM
>> framework to manage power on HiP04 SoC.
>>
>> Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
>
> Some more comments...
>
> [...]
>> +static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on)
>> +{
>> +     unsigned long data;
>> +
>> +     if (!fabric)
>> +             return;
>
> How could this validly be NULL?
>
OK. I'll make it report BUG.

>> +     data = readl_relaxed(fabric + FAB_SF_MODE);
>> +     if (on)
>> +             data |= 1 << cluster;
>> +     else
>> +             data &= ~(1 << cluster);
>> +     writel_relaxed(data, fabric + FAB_SF_MODE);
>> +     while (1) {
>> +             if (data == readl_relaxed(fabric + FAB_SF_MODE))
>> +                     break;
>> +     }
>> +}
>
> The above could be easily coded in assembly for the power_up_setup
> callback thusly:
>
> hip04_power_up_setup:
>
>         cmp     r0, #0                  @ check affinity level
>         bxeq    lr                      @ nothing to do at CPU level
>
>         mrc     p15, 0, r0, c0, c0, 5   @ get MPIDR
>         ubfx    r0, r0, #8, #8          @ extract cluster number
>
>         adr     r1, .LC0
>         ldmia   r1, {r2, r3}
>         sub     r2, r2, r1              @ virt_addr - phys_addr
>         ldr     r1, [r2, r3]            @ get fabric_phys_addr
>         mov     r2, #1
>         ldr     r3, [r1, #FAB_SF_MODE]  @ read "data"
>         orr     r3, r3, r2, lsl r0      @ set cluster bit
>         str     r3, [r1, #FAB_SF_MODE]  @ write it back
>
> 1:      ldr     r2, [r1, #FAB_SF_MODE]  @ read register content
>         cmp     r2, r3                  @ make sure it matches
>         bne     1b                      @ otherwise retry
>
>         bx      lr
>
> :LC0:   .word   .
>         .word   fabric_phys_addr - .LC0
>
> That should be it.
>

No. These code should be executed before new CPU on. If I transfer
them to assembler code, it means that code will be executed after
new CPU on.

Then it results me failing to make new CPU online.

>> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
>> +{
>> +     unsigned long data, mask;
>> +
>> +     if (!relocation || !sysctrl)
>> +             return -ENODEV;
>> +     if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
>> +             return -EINVAL;
>> +
>> +     spin_lock_irq(&boot_lock);
>> +     writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
>> +     writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
>> +     writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
>> +     writel_relaxed(0, relocation + 12);
>
> Shouldn't you do the above writes only when
> hip04_cpu_table[cluster][cpu] is zero?  Please see the comment in
> mcpm_cpu_power_down() about unordered calls.
>
OK. I can add the check.

>> +     if (hip04_cluster_down(cluster)) {
>> +             data = CLUSTER_DEBUG_RESET_BIT;
>> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
>> +             do {
>> +                     mask = CLUSTER_DEBUG_RESET_STATUS;
>> +                     data = readl_relaxed(sysctrl + \
>> +                                          SC_CPU_RESET_STATUS(cluster));
>> +             } while (data & mask);
>> +             hip04_set_snoop_filter(cluster, 1);
>> +     }
>> +
>> +     hip04_cpu_table[cluster][cpu]++;
>> +
>> +     data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
>> +            CORE_DEBUG_RESET_BIT(cpu);
>> +     writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
>> +     spin_unlock_irq(&boot_lock);
>> +     msleep(POLL_MSEC);
>> +
>> +     return 0;
>> +}
>> +
>> +static void hip04_mcpm_power_down(void)
>> +{
>> +     unsigned int mpidr, cpu, cluster, data = 0;
>> +     bool skip_reset = false;
>> +
>> +     mpidr = read_cpuid_mpidr();
>> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +
>> +     __mcpm_cpu_going_down(cpu, cluster);
>> +
>> +     spin_lock(&boot_lock);
>> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
>> +     hip04_cpu_table[cluster][cpu]--;
>> +     if (hip04_cpu_table[cluster][cpu] == 1) {
>> +             /* A power_up request went ahead of us. */
>> +             skip_reset = true;
>> +     } else if (hip04_cpu_table[cluster][cpu] > 1) {
>> +             pr_err("Cluster %d CPU%d is still running\n", cluster, cpu);
>
> This message is misleading.  If execution gets here, that means
> mcpm_cpu_power_up() was called more than twice in a row for the same CPU
> which should never happen.
>
OK. I'll replace the comments.

>> +             BUG();
>> +     }
>> +
>> +     spin_unlock(&boot_lock);
>> +
>> +     v7_exit_coherency_flush(louis);
>> +
>> +     __mcpm_cpu_down(cpu, cluster);
>> +
>> +     if (!skip_reset) {
>> +             data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
>> +                    CORE_DEBUG_RESET_BIT(cpu);
>> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
>
> You should not perform this outside the lock protected region as this
> could race with hip04_mcpm_power_up().  Instead, this should be done
> above when hip04_cpu_table[cluster][cpu] == 0 after being decremented.
>

No. power_down() is executed on the specified CPU. If spin_unlock() is
placed after reset operation, it means that there's no chance to
execute
the spin_unlock(). Because CPU is already in reset mode at this time.

Regards
Haojian
Nicolas Pitre May 15, 2014, 8:01 p.m. UTC | #3
On Thu, 15 May 2014, Haojian Zhuang wrote:

> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Tue, 13 May 2014, Haojian Zhuang wrote:
> >
> >> +     data = readl_relaxed(fabric + FAB_SF_MODE);
> >> +     if (on)
> >> +             data |= 1 << cluster;
> >> +     else
> >> +             data &= ~(1 << cluster);
> >> +     writel_relaxed(data, fabric + FAB_SF_MODE);
> >> +     while (1) {
> >> +             if (data == readl_relaxed(fabric + FAB_SF_MODE))
> >> +                     break;
> >> +     }
> >> +}
> >
> > The above could be easily coded in assembly for the power_up_setup
> > callback thusly:
> >
> > hip04_power_up_setup:
> >
> >         cmp     r0, #0                  @ check affinity level
> >         bxeq    lr                      @ nothing to do at CPU level
> >
> >         mrc     p15, 0, r0, c0, c0, 5   @ get MPIDR
> >         ubfx    r0, r0, #8, #8          @ extract cluster number
> >
> >         adr     r1, .LC0
> >         ldmia   r1, {r2, r3}
> >         sub     r2, r2, r1              @ virt_addr - phys_addr
> >         ldr     r1, [r2, r3]            @ get fabric_phys_addr
> >         mov     r2, #1
> >         ldr     r3, [r1, #FAB_SF_MODE]  @ read "data"
> >         orr     r3, r3, r2, lsl r0      @ set cluster bit
> >         str     r3, [r1, #FAB_SF_MODE]  @ write it back
> >
> > 1:      ldr     r2, [r1, #FAB_SF_MODE]  @ read register content
> >         cmp     r2, r3                  @ make sure it matches
> >         bne     1b                      @ otherwise retry
> >
> >         bx      lr
> >
> > :LC0:   .word   .
> >         .word   fabric_phys_addr - .LC0
> >
> > That should be it.
> >
> 
> No. These code should be executed before new CPU on. If I transfer
> them to assembler code, it means that code will be executed after
> new CPU on.

Exact.

> Then it results me failing to make new CPU online.

The assembly code could be wrong as well.  Are you sure this is not the 
actual reason?

Is there some documentation for this stuff?

> >> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
> >> +{
> >> +     unsigned long data, mask;
> >> +
> >> +     if (!relocation || !sysctrl)
> >> +             return -ENODEV;
> >> +     if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
> >> +             return -EINVAL;
> >> +
> >> +     spin_lock_irq(&boot_lock);
> >> +     writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
> >> +     writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
> >> +     writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
> >> +     writel_relaxed(0, relocation + 12);
> >
> > Shouldn't you do the above writes only when
> > hip04_cpu_table[cluster][cpu] is zero?  Please see the comment in
> > mcpm_cpu_power_down() about unordered calls.
> >
> OK. I can add the check.
> 
> >> +     if (hip04_cluster_down(cluster)) {
> >> +             data = CLUSTER_DEBUG_RESET_BIT;
> >> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> >> +             do {
> >> +                     mask = CLUSTER_DEBUG_RESET_STATUS;
> >> +                     data = readl_relaxed(sysctrl + \
> >> +                                          SC_CPU_RESET_STATUS(cluster));
> >> +             } while (data & mask);
> >> +             hip04_set_snoop_filter(cluster, 1);
> >> +     }
> >> +
> >> +     hip04_cpu_table[cluster][cpu]++;
> >> +
> >> +     data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> >> +            CORE_DEBUG_RESET_BIT(cpu);
> >> +     writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
> >> +     spin_unlock_irq(&boot_lock);
> >> +     msleep(POLL_MSEC);
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static void hip04_mcpm_power_down(void)
> >> +{
> >> +     unsigned int mpidr, cpu, cluster, data = 0;
> >> +     bool skip_reset = false;
> >> +
> >> +     mpidr = read_cpuid_mpidr();
> >> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> >> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> >> +
> >> +     __mcpm_cpu_going_down(cpu, cluster);
> >> +
> >> +     spin_lock(&boot_lock);
> >> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> >> +     hip04_cpu_table[cluster][cpu]--;
> >> +     if (hip04_cpu_table[cluster][cpu] == 1) {
> >> +             /* A power_up request went ahead of us. */
> >> +             skip_reset = true;
> >> +     } else if (hip04_cpu_table[cluster][cpu] > 1) {
> >> +             pr_err("Cluster %d CPU%d is still running\n", cluster, cpu);
> >
> > This message is misleading.  If execution gets here, that means
> > mcpm_cpu_power_up() was called more than twice in a row for the same CPU
> > which should never happen.
> >
> OK. I'll replace the comments.
> 
> >> +             BUG();
> >> +     }
> >> +
> >> +     spin_unlock(&boot_lock);
> >> +
> >> +     v7_exit_coherency_flush(louis);
> >> +
> >> +     __mcpm_cpu_down(cpu, cluster);
> >> +
> >> +     if (!skip_reset) {
> >> +             data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
> >> +                    CORE_DEBUG_RESET_BIT(cpu);
> >> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
> >
> > You should not perform this outside the lock protected region as this
> > could race with hip04_mcpm_power_up().  Instead, this should be done
> > above when hip04_cpu_table[cluster][cpu] == 0 after being decremented.
> >
> 
> No. power_down() is executed on the specified CPU. If spin_unlock() is 
> placed after reset operation, it means that there's no chance to 
> execute the spin_unlock(). Because CPU is already in reset mode at 
> this time.

Normally, reset is effective only when WFI is later executed.  Are you 
sure this is not the case on hip04 as well?


Nicolas
Haojian Zhuang May 20, 2014, 4:43 a.m. UTC | #4
On 16 May 2014 04:01, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Thu, 15 May 2014, Haojian Zhuang wrote:
>
>> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> > On Tue, 13 May 2014, Haojian Zhuang wrote:
>> >
>> >> +     data = readl_relaxed(fabric + FAB_SF_MODE);
>> >> +     if (on)
>> >> +             data |= 1 << cluster;
>> >> +     else
>> >> +             data &= ~(1 << cluster);
>> >> +     writel_relaxed(data, fabric + FAB_SF_MODE);
>> >> +     while (1) {
>> >> +             if (data == readl_relaxed(fabric + FAB_SF_MODE))
>> >> +                     break;
>> >> +     }
>> >> +}
>> >
>> > The above could be easily coded in assembly for the power_up_setup
>> > callback thusly:
>> >
>> > hip04_power_up_setup:
>> >
>> >         cmp     r0, #0                  @ check affinity level
>> >         bxeq    lr                      @ nothing to do at CPU level
>> >
>> >         mrc     p15, 0, r0, c0, c0, 5   @ get MPIDR
>> >         ubfx    r0, r0, #8, #8          @ extract cluster number
>> >
>> >         adr     r1, .LC0
>> >         ldmia   r1, {r2, r3}
>> >         sub     r2, r2, r1              @ virt_addr - phys_addr
>> >         ldr     r1, [r2, r3]            @ get fabric_phys_addr
>> >         mov     r2, #1
>> >         ldr     r3, [r1, #FAB_SF_MODE]  @ read "data"
>> >         orr     r3, r3, r2, lsl r0      @ set cluster bit
>> >         str     r3, [r1, #FAB_SF_MODE]  @ write it back
>> >
>> > 1:      ldr     r2, [r1, #FAB_SF_MODE]  @ read register content
>> >         cmp     r2, r3                  @ make sure it matches
>> >         bne     1b                      @ otherwise retry
>> >
>> >         bx      lr
>> >
>> > :LC0:   .word   .
>> >         .word   fabric_phys_addr - .LC0
>> >
>> > That should be it.
>> >
>>
>> No. These code should be executed before new CPU on. If I transfer
>> them to assembler code, it means that code will be executed after
>> new CPU on.
>
> Exact.
>
>> Then it results me failing to make new CPU online.
>
> The assembly code could be wrong as well.  Are you sure this is not the
> actual reason?
>
> Is there some documentation for this stuff?
>

There's no problem in assembly code. I even rewrite your assembly code.

If I keep my c code with assembly code, new CPU could be online right.
If I only use assembly code, I only get the kernel panic. So it's not
caused by assembly code. It's caused by executing code after new CPU
on.

There's no documentation on this. They didn't prepare well on documents.
I think they'll improve it in the future.

>> >> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
>> >> +{
>> >> +     unsigned long data, mask;
>> >> +
>> >> +     if (!relocation || !sysctrl)
>> >> +             return -ENODEV;
>> >> +     if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
>> >> +             return -EINVAL;
>> >> +
>> >> +     spin_lock_irq(&boot_lock);
>> >> +     writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
>> >> +     writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
>> >> +     writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
>> >> +     writel_relaxed(0, relocation + 12);
>> >
>> > Shouldn't you do the above writes only when
>> > hip04_cpu_table[cluster][cpu] is zero?  Please see the comment in
>> > mcpm_cpu_power_down() about unordered calls.
>> >
>> OK. I can add the check.
>>
>> >> +     if (hip04_cluster_down(cluster)) {
>> >> +             data = CLUSTER_DEBUG_RESET_BIT;
>> >> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
>> >> +             do {
>> >> +                     mask = CLUSTER_DEBUG_RESET_STATUS;
>> >> +                     data = readl_relaxed(sysctrl + \
>> >> +                                          SC_CPU_RESET_STATUS(cluster));
>> >> +             } while (data & mask);
>> >> +             hip04_set_snoop_filter(cluster, 1);
>> >> +     }
>> >> +
>> >> +     hip04_cpu_table[cluster][cpu]++;
>> >> +
>> >> +     data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
>> >> +            CORE_DEBUG_RESET_BIT(cpu);
>> >> +     writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
>> >> +     spin_unlock_irq(&boot_lock);
>> >> +     msleep(POLL_MSEC);
>> >> +
>> >> +     return 0;
>> >> +}
>> >> +
>> >> +static void hip04_mcpm_power_down(void)
>> >> +{
>> >> +     unsigned int mpidr, cpu, cluster, data = 0;
>> >> +     bool skip_reset = false;
>> >> +
>> >> +     mpidr = read_cpuid_mpidr();
>> >> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> >> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> >> +
>> >> +     __mcpm_cpu_going_down(cpu, cluster);
>> >> +
>> >> +     spin_lock(&boot_lock);
>> >> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
>> >> +     hip04_cpu_table[cluster][cpu]--;
>> >> +     if (hip04_cpu_table[cluster][cpu] == 1) {
>> >> +             /* A power_up request went ahead of us. */
>> >> +             skip_reset = true;
>> >> +     } else if (hip04_cpu_table[cluster][cpu] > 1) {
>> >> +             pr_err("Cluster %d CPU%d is still running\n", cluster, cpu);
>> >
>> > This message is misleading.  If execution gets here, that means
>> > mcpm_cpu_power_up() was called more than twice in a row for the same CPU
>> > which should never happen.
>> >
>> OK. I'll replace the comments.
>>
>> >> +             BUG();
>> >> +     }
>> >> +
>> >> +     spin_unlock(&boot_lock);
>> >> +
>> >> +     v7_exit_coherency_flush(louis);
>> >> +
>> >> +     __mcpm_cpu_down(cpu, cluster);
>> >> +
>> >> +     if (!skip_reset) {
>> >> +             data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
>> >> +                    CORE_DEBUG_RESET_BIT(cpu);
>> >> +             writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
>> >
>> > You should not perform this outside the lock protected region as this
>> > could race with hip04_mcpm_power_up().  Instead, this should be done
>> > above when hip04_cpu_table[cluster][cpu] == 0 after being decremented.
>> >
>>
>> No. power_down() is executed on the specified CPU. If spin_unlock() is
>> placed after reset operation, it means that there's no chance to
>> execute the spin_unlock(). Because CPU is already in reset mode at
>> this time.
>
> Normally, reset is effective only when WFI is later executed.  Are you
> sure this is not the case on hip04 as well?
>
>
Oh. it's different. cpu_v7_reset() likes to give a reset pulse signal to CPU
core logic.
The operation on SC_CPU_RESET_REQ register likes make CPU out of
reset mode. After system is power on, all CPUs except for CPU0 stay
in reset mode.

Regards
Haojian
Dave Martin May 21, 2014, 10:02 a.m. UTC | #5
On Tue, May 20, 2014 at 12:43:59PM +0800, Haojian Zhuang wrote:
> On 16 May 2014 04:01, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Thu, 15 May 2014, Haojian Zhuang wrote:
> >
> >> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> >> > On Tue, 13 May 2014, Haojian Zhuang wrote:
> >> >
> >> >> +     data = readl_relaxed(fabric + FAB_SF_MODE);
> >> >> +     if (on)
> >> >> +             data |= 1 << cluster;
> >> >> +     else
> >> >> +             data &= ~(1 << cluster);
> >> >> +     writel_relaxed(data, fabric + FAB_SF_MODE);
> >> >> +     while (1) {
> >> >> +             if (data == readl_relaxed(fabric + FAB_SF_MODE))
> >> >> +                     break;
> >> >> +     }
> >> >> +}
> >> >
> >> > The above could be easily coded in assembly for the power_up_setup
> >> > callback thusly:
> >> >
> >> > hip04_power_up_setup:
> >> >
> >> >         cmp     r0, #0                  @ check affinity level
> >> >         bxeq    lr                      @ nothing to do at CPU level
> >> >
> >> >         mrc     p15, 0, r0, c0, c0, 5   @ get MPIDR
> >> >         ubfx    r0, r0, #8, #8          @ extract cluster number
> >> >
> >> >         adr     r1, .LC0
> >> >         ldmia   r1, {r2, r3}
> >> >         sub     r2, r2, r1              @ virt_addr - phys_addr
> >> >         ldr     r1, [r2, r3]            @ get fabric_phys_addr
> >> >         mov     r2, #1
> >> >         ldr     r3, [r1, #FAB_SF_MODE]  @ read "data"
> >> >         orr     r3, r3, r2, lsl r0      @ set cluster bit
> >> >         str     r3, [r1, #FAB_SF_MODE]  @ write it back
> >> >
> >> > 1:      ldr     r2, [r1, #FAB_SF_MODE]  @ read register content
> >> >         cmp     r2, r3                  @ make sure it matches
> >> >         bne     1b                      @ otherwise retry
> >> >
> >> >         bx      lr
> >> >
> >> > :LC0:   .word   .
> >> >         .word   fabric_phys_addr - .LC0
> >> >
> >> > That should be it.
> >> >
> >>
> >> No. These code should be executed before new CPU on. If I transfer
> >> them to assembler code, it means that code will be executed after
> >> new CPU on.
> >
> > Exact.
> >
> >> Then it results me failing to make new CPU online.
> >
> > The assembly code could be wrong as well.  Are you sure this is not the
> > actual reason?
> >
> > Is there some documentation for this stuff?
> >
> 
> There's no problem in assembly code. I even rewrite your assembly code.
> 
> If I keep my c code with assembly code, new CPU could be online right.
> If I only use assembly code, I only get the kernel panic. So it's not
> caused by assembly code. It's caused by executing code after new CPU
> on.
> 
> There's no documentation on this. They didn't prepare well on documents.
> I think they'll improve it in the future.

It's essential to understand what the hardware is actually doing here.

If we don't understand exactly what toggling those bits in FAB_SF_MODE
actually does, then it's impossible to judge on how to do it safely.

Cheers
---Dave
Nicolas Pitre May 21, 2014, 1:52 p.m. UTC | #6
[ I somehow missed this email yesterday.  Sorry if I asked the same 
  questions for which you already had provided answers. ]

On Tue, 20 May 2014, Haojian Zhuang wrote:

> On 16 May 2014 04:01, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Thu, 15 May 2014, Haojian Zhuang wrote:
> >
> >> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> >> > On Tue, 13 May 2014, Haojian Zhuang wrote:
> >> >
> >> >> +     data = readl_relaxed(fabric + FAB_SF_MODE);
> >> >> +     if (on)
> >> >> +             data |= 1 << cluster;
> >> >> +     else
> >> >> +             data &= ~(1 << cluster);
> >> >> +     writel_relaxed(data, fabric + FAB_SF_MODE);
> >> >> +     while (1) {
> >> >> +             if (data == readl_relaxed(fabric + FAB_SF_MODE))
> >> >> +                     break;
> >> >> +     }
> >> >> +}
> >> >
> >> > The above could be easily coded in assembly for the power_up_setup
> >> > callback thusly:
> >> >
> >> > hip04_power_up_setup:
> >> >
> >> >         cmp     r0, #0                  @ check affinity level
> >> >         bxeq    lr                      @ nothing to do at CPU level
> >> >
> >> >         mrc     p15, 0, r0, c0, c0, 5   @ get MPIDR
> >> >         ubfx    r0, r0, #8, #8          @ extract cluster number
> >> >
> >> >         adr     r1, .LC0
> >> >         ldmia   r1, {r2, r3}
> >> >         sub     r2, r2, r1              @ virt_addr - phys_addr
> >> >         ldr     r1, [r2, r3]            @ get fabric_phys_addr
> >> >         mov     r2, #1
> >> >         ldr     r3, [r1, #FAB_SF_MODE]  @ read "data"
> >> >         orr     r3, r3, r2, lsl r0      @ set cluster bit
> >> >         str     r3, [r1, #FAB_SF_MODE]  @ write it back
> >> >
> >> > 1:      ldr     r2, [r1, #FAB_SF_MODE]  @ read register content
> >> >         cmp     r2, r3                  @ make sure it matches
> >> >         bne     1b                      @ otherwise retry
> >> >
> >> >         bx      lr
> >> >
> >> > :LC0:   .word   .
> >> >         .word   fabric_phys_addr - .LC0
> >> >
> >> > That should be it.
> >> >
> >>
> >> No. These code should be executed before new CPU on. If I transfer
> >> them to assembler code, it means that code will be executed after
> >> new CPU on.
> >
> > Exact.
> >
> >> Then it results me failing to make new CPU online.
> >
> > The assembly code could be wrong as well.  Are you sure this is not the
> > actual reason?
> >
> > Is there some documentation for this stuff?
> >
> 
> There's no problem in assembly code. I even rewrite your assembly code.
> 
> If I keep my c code with assembly code, new CPU could be online right.
> If I only use assembly code, I only get the kernel panic. So it's not
> caused by assembly code. It's caused by executing code after new CPU
> on.

Beware.  The assembly code, when invoked via the MCPM layer during early 
boot of a CPU, is executing with the MMU still disabled.  That means all 
addresses must be physical addresses.  This is where things myght be 
tricky.  And then that code should not work if invoked from C code 
because it then has to deal with virtual addresses. So if you tested the 
assembly code by calling it from C code and it worked then the assembly 
code is wrong.

To be sure please post the code you tested (mine wasn't complete) so we 
could tell you if it is right.

> cpu_v7_reset() likes to give a reset pulse signal to CPU core logic. 
> The operation on SC_CPU_RESET_REQ register likes make CPU out of reset 
> mode. After system is power on, all CPUs except for CPU0 stay in reset 
> mode.

Sorry, I don't fully understand the above.

I also note in your code that you write the same bits to 
SC_CPU_RESET_REQ in both the power_up and power_down methods.  So if 
this is about sending a reset pulse only, how do you keep a CPU down for 
a long period?


Nicolas
diff mbox

Patch

diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile
index 2ae1b59..e7a8640 100644
--- a/arch/arm/mach-hisi/Makefile
+++ b/arch/arm/mach-hisi/Makefile
@@ -3,4 +3,5 @@ 
 #
 
 obj-y	+= hisilicon.o
+obj-$(CONFIG_MCPM)		+= platmcpm.o
 obj-$(CONFIG_SMP)		+= platsmp.o hotplug.o
diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
new file mode 100644
index 0000000..3b42977
--- /dev/null
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -0,0 +1,304 @@ 
+/*
+ * Copyright (c) 2013-2014 Linaro Ltd.
+ * Copyright (c) 2013-2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/mcpm.h>
+
+#include "core.h"
+
+/* bits definition in SC_CPU_RESET_REQ[x]/SC_CPU_RESET_DREQ[x]
+ * 1 -- unreset; 0 -- reset
+ */
+#define CORE_RESET_BIT(x)		(1 << x)
+#define NEON_RESET_BIT(x)		(1 << (x + 4))
+#define CORE_DEBUG_RESET_BIT(x)		(1 << (x + 9))
+#define CLUSTER_L2_RESET_BIT		(1 << 8)
+#define CLUSTER_DEBUG_RESET_BIT		(1 << 13)
+
+/*
+ * bits definition in SC_CPU_RESET_STATUS[x]
+ * 1 -- reset status; 0 -- unreset status
+ */
+#define CORE_RESET_STATUS(x)		(1 << x)
+#define NEON_RESET_STATUS(x)		(1 << (x + 4))
+#define CORE_DEBUG_RESET_STATUS(x)	(1 << (x + 9))
+#define CLUSTER_L2_RESET_STATUS		(1 << 8)
+#define CLUSTER_DEBUG_RESET_STATUS	(1 << 13)
+#define CORE_WFI_STATUS(x)		(1 << (x + 16))
+#define CORE_WFE_STATUS(x)		(1 << (x + 20))
+#define CORE_DEBUG_ACK(x)		(1 << (x + 24))
+
+#define SC_CPU_RESET_REQ(x)		(0x520 + (x << 3))	/* reset */
+#define SC_CPU_RESET_DREQ(x)		(0x524 + (x << 3))	/* unreset */
+#define SC_CPU_RESET_STATUS(x)		(0x1520 + (x << 3))
+
+#define FAB_SF_MODE			0x0c
+#define FAB_SF_INVLD			0x10
+
+/* bits definition in FB_SF_INVLD */
+#define FB_SF_INVLD_START		(1 << 8)
+
+#define HIP04_MAX_CLUSTERS		4
+#define HIP04_MAX_CPUS_PER_CLUSTER	4
+
+#define POLL_MSEC	10
+#define TIMEOUT_MSEC	1000
+
+struct hip04_secondary_cpu_data {
+	u32	bootwrapper_phys;
+	u32	bootwrapper_size;
+	u32	bootwrapper_magic;
+	u32	relocation_entry;
+	u32	relocation_size;
+};
+
+static void __iomem *relocation, *sysctrl, *fabric;
+static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
+static DEFINE_SPINLOCK(boot_lock);
+static struct hip04_secondary_cpu_data hip04_boot;
+
+static bool hip04_cluster_down(unsigned int cluster)
+{
+	int i;
+
+	for (i = 0; i < HIP04_MAX_CPUS_PER_CLUSTER; i++)
+		if (hip04_cpu_table[cluster][i])
+			return false;
+	return true;
+}
+
+static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on)
+{
+	unsigned long data;
+
+	if (!fabric)
+		return;
+	data = readl_relaxed(fabric + FAB_SF_MODE);
+	if (on)
+		data |= 1 << cluster;
+	else
+		data &= ~(1 << cluster);
+	writel_relaxed(data, fabric + FAB_SF_MODE);
+	while (1) {
+		if (data == readl_relaxed(fabric + FAB_SF_MODE))
+			break;
+	}
+}
+
+static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned long data, mask;
+
+	if (!relocation || !sysctrl)
+		return -ENODEV;
+	if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
+		return -EINVAL;
+
+	spin_lock_irq(&boot_lock);
+	writel_relaxed(hip04_boot.bootwrapper_phys, relocation);
+	writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4);
+	writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
+	writel_relaxed(0, relocation + 12);
+
+	if (hip04_cluster_down(cluster)) {
+		data = CLUSTER_DEBUG_RESET_BIT;
+		writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
+		do {
+			mask = CLUSTER_DEBUG_RESET_STATUS;
+			data = readl_relaxed(sysctrl + \
+					     SC_CPU_RESET_STATUS(cluster));
+		} while (data & mask);
+		hip04_set_snoop_filter(cluster, 1);
+	}
+
+	hip04_cpu_table[cluster][cpu]++;
+
+	data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
+	       CORE_DEBUG_RESET_BIT(cpu);
+	writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster));
+	spin_unlock_irq(&boot_lock);
+	msleep(POLL_MSEC);
+
+	return 0;
+}
+
+static void hip04_mcpm_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, data = 0;
+	bool skip_reset = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	spin_lock(&boot_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	hip04_cpu_table[cluster][cpu]--;
+	if (hip04_cpu_table[cluster][cpu] == 1) {
+		/* A power_up request went ahead of us. */
+		skip_reset = true;
+	} else if (hip04_cpu_table[cluster][cpu] > 1) {
+		pr_err("Cluster %d CPU%d is still running\n", cluster, cpu);
+		BUG();
+	}
+
+	spin_unlock(&boot_lock);
+
+	v7_exit_coherency_flush(louis);
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	if (!skip_reset) {
+		data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
+		       CORE_DEBUG_RESET_BIT(cpu);
+		writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster));
+	}
+}
+
+static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int data, tries;
+
+	BUG_ON(cluster >= HIP04_MAX_CLUSTERS ||
+	       cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
+
+	for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; tries++) {
+		data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
+		if (!(data & CORE_RESET_STATUS(cpu))) {
+			msleep(POLL_MSEC);
+			continue;
+		}
+		return 0;
+	}
+	return -ETIMEDOUT;
+}
+
+static void hip04_mcpm_powered_up(void)
+{
+	if (!relocation)
+		return;
+	spin_lock(&boot_lock);
+	writel_relaxed(0, relocation);
+	writel_relaxed(0, relocation + 4);
+	writel_relaxed(0, relocation + 8);
+	writel_relaxed(0, relocation + 12);
+	spin_unlock(&boot_lock);
+}
+
+static const struct mcpm_platform_ops hip04_mcpm_ops = {
+	.power_up		= hip04_mcpm_power_up,
+	.power_down		= hip04_mcpm_power_down,
+	.wait_for_powerdown	= hip04_mcpm_wait_for_powerdown,
+	.powered_up		= hip04_mcpm_powered_up,
+};
+
+static bool __init hip04_cpu_table_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	if (cluster >= HIP04_MAX_CLUSTERS ||
+	    cpu >= HIP04_MAX_CPUS_PER_CLUSTER) {
+		pr_err("%s: boot CPU is out of bound!\n", __func__);
+		return false;
+	}
+	hip04_set_snoop_filter(cluster, 1);
+	hip04_cpu_table[cluster][cpu] = 1;
+	return true;
+}
+
+static int __init hip04_mcpm_init(void)
+{
+	struct device_node *np, *np_fab;
+	int ret = -ENODEV;
+
+	np = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl");
+	if (!np)
+		goto err;
+	np_fab = of_find_compatible_node(NULL, NULL, "hisilicon,hip04-fabric");
+	if (!np_fab)
+		goto err;
+
+	if (of_property_read_u32(np, "bootwrapper-phys",
+				 &hip04_boot.bootwrapper_phys)) {
+		pr_err("failed to get bootwrapper-phys\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "bootwrapper-size",
+				 &hip04_boot.bootwrapper_size)) {
+		pr_err("failed to get bootwrapper-size\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "bootwrapper-magic",
+				 &hip04_boot.bootwrapper_magic)) {
+		pr_err("failed to get bootwrapper-magic\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "relocation-entry",
+				 &hip04_boot.relocation_entry)) {
+		pr_err("failed to get relocation-entry\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	if (of_property_read_u32(np, "relocation-size",
+				 &hip04_boot.relocation_size)) {
+		pr_err("failed to get relocation-size\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	relocation = ioremap(hip04_boot.relocation_entry,
+			     hip04_boot.relocation_size);
+	if (!relocation) {
+		pr_err("failed to map relocation space\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	sysctrl = of_iomap(np, 0);
+	if (!sysctrl) {
+		pr_err("failed to get sysctrl base\n");
+		ret = -ENOMEM;
+		goto err_sysctrl;
+	}
+	fabric = of_iomap(np_fab, 0);
+	if (!fabric) {
+		pr_err("failed to get fabric base\n");
+		ret = -ENOMEM;
+		goto err_fabric;
+	}
+
+	if (!hip04_cpu_table_init())
+		return -EINVAL;
+	ret = mcpm_platform_register(&hip04_mcpm_ops);
+	if (!ret) {
+		mcpm_sync_init(NULL);
+		pr_info("HiP04 MCPM initialized\n");
+	}
+	mcpm_smp_set_ops();
+	return ret;
+err_fabric:
+	iounmap(sysctrl);
+err_sysctrl:
+	iounmap(relocation);
+err:
+	return ret;
+}
+early_initcall(hip04_mcpm_init);