diff mbox

[3/3] ARM: zynq: add SMP support

Message ID 1364043450-18700-4-git-send-email-s.trumtrar@pengutronix.de (mailing list archive)
State New, archived
Headers show

Commit Message

Steffen Trumtrar March 23, 2013, 12:57 p.m. UTC
The Zynq7000 has a CortexA9 with at least 2 cores. Add support to boot them all.
To do this a trampoline is needed. At runtime the jump address and two
instructions are copied to RAM by CPU0 and then executed by CPU1.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Cc: Michal Simek <michal.simek@xilinx.com>
---
 arch/arm/boot/dts/zynq-7000.dtsi |  14 +++++
 arch/arm/mach-zynq/Makefile      |   2 +
 arch/arm/mach-zynq/common.c      |   1 +
 arch/arm/mach-zynq/common.h      |  14 +++++
 arch/arm/mach-zynq/headsmp.S     |  20 +++++++
 arch/arm/mach-zynq/platsmp.c     | 110 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 161 insertions(+)
 create mode 100644 arch/arm/mach-zynq/headsmp.S
 create mode 100644 arch/arm/mach-zynq/platsmp.c

Comments

Michal Simek March 25, 2013, 2:27 p.m. UTC | #1
2013/3/23 Steffen Trumtrar <s.trumtrar@pengutronix.de>:
> The Zynq7000 has a CortexA9 with at least 2 cores. Add support to boot them all.
> To do this a trampoline is needed. At runtime the jump address and two
> instructions are copied to RAM by CPU0 and then executed by CPU1.
>
> Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
> Cc: Michal Simek <michal.simek@xilinx.com>
> ---
>  arch/arm/boot/dts/zynq-7000.dtsi |  14 +++++
>  arch/arm/mach-zynq/Makefile      |   2 +
>  arch/arm/mach-zynq/common.c      |   1 +
>  arch/arm/mach-zynq/common.h      |  14 +++++
>  arch/arm/mach-zynq/headsmp.S     |  20 +++++++
>  arch/arm/mach-zynq/platsmp.c     | 110 +++++++++++++++++++++++++++++++++++++++
>  6 files changed, 161 insertions(+)
>  create mode 100644 arch/arm/mach-zynq/headsmp.S
>  create mode 100644 arch/arm/mach-zynq/platsmp.c
>
> diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
> index c103082..258b6c9 100644
> --- a/arch/arm/boot/dts/zynq-7000.dtsi
> +++ b/arch/arm/boot/dts/zynq-7000.dtsi
> @@ -15,6 +15,20 @@
>  / {
>         compatible = "xlnx,zynq-7000";
>
> +       cpus {
> +               cpu@0 {
> +                       compatible = "arm,cortex-a9";
> +                       next-level-cache = <&L2>;
> +                       clocks = <&armpll 0>;
> +               };
> +
> +               cpu@1 {
> +                       compatible = "arm,cortex-a9";
> +                       next-level-cache = <&L2>;
> +                       clocks = <&armpll 0>;
> +               };
> +       };

Is this really needed?

> +
>         amba {
>                 compatible = "simple-bus";
>                 #address-cells = <1>;
> diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile
> index 397268c..906d199 100644
> --- a/arch/arm/mach-zynq/Makefile
> +++ b/arch/arm/mach-zynq/Makefile
> @@ -4,3 +4,5 @@
>
>  # Common support
>  obj-y                          := common.o timer.o
> +AFLAGS_headsmp.o :=-Wa,-march=armv7-a
> +obj-$(CONFIG_SMP) += headsmp.o platsmp.o
> diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
> index 1b9bb3d..4053962 100644
> --- a/arch/arm/mach-zynq/common.c
> +++ b/arch/arm/mach-zynq/common.c
> @@ -117,6 +117,7 @@ static const char *xilinx_dt_match[] = {
>  };
>
>  MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
> +       .smp            = smp_ops(zynq_smp_ops),
>         .map_io         = xilinx_map_io,
>         .init_irq       = xilinx_irqchip_init,
>         .init_machine   = xilinx_init_machine,
> diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h
> index 8b4dbba..451d386 100644
> --- a/arch/arm/mach-zynq/common.h
> +++ b/arch/arm/mach-zynq/common.h
> @@ -19,4 +19,18 @@
>
>  void __init xttcps_timer_init(void);
>
> +#ifdef CONFIG_SMP
> +extern void secondary_startup(void);
> +extern char secondary_trampoline, secondary_trampoline_end;
> +#endif
> +
> +extern struct smp_operations __initdata zynq_smp_ops;
> +extern void __iomem *slcr_base_addr;
> +extern void __iomem *scu_base;
> +
> +#define SLCR_UNLOCK_MAGIC      0xdf0d
> +
> +#define SLCR_UNLOCK            0x8
> +#define SLCR_A9_CPU_RST_CTRL   0x244

As previous code.

> +
>  #endif
> diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S
> new file mode 100644
> index 0000000..145bbae
> --- /dev/null
> +++ b/arch/arm/mach-zynq/headsmp.S
> @@ -0,0 +1,20 @@
> +/*
> + *  Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
> + *
> + * based on mach-socfpga/headsmp.S
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +
> +       __CPUINIT
> +       .arch   armv7-a
> +
> +ENTRY(secondary_trampoline)
> +       ldr     r0, [pc]
> +       mov     pc, r0

This code is familiar to me. It looks like my jump trampoline which I
wrote in C. :-)

> +
> +ENTRY(secondary_trampoline_end)
> diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c
> new file mode 100644
> index 0000000..1ea3d4f
> --- /dev/null
> +++ b/arch/arm/mach-zynq/platsmp.c
> @@ -0,0 +1,110 @@
> +/*
> + * Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
> + *
> + * based on
> + *  linux/arch/arm/mach-zynq/platsmp.c Copyright (C) 2011 Xilinx
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#include <linux/delay.h>
> +#include <linux/errno.h>
> +#include <linux/init.h>
> +#include <linux/irqchip/arm-gic.h>
> +#include <linux/io.h>
> +#include <linux/jiffies.h>
> +#include <linux/smp.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/mach/arch.h>
> +#include <asm/mach/map.h>
> +#include <asm/smp_scu.h>
> +
> +#include "common.h"
> +
> +#define A9_RST_MASK    (1 << 0)
> +#define A9_CLKSTOP_MASK        (1 << 4)

as I wrote above moving to separate driver make sense to me.

> +
> +static DEFINE_SPINLOCK(boot_lock);
> +
> +static void __cpuinit zynq_smp_secondary_init(unsigned int cpu)
> +{
> +       /*
> +        * if any interrupts are already enabled for the primary
> +        * core (e.g. timer irq), then they will not have been enabled
> +        * for us: do so
> +        */
> +       gic_secondary_init(0);
> +
> +       /*
> +        * Synchronise with the boot thread.
> +        */
> +       spin_lock(&boot_lock);
> +       spin_unlock(&boot_lock);
> +}
> +
> +static inline void zynq_set_cpu_jump(int cpu, void *jump_addr)
> +{
> +       if (jump_addr) {

This is wrong because if jump_addr is 0x1 - 0xF then you are rewriting.
It is unlikely but it should be captured.
The main reason is AMP solution where Linux can't be added from 0x0
and user amp code can setup any entry point. Better to check it just for sure.

> +               int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
> +
> +               writel(virt_to_phys(jump_addr), phys_to_virt(8));

This will probably generate sparse warning.

> +               memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);

This is nice solution. I am hardcoding instruction and this looks like
good solution.
Let me do some experiment with it because maybe this could be
problematic when Linux
kernel starts from different address than 0x0.

> +
> +               flush_cache_all();
> +               outer_flush_all();
> +               smp_wmb();
> +       }
> +}
> +
> +static void zynq_enable_cpu(int cpu, bool enable)
> +{
> +       writel(A9_CLKSTOP_MASK << cpu, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
> +       writel(0, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
> +}
> +
> +int __cpuinit zynq_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
> +{
> +       writel(SLCR_UNLOCK_MAGIC, slcr_base_addr + SLCR_UNLOCK);
> +       writel((A9_CLKSTOP_MASK | A9_RST_MASK) << cpu,
> +               slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
> +
> +       zynq_set_cpu_jump(cpu, secondary_startup);
> +       zynq_enable_cpu(cpu, true);
> +
> +       return 0;
> +}
> +
> +/*
> + * Initialise the CPU possible map early - this describes the CPUs
> + * which may be present or become present in the system.
> + */
> +static void __init zynq_smp_init_cpus(void)
> +{
> +       unsigned int ncores, i;
> +
> +       ncores = scu_get_core_count(scu_base);
> +
> +       for (i = 0; i < ncores; ++i)
> +               set_cpu_possible(i, true);
> +
> +       /* sanity check */
> +       if (ncores > num_possible_cpus()) {
> +               pr_warn("zynq: no. of cores (%d) greater than configured"
> +                       "maximum of %d - clipping\n", ncores, num_possible_cpus());
> +               ncores = num_possible_cpus();
> +       }

Is this necessary?
IRC: There is checking in smp_prepare_cpus in arch/arm/kernel/smp.c for it.

Thanks,
Michal
Steffen Trumtrar March 25, 2013, 4:34 p.m. UTC | #2
On Mon, Mar 25, 2013 at 03:27:57PM +0100, Michal Simek wrote:
> 2013/3/23 Steffen Trumtrar <s.trumtrar@pengutronix.de>:
> > The Zynq7000 has a CortexA9 with at least 2 cores. Add support to boot them all.
> > To do this a trampoline is needed. At runtime the jump address and two
> > instructions are copied to RAM by CPU0 and then executed by CPU1.
> >
> > Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
> > Cc: Michal Simek <michal.simek@xilinx.com>
> > ---
> >  arch/arm/boot/dts/zynq-7000.dtsi |  14 +++++
> >  arch/arm/mach-zynq/Makefile      |   2 +
> >  arch/arm/mach-zynq/common.c      |   1 +
> >  arch/arm/mach-zynq/common.h      |  14 +++++
> >  arch/arm/mach-zynq/headsmp.S     |  20 +++++++
> >  arch/arm/mach-zynq/platsmp.c     | 110 +++++++++++++++++++++++++++++++++++++++
> >  6 files changed, 161 insertions(+)
> >  create mode 100644 arch/arm/mach-zynq/headsmp.S
> >  create mode 100644 arch/arm/mach-zynq/platsmp.c
> >
> > diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
> > index c103082..258b6c9 100644
> > --- a/arch/arm/boot/dts/zynq-7000.dtsi
> > +++ b/arch/arm/boot/dts/zynq-7000.dtsi
> > @@ -15,6 +15,20 @@
> >  / {
> >         compatible = "xlnx,zynq-7000";
> >
> > +       cpus {
> > +               cpu@0 {
> > +                       compatible = "arm,cortex-a9";
> > +                       next-level-cache = <&L2>;
> > +                       clocks = <&armpll 0>;
> > +               };
> > +
> > +               cpu@1 {
> > +                       compatible = "arm,cortex-a9";
> > +                       next-level-cache = <&L2>;
> > +                       clocks = <&armpll 0>;
> > +               };
> > +       };
> 
> Is this really needed?
> 
I don't know if it is really necessary for the SMP stuff, but my reasoning is:
the DT describes the HW and not only for Linux that is. We have two cores, so
we should specify that. Also, you can add other properties here like
imx6q.dtsi does for the operating-points for example.
So, I think it is valid to have those entries here.

> > +
> >         amba {
> >                 compatible = "simple-bus";
> >                 #address-cells = <1>;
> > diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile
> > index 397268c..906d199 100644
> > --- a/arch/arm/mach-zynq/Makefile
> > +++ b/arch/arm/mach-zynq/Makefile
> > @@ -4,3 +4,5 @@
> >
> >  # Common support
> >  obj-y                          := common.o timer.o
> > +AFLAGS_headsmp.o :=-Wa,-march=armv7-a
> > +obj-$(CONFIG_SMP) += headsmp.o platsmp.o
> > diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
> > index 1b9bb3d..4053962 100644
> > --- a/arch/arm/mach-zynq/common.c
> > +++ b/arch/arm/mach-zynq/common.c
> > @@ -117,6 +117,7 @@ static const char *xilinx_dt_match[] = {
> >  };
> >
> >  MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
> > +       .smp            = smp_ops(zynq_smp_ops),
> >         .map_io         = xilinx_map_io,
> >         .init_irq       = xilinx_irqchip_init,
> >         .init_machine   = xilinx_init_machine,
> > diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h
> > index 8b4dbba..451d386 100644
> > --- a/arch/arm/mach-zynq/common.h
> > +++ b/arch/arm/mach-zynq/common.h
> > @@ -19,4 +19,18 @@
> >
> >  void __init xttcps_timer_init(void);
> >
> > +#ifdef CONFIG_SMP
> > +extern void secondary_startup(void);
> > +extern char secondary_trampoline, secondary_trampoline_end;
> > +#endif
> > +
> > +extern struct smp_operations __initdata zynq_smp_ops;
> > +extern void __iomem *slcr_base_addr;
> > +extern void __iomem *scu_base;
> > +
> > +#define SLCR_UNLOCK_MAGIC      0xdf0d
> > +
> > +#define SLCR_UNLOCK            0x8
> > +#define SLCR_A9_CPU_RST_CTRL   0x244
> 
> As previous code.
> 
> > +
> >  #endif
> > diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S
> > new file mode 100644
> > index 0000000..145bbae
> > --- /dev/null
> > +++ b/arch/arm/mach-zynq/headsmp.S
> > @@ -0,0 +1,20 @@
> > +/*
> > + *  Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
> > + *
> > + * based on mach-socfpga/headsmp.S
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +#include <linux/linkage.h>
> > +#include <linux/init.h>
> > +
> > +       __CPUINIT
> > +       .arch   armv7-a
> > +
> > +ENTRY(secondary_trampoline)
> > +       ldr     r0, [pc]
> > +       mov     pc, r0
> 
> This code is familiar to me. It looks like my jump trampoline which I
> wrote in C. :-)
> 
Of course. I shouldn't have only mentioned that in the platsmp.c
But I think asm is a little easier on the eyes than machine code ;-)

> > +
> > +ENTRY(secondary_trampoline_end)
> > diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c
> > new file mode 100644
> > index 0000000..1ea3d4f
> > --- /dev/null
> > +++ b/arch/arm/mach-zynq/platsmp.c
> > @@ -0,0 +1,110 @@
> > +/*
> > + * Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
> > + *
> > + * based on
> > + *  linux/arch/arm/mach-zynq/platsmp.c Copyright (C) 2011 Xilinx
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +#include <linux/delay.h>
> > +#include <linux/errno.h>
> > +#include <linux/init.h>
> > +#include <linux/irqchip/arm-gic.h>
> > +#include <linux/io.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/smp.h>
> > +
> > +#include <asm/cacheflush.h>
> > +#include <asm/mach/arch.h>
> > +#include <asm/mach/map.h>
> > +#include <asm/smp_scu.h>
> > +
> > +#include "common.h"
> > +
> > +#define A9_RST_MASK    (1 << 0)
> > +#define A9_CLKSTOP_MASK        (1 << 4)
> 
> as I wrote above moving to separate driver make sense to me.
> 
> > +
> > +static DEFINE_SPINLOCK(boot_lock);
> > +
> > +static void __cpuinit zynq_smp_secondary_init(unsigned int cpu)
> > +{
> > +       /*
> > +        * if any interrupts are already enabled for the primary
> > +        * core (e.g. timer irq), then they will not have been enabled
> > +        * for us: do so
> > +        */
> > +       gic_secondary_init(0);
> > +
> > +       /*
> > +        * Synchronise with the boot thread.
> > +        */
> > +       spin_lock(&boot_lock);
> > +       spin_unlock(&boot_lock);
> > +}
> > +
> > +static inline void zynq_set_cpu_jump(int cpu, void *jump_addr)
> > +{
> > +       if (jump_addr) {
> 
> This is wrong because if jump_addr is 0x1 - 0xF then you are rewriting.
> It is unlikely but it should be captured.
> The main reason is AMP solution where Linux can't be added from 0x0
> and user amp code can setup any entry point. Better to check it just for sure.
> 
> > +               int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
> > +
> > +               writel(virt_to_phys(jump_addr), phys_to_virt(8));
> 
> This will probably generate sparse warning.
> 
> > +               memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
> 
> This is nice solution. I am hardcoding instruction and this looks like
> good solution.
> Let me do some experiment with it because maybe this could be
> problematic when Linux
> kernel starts from different address than 0x0.
> 
> > +
> > +               flush_cache_all();
> > +               outer_flush_all();
> > +               smp_wmb();
> > +       }
> > +}
> > +
> > +static void zynq_enable_cpu(int cpu, bool enable)
> > +{
> > +       writel(A9_CLKSTOP_MASK << cpu, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
> > +       writel(0, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
> > +}
> > +
> > +int __cpuinit zynq_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
> > +{
> > +       writel(SLCR_UNLOCK_MAGIC, slcr_base_addr + SLCR_UNLOCK);
> > +       writel((A9_CLKSTOP_MASK | A9_RST_MASK) << cpu,
> > +               slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
> > +
> > +       zynq_set_cpu_jump(cpu, secondary_startup);
> > +       zynq_enable_cpu(cpu, true);
> > +
> > +       return 0;
> > +}
> > +
> > +/*
> > + * Initialise the CPU possible map early - this describes the CPUs
> > + * which may be present or become present in the system.
> > + */
> > +static void __init zynq_smp_init_cpus(void)
> > +{
> > +       unsigned int ncores, i;
> > +
> > +       ncores = scu_get_core_count(scu_base);
> > +
> > +       for (i = 0; i < ncores; ++i)
> > +               set_cpu_possible(i, true);
> > +
> > +       /* sanity check */
> > +       if (ncores > num_possible_cpus()) {
> > +               pr_warn("zynq: no. of cores (%d) greater than configured"
> > +                       "maximum of %d - clipping\n", ncores, num_possible_cpus());
> > +               ncores = num_possible_cpus();
> > +       }
> 
> Is this necessary?
> IRC: There is checking in smp_prepare_cpus in arch/arm/kernel/smp.c for it.
> 

As our solutions are pretty much alike, I think I will not work on this patch
in the future. Makes no sense to do everything twice :-)

Regards,
Steffen
Michal Simek March 25, 2013, 4:47 p.m. UTC | #3
2013/3/25 Steffen Trumtrar <s.trumtrar@pengutronix.de>:
> On Mon, Mar 25, 2013 at 03:27:57PM +0100, Michal Simek wrote:
>> 2013/3/23 Steffen Trumtrar <s.trumtrar@pengutronix.de>:
>> > The Zynq7000 has a CortexA9 with at least 2 cores. Add support to boot them all.
>> > To do this a trampoline is needed. At runtime the jump address and two
>> > instructions are copied to RAM by CPU0 and then executed by CPU1.
>> >
>> > Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
>> > Cc: Michal Simek <michal.simek@xilinx.com>
>> > ---
>> >  arch/arm/boot/dts/zynq-7000.dtsi |  14 +++++
>> >  arch/arm/mach-zynq/Makefile      |   2 +
>> >  arch/arm/mach-zynq/common.c      |   1 +
>> >  arch/arm/mach-zynq/common.h      |  14 +++++
>> >  arch/arm/mach-zynq/headsmp.S     |  20 +++++++
>> >  arch/arm/mach-zynq/platsmp.c     | 110 +++++++++++++++++++++++++++++++++++++++
>> >  6 files changed, 161 insertions(+)
>> >  create mode 100644 arch/arm/mach-zynq/headsmp.S
>> >  create mode 100644 arch/arm/mach-zynq/platsmp.c
>> >
>> > diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
>> > index c103082..258b6c9 100644
>> > --- a/arch/arm/boot/dts/zynq-7000.dtsi
>> > +++ b/arch/arm/boot/dts/zynq-7000.dtsi
>> > @@ -15,6 +15,20 @@
>> >  / {
>> >         compatible = "xlnx,zynq-7000";
>> >
>> > +       cpus {
>> > +               cpu@0 {
>> > +                       compatible = "arm,cortex-a9";
>> > +                       next-level-cache = <&L2>;
>> > +                       clocks = <&armpll 0>;
>> > +               };
>> > +
>> > +               cpu@1 {
>> > +                       compatible = "arm,cortex-a9";
>> > +                       next-level-cache = <&L2>;
>> > +                       clocks = <&armpll 0>;
>> > +               };
>> > +       };
>>
>> Is this really needed?
>>
> I don't know if it is really necessary for the SMP stuff, but my reasoning is:
> the DT describes the HW and not only for Linux that is. We have two cores, so
> we should specify that. Also, you can add other properties here like
> imx6q.dtsi does for the operating-points for example.
> So, I think it is valid to have those entries here.

From device-tree description point of view we should add that cpus.
Rob also mention in scu patch that we can count number of cpus from dts
that there is support which we can use.

But anyway there are are still some things in description which I really miss
like adding pmu node directly to cpu node because core0 uses different irq
than core1. In our case there is also register access.
The same case is with scu timer/watchdog which we have on the bus
and it is handled via PPI interrupt cpu mask properties.

I am not saying not to have them at all.
The point is why to have this in this patch in smp support if it works
without it.


>> > +
>> >         amba {
>> >                 compatible = "simple-bus";
>> >                 #address-cells = <1>;
>> > diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile
>> > index 397268c..906d199 100644
>> > --- a/arch/arm/mach-zynq/Makefile
>> > +++ b/arch/arm/mach-zynq/Makefile
>> > @@ -4,3 +4,5 @@
>> >
>> >  # Common support
>> >  obj-y                          := common.o timer.o
>> > +AFLAGS_headsmp.o :=-Wa,-march=armv7-a
>> > +obj-$(CONFIG_SMP) += headsmp.o platsmp.o
>> > diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
>> > index 1b9bb3d..4053962 100644
>> > --- a/arch/arm/mach-zynq/common.c
>> > +++ b/arch/arm/mach-zynq/common.c
>> > @@ -117,6 +117,7 @@ static const char *xilinx_dt_match[] = {
>> >  };
>> >
>> >  MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
>> > +       .smp            = smp_ops(zynq_smp_ops),
>> >         .map_io         = xilinx_map_io,
>> >         .init_irq       = xilinx_irqchip_init,
>> >         .init_machine   = xilinx_init_machine,
>> > diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h
>> > index 8b4dbba..451d386 100644
>> > --- a/arch/arm/mach-zynq/common.h
>> > +++ b/arch/arm/mach-zynq/common.h
>> > @@ -19,4 +19,18 @@
>> >
>> >  void __init xttcps_timer_init(void);
>> >
>> > +#ifdef CONFIG_SMP
>> > +extern void secondary_startup(void);
>> > +extern char secondary_trampoline, secondary_trampoline_end;
>> > +#endif
>> > +
>> > +extern struct smp_operations __initdata zynq_smp_ops;
>> > +extern void __iomem *slcr_base_addr;
>> > +extern void __iomem *scu_base;
>> > +
>> > +#define SLCR_UNLOCK_MAGIC      0xdf0d
>> > +
>> > +#define SLCR_UNLOCK            0x8
>> > +#define SLCR_A9_CPU_RST_CTRL   0x244
>>
>> As previous code.
>>
>> > +
>> >  #endif
>> > diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S
>> > new file mode 100644
>> > index 0000000..145bbae
>> > --- /dev/null
>> > +++ b/arch/arm/mach-zynq/headsmp.S
>> > @@ -0,0 +1,20 @@
>> > +/*
>> > + *  Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
>> > + *
>> > + * based on mach-socfpga/headsmp.S
>> > + *
>> > + * This program is free software; you can redistribute it and/or modify
>> > + * it under the terms of the GNU General Public License version 2 as
>> > + * published by the Free Software Foundation.
>> > + */
>> > +#include <linux/linkage.h>
>> > +#include <linux/init.h>
>> > +
>> > +       __CPUINIT
>> > +       .arch   armv7-a
>> > +
>> > +ENTRY(secondary_trampoline)
>> > +       ldr     r0, [pc]
>> > +       mov     pc, r0
>>
>> This code is familiar to me. It looks like my jump trampoline which I
>> wrote in C. :-)
>>
> Of course. I shouldn't have only mentioned that in the platsmp.c
> But I think asm is a little easier on the eyes than machine code ;-)

Definitely. I will adopt this solution.
Thanks,


>> > +
>> > +ENTRY(secondary_trampoline_end)
>> > diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c
>> > new file mode 100644
>> > index 0000000..1ea3d4f
>> > --- /dev/null
>> > +++ b/arch/arm/mach-zynq/platsmp.c
>> > @@ -0,0 +1,110 @@
>> > +/*
>> > + * Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
>> > + *
>> > + * based on
>> > + *  linux/arch/arm/mach-zynq/platsmp.c Copyright (C) 2011 Xilinx
>> > + *
>> > + * This program is free software; you can redistribute it and/or modify
>> > + * it under the terms of the GNU General Public License version 2 as
>> > + * published by the Free Software Foundation.
>> > + */
>> > +#include <linux/delay.h>
>> > +#include <linux/errno.h>
>> > +#include <linux/init.h>
>> > +#include <linux/irqchip/arm-gic.h>
>> > +#include <linux/io.h>
>> > +#include <linux/jiffies.h>
>> > +#include <linux/smp.h>
>> > +
>> > +#include <asm/cacheflush.h>
>> > +#include <asm/mach/arch.h>
>> > +#include <asm/mach/map.h>
>> > +#include <asm/smp_scu.h>
>> > +
>> > +#include "common.h"
>> > +
>> > +#define A9_RST_MASK    (1 << 0)
>> > +#define A9_CLKSTOP_MASK        (1 << 4)
>>
>> as I wrote above moving to separate driver make sense to me.
>>
>> > +
>> > +static DEFINE_SPINLOCK(boot_lock);
>> > +
>> > +static void __cpuinit zynq_smp_secondary_init(unsigned int cpu)
>> > +{
>> > +       /*
>> > +        * if any interrupts are already enabled for the primary
>> > +        * core (e.g. timer irq), then they will not have been enabled
>> > +        * for us: do so
>> > +        */
>> > +       gic_secondary_init(0);
>> > +
>> > +       /*
>> > +        * Synchronise with the boot thread.
>> > +        */
>> > +       spin_lock(&boot_lock);
>> > +       spin_unlock(&boot_lock);
>> > +}
>> > +
>> > +static inline void zynq_set_cpu_jump(int cpu, void *jump_addr)
>> > +{
>> > +       if (jump_addr) {
>>
>> This is wrong because if jump_addr is 0x1 - 0xF then you are rewriting.
>> It is unlikely but it should be captured.
>> The main reason is AMP solution where Linux can't be added from 0x0
>> and user amp code can setup any entry point. Better to check it just for sure.
>>
>> > +               int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
>> > +
>> > +               writel(virt_to_phys(jump_addr), phys_to_virt(8));
>>
>> This will probably generate sparse warning.
>>
>> > +               memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
>>
>> This is nice solution. I am hardcoding instruction and this looks like
>> good solution.
>> Let me do some experiment with it because maybe this could be
>> problematic when Linux
>> kernel starts from different address than 0x0.
>>
>> > +
>> > +               flush_cache_all();
>> > +               outer_flush_all();
>> > +               smp_wmb();
>> > +       }
>> > +}
>> > +
>> > +static void zynq_enable_cpu(int cpu, bool enable)
>> > +{
>> > +       writel(A9_CLKSTOP_MASK << cpu, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
>> > +       writel(0, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
>> > +}
>> > +
>> > +int __cpuinit zynq_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
>> > +{
>> > +       writel(SLCR_UNLOCK_MAGIC, slcr_base_addr + SLCR_UNLOCK);
>> > +       writel((A9_CLKSTOP_MASK | A9_RST_MASK) << cpu,
>> > +               slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
>> > +
>> > +       zynq_set_cpu_jump(cpu, secondary_startup);
>> > +       zynq_enable_cpu(cpu, true);
>> > +
>> > +       return 0;
>> > +}
>> > +
>> > +/*
>> > + * Initialise the CPU possible map early - this describes the CPUs
>> > + * which may be present or become present in the system.
>> > + */
>> > +static void __init zynq_smp_init_cpus(void)
>> > +{
>> > +       unsigned int ncores, i;
>> > +
>> > +       ncores = scu_get_core_count(scu_base);
>> > +
>> > +       for (i = 0; i < ncores; ++i)
>> > +               set_cpu_possible(i, true);
>> > +
>> > +       /* sanity check */
>> > +       if (ncores > num_possible_cpus()) {
>> > +               pr_warn("zynq: no. of cores (%d) greater than configured"
>> > +                       "maximum of %d - clipping\n", ncores, num_possible_cpus());
>> > +               ncores = num_possible_cpus();
>> > +       }
>>
>> Is this necessary?
>> IRC: There is checking in smp_prepare_cpus in arch/arm/kernel/smp.c for it.
>>
>
> As our solutions are pretty much alike, I think I will not work on this patch
> in the future. Makes no sense to do everything twice :-)

:-)

M
diff mbox

Patch

diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
index c103082..258b6c9 100644
--- a/arch/arm/boot/dts/zynq-7000.dtsi
+++ b/arch/arm/boot/dts/zynq-7000.dtsi
@@ -15,6 +15,20 @@ 
 / {
 	compatible = "xlnx,zynq-7000";
 
+	cpus {
+		cpu@0 {
+			compatible = "arm,cortex-a9";
+			next-level-cache = <&L2>;
+			clocks = <&armpll 0>;
+		};
+
+		cpu@1 {
+			compatible = "arm,cortex-a9";
+			next-level-cache = <&L2>;
+			clocks = <&armpll 0>;
+		};
+	};
+
 	amba {
 		compatible = "simple-bus";
 		#address-cells = <1>;
diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile
index 397268c..906d199 100644
--- a/arch/arm/mach-zynq/Makefile
+++ b/arch/arm/mach-zynq/Makefile
@@ -4,3 +4,5 @@ 
 
 # Common support
 obj-y				:= common.o timer.o
+AFLAGS_headsmp.o :=-Wa,-march=armv7-a
+obj-$(CONFIG_SMP) += headsmp.o platsmp.o
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 1b9bb3d..4053962 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -117,6 +117,7 @@  static const char *xilinx_dt_match[] = {
 };
 
 MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
+	.smp		= smp_ops(zynq_smp_ops),
 	.map_io		= xilinx_map_io,
 	.init_irq	= xilinx_irqchip_init,
 	.init_machine	= xilinx_init_machine,
diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h
index 8b4dbba..451d386 100644
--- a/arch/arm/mach-zynq/common.h
+++ b/arch/arm/mach-zynq/common.h
@@ -19,4 +19,18 @@ 
 
 void __init xttcps_timer_init(void);
 
+#ifdef CONFIG_SMP
+extern void secondary_startup(void);
+extern char secondary_trampoline, secondary_trampoline_end;
+#endif
+
+extern struct smp_operations __initdata zynq_smp_ops;
+extern void __iomem *slcr_base_addr;
+extern void __iomem *scu_base;
+
+#define SLCR_UNLOCK_MAGIC	0xdf0d
+
+#define SLCR_UNLOCK		0x8
+#define SLCR_A9_CPU_RST_CTRL	0x244
+
 #endif
diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S
new file mode 100644
index 0000000..145bbae
--- /dev/null
+++ b/arch/arm/mach-zynq/headsmp.S
@@ -0,0 +1,20 @@ 
+/*
+ *  Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
+ *
+ * based on mach-socfpga/headsmp.S
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+	__CPUINIT
+	.arch	armv7-a
+
+ENTRY(secondary_trampoline)
+	ldr	r0, [pc]
+	mov	pc, r0
+
+ENTRY(secondary_trampoline_end)
diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c
new file mode 100644
index 0000000..1ea3d4f
--- /dev/null
+++ b/arch/arm/mach-zynq/platsmp.c
@@ -0,0 +1,110 @@ 
+/*
+ * Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
+ *
+ * based on
+ *  linux/arch/arm/mach-zynq/platsmp.c Copyright (C) 2011 Xilinx
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/smp_scu.h>
+
+#include "common.h"
+
+#define A9_RST_MASK	(1 << 0)
+#define A9_CLKSTOP_MASK	(1 << 4)
+
+static DEFINE_SPINLOCK(boot_lock);
+
+static void __cpuinit zynq_smp_secondary_init(unsigned int cpu)
+{
+	/*
+	 * if any interrupts are already enabled for the primary
+	 * core (e.g. timer irq), then they will not have been enabled
+	 * for us: do so
+	 */
+	gic_secondary_init(0);
+
+	/*
+	 * Synchronise with the boot thread.
+	 */
+	spin_lock(&boot_lock);
+	spin_unlock(&boot_lock);
+}
+
+static inline void zynq_set_cpu_jump(int cpu, void *jump_addr)
+{
+	if (jump_addr) {
+		int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+
+		writel(virt_to_phys(jump_addr), phys_to_virt(8));
+		memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+
+		flush_cache_all();
+		outer_flush_all();
+		smp_wmb();
+	}
+}
+
+static void zynq_enable_cpu(int cpu, bool enable)
+{
+	writel(A9_CLKSTOP_MASK << cpu, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
+	writel(0, slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
+}
+
+int __cpuinit zynq_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	writel(SLCR_UNLOCK_MAGIC, slcr_base_addr + SLCR_UNLOCK);
+	writel((A9_CLKSTOP_MASK | A9_RST_MASK) << cpu,
+		slcr_base_addr + SLCR_A9_CPU_RST_CTRL);
+
+	zynq_set_cpu_jump(cpu, secondary_startup);
+	zynq_enable_cpu(cpu, true);
+
+	return 0;
+}
+
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+static void __init zynq_smp_init_cpus(void)
+{
+	unsigned int ncores, i;
+
+	ncores = scu_get_core_count(scu_base);
+
+	for (i = 0; i < ncores; ++i)
+		set_cpu_possible(i, true);
+
+	/* sanity check */
+	if (ncores > num_possible_cpus()) {
+		pr_warn("zynq: no. of cores (%d) greater than configured"
+			"maximum of %d - clipping\n", ncores, num_possible_cpus());
+		ncores = num_possible_cpus();
+	}
+}
+
+static void __init zynq_smp_prepare_cpus(unsigned int max_cpus)
+{
+	scu_enable(scu_base);
+}
+
+struct smp_operations __initdata zynq_smp_ops = {
+	.smp_init_cpus		= zynq_smp_init_cpus,
+	.smp_prepare_cpus	= zynq_smp_prepare_cpus,
+	.smp_secondary_init	= zynq_smp_secondary_init,
+	.smp_boot_secondary	= zynq_smp_boot_secondary,
+};