Message ID | 20130626235837.11576.86389.sendpatchset@w520 (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Jun 27, 2013 at 08:58:37AM +0900, Magnus Damm wrote: > +#include <linux/smp.h> > +#include <linux/irqchip/arm-gic.h> > +#include <asm/cacheflush.h> > +#include <asm/io.h> Please always use linux/io.h
On Thursday 27 June 2013, Magnus Damm wrote: > +#include <asm/cacheflush.h> > +#include <asm/cp15.h> > +#include <asm/smp_plat.h> > + > +#define SYSC 0xe6180000 > +#define SYSCSR 0x0000 > + > +#define RST 0xe6160000 > +#define CA15BAR 0x6020 > +#define CA15RESCNT 0x0040 > +#define CA7BAR 0x4030 > +#define CA7RESCNT 0x0044 > +#define RESCNT 0x0050 > + > +#define APMU 0xe6150000 > +#define CA15WUPCR 0x2010 > +#define CA7WUPCR 0x1010 > + > +#define MERAM 0xe8080000 Please get the base addresses from device tree if you can, using of_iomap(). > +#define r8a7790_clst_id(cpu) (cpu_logical_map((cpu)) >> 8) > +#define r8a7790_cpu_id(cpu) (cpu_logical_map((cpu)) & 0xff) > + > +static void r8a7790_deassert_reset(unsigned int cpu) > +{ > + void __iomem *p, *carescnt; > + u32 bar, mask, magic; > + unsigned int clst_id = r8a7790_clst_id(cpu); > + > + /* setup reset vectors */ > + p = ioremap_nocache(RST, 0x7000); > + bar = (MERAM >> 8) & 0xfffffc00; > + __raw_writel(bar, p + r8a7790_clst[clst_id].cabar); > + __raw_writel(bar | 0x10, p + r8a7790_clst[clst_id].cabar); > + > + /* enable clocks for cluster */ > + if (r8a7790_clst[clst_id].use_count++ == 0) { > + mask = 1 << r8a7790_clst[clst_id].rescnt_bit; > + __raw_writel(__raw_readl(p + RESCNT) & ~mask, p + RESCNT); > + } If you cannot use writel_relaxed() here, add a comment. Otherwise change it to writel or writel_relaxed. > +static void __init r8a7790_smp_prepare_cpus(unsigned int max_cpus) > +{ > + void __iomem *p; > + unsigned int k; > + > + shmobile_boot_fn = virt_to_phys(shmobile_invalidate_start); > + > + /* MERAM for jump stub, because BAR requires 256KB aligned address */ > + p = ioremap_nocache(MERAM, 16); > + memcpy(p, shmobile_boot_vector, 16); > + iounmap(p); On the other hand here you have to use __raw_writel() or memcpy_toio() because memcpy cannot operate on __iomem tokens. Also, if this is actually memory, you probably want to use ioremap_cached or ioremap_writecombine. Arnd
On Thu, Jun 27, 2013 at 5:43 PM, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote: > On Thu, Jun 27, 2013 at 08:58:37AM +0900, Magnus Damm wrote: >> +#include <linux/smp.h> >> +#include <linux/irqchip/arm-gic.h> >> +#include <asm/cacheflush.h> >> +#include <asm/io.h> > > Please always use linux/io.h Will do, thanks! / magnus
Hi Arnd, On Thu, Jun 27, 2013 at 9:01 PM, Arnd Bergmann <arnd@arndb.de> wrote: > On Thursday 27 June 2013, Magnus Damm wrote: > >> +#include <asm/cacheflush.h> >> +#include <asm/cp15.h> >> +#include <asm/smp_plat.h> >> + >> +#define SYSC 0xe6180000 >> +#define SYSCSR 0x0000 >> + >> +#define RST 0xe6160000 >> +#define CA15BAR 0x6020 >> +#define CA15RESCNT 0x0040 >> +#define CA7BAR 0x4030 >> +#define CA7RESCNT 0x0044 >> +#define RESCNT 0x0050 >> + >> +#define APMU 0xe6150000 >> +#define CA15WUPCR 0x2010 >> +#define CA7WUPCR 0x1010 >> + >> +#define MERAM 0xe8080000 > > Please get the base addresses from device tree if you can, > using of_iomap(). Yes, that is my plan. Actually, I was hoping of moving out the APMU and SYSC bits from this file in the future. >> +#define r8a7790_clst_id(cpu) (cpu_logical_map((cpu)) >> 8) >> +#define r8a7790_cpu_id(cpu) (cpu_logical_map((cpu)) & 0xff) >> + >> +static void r8a7790_deassert_reset(unsigned int cpu) >> +{ >> + void __iomem *p, *carescnt; >> + u32 bar, mask, magic; >> + unsigned int clst_id = r8a7790_clst_id(cpu); >> + >> + /* setup reset vectors */ >> + p = ioremap_nocache(RST, 0x7000); >> + bar = (MERAM >> 8) & 0xfffffc00; >> + __raw_writel(bar, p + r8a7790_clst[clst_id].cabar); >> + __raw_writel(bar | 0x10, p + r8a7790_clst[clst_id].cabar); >> + >> + /* enable clocks for cluster */ >> + if (r8a7790_clst[clst_id].use_count++ == 0) { >> + mask = 1 << r8a7790_clst[clst_id].rescnt_bit; >> + __raw_writel(__raw_readl(p + RESCNT) & ~mask, p + RESCNT); >> + } > > If you cannot use writel_relaxed() here, add a comment. Otherwise > change it to writel or writel_relaxed. Ok, I will git that a go. >> +static void __init r8a7790_smp_prepare_cpus(unsigned int max_cpus) >> +{ >> + void __iomem *p; >> + unsigned int k; >> + >> + shmobile_boot_fn = virt_to_phys(shmobile_invalidate_start); >> + >> + /* MERAM for jump stub, because BAR requires 256KB aligned address */ >> + p = ioremap_nocache(MERAM, 16); >> + memcpy(p, shmobile_boot_vector, 16); >> + iounmap(p); > > On the other hand here you have to use __raw_writel() or > memcpy_toio() because memcpy cannot operate on __iomem tokens. > > Also, if this is actually memory, you probably want to use ioremap_cached > or ioremap_writecombine. Yeah, I realize this may be a bit of a mess. So say that I used ioremap_cached() here, I think I still need to flush the cache to make sure the secondary processors have valid code available then they boot. I suppose you're fine with that? Thanks, / magnus
On Monday 08 July 2013, Magnus Damm wrote: > >> +static void __init r8a7790_smp_prepare_cpus(unsigned int max_cpus) > >> +{ > >> + void __iomem *p; > >> + unsigned int k; > >> + > >> + shmobile_boot_fn = virt_to_phys(shmobile_invalidate_start); > >> + > >> + /* MERAM for jump stub, because BAR requires 256KB aligned address */ > >> + p = ioremap_nocache(MERAM, 16); > >> + memcpy(p, shmobile_boot_vector, 16); > >> + iounmap(p); > > > > On the other hand here you have to use __raw_writel() or > > memcpy_toio() because memcpy cannot operate on __iomem tokens. > > > > Also, if this is actually memory, you probably want to use ioremap_cached > > or ioremap_writecombine. > > Yeah, I realize this may be a bit of a mess. So say that I used > ioremap_cached() here, I think I still need to flush the cache to make > sure the secondary processors have valid code available then they > boot. I suppose you're fine with that? Is that required to get the data from dcache to icache? If so, that sounds right. Arnd
On Tue, Jul 9, 2013 at 8:21 AM, Arnd Bergmann <arnd@arndb.de> wrote: > On Monday 08 July 2013, Magnus Damm wrote: >> >> +static void __init r8a7790_smp_prepare_cpus(unsigned int max_cpus) >> >> +{ >> >> + void __iomem *p; >> >> + unsigned int k; >> >> + >> >> + shmobile_boot_fn = virt_to_phys(shmobile_invalidate_start); >> >> + >> >> + /* MERAM for jump stub, because BAR requires 256KB aligned address */ >> >> + p = ioremap_nocache(MERAM, 16); >> >> + memcpy(p, shmobile_boot_vector, 16); >> >> + iounmap(p); >> > >> > On the other hand here you have to use __raw_writel() or >> > memcpy_toio() because memcpy cannot operate on __iomem tokens. >> > >> > Also, if this is actually memory, you probably want to use ioremap_cached >> > or ioremap_writecombine. >> >> Yeah, I realize this may be a bit of a mess. So say that I used >> ioremap_cached() here, I think I still need to flush the cache to make >> sure the secondary processors have valid code available then they >> boot. I suppose you're fine with that? > > Is that required to get the data from dcache to icache? If so, that > sounds right. Good point. I sort of assumed that the secondary CPU cores booted with cache disabled, but I may be wrong. Thanks, / magnus
--- 0001/arch/arm/boot/dts/r8a7790.dtsi +++ work/arch/arm/boot/dts/r8a7790.dtsi 2013-06-27 08:34:41.000000000 +0900 @@ -24,6 +24,55 @@ reg = <0>; clock-frequency = <1300000000>; }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + clock-frequency = <1300000000>; + }; + + cpu2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + clock-frequency = <1300000000>; + }; + + cpu3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + clock-frequency = <1300000000>; + }; + + cpu4: cpu@4 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + clock-frequency = <780000000>; + }; + + cpu5: cpu@5 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + clock-frequency = <780000000>; + }; + + cpu6: cpu@6 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x102>; + clock-frequency = <780000000>; + }; + + cpu7: cpu@7 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x103>; + clock-frequency = <780000000>; + }; }; gic: interrupt-controller@f1001000 { --- 0001/arch/arm/mach-shmobile/Makefile +++ work/arch/arm/mach-shmobile/Makefile 2013-06-26 22:40:38.000000000 +0900 @@ -19,6 +19,7 @@ obj-$(CONFIG_ARCH_EMEV2) += setup-emev2. smp-y := platsmp.o headsmp.o smp-$(CONFIG_ARCH_SH73A0) += smp-sh73a0.o headsmp-scu.o smp-$(CONFIG_ARCH_R8A7779) += smp-r8a7779.o headsmp-scu.o +smp-$(CONFIG_ARCH_R8A7790) += smp-r8a7790.o smp-$(CONFIG_ARCH_EMEV2) += smp-emev2.o headsmp-scu.o # IRQ objects --- 0001/arch/arm/mach-shmobile/board-lager.c +++ work/arch/arm/mach-shmobile/board-lager.c 2013-06-26 22:40:38.000000000 +0900 @@ -103,6 +103,7 @@ static const char *lager_boards_compat_d }; DT_MACHINE_START(LAGER_DT, "lager") + .smp = smp_ops(r8a7790_smp_ops), .init_irq = irqchip_init, .init_time = r8a7790_timer_init, .init_machine = lager_add_standard_devices, --- 0001/arch/arm/mach-shmobile/include/mach/common.h +++ work/arch/arm/mach-shmobile/include/mach/common.h 2013-06-26 22:40:38.000000000 +0900 @@ -11,6 +11,7 @@ extern void shmobile_boot_vector(void); extern unsigned long shmobile_boot_fn; extern unsigned long shmobile_boot_arg; extern void shmobile_boot_scu(void); +extern void shmobile_invalidate_start(void); struct clk; extern int shmobile_clk_init(void); extern void shmobile_handle_irq_intc(struct pt_regs *); --- 0001/arch/arm/mach-shmobile/include/mach/r8a7790.h +++ work/arch/arm/mach-shmobile/include/mach/r8a7790.h 2013-06-26 22:40:38.000000000 +0900 @@ -5,5 +5,6 @@ void r8a7790_add_standard_devices(void); void r8a7790_clock_init(void); void r8a7790_pinmux_init(void); void r8a7790_timer_init(void); +extern struct smp_operations r8a7790_smp_ops; #endif /* __ASM_R8A7790_H__ */ --- 0001/arch/arm/mach-shmobile/setup-r8a7790.c +++ work/arch/arm/mach-shmobile/setup-r8a7790.c 2013-06-26 22:40:38.000000000 +0900 @@ -188,6 +188,7 @@ static const char *r8a7790_boards_compat }; DT_MACHINE_START(R8A7790_DT, "Generic R8A7790 (Flattened Device Tree)") + .smp = smp_ops(r8a7790_smp_ops), .init_irq = irqchip_init, .init_machine = r8a7790_add_standard_devices_dt, .init_time = r8a7790_timer_init, --- /dev/null +++ work/arch/arm/mach-shmobile/smp-r8a7790.c 2013-06-26 22:40:40.000000000 +0900 @@ -0,0 +1,187 @@ +/* + * SMP support for r8a7790 + * + * Copyright (C) 2012-2013 Renesas Solutions Corp. + * Copyright (C) 2012 Takashi Yoshii <takashi.yoshii.ze@renesas.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/smp.h> +#include <linux/irqchip/arm-gic.h> +#include <asm/cacheflush.h> +#include <asm/io.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <mach/common.h> +#include <mach/hardware.h> + +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/smp_plat.h> + +#define SYSC 0xe6180000 +#define SYSCSR 0x0000 + +#define RST 0xe6160000 +#define CA15BAR 0x6020 +#define CA15RESCNT 0x0040 +#define CA7BAR 0x4030 +#define CA7RESCNT 0x0044 +#define RESCNT 0x0050 + +#define APMU 0xe6150000 +#define CA15WUPCR 0x2010 +#define CA7WUPCR 0x1010 + +#define MERAM 0xe8080000 + +enum { R8A7790_CLST_CA15, R8A7790_CLST_CA7, R8A7790_CLST_NR }; + +static struct { + unsigned int cabar; + unsigned int carescnt; + unsigned int carescnt_magic; + unsigned int rescnt_bit; + unsigned int use_count; +} r8a7790_clst[R8A7790_CLST_NR] = { + [R8A7790_CLST_CA15] = { + .cabar = CA15BAR, + .carescnt = CA15RESCNT, + .carescnt_magic = 0xa5a50000, + .rescnt_bit = 1, + }, + [R8A7790_CLST_CA7] = { + .cabar = CA7BAR, + .carescnt = CA7RESCNT, + .carescnt_magic = 0x5a5a0000, + .rescnt_bit = 0, + }, +}; + +#define r8a7790_clst_id(cpu) (cpu_logical_map((cpu)) >> 8) +#define r8a7790_cpu_id(cpu) (cpu_logical_map((cpu)) & 0xff) + +static void r8a7790_deassert_reset(unsigned int cpu) +{ + void __iomem *p, *carescnt; + u32 bar, mask, magic; + unsigned int clst_id = r8a7790_clst_id(cpu); + + /* setup reset vectors */ + p = ioremap_nocache(RST, 0x7000); + bar = (MERAM >> 8) & 0xfffffc00; + __raw_writel(bar, p + r8a7790_clst[clst_id].cabar); + __raw_writel(bar | 0x10, p + r8a7790_clst[clst_id].cabar); + + /* enable clocks for cluster */ + if (r8a7790_clst[clst_id].use_count++ == 0) { + mask = 1 << r8a7790_clst[clst_id].rescnt_bit; + __raw_writel(__raw_readl(p + RESCNT) & ~mask, p + RESCNT); + } + + /* enable per-core clocks */ + mask = BIT(3 - r8a7790_cpu_id(cpu)); + magic = r8a7790_clst[clst_id].carescnt_magic; + carescnt = p + r8a7790_clst[clst_id].carescnt; + __raw_writel((__raw_readl(carescnt) & ~mask) | magic, carescnt); + + iounmap(p); +} + +static void r8a7790_assert_reset(unsigned int cpu) +{ + void __iomem *p, *carescnt; + u32 mask, magic; + unsigned int clst_id = r8a7790_clst_id(cpu); + + p = ioremap_nocache(RST, 0x7000); + + /* disable per-core clocks */ + mask = BIT(3 - r8a7790_cpu_id(cpu)); + magic = r8a7790_clst[clst_id].carescnt_magic; + carescnt = p + r8a7790_clst[clst_id].carescnt; + __raw_writel((__raw_readl(carescnt) | mask) | magic, carescnt); + + /* disable clocks for cluster */ + if (r8a7790_clst[clst_id].use_count == 1) { + mask = 1 << r8a7790_clst[clst_id].rescnt_bit; + __raw_writel(__raw_readl(p + RESCNT) | mask, p + RESCNT); + } + + if (r8a7790_clst[clst_id].use_count > 0) + r8a7790_clst[clst_id].use_count--; + + iounmap(p); +} + +static void r8a7790_power_on(unsigned int cpu) +{ + void __iomem *p, *p2, *cawupcr; + + /* wake up CPU core via APMU */ + p = ioremap_nocache(APMU, 0x3000); + cawupcr = p + (r8a7790_clst_id(cpu) ? CA7WUPCR : CA15WUPCR); + __raw_writel(BIT(r8a7790_cpu_id(cpu)), cawupcr); + + /* wait for SYSC to finish wake up sequence */ + p2 = ioremap_nocache(SYSC, 0x1000); + while ((__raw_readl(p2 + SYSCSR) & 0x3) != 0x3) + ; + + /* wait for APMU to finish */ + while (__raw_readl(cawupcr) != 0) + ; + + iounmap(p2); + iounmap(p); +} + +static void __init r8a7790_smp_prepare_cpus(unsigned int max_cpus) +{ + void __iomem *p; + unsigned int k; + + shmobile_boot_fn = virt_to_phys(shmobile_invalidate_start); + + /* MERAM for jump stub, because BAR requires 256KB aligned address */ + p = ioremap_nocache(MERAM, 16); + memcpy(p, shmobile_boot_vector, 16); + iounmap(p); + + flush_cache_louis(); + + /* keep secondary CPU cores in reset, but powered on */ + for (k = 1; k < 8; k++) { + r8a7790_assert_reset(k); + r8a7790_power_on(k); + } + + r8a7790_deassert_reset(0); +} + +static int __cpuinit r8a7790_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + /* only allow a single cluster for now */ + if (r8a7790_clst_id(cpu) != r8a7790_clst_id(0)) + return -ENOTSUPP; + + r8a7790_deassert_reset(cpu); + return 0; +} + +struct smp_operations r8a7790_smp_ops __initdata = { + .smp_prepare_cpus = r8a7790_smp_prepare_cpus, + .smp_boot_secondary = r8a7790_boot_secondary, +};