diff mbox

Arm64: convert soft_restart() to assembly code

Message ID CAFdej01NQoVVB6QhZ=jSO5OACZ4293xKPJR9sDboZOOX5DMeig@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arun Chandran Aug. 26, 2014, 1 p.m. UTC
On Thu, Aug 21, 2014 at 8:01 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Aug 21, 2014 at 02:34:46PM +0100, Arun Chandran wrote:
>> Hi Mark,
>>
>> On Wed, Aug 20, 2014 at 7:46 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > [...]
>> >
>> >> I am trying to kexec a UP-LE kernel from and SMP-LE kernel.
>> >>
>> >> > As I mentioned we do need to ensure that the CPUs are in the mode they
>> >> > started in, though I'm not sure I follow what you mean by "not waiting".
>> >> > This could be an orthogonal issue.
>> >>
>> >> If I verify the secondary CPUs from u-boot I can see that
>> >> they are all looping at
>> >>
>> >>     Core number       : 1
>> >>     Core state        : debug (AArch64 EL2)
>> >>     Debug entry cause : External Debug Request
>> >>     Current PC        : 0x0000000000000238
>> >>     Current CPSR      : 0x200003c9 (EL2h)
>> >>
>> >> But after the kexec calls soft_restar(0) for all secondary CPUs
>> >> they are looping at
>> >>
>> >>     Core number       : 1
>> >>     Core state        : debug (AArch64 EL1)
>> >>     Debug entry cause : External Debug Request
>> >>     Current PC        : 0xffffffc000083200
>> >>     Current CPSR      : 0x600003c5 (EL1h)
>> >>
>> >> This is what I mean by they are not waiting for
>> >> the secondary start-up address to jump.
>> >
>> > Ok.
>> >
>> >> >
>> >> > What exactly do you see, do the CPUs leave the spin-table, are they
>> >> > taking exceptions, are they getting stuck in the spin-table, etc?
>> >> >
>> >> They all are clearly resetting to address "0"(Put a breakpoint and
>> >> verified) but somehow they end up @0xffffffc000083200.
>> >> I still don't know why.
>> >>
>> >> ########
>> >> ffffffc00008319c:       d503201f        nop
>> >>         ...
>> >> ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
>> >> ffffffc000083204:       d503201f        nop
>> >> ffffffc000083208:       d503201f        nop
>> >> ########
>> >
>> > That's the EL1 exception vector table.
>> >
>> > What looks to be happening is that something causes the CPUs to take an
>> > exception (at EL1). Because translation isn't enabled and the vector
>> > address doesn't map to anything, they'll take some sort of exception.
>> > Because the vectors aren't mapped that will go recursive.
>> >
>> > Your spin-table implementation might be poking something that's not
>> > accessible at EL1, in which case you require the jump to EL2 for
>> > correctness. I can't say for certain either way.
>> >
>>
>> Yes you are right I needed a jump to EL2 for putting the
>> secondary CPUs at correct spinning loop.
>
> Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
> up to EL2. As you point out below we need to synchronise with the CPUs
> on their way out too we can add a spin-table cpu_kill with a slightly
> dodgy delay to ensure that, given we have no other mechanism for
> synchronising.
>

I was able to remove the delay by pushing "set_cpu_online(cpu, false);"
further down.

############
##############

This will

a) Make the waiting loop inside smp_send_stop() more meaningful

b) Make sure that at least cpu-release-addr is invalidated.

--Arun


>> I made some progress with the below changes.
>>
>> ###########
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 607d4bb..8969922 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
>>  // Clear EE and E0E on LE systems
>>                       PSR_MODE_EL1h)
>>         msr     spsr_el2, x0
>>         msr     elr_el2, lr
>> +       mov     x0, #0x33ff
>> +       movk    x0, #0x801f, lsl #16
>> +       msr     cptr_el2, x0                    // Enable traps to el2
>> when CPACR or CPACR_EL1 is accessed
>> +       mov     x0, #3 << 20
>> +       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>>         mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
>>         eret
>>  ENDPROC(el2_setup)
>> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
>> index a272f33..d8e806c 100644
>> --- a/arch/arm64/kernel/hyp-stub.S
>> +++ b/arch/arm64/kernel/hyp-stub.S
>> @@ -66,6 +66,14 @@ ENDPROC(el1_sync)
>>
>>  .macro invalid_vector  label
>>  \label:
>> +       mov     x1, #0x33ff
>> +       msr     cptr_el2, x1                    // Disable copro. traps to EL2
>> +
>> +       ic      ialluis
>> +       isb
>> +       dsb     sy
>
> As far as I am aware an "isb; dsb" sequence never makes sense. It should
> be "dsb; isb". You want the I-side maintenance to complete (for which
> you have the DSB) _then_ you want to synchronise the execution context
> with the newly-visible instructions (for which you have the ISB).
>
>> +
>> +       br  x0
>>         b \label
>>  ENDPROC(\label)
>>  .endm
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 64733d4..8ca929c 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -65,6 +65,12 @@ void soft_restart(unsigned long addr)
>>  //     smp_secondary_shutdown();
>>  #endif
>>
>> +       /* Delay primary cpu; allow enough time for
>> +        * secondaries to enter spin loop
>> +        */
>> +       if (smp_processor_id() == 0)
>> +               mdelay(1000);
>> +
>>         cpu_soft_restart(virt_to_phys(cpu_reset), addr);
>>
>>         /* Should never get here */
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 3cb6dec..f6ab468 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -76,6 +76,10 @@ ENTRY(cpu_reset)
>>  #if defined(CONFIG_SMP)
>>  /*     bl      secondary_shutdown */
>>  #endif
>> +
>> +       /* Trap to EL2 */
>> +       mov     x1, #3 << 20
>> +       msr     cpacr_el1, x1                   // Enable FP/ASIMD
>>         ret     x0
>>  ENDPROC(cpu_reset)
>>
>> @@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
>>         tlbi    vmalle1is                       // invalidate I + D TLBs
>>         dsb     ish
>>
>> -       mov     x0, #3 << 20
>> -       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>>         msr     mdscr_el1, xzr                  // Reset mdscr_el1
>>         /*
>>          * Memory region attributes for LPAE:
>> #########
>>
>> With the above code I was able to manually kexec
>> an SMP kernel (With the help of debugger).
>>
>> Basically the branch instruction (br x0) is not working
>> in the el2 vector.
>> ------------------
>> CPU#0>h
>>     Core number       : 0
>>     Core state        : debug (AArch64 EL2)
>>     Debug entry cause : External Debug Request
>>     Current PC        : 0x0000004000089800
>
> Does this address look similar to the VBAR_EL2, perhaps?
>
>>     Current CPSR      : 0x600003c9 (EL2h)
>> CPU#0>rd
>> GPR00: 00000043eb891000 0000000000000018 0000000000000006 0000000000000004
>> GPR04: 000000000000001f 000000000000001b 0000000000000000 ffffffffffffffff
>> GPR08: ffffffc3ffe80614 ffffffffffffffff 0000000000000000 0000000000000002
>> GPR12: ffffffc0000968f0 00000040008c0000 00000043eb949002 ffffffffffffffff
>> GPR16: ffffffc0000c1244 0000000000435260 0000007ff9c5d490 00000040000968c0
>> GPR20: 00000043eb891000 ffffffc3eb891000 ffffffc3eb973c00 0000000000000110
>> GPR24: ffffffc000094000 ffffffc000094cd8 000000000000008e ffffffc00082f000
>> GPR28: ffffffc3e9888000 ffffffc3e988bd00 ffffffc000095d88 00000043efb24430
>> CPU#0>go 0x00000043eb891000
>> --------------------
>>
>> I had to use the debugger  to make cpu0 jump to kexec-boot code.
>> Similarly I had to manually put other CPUs to spinning code
>> using debugger. Could you please throw some light here?
>
> I really can't say. I know nothing of your platform or spin-table
> implementation, and this is nothing like what I'd expect the jump to EL2
> code to look like.
>
> Thanks,
> Mark.

Comments

Mark Rutland Aug. 26, 2014, 2:08 p.m. UTC | #1
Hi Arun,

Please start a new thread if you have new things to say; this thread has
drifted far from its original purpose (the titular patch is now in the
arm64 devel branch [1]).

[...]

> > Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
> > up to EL2. As you point out below we need to synchronise with the CPUs
> > on their way out too we can add a spin-table cpu_kill with a slightly
> > dodgy delay to ensure that, given we have no other mechanism for
> > synchronising.
> >
> 
> I was able to remove the delay by pushing "set_cpu_online(cpu, false);"
> further down.
> 
> ############
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 3fb64cb..200e49e 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -543,8 +543,6 @@ static void ipi_cpu_stop(unsigned int cpu)
>                 raw_spin_unlock(&stop_lock);
>         }
> 
> -       set_cpu_online(cpu, false);
> -
>         local_irq_disable();
> 

If we don't set the cpu offline here, we'll call
clear_tasks_mm_cpumask(cpu) and migrate IRQs while the CPU is marked
online, wait a bit, then mark the CPU offline. 

I suspect this is broken, though I could be wrong.

>         /* If we have the cpu ops use them. */
> @@ -554,6 +552,8 @@ static void ipi_cpu_stop(unsigned int cpu)
>                 && !cpu_ops[cpu]->cpu_disable(cpu))
>                 cpu_ops[cpu]->cpu_die(cpu);
> 
> +
> +       set_cpu_online(cpu, false);
>         /* Otherwise spin here. */
> 
>         while (1)
> diff --git a/arch/arm64/kernel/smp_spin_table.c
> b/arch/arm64/kernel/smp_spin_table.c
> index e7275b3..8dcca88 100644
> --- a/arch/arm64/kernel/smp_spin_table.c
> +++ b/arch/arm64/kernel/smp_spin_table.c
> @@ -149,6 +149,7 @@ static void smp_spin_table_cpu_die(unsigned int cpu)
>         *release_addr = 0;
>         __flush_dcache_area(release_addr, 8);
> 
> +       set_cpu_online(cpu, false);
>         mb();
> 
>         soft_restart(0);
> ##############
> 
> This will
> 
> a) Make the waiting loop inside smp_send_stop() more meaningful

I don't follow how this is any more meaningful. We still have no
guarantee that the CPU is actually dead.

> b) Make sure that at least cpu-release-addr is invalidated.

There is still a period between the call to set_cpu_online(cpu, false)
and the CPU jumping to the return-address where it is still in the
kernel, so all this does is shorten the window for the race.

For PSCI 0.2+ we can poll to determine when the CPU is in the firmware.
For PSCI prior to 0.2 we can't, but the window is very short (as the
firmware performs the cache maintenance) and we seem to have gotten away
with it so far.

For spin-table we might have a large race window because the kernel must
flush the caches at EL2, incurring a relatively large delay. If we are
encountering a race there I'd rather this were fixed with a cpu_kill
callback for spin-table.

Cheers,
Mark.

[1] https://git.kernel.org/cgit/linux/kernel/git/arm64/linux.git/commit/?h=devel&id=6c80fe35fe9edf9147e3db9c8ff1a7761c49c4cc
diff mbox

Patch

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3fb64cb..200e49e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -543,8 +543,6 @@  static void ipi_cpu_stop(unsigned int cpu)
                raw_spin_unlock(&stop_lock);
        }

-       set_cpu_online(cpu, false);
-
        local_irq_disable();

        /* If we have the cpu ops use them. */
@@ -554,6 +552,8 @@  static void ipi_cpu_stop(unsigned int cpu)
                && !cpu_ops[cpu]->cpu_disable(cpu))
                cpu_ops[cpu]->cpu_die(cpu);

+
+       set_cpu_online(cpu, false);
        /* Otherwise spin here. */

        while (1)
diff --git a/arch/arm64/kernel/smp_spin_table.c
b/arch/arm64/kernel/smp_spin_table.c
index e7275b3..8dcca88 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -149,6 +149,7 @@  static void smp_spin_table_cpu_die(unsigned int cpu)
        *release_addr = 0;
        __flush_dcache_area(release_addr, 8);

+       set_cpu_online(cpu, false);
        mb();

        soft_restart(0);