diff mbox

[v12,12/16] arm64: kdump: implement machine_crash_shutdown()

Message ID 9ae67f7515c66bfd747486c8dc1c1772913a8d46.1448403503.git.geoff@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Geoff Levand Nov. 24, 2015, 10:25 p.m. UTC
From: AKASHI Takahiro <takahiro.akashi@linaro.org>

kdump calls machine_crash_shutdown() to shut down non-boot cpus and
save registers' status in per-cpu ELF notes before starting the crash
dump kernel. See kernel_kexec().

ipi_cpu_stop() is a bit modified and used to support this behavior.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/include/asm/kexec.h    | 34 +++++++++++++++++++++++++++++++++-
 arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
 arch/arm64/kernel/smp.c           | 16 ++++++++++++++--
 3 files changed, 76 insertions(+), 5 deletions(-)

Comments

Marc Zyngier Nov. 27, 2015, 2:39 p.m. UTC | #1
On 24/11/15 22:25, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
> save registers' status in per-cpu ELF notes before starting the crash
> dump kernel. See kernel_kexec().
> 
> ipi_cpu_stop() is a bit modified and used to support this behavior.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  arch/arm64/include/asm/kexec.h    | 34 +++++++++++++++++++++++++++++++++-
>  arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
>  arch/arm64/kernel/smp.c           | 16 ++++++++++++++--
>  3 files changed, 76 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> index 46d63cd..555a955 100644
> --- a/arch/arm64/include/asm/kexec.h
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -30,6 +30,8 @@
>  
>  #if !defined(__ASSEMBLY__)
>  
> +extern bool in_crash_kexec;
> +
>  /**
>   * crash_setup_regs() - save registers for the panic kernel
>   *
> @@ -40,7 +42,37 @@
>  static inline void crash_setup_regs(struct pt_regs *newregs,
>  				    struct pt_regs *oldregs)
>  {
> -	/* Empty routine needed to avoid build errors. */
> +	if (oldregs) {
> +		memcpy(newregs, oldregs, sizeof(*newregs));
> +	} else {
> +		__asm__ __volatile__ (
> +			"stp	 x0,   x1, [%3, #16 *  0]\n"
> +			"stp	 x2,   x3, [%3, #16 *  1]\n"
> +			"stp	 x4,   x5, [%3, #16 *  2]\n"
> +			"stp	 x6,   x7, [%3, #16 *  3]\n"
> +			"stp	 x8,   x9, [%3, #16 *  4]\n"
> +			"stp	x10,  x11, [%3, #16 *  5]\n"
> +			"stp	x12,  x13, [%3, #16 *  6]\n"
> +			"stp	x14,  x15, [%3, #16 *  7]\n"
> +			"stp	x16,  x17, [%3, #16 *  8]\n"
> +			"stp	x18,  x19, [%3, #16 *  9]\n"
> +			"stp	x20,  x21, [%3, #16 * 10]\n"
> +			"stp	x22,  x23, [%3, #16 * 11]\n"
> +			"stp	x24,  x25, [%3, #16 * 12]\n"
> +			"stp	x26,  x27, [%3, #16 * 13]\n"
> +			"stp	x28,  x29, [%3, #16 * 14]\n"
> +			"str	x30,	   [%3, #16 * 15]\n"
> +			"mov	%0, sp\n"
> +			"adr	%1, 1f\n"
> +			"mrs	%2, spsr_el1\n"
> +		"1:"
> +			: "=r" (newregs->sp),
> +			  "=r" (newregs->pc),
> +			  "=r" (newregs->pstate)
> +			: "r"  (&newregs->regs)
> +			: "memory"
> +		);

I wonder how useful this thing is, given that it starts by corrupting
whatever register is holding newregs->regs. Maybe this is not supposed
to be accurate anyway...


> +	}
>  }
>  
>  #endif /* !defined(__ASSEMBLY__) */
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> index da28a26..d2d7e90 100644
> --- a/arch/arm64/kernel/machine_kexec.c
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -9,6 +9,7 @@
>   * published by the Free Software Foundation.
>   */
>  
> +#include <linux/kernel.h>
>  #include <linux/kexec.h>
>  #include <linux/of_fdt.h>
>  #include <linux/slab.h>
> @@ -23,6 +24,7 @@
>  extern const unsigned char arm64_relocate_new_kernel[];
>  extern const unsigned long arm64_relocate_new_kernel_size;
>  
> +bool in_crash_kexec;
>  static unsigned long kimage_start;
>  
>  /**
> @@ -203,13 +205,38 @@ void machine_kexec(struct kimage *kimage)
>  	 */
>  
>  	cpu_soft_restart(virt_to_phys(cpu_reset),
> -		is_hyp_mode_available(),
> +		in_crash_kexec ? 0 : is_hyp_mode_available(),
>  		reboot_code_buffer_phys, kimage->head, kimage_start);
>  
>  	BUG(); /* Should never get here. */
>  }
>  
> +/**
> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
> + */
>  void machine_crash_shutdown(struct pt_regs *regs)
>  {
> -	/* Empty routine needed to avoid build errors. */
> +	struct pt_regs dummy_regs;
> +	int cpu;
> +
> +	local_irq_disable();
> +
> +	in_crash_kexec = true;
> +
> +	/*
> +	 * clear and initialize the per-cpu info. This is necessary
> +	 * because, otherwise, slots for offline cpus would never be
> +	 * filled up. See smp_send_stop().
> +	 */
> +	memset(&dummy_regs, 0, sizeof(dummy_regs));
> +	for_each_possible_cpu(cpu)
> +		crash_save_cpu(&dummy_regs, cpu);
> +
> +	/* shutdown non-boot cpus */
> +	smp_send_stop();
> +
> +	/* for boot cpu */
> +	crash_save_cpu(regs, smp_processor_id());
> +
> +	pr_info("Starting crashdump kernel...\n");
>  }
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index b1adc51..15aabef 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -37,6 +37,7 @@
>  #include <linux/completion.h>
>  #include <linux/of.h>
>  #include <linux/irq_work.h>
> +#include <linux/kexec.h>
>  
>  #include <asm/alternative.h>
>  #include <asm/atomic.h>
> @@ -54,6 +55,8 @@
>  #include <asm/ptrace.h>
>  #include <asm/virt.h>
>  
> +#include "cpu-reset.h"
> +
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/ipi.h>
>  
> @@ -683,8 +686,12 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>  /*
>   * ipi_cpu_stop - handle IPI from smp_send_stop()
>   */
> -static void ipi_cpu_stop(unsigned int cpu)
> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>  {
> +#ifdef CONFIG_KEXEC
> +	/* printing messages may slow down the shutdown. */
> +	if (!in_crash_kexec)
> +#endif
>  	if (system_state == SYSTEM_BOOTING ||
>  	    system_state == SYSTEM_RUNNING) {
>  		raw_spin_lock(&stop_lock);

Irrespective of how useful this change is, how about having a predicate
instead? Something like:

static inline bool is_in_crash_kexec(void)
{
#ifdef CONFIG_KEXEC
	return in_crash_kexec;
#else
	return false;
#endif
}

located in machine_kexec.c (making the in_crash_kernel static), and then

	if (!is_in_crash_kexec() && (systen_state == ... || ...) {

It would certainly look better.

> @@ -697,6 +704,11 @@ static void ipi_cpu_stop(unsigned int cpu)
>  
>  	local_irq_disable();
>  
> +#ifdef CONFIG_KEXEC
> +	if (in_crash_kexec)
> +		crash_save_cpu(regs, cpu);
> +#endif /* CONFIG_KEXEC */
> +

Same here.

>  	while (1)
>  		cpu_relax();
>  }
> @@ -727,7 +739,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>  
>  	case IPI_CPU_STOP:
>  		irq_enter();
> -		ipi_cpu_stop(cpu);
> +		ipi_cpu_stop(cpu, regs);
>  		irq_exit();
>  		break;
>  
> 

Thanks,

	M.
Pratyush Anand Dec. 3, 2015, 4:15 a.m. UTC | #2
Hi Akashi,

Some of the points which came while discussing with Mark Salter are worth
including in v13.

On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>

[...]

> +/**
> + * machine_crash_shutdown - shutdown non-boot cpus and save registers

"non-panic" would be correct in stead of "non-boot". 

> +	/* shutdown non-boot cpus */

Ditto

> +	smp_send_stop();
> +
> +	/* for boot cpu */

"for panic cpu"


[...]

> @@ -697,6 +704,11 @@ static void ipi_cpu_stop(unsigned int cpu)
>  
>  	local_irq_disable();

We have "set_cpu_online(cpu, false);" just before it.
Panic core is waiting for non-panic to go offline, i.e. for the above event.

>  
> +#ifdef CONFIG_KEXEC
> +	if (in_crash_kexec)
> +		crash_save_cpu(regs, cpu);
> +#endif /* CONFIG_KEXEC */

However, we are still saving crash info for non-panic core. So, it would be good
to move crash_save_cpu() before set_cpu_online() to avoid any race condition.

~Pratyush
AKASHI Takahiro Dec. 10, 2015, 11:34 a.m. UTC | #3
Marc,

I was back from my vacation.

On 11/27/2015 11:39 PM, Marc Zyngier wrote:
> On 24/11/15 22:25, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>> save registers' status in per-cpu ELF notes before starting the crash
>> dump kernel. See kernel_kexec().
>>
>> ipi_cpu_stop() is a bit modified and used to support this behavior.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>   arch/arm64/include/asm/kexec.h    | 34 +++++++++++++++++++++++++++++++++-
>>   arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
>>   arch/arm64/kernel/smp.c           | 16 ++++++++++++++--
>>   3 files changed, 76 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>> index 46d63cd..555a955 100644
>> --- a/arch/arm64/include/asm/kexec.h
>> +++ b/arch/arm64/include/asm/kexec.h
>> @@ -30,6 +30,8 @@
>>
>>   #if !defined(__ASSEMBLY__)
>>
>> +extern bool in_crash_kexec;
>> +
>>   /**
>>    * crash_setup_regs() - save registers for the panic kernel
>>    *
>> @@ -40,7 +42,37 @@
>>   static inline void crash_setup_regs(struct pt_regs *newregs,
>>   				    struct pt_regs *oldregs)
>>   {
>> -	/* Empty routine needed to avoid build errors. */
>> +	if (oldregs) {
>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>> +	} else {
>> +		__asm__ __volatile__ (
>> +			"stp	 x0,   x1, [%3, #16 *  0]\n"
>> +			"stp	 x2,   x3, [%3, #16 *  1]\n"
>> +			"stp	 x4,   x5, [%3, #16 *  2]\n"
>> +			"stp	 x6,   x7, [%3, #16 *  3]\n"
>> +			"stp	 x8,   x9, [%3, #16 *  4]\n"
>> +			"stp	x10,  x11, [%3, #16 *  5]\n"
>> +			"stp	x12,  x13, [%3, #16 *  6]\n"
>> +			"stp	x14,  x15, [%3, #16 *  7]\n"
>> +			"stp	x16,  x17, [%3, #16 *  8]\n"
>> +			"stp	x18,  x19, [%3, #16 *  9]\n"
>> +			"stp	x20,  x21, [%3, #16 * 10]\n"
>> +			"stp	x22,  x23, [%3, #16 * 11]\n"
>> +			"stp	x24,  x25, [%3, #16 * 12]\n"
>> +			"stp	x26,  x27, [%3, #16 * 13]\n"
>> +			"stp	x28,  x29, [%3, #16 * 14]\n"
>> +			"str	x30,	   [%3, #16 * 15]\n"
>> +			"mov	%0, sp\n"
>> +			"adr	%1, 1f\n"
>> +			"mrs	%2, spsr_el1\n"
>> +		"1:"
>> +			: "=r" (newregs->sp),
>> +			  "=r" (newregs->pc),
>> +			  "=r" (newregs->pstate)
>> +			: "r"  (&newregs->regs)
>> +			: "memory"
>> +		);
>
> I wonder how useful this thing is, given that it starts by corrupting
> whatever register is holding newregs->regs. Maybe this is not supposed
> to be accurate anyway...

I'm not quite sure about what part of my code you're mentioning here, but
crash_setup_regs() is solely called by crash_kexec(), and panic() is
the only caller of crash_kexec() with NULL argument which, in turn, is
used as 'oldregs' in crash_setup_regs().

Given this fact, I think that the values saved in newregs as indicated above
will be the best estimate of current cpu contexts.

The other caller of crash_kexec() is die() in traps.c, but here we call
it with explicit cpu contexts at exception.

>
>> +	}
>>   }
>>
>>   #endif /* !defined(__ASSEMBLY__) */
>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>> index da28a26..d2d7e90 100644
>> --- a/arch/arm64/kernel/machine_kexec.c
>> +++ b/arch/arm64/kernel/machine_kexec.c
>> @@ -9,6 +9,7 @@
>>    * published by the Free Software Foundation.
>>    */
>>
>> +#include <linux/kernel.h>
>>   #include <linux/kexec.h>
>>   #include <linux/of_fdt.h>
>>   #include <linux/slab.h>
>> @@ -23,6 +24,7 @@
>>   extern const unsigned char arm64_relocate_new_kernel[];
>>   extern const unsigned long arm64_relocate_new_kernel_size;
>>
>> +bool in_crash_kexec;
>>   static unsigned long kimage_start;
>>
>>   /**
>> @@ -203,13 +205,38 @@ void machine_kexec(struct kimage *kimage)
>>   	 */
>>
>>   	cpu_soft_restart(virt_to_phys(cpu_reset),
>> -		is_hyp_mode_available(),
>> +		in_crash_kexec ? 0 : is_hyp_mode_available(),
>>   		reboot_code_buffer_phys, kimage->head, kimage_start);
>>
>>   	BUG(); /* Should never get here. */
>>   }
>>
>> +/**
>> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
>> + */
>>   void machine_crash_shutdown(struct pt_regs *regs)
>>   {
>> -	/* Empty routine needed to avoid build errors. */
>> +	struct pt_regs dummy_regs;
>> +	int cpu;
>> +
>> +	local_irq_disable();
>> +
>> +	in_crash_kexec = true;
>> +
>> +	/*
>> +	 * clear and initialize the per-cpu info. This is necessary
>> +	 * because, otherwise, slots for offline cpus would never be
>> +	 * filled up. See smp_send_stop().
>> +	 */
>> +	memset(&dummy_regs, 0, sizeof(dummy_regs));
>> +	for_each_possible_cpu(cpu)
>> +		crash_save_cpu(&dummy_regs, cpu);
>> +
>> +	/* shutdown non-boot cpus */
>> +	smp_send_stop();
>> +
>> +	/* for boot cpu */
>> +	crash_save_cpu(regs, smp_processor_id());
>> +
>> +	pr_info("Starting crashdump kernel...\n");
>>   }
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index b1adc51..15aabef 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -37,6 +37,7 @@
>>   #include <linux/completion.h>
>>   #include <linux/of.h>
>>   #include <linux/irq_work.h>
>> +#include <linux/kexec.h>
>>
>>   #include <asm/alternative.h>
>>   #include <asm/atomic.h>
>> @@ -54,6 +55,8 @@
>>   #include <asm/ptrace.h>
>>   #include <asm/virt.h>
>>
>> +#include "cpu-reset.h"
>> +
>>   #define CREATE_TRACE_POINTS
>>   #include <trace/events/ipi.h>
>>
>> @@ -683,8 +686,12 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>   /*
>>    * ipi_cpu_stop - handle IPI from smp_send_stop()
>>    */
>> -static void ipi_cpu_stop(unsigned int cpu)
>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>   {
>> +#ifdef CONFIG_KEXEC
>> +	/* printing messages may slow down the shutdown. */
>> +	if (!in_crash_kexec)
>> +#endif
>>   	if (system_state == SYSTEM_BOOTING ||
>>   	    system_state == SYSTEM_RUNNING) {
>>   		raw_spin_lock(&stop_lock);
>
> Irrespective of how useful this change is, how about having a predicate
> instead? Something like:
>
> static inline bool is_in_crash_kexec(void)
> {
> #ifdef CONFIG_KEXEC
> 	return in_crash_kexec;
> #else
> 	return false;
> #endif
> }

OK, I will take your idea.

> located in machine_kexec.c (making the in_crash_kernel static), and then

but cannot make in_crash_kernel static because it is also used in both smp.c
and machine_kexec.c.

> 	if (!is_in_crash_kexec() && (systen_state == ... || ...) {
>
> It would certainly look better.

Thanks,
-Takahiro AKASHI

>> @@ -697,6 +704,11 @@ static void ipi_cpu_stop(unsigned int cpu)
>>
>>   	local_irq_disable();
>>
>> +#ifdef CONFIG_KEXEC
>> +	if (in_crash_kexec)
>> +		crash_save_cpu(regs, cpu);
>> +#endif /* CONFIG_KEXEC */
>> +
>
> Same here.
>
>>   	while (1)
>>   		cpu_relax();
>>   }
>> @@ -727,7 +739,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>>
>>   	case IPI_CPU_STOP:
>>   		irq_enter();
>> -		ipi_cpu_stop(cpu);
>> +		ipi_cpu_stop(cpu, regs);
>>   		irq_exit();
>>   		break;
>>
>>
>
> Thanks,
>
> 	M.
>
AKASHI Takahiro Dec. 10, 2015, 11:42 a.m. UTC | #4
Pratyush,

On 12/03/2015 01:15 PM, Pratyush Anand wrote:
> Hi Akashi,
>
> Some of the points which came while discussing with Mark Salter are worth
> including in v13.
>
> On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>
> [...]
>
>> +/**
>> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
>
> "non-panic" would be correct in stead of "non-boot".

OK, but for consistency with other places (say, arm/kernel/machine_kexec.c)
I prefer "non-crashing cpus."

>> +	/* shutdown non-boot cpus */
>
> Ditto
>
>> +	smp_send_stop();
>> +
>> +	/* for boot cpu */
>
> "for panic cpu"

Ditto.
"for crashing cpu"

>
>
> [...]
>
>> @@ -697,6 +704,11 @@ static void ipi_cpu_stop(unsigned int cpu)
>>
>>   	local_irq_disable();
>
> We have "set_cpu_online(cpu, false);" just before it.
> Panic core is waiting for non-panic to go offline, i.e. for the above event.
>
>>
>> +#ifdef CONFIG_KEXEC
>> +	if (in_crash_kexec)
>> +		crash_save_cpu(regs, cpu);
>> +#endif /* CONFIG_KEXEC */
>
> However, we are still saving crash info for non-panic core. So, it would be good
> to move crash_save_cpu() before set_cpu_online() to avoid any race condition.

Good point, race will be very unlikely though. I will fix it.

Thanks,
-Takahiro AKASHI

> ~Pratyush
>
Marc Zyngier Dec. 10, 2015, 11:44 a.m. UTC | #5
On 10/12/15 11:34, AKASHI Takahiro wrote:
> Marc,
> 
> I was back from my vacation.
> 
> On 11/27/2015 11:39 PM, Marc Zyngier wrote:
>> On 24/11/15 22:25, Geoff Levand wrote:
>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>
>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>> save registers' status in per-cpu ELF notes before starting the crash
>>> dump kernel. See kernel_kexec().
>>>
>>> ipi_cpu_stop() is a bit modified and used to support this behavior.
>>>
>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>> ---
>>>   arch/arm64/include/asm/kexec.h    | 34 +++++++++++++++++++++++++++++++++-
>>>   arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
>>>   arch/arm64/kernel/smp.c           | 16 ++++++++++++++--
>>>   3 files changed, 76 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>> index 46d63cd..555a955 100644
>>> --- a/arch/arm64/include/asm/kexec.h
>>> +++ b/arch/arm64/include/asm/kexec.h
>>> @@ -30,6 +30,8 @@
>>>
>>>   #if !defined(__ASSEMBLY__)
>>>
>>> +extern bool in_crash_kexec;
>>> +
>>>   /**
>>>    * crash_setup_regs() - save registers for the panic kernel
>>>    *
>>> @@ -40,7 +42,37 @@
>>>   static inline void crash_setup_regs(struct pt_regs *newregs,
>>>   				    struct pt_regs *oldregs)
>>>   {
>>> -	/* Empty routine needed to avoid build errors. */
>>> +	if (oldregs) {
>>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>>> +	} else {
>>> +		__asm__ __volatile__ (
>>> +			"stp	 x0,   x1, [%3, #16 *  0]\n"
>>> +			"stp	 x2,   x3, [%3, #16 *  1]\n"
>>> +			"stp	 x4,   x5, [%3, #16 *  2]\n"
>>> +			"stp	 x6,   x7, [%3, #16 *  3]\n"
>>> +			"stp	 x8,   x9, [%3, #16 *  4]\n"
>>> +			"stp	x10,  x11, [%3, #16 *  5]\n"
>>> +			"stp	x12,  x13, [%3, #16 *  6]\n"
>>> +			"stp	x14,  x15, [%3, #16 *  7]\n"
>>> +			"stp	x16,  x17, [%3, #16 *  8]\n"
>>> +			"stp	x18,  x19, [%3, #16 *  9]\n"
>>> +			"stp	x20,  x21, [%3, #16 * 10]\n"
>>> +			"stp	x22,  x23, [%3, #16 * 11]\n"
>>> +			"stp	x24,  x25, [%3, #16 * 12]\n"
>>> +			"stp	x26,  x27, [%3, #16 * 13]\n"
>>> +			"stp	x28,  x29, [%3, #16 * 14]\n"
>>> +			"str	x30,	   [%3, #16 * 15]\n"
>>> +			"mov	%0, sp\n"
>>> +			"adr	%1, 1f\n"
>>> +			"mrs	%2, spsr_el1\n"
>>> +		"1:"
>>> +			: "=r" (newregs->sp),
>>> +			  "=r" (newregs->pc),
>>> +			  "=r" (newregs->pstate)
>>> +			: "r"  (&newregs->regs)
>>> +			: "memory"
>>> +		);
>>
>> I wonder how useful this thing is, given that it starts by corrupting
>> whatever register is holding newregs->regs. Maybe this is not supposed
>> to be accurate anyway...
> 
> I'm not quite sure about what part of my code you're mentioning here, but
> crash_setup_regs() is solely called by crash_kexec(), and panic() is
> the only caller of crash_kexec() with NULL argument which, in turn, is
> used as 'oldregs' in crash_setup_regs().

You have this assembly sequence:

stp	 x0,   x1, [%3, #16 *  0]
[...]

where %3 itself is one of the x[0..30] registers. So you are saving
things that have already been corrupted by the saving procedure. Not
sure how useful that is, but as I said, maybe it is not supposed to be
completely accurate.

> Given this fact, I think that the values saved in newregs as indicated above
> will be the best estimate of current cpu contexts.
> 
> The other caller of crash_kexec() is die() in traps.c, but here we call
> it with explicit cpu contexts at exception.
> 
>>
>>> +	}
>>>   }
>>>
>>>   #endif /* !defined(__ASSEMBLY__) */
>>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>>> index da28a26..d2d7e90 100644
>>> --- a/arch/arm64/kernel/machine_kexec.c
>>> +++ b/arch/arm64/kernel/machine_kexec.c
>>> @@ -9,6 +9,7 @@
>>>    * published by the Free Software Foundation.
>>>    */
>>>
>>> +#include <linux/kernel.h>
>>>   #include <linux/kexec.h>
>>>   #include <linux/of_fdt.h>
>>>   #include <linux/slab.h>
>>> @@ -23,6 +24,7 @@
>>>   extern const unsigned char arm64_relocate_new_kernel[];
>>>   extern const unsigned long arm64_relocate_new_kernel_size;
>>>
>>> +bool in_crash_kexec;
>>>   static unsigned long kimage_start;
>>>
>>>   /**
>>> @@ -203,13 +205,38 @@ void machine_kexec(struct kimage *kimage)
>>>   	 */
>>>
>>>   	cpu_soft_restart(virt_to_phys(cpu_reset),
>>> -		is_hyp_mode_available(),
>>> +		in_crash_kexec ? 0 : is_hyp_mode_available(),
>>>   		reboot_code_buffer_phys, kimage->head, kimage_start);
>>>
>>>   	BUG(); /* Should never get here. */
>>>   }
>>>
>>> +/**
>>> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
>>> + */
>>>   void machine_crash_shutdown(struct pt_regs *regs)
>>>   {
>>> -	/* Empty routine needed to avoid build errors. */
>>> +	struct pt_regs dummy_regs;
>>> +	int cpu;
>>> +
>>> +	local_irq_disable();
>>> +
>>> +	in_crash_kexec = true;
>>> +
>>> +	/*
>>> +	 * clear and initialize the per-cpu info. This is necessary
>>> +	 * because, otherwise, slots for offline cpus would never be
>>> +	 * filled up. See smp_send_stop().
>>> +	 */
>>> +	memset(&dummy_regs, 0, sizeof(dummy_regs));
>>> +	for_each_possible_cpu(cpu)
>>> +		crash_save_cpu(&dummy_regs, cpu);
>>> +
>>> +	/* shutdown non-boot cpus */
>>> +	smp_send_stop();
>>> +
>>> +	/* for boot cpu */
>>> +	crash_save_cpu(regs, smp_processor_id());
>>> +
>>> +	pr_info("Starting crashdump kernel...\n");
>>>   }
>>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>>> index b1adc51..15aabef 100644
>>> --- a/arch/arm64/kernel/smp.c
>>> +++ b/arch/arm64/kernel/smp.c
>>> @@ -37,6 +37,7 @@
>>>   #include <linux/completion.h>
>>>   #include <linux/of.h>
>>>   #include <linux/irq_work.h>
>>> +#include <linux/kexec.h>
>>>
>>>   #include <asm/alternative.h>
>>>   #include <asm/atomic.h>
>>> @@ -54,6 +55,8 @@
>>>   #include <asm/ptrace.h>
>>>   #include <asm/virt.h>
>>>
>>> +#include "cpu-reset.h"
>>> +
>>>   #define CREATE_TRACE_POINTS
>>>   #include <trace/events/ipi.h>
>>>
>>> @@ -683,8 +686,12 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>>   /*
>>>    * ipi_cpu_stop - handle IPI from smp_send_stop()
>>>    */
>>> -static void ipi_cpu_stop(unsigned int cpu)
>>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>>   {
>>> +#ifdef CONFIG_KEXEC
>>> +	/* printing messages may slow down the shutdown. */
>>> +	if (!in_crash_kexec)
>>> +#endif
>>>   	if (system_state == SYSTEM_BOOTING ||
>>>   	    system_state == SYSTEM_RUNNING) {
>>>   		raw_spin_lock(&stop_lock);
>>
>> Irrespective of how useful this change is, how about having a predicate
>> instead? Something like:
>>
>> static inline bool is_in_crash_kexec(void)
>> {
>> #ifdef CONFIG_KEXEC
>> 	return in_crash_kexec;
>> #else
>> 	return false;
>> #endif
>> }
> 
> OK, I will take your idea.
> 
>> located in machine_kexec.c (making the in_crash_kernel static), and then
> 
> but cannot make in_crash_kernel static because it is also used in both smp.c
> and machine_kexec.c.

smp.c only reads from in_crash_kernel (at least from what I can see in
this patch), so it should be able to use the accessor.

Thanks,

	M.
Pratyush Anand Dec. 10, 2015, 11:50 a.m. UTC | #6
On 10/12/2015:08:42:12 PM, AKASHI Takahiro wrote:
> >>+ * machine_crash_shutdown - shutdown non-boot cpus and save registers
> >
> >"non-panic" would be correct in stead of "non-boot".
> 
> OK, but for consistency with other places (say, arm/kernel/machine_kexec.c)
> I prefer "non-crashing cpus."

OK.

> 
> >>+	/* shutdown non-boot cpus */
> >
> >Ditto
> >
> >>+	smp_send_stop();
> >>+
> >>+	/* for boot cpu */
> >
> >"for panic cpu"
> 
> Ditto.
> "for crashing cpu"

OK.

~Pratyush
AKASHI Takahiro Dec. 10, 2015, 12:55 p.m. UTC | #7
On 12/10/2015 08:44 PM, Marc Zyngier wrote:
> On 10/12/15 11:34, AKASHI Takahiro wrote:
>> Marc,
>>
>> I was back from my vacation.
>>
>> On 11/27/2015 11:39 PM, Marc Zyngier wrote:
>>> On 24/11/15 22:25, Geoff Levand wrote:
>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>
>>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>>> save registers' status in per-cpu ELF notes before starting the crash
>>>> dump kernel. See kernel_kexec().
>>>>
>>>> ipi_cpu_stop() is a bit modified and used to support this behavior.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>    arch/arm64/include/asm/kexec.h    | 34 +++++++++++++++++++++++++++++++++-
>>>>    arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
>>>>    arch/arm64/kernel/smp.c           | 16 ++++++++++++++--
>>>>    3 files changed, 76 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>>> index 46d63cd..555a955 100644
>>>> --- a/arch/arm64/include/asm/kexec.h
>>>> +++ b/arch/arm64/include/asm/kexec.h
>>>> @@ -30,6 +30,8 @@
>>>>
>>>>    #if !defined(__ASSEMBLY__)
>>>>
>>>> +extern bool in_crash_kexec;
>>>> +
>>>>    /**
>>>>     * crash_setup_regs() - save registers for the panic kernel
>>>>     *
>>>> @@ -40,7 +42,37 @@
>>>>    static inline void crash_setup_regs(struct pt_regs *newregs,
>>>>    				    struct pt_regs *oldregs)
>>>>    {
>>>> -	/* Empty routine needed to avoid build errors. */
>>>> +	if (oldregs) {
>>>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>>>> +	} else {
>>>> +		__asm__ __volatile__ (
>>>> +			"stp	 x0,   x1, [%3, #16 *  0]\n"
>>>> +			"stp	 x2,   x3, [%3, #16 *  1]\n"
>>>> +			"stp	 x4,   x5, [%3, #16 *  2]\n"
>>>> +			"stp	 x6,   x7, [%3, #16 *  3]\n"
>>>> +			"stp	 x8,   x9, [%3, #16 *  4]\n"
>>>> +			"stp	x10,  x11, [%3, #16 *  5]\n"
>>>> +			"stp	x12,  x13, [%3, #16 *  6]\n"
>>>> +			"stp	x14,  x15, [%3, #16 *  7]\n"
>>>> +			"stp	x16,  x17, [%3, #16 *  8]\n"
>>>> +			"stp	x18,  x19, [%3, #16 *  9]\n"
>>>> +			"stp	x20,  x21, [%3, #16 * 10]\n"
>>>> +			"stp	x22,  x23, [%3, #16 * 11]\n"
>>>> +			"stp	x24,  x25, [%3, #16 * 12]\n"
>>>> +			"stp	x26,  x27, [%3, #16 * 13]\n"
>>>> +			"stp	x28,  x29, [%3, #16 * 14]\n"
>>>> +			"str	x30,	   [%3, #16 * 15]\n"
>>>> +			"mov	%0, sp\n"
>>>> +			"adr	%1, 1f\n"
>>>> +			"mrs	%2, spsr_el1\n"
>>>> +		"1:"
>>>> +			: "=r" (newregs->sp),
>>>> +			  "=r" (newregs->pc),
>>>> +			  "=r" (newregs->pstate)
>>>> +			: "r"  (&newregs->regs)
>>>> +			: "memory"
>>>> +		);
>>>
>>> I wonder how useful this thing is, given that it starts by corrupting
>>> whatever register is holding newregs->regs. Maybe this is not supposed
>>> to be accurate anyway...
>>
>> I'm not quite sure about what part of my code you're mentioning here, but
>> crash_setup_regs() is solely called by crash_kexec(), and panic() is
>> the only caller of crash_kexec() with NULL argument which, in turn, is
>> used as 'oldregs' in crash_setup_regs().
>
> You have this assembly sequence:
>
> stp	 x0,   x1, [%3, #16 *  0]
> [...]
>
> where %3 itself is one of the x[0..30] registers.

Not only %3, but also

> So you are saving
> things that have already been corrupted by the saving procedure. Not
> sure how useful that is, but as I said, maybe it is not supposed to be
> completely accurate.

x0, x1 ... are the current values in panic(), and not the exact cpu contexts
at the place we are really interested in.
We have no way here in panic() to know them, but sp and pc would still be useful
for back-tracing in later investigation of dump file.

Please note that the same problem exists on arm (and x86) implementation.

>> Given this fact, I think that the values saved in newregs as indicated above
>> will be the best estimate of current cpu contexts.
>>
>> The other caller of crash_kexec() is die() in traps.c, but here we call
>> it with explicit cpu contexts at exception.
>>
>>>
>>>> +	}
>>>>    }
>>>>
>>>>    #endif /* !defined(__ASSEMBLY__) */
>>>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>>>> index da28a26..d2d7e90 100644
>>>> --- a/arch/arm64/kernel/machine_kexec.c
>>>> +++ b/arch/arm64/kernel/machine_kexec.c
>>>> @@ -9,6 +9,7 @@
>>>>     * published by the Free Software Foundation.
>>>>     */
>>>>
>>>> +#include <linux/kernel.h>
>>>>    #include <linux/kexec.h>
>>>>    #include <linux/of_fdt.h>
>>>>    #include <linux/slab.h>
>>>> @@ -23,6 +24,7 @@
>>>>    extern const unsigned char arm64_relocate_new_kernel[];
>>>>    extern const unsigned long arm64_relocate_new_kernel_size;
>>>>
>>>> +bool in_crash_kexec;
>>>>    static unsigned long kimage_start;
>>>>
>>>>    /**
>>>> @@ -203,13 +205,38 @@ void machine_kexec(struct kimage *kimage)
>>>>    	 */
>>>>
>>>>    	cpu_soft_restart(virt_to_phys(cpu_reset),
>>>> -		is_hyp_mode_available(),
>>>> +		in_crash_kexec ? 0 : is_hyp_mode_available(),
>>>>    		reboot_code_buffer_phys, kimage->head, kimage_start);
>>>>
>>>>    	BUG(); /* Should never get here. */
>>>>    }
>>>>
>>>> +/**
>>>> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
>>>> + */
>>>>    void machine_crash_shutdown(struct pt_regs *regs)
>>>>    {
>>>> -	/* Empty routine needed to avoid build errors. */
>>>> +	struct pt_regs dummy_regs;
>>>> +	int cpu;
>>>> +
>>>> +	local_irq_disable();
>>>> +
>>>> +	in_crash_kexec = true;
>>>> +
>>>> +	/*
>>>> +	 * clear and initialize the per-cpu info. This is necessary
>>>> +	 * because, otherwise, slots for offline cpus would never be
>>>> +	 * filled up. See smp_send_stop().
>>>> +	 */
>>>> +	memset(&dummy_regs, 0, sizeof(dummy_regs));
>>>> +	for_each_possible_cpu(cpu)
>>>> +		crash_save_cpu(&dummy_regs, cpu);
>>>> +
>>>> +	/* shutdown non-boot cpus */
>>>> +	smp_send_stop();
>>>> +
>>>> +	/* for boot cpu */
>>>> +	crash_save_cpu(regs, smp_processor_id());
>>>> +
>>>> +	pr_info("Starting crashdump kernel...\n");
>>>>    }
>>>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>>>> index b1adc51..15aabef 100644
>>>> --- a/arch/arm64/kernel/smp.c
>>>> +++ b/arch/arm64/kernel/smp.c
>>>> @@ -37,6 +37,7 @@
>>>>    #include <linux/completion.h>
>>>>    #include <linux/of.h>
>>>>    #include <linux/irq_work.h>
>>>> +#include <linux/kexec.h>
>>>>
>>>>    #include <asm/alternative.h>
>>>>    #include <asm/atomic.h>
>>>> @@ -54,6 +55,8 @@
>>>>    #include <asm/ptrace.h>
>>>>    #include <asm/virt.h>
>>>>
>>>> +#include "cpu-reset.h"
>>>> +
>>>>    #define CREATE_TRACE_POINTS
>>>>    #include <trace/events/ipi.h>
>>>>
>>>> @@ -683,8 +686,12 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>>>    /*
>>>>     * ipi_cpu_stop - handle IPI from smp_send_stop()
>>>>     */
>>>> -static void ipi_cpu_stop(unsigned int cpu)
>>>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>>>    {
>>>> +#ifdef CONFIG_KEXEC
>>>> +	/* printing messages may slow down the shutdown. */
>>>> +	if (!in_crash_kexec)
>>>> +#endif
>>>>    	if (system_state == SYSTEM_BOOTING ||
>>>>    	    system_state == SYSTEM_RUNNING) {
>>>>    		raw_spin_lock(&stop_lock);
>>>
>>> Irrespective of how useful this change is, how about having a predicate
>>> instead? Something like:
>>>
>>> static inline bool is_in_crash_kexec(void)
>>> {
>>> #ifdef CONFIG_KEXEC
>>> 	return in_crash_kexec;
>>> #else
>>> 	return false;
>>> #endif
>>> }
>>
>> OK, I will take your idea.
>>
>>> located in machine_kexec.c (making the in_crash_kernel static), and then
>>
>> but cannot make in_crash_kernel static because it is also used in both smp.c
>> and machine_kexec.c.
>
> smp.c only reads from in_crash_kernel (at least from what I can see in
> this patch), so it should be able to use the accessor.

Only if we define the accessor as a real function, not an inline function in a header :)

-Takahiro AKASHI

> Thanks,
>
> 	M.
>
Marc Zyngier Dec. 10, 2015, 1:43 p.m. UTC | #8
On 10/12/15 12:55, AKASHI Takahiro wrote:
> On 12/10/2015 08:44 PM, Marc Zyngier wrote:
>> On 10/12/15 11:34, AKASHI Takahiro wrote:
>>> Marc,
>>>
>>> I was back from my vacation.
>>>
>>> On 11/27/2015 11:39 PM, Marc Zyngier wrote:
>>>> On 24/11/15 22:25, Geoff Levand wrote:
>>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>>
>>>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>>>> save registers' status in per-cpu ELF notes before starting the crash
>>>>> dump kernel. See kernel_kexec().
>>>>>
>>>>> ipi_cpu_stop() is a bit modified and used to support this behavior.
>>>>>
>>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>> ---
>>>>>    arch/arm64/include/asm/kexec.h    | 34 +++++++++++++++++++++++++++++++++-
>>>>>    arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++--
>>>>>    arch/arm64/kernel/smp.c           | 16 ++++++++++++++--
>>>>>    3 files changed, 76 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>>>> index 46d63cd..555a955 100644
>>>>> --- a/arch/arm64/include/asm/kexec.h
>>>>> +++ b/arch/arm64/include/asm/kexec.h
>>>>> @@ -30,6 +30,8 @@
>>>>>
>>>>>    #if !defined(__ASSEMBLY__)
>>>>>
>>>>> +extern bool in_crash_kexec;
>>>>> +
>>>>>    /**
>>>>>     * crash_setup_regs() - save registers for the panic kernel
>>>>>     *
>>>>> @@ -40,7 +42,37 @@
>>>>>    static inline void crash_setup_regs(struct pt_regs *newregs,
>>>>>    				    struct pt_regs *oldregs)
>>>>>    {
>>>>> -	/* Empty routine needed to avoid build errors. */
>>>>> +	if (oldregs) {
>>>>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>>>>> +	} else {
>>>>> +		__asm__ __volatile__ (
>>>>> +			"stp	 x0,   x1, [%3, #16 *  0]\n"
>>>>> +			"stp	 x2,   x3, [%3, #16 *  1]\n"
>>>>> +			"stp	 x4,   x5, [%3, #16 *  2]\n"
>>>>> +			"stp	 x6,   x7, [%3, #16 *  3]\n"
>>>>> +			"stp	 x8,   x9, [%3, #16 *  4]\n"
>>>>> +			"stp	x10,  x11, [%3, #16 *  5]\n"
>>>>> +			"stp	x12,  x13, [%3, #16 *  6]\n"
>>>>> +			"stp	x14,  x15, [%3, #16 *  7]\n"
>>>>> +			"stp	x16,  x17, [%3, #16 *  8]\n"
>>>>> +			"stp	x18,  x19, [%3, #16 *  9]\n"
>>>>> +			"stp	x20,  x21, [%3, #16 * 10]\n"
>>>>> +			"stp	x22,  x23, [%3, #16 * 11]\n"
>>>>> +			"stp	x24,  x25, [%3, #16 * 12]\n"
>>>>> +			"stp	x26,  x27, [%3, #16 * 13]\n"
>>>>> +			"stp	x28,  x29, [%3, #16 * 14]\n"
>>>>> +			"str	x30,	   [%3, #16 * 15]\n"
>>>>> +			"mov	%0, sp\n"
>>>>> +			"adr	%1, 1f\n"
>>>>> +			"mrs	%2, spsr_el1\n"
>>>>> +		"1:"
>>>>> +			: "=r" (newregs->sp),
>>>>> +			  "=r" (newregs->pc),
>>>>> +			  "=r" (newregs->pstate)
>>>>> +			: "r"  (&newregs->regs)
>>>>> +			: "memory"
>>>>> +		);
>>>>
>>>> I wonder how useful this thing is, given that it starts by corrupting
>>>> whatever register is holding newregs->regs. Maybe this is not supposed
>>>> to be accurate anyway...
>>>
>>> I'm not quite sure about what part of my code you're mentioning here, but
>>> crash_setup_regs() is solely called by crash_kexec(), and panic() is
>>> the only caller of crash_kexec() with NULL argument which, in turn, is
>>> used as 'oldregs' in crash_setup_regs().
>>
>> You have this assembly sequence:
>>
>> stp	 x0,   x1, [%3, #16 *  0]
>> [...]
>>
>> where %3 itself is one of the x[0..30] registers.
> 
> Not only %3, but also
> 
>> So you are saving
>> things that have already been corrupted by the saving procedure. Not
>> sure how useful that is, but as I said, maybe it is not supposed to be
>> completely accurate.
> 
> x0, x1 ... are the current values in panic(), and not the exact cpu contexts
> at the place we are really interested in.
> We have no way here in panic() to know them, but sp and pc would still be useful
> for back-tracing in later investigation of dump file.
> 
> Please note that the same problem exists on arm (and x86) implementation.

As I said: if people don't expect to have a precise dump of the register
file, then fine.

>>> Given this fact, I think that the values saved in newregs as indicated above
>>> will be the best estimate of current cpu contexts.
>>>
>>> The other caller of crash_kexec() is die() in traps.c, but here we call
>>> it with explicit cpu contexts at exception.
>>>
>>>>
>>>>> +	}
>>>>>    }
>>>>>
>>>>>    #endif /* !defined(__ASSEMBLY__) */
>>>>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>>>>> index da28a26..d2d7e90 100644
>>>>> --- a/arch/arm64/kernel/machine_kexec.c
>>>>> +++ b/arch/arm64/kernel/machine_kexec.c
>>>>> @@ -9,6 +9,7 @@
>>>>>     * published by the Free Software Foundation.
>>>>>     */
>>>>>
>>>>> +#include <linux/kernel.h>
>>>>>    #include <linux/kexec.h>
>>>>>    #include <linux/of_fdt.h>
>>>>>    #include <linux/slab.h>
>>>>> @@ -23,6 +24,7 @@
>>>>>    extern const unsigned char arm64_relocate_new_kernel[];
>>>>>    extern const unsigned long arm64_relocate_new_kernel_size;
>>>>>
>>>>> +bool in_crash_kexec;
>>>>>    static unsigned long kimage_start;
>>>>>
>>>>>    /**
>>>>> @@ -203,13 +205,38 @@ void machine_kexec(struct kimage *kimage)
>>>>>    	 */
>>>>>
>>>>>    	cpu_soft_restart(virt_to_phys(cpu_reset),
>>>>> -		is_hyp_mode_available(),
>>>>> +		in_crash_kexec ? 0 : is_hyp_mode_available(),
>>>>>    		reboot_code_buffer_phys, kimage->head, kimage_start);
>>>>>
>>>>>    	BUG(); /* Should never get here. */
>>>>>    }
>>>>>
>>>>> +/**
>>>>> + * machine_crash_shutdown - shutdown non-boot cpus and save registers
>>>>> + */
>>>>>    void machine_crash_shutdown(struct pt_regs *regs)
>>>>>    {
>>>>> -	/* Empty routine needed to avoid build errors. */
>>>>> +	struct pt_regs dummy_regs;
>>>>> +	int cpu;
>>>>> +
>>>>> +	local_irq_disable();
>>>>> +
>>>>> +	in_crash_kexec = true;
>>>>> +
>>>>> +	/*
>>>>> +	 * clear and initialize the per-cpu info. This is necessary
>>>>> +	 * because, otherwise, slots for offline cpus would never be
>>>>> +	 * filled up. See smp_send_stop().
>>>>> +	 */
>>>>> +	memset(&dummy_regs, 0, sizeof(dummy_regs));
>>>>> +	for_each_possible_cpu(cpu)
>>>>> +		crash_save_cpu(&dummy_regs, cpu);
>>>>> +
>>>>> +	/* shutdown non-boot cpus */
>>>>> +	smp_send_stop();
>>>>> +
>>>>> +	/* for boot cpu */
>>>>> +	crash_save_cpu(regs, smp_processor_id());
>>>>> +
>>>>> +	pr_info("Starting crashdump kernel...\n");
>>>>>    }
>>>>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>>>>> index b1adc51..15aabef 100644
>>>>> --- a/arch/arm64/kernel/smp.c
>>>>> +++ b/arch/arm64/kernel/smp.c
>>>>> @@ -37,6 +37,7 @@
>>>>>    #include <linux/completion.h>
>>>>>    #include <linux/of.h>
>>>>>    #include <linux/irq_work.h>
>>>>> +#include <linux/kexec.h>
>>>>>
>>>>>    #include <asm/alternative.h>
>>>>>    #include <asm/atomic.h>
>>>>> @@ -54,6 +55,8 @@
>>>>>    #include <asm/ptrace.h>
>>>>>    #include <asm/virt.h>
>>>>>
>>>>> +#include "cpu-reset.h"
>>>>> +
>>>>>    #define CREATE_TRACE_POINTS
>>>>>    #include <trace/events/ipi.h>
>>>>>
>>>>> @@ -683,8 +686,12 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>>>>    /*
>>>>>     * ipi_cpu_stop - handle IPI from smp_send_stop()
>>>>>     */
>>>>> -static void ipi_cpu_stop(unsigned int cpu)
>>>>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>>>>    {
>>>>> +#ifdef CONFIG_KEXEC
>>>>> +	/* printing messages may slow down the shutdown. */
>>>>> +	if (!in_crash_kexec)
>>>>> +#endif
>>>>>    	if (system_state == SYSTEM_BOOTING ||
>>>>>    	    system_state == SYSTEM_RUNNING) {
>>>>>    		raw_spin_lock(&stop_lock);
>>>>
>>>> Irrespective of how useful this change is, how about having a predicate
>>>> instead? Something like:
>>>>
>>>> static inline bool is_in_crash_kexec(void)
>>>> {
>>>> #ifdef CONFIG_KEXEC
>>>> 	return in_crash_kexec;
>>>> #else
>>>> 	return false;
>>>> #endif
>>>> }
>>>
>>> OK, I will take your idea.
>>>
>>>> located in machine_kexec.c (making the in_crash_kernel static), and then
>>>
>>> but cannot make in_crash_kernel static because it is also used in both smp.c
>>> and machine_kexec.c.
>>
>> smp.c only reads from in_crash_kernel (at least from what I can see in
>> this patch), so it should be able to use the accessor.
> 
> Only if we define the accessor as a real function, not an inline function in a header :)

I'll leave the implementation details in your capable hands. :-)

Thanks,

	M.
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 46d63cd..555a955 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -30,6 +30,8 @@ 
 
 #if !defined(__ASSEMBLY__)
 
+extern bool in_crash_kexec;
+
 /**
  * crash_setup_regs() - save registers for the panic kernel
  *
@@ -40,7 +42,37 @@ 
 static inline void crash_setup_regs(struct pt_regs *newregs,
 				    struct pt_regs *oldregs)
 {
-	/* Empty routine needed to avoid build errors. */
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		__asm__ __volatile__ (
+			"stp	 x0,   x1, [%3, #16 *  0]\n"
+			"stp	 x2,   x3, [%3, #16 *  1]\n"
+			"stp	 x4,   x5, [%3, #16 *  2]\n"
+			"stp	 x6,   x7, [%3, #16 *  3]\n"
+			"stp	 x8,   x9, [%3, #16 *  4]\n"
+			"stp	x10,  x11, [%3, #16 *  5]\n"
+			"stp	x12,  x13, [%3, #16 *  6]\n"
+			"stp	x14,  x15, [%3, #16 *  7]\n"
+			"stp	x16,  x17, [%3, #16 *  8]\n"
+			"stp	x18,  x19, [%3, #16 *  9]\n"
+			"stp	x20,  x21, [%3, #16 * 10]\n"
+			"stp	x22,  x23, [%3, #16 * 11]\n"
+			"stp	x24,  x25, [%3, #16 * 12]\n"
+			"stp	x26,  x27, [%3, #16 * 13]\n"
+			"stp	x28,  x29, [%3, #16 * 14]\n"
+			"str	x30,	   [%3, #16 * 15]\n"
+			"mov	%0, sp\n"
+			"adr	%1, 1f\n"
+			"mrs	%2, spsr_el1\n"
+		"1:"
+			: "=r" (newregs->sp),
+			  "=r" (newregs->pc),
+			  "=r" (newregs->pstate)
+			: "r"  (&newregs->regs)
+			: "memory"
+		);
+	}
 }
 
 #endif /* !defined(__ASSEMBLY__) */
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index da28a26..d2d7e90 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -9,6 +9,7 @@ 
  * published by the Free Software Foundation.
  */
 
+#include <linux/kernel.h>
 #include <linux/kexec.h>
 #include <linux/of_fdt.h>
 #include <linux/slab.h>
@@ -23,6 +24,7 @@ 
 extern const unsigned char arm64_relocate_new_kernel[];
 extern const unsigned long arm64_relocate_new_kernel_size;
 
+bool in_crash_kexec;
 static unsigned long kimage_start;
 
 /**
@@ -203,13 +205,38 @@  void machine_kexec(struct kimage *kimage)
 	 */
 
 	cpu_soft_restart(virt_to_phys(cpu_reset),
-		is_hyp_mode_available(),
+		in_crash_kexec ? 0 : is_hyp_mode_available(),
 		reboot_code_buffer_phys, kimage->head, kimage_start);
 
 	BUG(); /* Should never get here. */
 }
 
+/**
+ * machine_crash_shutdown - shutdown non-boot cpus and save registers
+ */
 void machine_crash_shutdown(struct pt_regs *regs)
 {
-	/* Empty routine needed to avoid build errors. */
+	struct pt_regs dummy_regs;
+	int cpu;
+
+	local_irq_disable();
+
+	in_crash_kexec = true;
+
+	/*
+	 * clear and initialize the per-cpu info. This is necessary
+	 * because, otherwise, slots for offline cpus would never be
+	 * filled up. See smp_send_stop().
+	 */
+	memset(&dummy_regs, 0, sizeof(dummy_regs));
+	for_each_possible_cpu(cpu)
+		crash_save_cpu(&dummy_regs, cpu);
+
+	/* shutdown non-boot cpus */
+	smp_send_stop();
+
+	/* for boot cpu */
+	crash_save_cpu(regs, smp_processor_id());
+
+	pr_info("Starting crashdump kernel...\n");
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51..15aabef 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@ 
 #include <linux/completion.h>
 #include <linux/of.h>
 #include <linux/irq_work.h>
+#include <linux/kexec.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -54,6 +55,8 @@ 
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
+#include "cpu-reset.h"
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/ipi.h>
 
@@ -683,8 +686,12 @@  static DEFINE_RAW_SPINLOCK(stop_lock);
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
 {
+#ifdef CONFIG_KEXEC
+	/* printing messages may slow down the shutdown. */
+	if (!in_crash_kexec)
+#endif
 	if (system_state == SYSTEM_BOOTING ||
 	    system_state == SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
@@ -697,6 +704,11 @@  static void ipi_cpu_stop(unsigned int cpu)
 
 	local_irq_disable();
 
+#ifdef CONFIG_KEXEC
+	if (in_crash_kexec)
+		crash_save_cpu(regs, cpu);
+#endif /* CONFIG_KEXEC */
+
 	while (1)
 		cpu_relax();
 }
@@ -727,7 +739,7 @@  void handle_IPI(int ipinr, struct pt_regs *regs)
 
 	case IPI_CPU_STOP:
 		irq_enter();
-		ipi_cpu_stop(cpu);
+		ipi_cpu_stop(cpu, regs);
 		irq_exit();
 		break;