diff mbox

[v2,2/5] arm64: kdump: implement machine_crash_shutdown()

Message ID 1429861989-8417-3-git-send-email-takahiro.akashi@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

AKASHI Takahiro April 24, 2015, 7:53 a.m. UTC
kdump calls machine_crash_shutdown() to shut down non-boot cpus and
save per-cpu general-purpose registers before restarting the crash dump
kernel. See kernel_kexec().
ipi_cpu_stop() is used and a bit modified to support this behavior.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
 arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
 arch/arm64/kernel/smp.c           |   12 ++++++--
 3 files changed, 97 insertions(+), 4 deletions(-)

Comments

Mark Rutland April 24, 2015, 10:39 a.m. UTC | #1
On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
> save per-cpu general-purpose registers before restarting the crash dump
> kernel. See kernel_kexec().
> ipi_cpu_stop() is used and a bit modified to support this behavior.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>  arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>  arch/arm64/kernel/smp.c           |   12 ++++++--
>  3 files changed, 97 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> index 3530ff5..eaf3fcb 100644
> --- a/arch/arm64/include/asm/kexec.h
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -30,6 +30,8 @@
>  
>  #if !defined(__ASSEMBLY__)
>  
> +extern bool in_crash_kexec;
> +
>  /**
>   * crash_setup_regs() - save registers for the panic kernel
>   *
> @@ -40,7 +42,37 @@
>  static inline void crash_setup_regs(struct pt_regs *newregs,
>  				    struct pt_regs *oldregs)
>  {
> -	/* Empty routine needed to avoid build errors. */
> +	if (oldregs) {
> +		memcpy(newregs, oldregs, sizeof(*newregs));
> +	} else {
> +		__asm__ __volatile__ (
> +			"stp	 x0,   x1, [%3]\n\t"

Why the tabs?

Please use #16 * N as the offset for consistency with entry.S, with 0
for the first N.

[...]

> +static void machine_kexec_mask_interrupts(void)
> +{
> +	unsigned int i;
> +	struct irq_desc *desc;
> +
> +	for_each_irq_desc(i, desc) {
> +		struct irq_chip *chip;
> +
> +		chip = irq_desc_get_chip(desc);
> +		if (!chip)
> +			continue;
> +
> +		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
> +			chip->irq_eoi(&desc->irq_data);
> +
> +		if (chip->irq_mask)
> +			chip->irq_mask(&desc->irq_data);
> +
> +		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
> +			chip->irq_disable(&desc->irq_data);
> +	}
> +}

I'm surprised that this isn't left to the irqchip driver init code in
the crash kernel. For all we know this state could be corrupt anyway.

Is there any reason we can't get the GIC driver to nuke all of this at
probe time?

[...]

> @@ -542,7 +543,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>  /*
>   * ipi_cpu_stop - handle IPI from smp_send_stop()
>   */
> -static void ipi_cpu_stop(unsigned int cpu)
> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>  {
>  	if (system_state == SYSTEM_BOOTING ||
>  	    system_state == SYSTEM_RUNNING) {
> @@ -556,6 +557,13 @@ static void ipi_cpu_stop(unsigned int cpu)
>  
>  	local_irq_disable();
>  
> +#ifdef CONFIG_KEXEC
> +	if (in_crash_kexec) {
> +		crash_save_cpu(regs, cpu);
> +		flush_cache_all();

Any cache maintenance will need to be by VA; flush_cache_all doesn't do
what the name implies, though may appear to work by chance.

Is kdump implemented for ARM? I don't see equivalent for in the arch/arm
ipi_cpu_stop.

Mark.
Marc Zyngier April 24, 2015, 10:43 a.m. UTC | #2
On 24/04/15 11:39, Mark Rutland wrote:
> On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>> save per-cpu general-purpose registers before restarting the crash dump
>> kernel. See kernel_kexec().
>> ipi_cpu_stop() is used and a bit modified to support this behavior.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>  arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>>  arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>>  arch/arm64/kernel/smp.c           |   12 ++++++--
>>  3 files changed, 97 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>> index 3530ff5..eaf3fcb 100644
>> --- a/arch/arm64/include/asm/kexec.h
>> +++ b/arch/arm64/include/asm/kexec.h
>> @@ -30,6 +30,8 @@
>>  
>>  #if !defined(__ASSEMBLY__)
>>  
>> +extern bool in_crash_kexec;
>> +
>>  /**
>>   * crash_setup_regs() - save registers for the panic kernel
>>   *
>> @@ -40,7 +42,37 @@
>>  static inline void crash_setup_regs(struct pt_regs *newregs,
>>  				    struct pt_regs *oldregs)
>>  {
>> -	/* Empty routine needed to avoid build errors. */
>> +	if (oldregs) {
>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>> +	} else {
>> +		__asm__ __volatile__ (
>> +			"stp	 x0,   x1, [%3]\n\t"
> 
> Why the tabs?
> 
> Please use #16 * N as the offset for consistency with entry.S, with 0
> for the first N.
> 
> [...]
> 
>> +static void machine_kexec_mask_interrupts(void)
>> +{
>> +	unsigned int i;
>> +	struct irq_desc *desc;
>> +
>> +	for_each_irq_desc(i, desc) {
>> +		struct irq_chip *chip;
>> +
>> +		chip = irq_desc_get_chip(desc);
>> +		if (!chip)
>> +			continue;
>> +
>> +		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
>> +			chip->irq_eoi(&desc->irq_data);
>> +
>> +		if (chip->irq_mask)
>> +			chip->irq_mask(&desc->irq_data);
>> +
>> +		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
>> +			chip->irq_disable(&desc->irq_data);
>> +	}
>> +}
> 
> I'm surprised that this isn't left to the irqchip driver init code in
> the crash kernel. For all we know this state could be corrupt anyway.

Indeed, parsing the irqdesc list is a recipe for disaster. Who knows
which locks have been taken or simply corrupted, pointers nuked...

> Is there any reason we can't get the GIC driver to nuke all of this at
> probe time?

This feels like the better option. I can cook a patch or two for that.

Thanks,

	M.
AKASHI Takahiro May 11, 2015, 7:10 a.m. UTC | #3
On 04/24/2015 07:39 PM, Mark Rutland wrote:
> On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>> save per-cpu general-purpose registers before restarting the crash dump
>> kernel. See kernel_kexec().
>> ipi_cpu_stop() is used and a bit modified to support this behavior.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>   arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>>   arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>>   arch/arm64/kernel/smp.c           |   12 ++++++--
>>   3 files changed, 97 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>> index 3530ff5..eaf3fcb 100644
>> --- a/arch/arm64/include/asm/kexec.h
>> +++ b/arch/arm64/include/asm/kexec.h
>> @@ -30,6 +30,8 @@
>>
>>   #if !defined(__ASSEMBLY__)
>>
>> +extern bool in_crash_kexec;
>> +
>>   /**
>>    * crash_setup_regs() - save registers for the panic kernel
>>    *
>> @@ -40,7 +42,37 @@
>>   static inline void crash_setup_regs(struct pt_regs *newregs,
>>   				    struct pt_regs *oldregs)
>>   {
>> -	/* Empty routine needed to avoid build errors. */
>> +	if (oldregs) {
>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>> +	} else {
>> +		__asm__ __volatile__ (
>> +			"stp	 x0,   x1, [%3]\n\t"
>
> Why the tabs?

Don't know. Will remove them.

> Please use #16 * N as the offset for consistency with entry.S, with 0
> for the first N.

OK.

> [...]
>
>> +static void machine_kexec_mask_interrupts(void)
>> +{
>> +	unsigned int i;
>> +	struct irq_desc *desc;
>> +
>> +	for_each_irq_desc(i, desc) {
>> +		struct irq_chip *chip;
>> +
>> +		chip = irq_desc_get_chip(desc);
>> +		if (!chip)
>> +			continue;
>> +
>> +		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
>> +			chip->irq_eoi(&desc->irq_data);
>> +
>> +		if (chip->irq_mask)
>> +			chip->irq_mask(&desc->irq_data);
>> +
>> +		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
>> +			chip->irq_disable(&desc->irq_data);
>> +	}
>> +}
>
> I'm surprised that this isn't left to the irqchip driver init code in
> the crash kernel. For all we know this state could be corrupt anyway.
>
> Is there any reason we can't get the GIC driver to nuke all of this at
> probe time?

I don't get the point. You mean that the code be put in probe() of GIC driver?
To be honest, this function was copied from arm's implementation.

> [...]
>
>> @@ -542,7 +543,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>   /*
>>    * ipi_cpu_stop - handle IPI from smp_send_stop()
>>    */
>> -static void ipi_cpu_stop(unsigned int cpu)
>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>   {
>>   	if (system_state == SYSTEM_BOOTING ||
>>   	    system_state == SYSTEM_RUNNING) {
>> @@ -556,6 +557,13 @@ static void ipi_cpu_stop(unsigned int cpu)
>>
>>   	local_irq_disable();
>>
>> +#ifdef CONFIG_KEXEC
>> +	if (in_crash_kexec) {
>> +		crash_save_cpu(regs, cpu);
>> +		flush_cache_all();
>
> Any cache maintenance will need to be by VA; flush_cache_all doesn't do
> what the name implies, though may appear to work by chance.
>
> Is kdump implemented for ARM? I don't see equivalent for in the arch/arm
> ipi_cpu_stop.

Arm has a dedicated function in arch/arm/kernel/machine_kexec.c:
    machine_crash_shutdown()
       => machine_crash_nonpanic_core()

Thanks,
-Takahiro AKASHI

> Mark.
>
AKASHI Takahiro May 22, 2015, 5:56 a.m. UTC | #4
Mark,

On 05/11/2015 04:10 PM, AKASHI Takahiro wrote:
> On 04/24/2015 07:39 PM, Mark Rutland wrote:
>> On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>> save per-cpu general-purpose registers before restarting the crash dump
>>> kernel. See kernel_kexec().
>>> ipi_cpu_stop() is used and a bit modified to support this behavior.
>>>
>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>> ---
>>>   arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>>>   arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>>>   arch/arm64/kernel/smp.c           |   12 ++++++--
>>>   3 files changed, 97 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>> index 3530ff5..eaf3fcb 100644
>>> --- a/arch/arm64/include/asm/kexec.h
>>> +++ b/arch/arm64/include/asm/kexec.h
>>> @@ -30,6 +30,8 @@
>>>
>>>   #if !defined(__ASSEMBLY__)
>>>
>>> +extern bool in_crash_kexec;
>>> +
>>>   /**
>>>    * crash_setup_regs() - save registers for the panic kernel
>>>    *
>>> @@ -40,7 +42,37 @@
>>>   static inline void crash_setup_regs(struct pt_regs *newregs,
>>>                       struct pt_regs *oldregs)
>>>   {
>>> -    /* Empty routine needed to avoid build errors. */
>>> +    if (oldregs) {
>>> +        memcpy(newregs, oldregs, sizeof(*newregs));
>>> +    } else {
>>> +        __asm__ __volatile__ (
>>> +            "stp     x0,   x1, [%3]\n\t"
>>
>> Why the tabs?
>
> Don't know. Will remove them.
>
>> Please use #16 * N as the offset for consistency with entry.S, with 0
>> for the first N.
>
> OK.
>
>> [...]
>>
>>> +static void machine_kexec_mask_interrupts(void)
>>> +{
>>> +    unsigned int i;
>>> +    struct irq_desc *desc;
>>> +
>>> +    for_each_irq_desc(i, desc) {
>>> +        struct irq_chip *chip;
>>> +
>>> +        chip = irq_desc_get_chip(desc);
>>> +        if (!chip)
>>> +            continue;
>>> +
>>> +        if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
>>> +            chip->irq_eoi(&desc->irq_data);
>>> +
>>> +        if (chip->irq_mask)
>>> +            chip->irq_mask(&desc->irq_data);
>>> +
>>> +        if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
>>> +            chip->irq_disable(&desc->irq_data);
>>> +    }
>>> +}
>>
>> I'm surprised that this isn't left to the irqchip driver init code in
>> the crash kernel. For all we know this state could be corrupt anyway.
>>
>> Is there any reason we can't get the GIC driver to nuke all of this at
>> probe time?
>
> I don't get the point. You mean that the code be put in probe() of GIC driver?
> To be honest, this function was copied from arm's implementation.

Could you please elaborate a bit more details so that I can address the issue?

Thanks,
-Takahiro AKASHI

>> [...]
>>
>>> @@ -542,7 +543,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
>>>   /*
>>>    * ipi_cpu_stop - handle IPI from smp_send_stop()
>>>    */
>>> -static void ipi_cpu_stop(unsigned int cpu)
>>> +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
>>>   {
>>>       if (system_state == SYSTEM_BOOTING ||
>>>           system_state == SYSTEM_RUNNING) {
>>> @@ -556,6 +557,13 @@ static void ipi_cpu_stop(unsigned int cpu)
>>>
>>>       local_irq_disable();
>>>
>>> +#ifdef CONFIG_KEXEC
>>> +    if (in_crash_kexec) {
>>> +        crash_save_cpu(regs, cpu);
>>> +        flush_cache_all();
>>
>> Any cache maintenance will need to be by VA; flush_cache_all doesn't do
>> what the name implies, though may appear to work by chance.
>>
>> Is kdump implemented for ARM? I don't see equivalent for in the arch/arm
>> ipi_cpu_stop.
>
> Arm has a dedicated function in arch/arm/kernel/machine_kexec.c:
>     machine_crash_shutdown()
>        => machine_crash_nonpanic_core()
>
> Thanks,
> -Takahiro AKASHI
>
>> Mark.
>>
AKASHI Takahiro Aug. 6, 2015, 7:09 a.m. UTC | #5
Marc, Mark

Sorry for not revisiting your comment below for a while.

On 04/24/2015 07:43 PM, Marc Zyngier wrote:
> On 24/04/15 11:39, Mark Rutland wrote:
>> On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>> save per-cpu general-purpose registers before restarting the crash dump
>>> kernel. See kernel_kexec().
>>> ipi_cpu_stop() is used and a bit modified to support this behavior.
>>>
>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>> ---
>>>   arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>>>   arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>>>   arch/arm64/kernel/smp.c           |   12 ++++++--
>>>   3 files changed, 97 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>> index 3530ff5..eaf3fcb 100644
>>> --- a/arch/arm64/include/asm/kexec.h
>>> +++ b/arch/arm64/include/asm/kexec.h
>>> @@ -30,6 +30,8 @@
>>>
>>>   #if !defined(__ASSEMBLY__)
>>>
>>> +extern bool in_crash_kexec;
>>> +
>>>   /**
>>>    * crash_setup_regs() - save registers for the panic kernel
>>>    *
>>> @@ -40,7 +42,37 @@
>>>   static inline void crash_setup_regs(struct pt_regs *newregs,
>>>   				    struct pt_regs *oldregs)
>>>   {
>>> -	/* Empty routine needed to avoid build errors. */
>>> +	if (oldregs) {
>>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>>> +	} else {
>>> +		__asm__ __volatile__ (
>>> +			"stp	 x0,   x1, [%3]\n\t"
>>
>> Why the tabs?
>>
>> Please use #16 * N as the offset for consistency with entry.S, with 0
>> for the first N.
>>
>> [...]
>>
>>> +static void machine_kexec_mask_interrupts(void)
>>> +{
>>> +	unsigned int i;
>>> +	struct irq_desc *desc;
>>> +
>>> +	for_each_irq_desc(i, desc) {
>>> +		struct irq_chip *chip;
>>> +
>>> +		chip = irq_desc_get_chip(desc);
>>> +		if (!chip)
>>> +			continue;
>>> +
>>> +		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
>>> +			chip->irq_eoi(&desc->irq_data);
>>> +
>>> +		if (chip->irq_mask)
>>> +			chip->irq_mask(&desc->irq_data);
>>> +
>>> +		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
>>> +			chip->irq_disable(&desc->irq_data);
>>> +	}
>>> +}
>>
>> I'm surprised that this isn't left to the irqchip driver init code in
>> the crash kernel. For all we know this state could be corrupt anyway.
>
> Indeed, parsing the irqdesc list is a recipe for disaster. Who knows
> which locks have been taken or simply corrupted, pointers nuked...
>
>> Is there any reason we can't get the GIC driver to nuke all of this at
>> probe time?

Is it just enough to remove kexec_mask_interrupts() and add gic_eoi_irq()
at the beginning of gic_cpu_init() in irq-gic.c and irq-gic-v3.c?

> This feels like the better option. I can cook a patch or two for that.

If you do, that will be much better :)

BTW, in arm-gic-v3.h, GICD_CTRL_ARE_NS is defined as
     (1U << 4)
but should it be 5?
(I'm referring to the page 8-415 in IHI0069A.)

Thanks,
-Takahiro AKASHI

> Thanks,
>
> 	M.
>
Marc Zyngier Aug. 6, 2015, 3:51 p.m. UTC | #6
Hi,

On 06/08/15 08:09, AKASHI Takahiro wrote:
> Marc, Mark
> 
> Sorry for not revisiting your comment below for a while.

Wow. It took me a few minutes to page the context back in.

> On 04/24/2015 07:43 PM, Marc Zyngier wrote:
>> On 24/04/15 11:39, Mark Rutland wrote:
>>> On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
>>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>>> save per-cpu general-purpose registers before restarting the crash dump
>>>> kernel. See kernel_kexec().
>>>> ipi_cpu_stop() is used and a bit modified to support this behavior.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>   arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>>>>   arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>>>>   arch/arm64/kernel/smp.c           |   12 ++++++--
>>>>   3 files changed, 97 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>>> index 3530ff5..eaf3fcb 100644
>>>> --- a/arch/arm64/include/asm/kexec.h
>>>> +++ b/arch/arm64/include/asm/kexec.h
>>>> @@ -30,6 +30,8 @@
>>>>
>>>>   #if !defined(__ASSEMBLY__)
>>>>
>>>> +extern bool in_crash_kexec;
>>>> +
>>>>   /**
>>>>    * crash_setup_regs() - save registers for the panic kernel
>>>>    *
>>>> @@ -40,7 +42,37 @@
>>>>   static inline void crash_setup_regs(struct pt_regs *newregs,
>>>>   				    struct pt_regs *oldregs)
>>>>   {
>>>> -	/* Empty routine needed to avoid build errors. */
>>>> +	if (oldregs) {
>>>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>>>> +	} else {
>>>> +		__asm__ __volatile__ (
>>>> +			"stp	 x0,   x1, [%3]\n\t"
>>>
>>> Why the tabs?
>>>
>>> Please use #16 * N as the offset for consistency with entry.S, with 0
>>> for the first N.
>>>
>>> [...]
>>>
>>>> +static void machine_kexec_mask_interrupts(void)
>>>> +{
>>>> +	unsigned int i;
>>>> +	struct irq_desc *desc;
>>>> +
>>>> +	for_each_irq_desc(i, desc) {
>>>> +		struct irq_chip *chip;
>>>> +
>>>> +		chip = irq_desc_get_chip(desc);
>>>> +		if (!chip)
>>>> +			continue;
>>>> +
>>>> +		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
>>>> +			chip->irq_eoi(&desc->irq_data);
>>>> +
>>>> +		if (chip->irq_mask)
>>>> +			chip->irq_mask(&desc->irq_data);
>>>> +
>>>> +		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
>>>> +			chip->irq_disable(&desc->irq_data);
>>>> +	}
>>>> +}
>>>
>>> I'm surprised that this isn't left to the irqchip driver init code in
>>> the crash kernel. For all we know this state could be corrupt anyway.
>>
>> Indeed, parsing the irqdesc list is a recipe for disaster. Who knows
>> which locks have been taken or simply corrupted, pointers nuked...
>>
>>> Is there any reason we can't get the GIC driver to nuke all of this at
>>> probe time?
> 
> Is it just enough to remove kexec_mask_interrupts() and add gic_eoi_irq()
> at the beginning of gic_cpu_init() in irq-gic.c and irq-gic-v3.c?

No, doing an EOI is definitely the wrong thing to do. If you do it in
the wrong order, you just screw up the GIC state machine. Plus, you have
no idea what to write there...

The only real solution is to zero the "active" registers.

>> This feels like the better option. I can cook a patch or two for that.
> 
> If you do, that will be much better :)

OK, I'll prepare something that we can merge at the same time kexec
comes back from the dead (if it ever does - I'm not holding my breath).

> 
> BTW, in arm-gic-v3.h, GICD_CTRL_ARE_NS is defined as
>      (1U << 4)
> but should it be 5?
> (I'm referring to the page 8-415 in IHI0069A.)

No, look at the definition ARE_NS has when the access is non-secure or
on a system supporting a single security state. The definition you're
referring to is for a secure access (firmware).

Thanks,

	M.
AKASHI Takahiro Aug. 7, 2015, 4:24 a.m. UTC | #7
Marc,

On 08/07/2015 12:51 AM, Marc Zyngier wrote:
> Hi,
>
> On 06/08/15 08:09, AKASHI Takahiro wrote:
>> Marc, Mark
>>
>> Sorry for not revisiting your comment below for a while.
>
> Wow. It took me a few minutes to page the context back in.

Please don't purge the page from your cache for a while :)

>> On 04/24/2015 07:43 PM, Marc Zyngier wrote:
>>> On 24/04/15 11:39, Mark Rutland wrote:
>>>> On Fri, Apr 24, 2015 at 08:53:05AM +0100, AKASHI Takahiro wrote:
>>>>> kdump calls machine_crash_shutdown() to shut down non-boot cpus and
>>>>> save per-cpu general-purpose registers before restarting the crash dump
>>>>> kernel. See kernel_kexec().
>>>>> ipi_cpu_stop() is used and a bit modified to support this behavior.
>>>>>
>>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>> ---
>>>>>    arch/arm64/include/asm/kexec.h    |   34 ++++++++++++++++++++++-
>>>>>    arch/arm64/kernel/machine_kexec.c |   55 ++++++++++++++++++++++++++++++++++++-
>>>>>    arch/arm64/kernel/smp.c           |   12 ++++++--
>>>>>    3 files changed, 97 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>>>>> index 3530ff5..eaf3fcb 100644
>>>>> --- a/arch/arm64/include/asm/kexec.h
>>>>> +++ b/arch/arm64/include/asm/kexec.h
>>>>> @@ -30,6 +30,8 @@
>>>>>
>>>>>    #if !defined(__ASSEMBLY__)
>>>>>
>>>>> +extern bool in_crash_kexec;
>>>>> +
>>>>>    /**
>>>>>     * crash_setup_regs() - save registers for the panic kernel
>>>>>     *
>>>>> @@ -40,7 +42,37 @@
>>>>>    static inline void crash_setup_regs(struct pt_regs *newregs,
>>>>>    				    struct pt_regs *oldregs)
>>>>>    {
>>>>> -	/* Empty routine needed to avoid build errors. */
>>>>> +	if (oldregs) {
>>>>> +		memcpy(newregs, oldregs, sizeof(*newregs));
>>>>> +	} else {
>>>>> +		__asm__ __volatile__ (
>>>>> +			"stp	 x0,   x1, [%3]\n\t"
>>>>
>>>> Why the tabs?
>>>>
>>>> Please use #16 * N as the offset for consistency with entry.S, with 0
>>>> for the first N.
>>>>
>>>> [...]
>>>>
>>>>> +static void machine_kexec_mask_interrupts(void)
>>>>> +{
>>>>> +	unsigned int i;
>>>>> +	struct irq_desc *desc;
>>>>> +
>>>>> +	for_each_irq_desc(i, desc) {
>>>>> +		struct irq_chip *chip;
>>>>> +
>>>>> +		chip = irq_desc_get_chip(desc);
>>>>> +		if (!chip)
>>>>> +			continue;
>>>>> +
>>>>> +		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
>>>>> +			chip->irq_eoi(&desc->irq_data);
>>>>> +
>>>>> +		if (chip->irq_mask)
>>>>> +			chip->irq_mask(&desc->irq_data);
>>>>> +
>>>>> +		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
>>>>> +			chip->irq_disable(&desc->irq_data);
>>>>> +	}
>>>>> +}
>>>>
>>>> I'm surprised that this isn't left to the irqchip driver init code in
>>>> the crash kernel. For all we know this state could be corrupt anyway.
>>>
>>> Indeed, parsing the irqdesc list is a recipe for disaster. Who knows
>>> which locks have been taken or simply corrupted, pointers nuked...
>>>
>>>> Is there any reason we can't get the GIC driver to nuke all of this at
>>>> probe time?
>>
>> Is it just enough to remove kexec_mask_interrupts() and add gic_eoi_irq()
>> at the beginning of gic_cpu_init() in irq-gic.c and irq-gic-v3.c?
>
> No, doing an EOI is definitely the wrong thing to do. If you do it in
> the wrong order, you just screw up the GIC state machine. Plus, you have
> no idea what to write there...
>
> The only real solution is to zero the "active" registers.
>
>>> This feels like the better option. I can cook a patch or two for that.
>>
>> If you do, that will be much better :)
>
> OK, I'll prepare something that we can merge at the same time kexec
> comes back from the dead (if it ever does - I'm not holding my breath).

Thank you.
Please note that the same function, machine_kexec_mask_interrupts(),
is already there on arm(/kernel/machine_kexec.c).

Well, kexec/kdump stuff is not dead.
Hopefully I and Geoff will submit a full series of patchset in a few weeks
although the main logic will be the same.

>>
>> BTW, in arm-gic-v3.h, GICD_CTRL_ARE_NS is defined as
>>       (1U << 4)
>> but should it be 5?
>> (I'm referring to the page 8-415 in IHI0069A.)
>
> No, look at the definition ARE_NS has when the access is non-secure or
> on a system supporting a single security state. The definition you're
> referring to is for a secure access (firmware).

Aha, I should remember that arm has "multiple personalities."

-Takahiro AKASHI

> Thanks,
>
> 	M.
>
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 3530ff5..eaf3fcb 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -30,6 +30,8 @@ 
 
 #if !defined(__ASSEMBLY__)
 
+extern bool in_crash_kexec;
+
 /**
  * crash_setup_regs() - save registers for the panic kernel
  *
@@ -40,7 +42,37 @@ 
 static inline void crash_setup_regs(struct pt_regs *newregs,
 				    struct pt_regs *oldregs)
 {
-	/* Empty routine needed to avoid build errors. */
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		__asm__ __volatile__ (
+			"stp	 x0,   x1, [%3]\n\t"
+			"stp	 x2,   x3, [%3, 0x10]\n\t"
+			"stp	 x4,   x5, [%3, 0x20]\n\t"
+			"stp	 x6,   x7, [%3, 0x30]\n\t"
+			"stp	 x8,   x9, [%3, 0x40]\n\t"
+			"stp	x10,  x11, [%3, 0x50]\n\t"
+			"stp	x12,  x13, [%3, 0x60]\n\t"
+			"stp	x14,  x15, [%3, 0x70]\n\t"
+			"stp	x16,  x17, [%3, 0x80]\n\t"
+			"stp	x18,  x19, [%3, 0x90]\n\t"
+			"stp	x20,  x21, [%3, 0xa0]\n\t"
+			"stp	x22,  x23, [%3, 0xb0]\n\t"
+			"stp	x24,  x25, [%3, 0xc0]\n\t"
+			"stp	x26,  x27, [%3, 0xd0]\n\t"
+			"stp	x28,  x29, [%3, 0xe0]\n\t"
+			"str	x30,	   [%3, 0xf0]\n\t"
+			"mov	%0, sp\n\t"
+			"adr	%1, 1f\n\t"
+			"mrs	%2, spsr_el1\n\t"
+		"1:"
+			: "=r" (newregs->sp),
+			  "=r" (newregs->pc),
+			  "=r" (newregs->pstate)
+			: "r"  (&newregs->regs)
+			: "memory"
+		);
+	}
 }
 
 #endif /* !defined(__ASSEMBLY__) */
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index fd6b742..6f7887b 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -9,6 +9,8 @@ 
  * published by the Free Software Foundation.
  */
 
+#include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/kexec.h>
 #include <linux/of_fdt.h>
 #include <linux/slab.h>
@@ -24,6 +26,8 @@  extern unsigned long arm64_kexec_dtb_addr;
 extern unsigned long arm64_kexec_kimage_head;
 extern unsigned long arm64_kexec_kimage_start;
 
+bool in_crash_kexec = false;
+
 /**
  * kexec_is_dtb - Helper routine to check the device tree header signature.
  */
@@ -178,7 +182,56 @@  void machine_kexec(struct kimage *image)
 	soft_restart(reboot_code_buffer_phys);
 }
 
+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-boot cpus and save registers
+ */
 void machine_crash_shutdown(struct pt_regs *regs)
 {
-	/* Empty routine needed to avoid build errors. */
+	struct pt_regs dummy_regs;
+	int cpu;
+
+	local_irq_disable();
+
+	in_crash_kexec = true;
+
+	/*
+	 * clear and initialize the per-cpu info. This is necessary
+	 * because, otherwise, slots for offline cpus would not be
+	 * filled up. See smp_send_stop().
+	 */
+	memset(&dummy_regs, 0, sizeof(dummy_regs));
+	for_each_possible_cpu(cpu)
+		crash_save_cpu(&dummy_regs, cpu);
+
+	/* shutdown non-boot cpus */
+	smp_send_stop();
+
+	/* for this cpu */
+	crash_save_cpu(regs, smp_processor_id());
+	machine_kexec_mask_interrupts();
+
+	pr_info("Loading crashdump kernel...\n");
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ffe8e1b..cf7f361 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -36,6 +36,7 @@ 
 #include <linux/completion.h>
 #include <linux/of.h>
 #include <linux/irq_work.h>
+#include <linux/kexec.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -542,7 +543,7 @@  static DEFINE_RAW_SPINLOCK(stop_lock);
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
 {
 	if (system_state == SYSTEM_BOOTING ||
 	    system_state == SYSTEM_RUNNING) {
@@ -556,6 +557,13 @@  static void ipi_cpu_stop(unsigned int cpu)
 
 	local_irq_disable();
 
+#ifdef CONFIG_KEXEC
+	if (in_crash_kexec) {
+		crash_save_cpu(regs, cpu);
+		flush_cache_all();
+	}
+#endif /* CONFIG_KEXEC */
+
 	while (1)
 		cpu_relax();
 }
@@ -586,7 +594,7 @@  void handle_IPI(int ipinr, struct pt_regs *regs)
 
 	case IPI_CPU_STOP:
 		irq_enter();
-		ipi_cpu_stop(cpu);
+		ipi_cpu_stop(cpu, regs);
 		irq_exit();
 		break;