diff mbox

[v2,1/5] arm64: kdump: reserve memory for crash dump kernel

Message ID 1429861989-8417-2-git-send-email-takahiro.akashi@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

AKASHI Takahiro April 24, 2015, 7:53 a.m. UTC
On system kernel, the memory region used by crash dump kernel must be
specified by "crashkernel=X@Y" boot parameter. reserve_crashkernel()
will allocate the region in "System RAM" and reserve it for later use.

On crash dump kernel, memory region information in system kernel is
described in a specific region specified by "elfcorehdr=X@Y" boot parameter.
reserve_elfcorehdr() will set aside the region to avoid data destruction
by the kernel.

Crash dump kernel will access memory regions in system kernel via
copy_oldmem_page(), which reads a page by ioremap'ing it assuming that
such pages are not part of main memory of crash dump kernel.
This is true under non-UEFI environment because kexec-tools modifies
a device tree adding "usablemem" attributes to memory sections.
Under UEFI, however, this is not true because UEFI remove memory sections
in a device tree and export all the memory regions, even though they belong
to system kernel.

So we should add "mem=X[MG]" boot parameter to limit the memory size and
avoid hitting the following assertion in ioremap():
	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
		return NULL;

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/kernel/Makefile     |    1 +
 arch/arm64/kernel/crash_dump.c |   71 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/setup.c      |    8 +++-
 arch/arm64/mm/init.c           |   84 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/crash_dump.c

Comments

Mark Rutland April 24, 2015, 10:11 a.m. UTC | #1
On Fri, Apr 24, 2015 at 08:53:04AM +0100, AKASHI Takahiro wrote:
> On system kernel, the memory region used by crash dump kernel must be
> specified by "crashkernel=X@Y" boot parameter. reserve_crashkernel()
> will allocate the region in "System RAM" and reserve it for later use.
> 
> On crash dump kernel, memory region information in system kernel is
> described in a specific region specified by "elfcorehdr=X@Y" boot parameter.
> reserve_elfcorehdr() will set aside the region to avoid data destruction
> by the kernel.
> 
> Crash dump kernel will access memory regions in system kernel via
> copy_oldmem_page(), which reads a page by ioremap'ing it assuming that
> such pages are not part of main memory of crash dump kernel.
> This is true under non-UEFI environment because kexec-tools modifies
> a device tree adding "usablemem" attributes to memory sections.

I'm not sure what you mean by "usablemem" here.

Do you just mean that the memory nodes are altered such that they only
cover memory usable by the crash kernel?

Why not _always_ require a command line argument for the crash kernel
that restricts its memory usage to a particular range? That way it
doesn't matter whether we're using UEFI or not.

> Under UEFI, however, this is not true because UEFI remove memory sections
> in a device tree and export all the memory regions, even though they belong
> to system kernel.
> 
> So we should add "mem=X[MG]" boot parameter to limit the memory size and
> avoid hitting the following assertion in ioremap():
>         if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
>                 return NULL;

That looks suspicious. What is being ioremapped at that point?

[...]

> @@ -393,6 +398,7 @@ void __init setup_arch(char **cmdline_p)
>         local_async_enable();
> 
>         efi_init();
> +
>         arm64_memblock_init();
> 
>         paging_init();

Nit: unrelated whitespace change.

> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index ae85da6..ea70d41 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -34,6 +34,8 @@
>  #include <linux/dma-contiguous.h>
>  #include <linux/efi.h>
>  #include <linux/swiotlb.h>
> +#include <linux/kexec.h>
> +#include <linux/crash_dump.h>

Nit: please keep these ordered.

[...]

> +               if (memblock_reserve(crash_base, crash_size)) {
> +                       pr_warn("crashkernel reservation failed - out of memory\n");
> +                       return;
> +               }

If we can remove this memory rather than reserving it, we can limit the
first kernel's ability to accidentally clobber the crash kernel, at the
expense of having to explicitly map/unmap around loading it.

Mark.
Baoquan He April 28, 2015, 9:19 a.m. UTC | #2
> +#ifdef CONFIG_CRASH_DUMP
> +/*
> + * reserve_elfcorehdr() - reserves memory for elf core header
> + *
> + * This function reserves memory area given in "elfcorehdr=" kernel command
> + * line parameter. The memory reserved is used by a dump capture kernel to
> + * identify the memory used by primary kernel.
> + */

Hi AKASHI,

May I know why elfcorehdr need be reserved separately but not locate a
memory region in crashkernel reserved region like all other ARCHs? Is
there any special reason?

Thanks
Baoquan

> +static void __init reserve_elfcorehdr(void)
> +{
> +	if (!elfcorehdr_size)
> +		return;
> +
> +	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
> +		pr_warn("elfcorehdr reservation failed - memory is in use (0x%llx)\n",
> +			elfcorehdr_addr);
> +		return;
> +	}
> +
> +	if (memblock_reserve(elfcorehdr_addr, elfcorehdr_size)) {
> +		pr_warn("elfcorehdr reservation failed - out of memory\n");
> +		return;
> +	}
> +
> +	pr_info("Reserving %lldKB of memory at %lldMB for elfcorehdr\n",
> +		elfcorehdr_size >> 10, elfcorehdr_addr >> 20);
> +}
> +#endif /* CONFIG_CRASH_DUMP */
>  /*
>   * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
>   * currently assumes that for memory starting above 4G, 32-bit devices will
> @@ -170,6 +247,13 @@ void __init arm64_memblock_init(void)
>  		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
>  #endif
>  
> +#ifdef CONFIG_KEXEC
> +	reserve_crashkernel(memory_limit);
> +#endif
> +#ifdef CONFIG_CRASH_DUMP
> +	reserve_elfcorehdr();
> +#endif
> +
>  	early_init_fdt_scan_reserved_mem();
>  
>  	/* 4GB maximum for 32-bit only capable devices */
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
AKASHI Takahiro May 11, 2015, 6:44 a.m. UTC | #3
On 04/24/2015 07:11 PM, Mark Rutland wrote:
> On Fri, Apr 24, 2015 at 08:53:04AM +0100, AKASHI Takahiro wrote:
>> On system kernel, the memory region used by crash dump kernel must be
>> specified by "crashkernel=X@Y" boot parameter. reserve_crashkernel()
>> will allocate the region in "System RAM" and reserve it for later use.
>>
>> On crash dump kernel, memory region information in system kernel is
>> described in a specific region specified by "elfcorehdr=X@Y" boot parameter.
>> reserve_elfcorehdr() will set aside the region to avoid data destruction
>> by the kernel.
>>
>> Crash dump kernel will access memory regions in system kernel via
>> copy_oldmem_page(), which reads a page by ioremap'ing it assuming that
>> such pages are not part of main memory of crash dump kernel.
>> This is true under non-UEFI environment because kexec-tools modifies
>> a device tree adding "usablemem" attributes to memory sections.
>
> I'm not sure what you mean by "usablemem" here.

I think I explained it in my previous reply.

> Do you just mean that the memory nodes are altered such that they only
> cover memory usable by the crash kernel?
>
> Why not _always_ require a command line argument for the crash kernel
> that restricts its memory usage to a particular range? That way it
> doesn't matter whether we're using UEFI or not.

This is one option, but why does uefi ignore all the memory properties?

>> Under UEFI, however, this is not true because UEFI remove memory sections
>> in a device tree and export all the memory regions, even though they belong
>> to system kernel.
>>
>> So we should add "mem=X[MG]" boot parameter to limit the memory size and
>> avoid hitting the following assertion in ioremap():
>>          if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
>>                  return NULL;
>
> That looks suspicious. What is being ioremapped at that point?

As explained so far, all the memory regions are exposed to crash dump kernel,
and it recognizes any pages which should belong to the old kernel also as
part of crash kernel's memory. So pfn_valid() returns true.


> [...]
>
>> @@ -393,6 +398,7 @@ void __init setup_arch(char **cmdline_p)
>>          local_async_enable();
>>
>>          efi_init();
>> +
>>          arm64_memblock_init();
>>
>>          paging_init();
>
> Nit: unrelated whitespace change.

Ok. Will fix it.

>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index ae85da6..ea70d41 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -34,6 +34,8 @@
>>   #include <linux/dma-contiguous.h>
>>   #include <linux/efi.h>
>>   #include <linux/swiotlb.h>
>> +#include <linux/kexec.h>
>> +#include <linux/crash_dump.h>
>
> Nit: please keep these ordered.

Yeah, but others "linux/*.h" in this file are already in a random order.


> [...]
>
>> +               if (memblock_reserve(crash_base, crash_size)) {
>> +                       pr_warn("crashkernel reservation failed - out of memory\n");
>> +                       return;
>> +               }
>
> If we can remove this memory rather than reserving it, we can limit the
> first kernel's ability to accidentally clobber the crash kernel, at the
> expense of having to explicitly map/unmap around loading it.

Do you mean that we should remove mmu mapping of crash kernel memory?
Might be a good idea, but it requires modifying kernel/kexec.c.

-Takahiro AKASHI

> Mark.
>
AKASHI Takahiro May 11, 2015, 7:38 a.m. UTC | #4
Hi Baoquan,

On 04/28/2015 06:19 PM, Baoquan He wrote:
>> +#ifdef CONFIG_CRASH_DUMP
>> +/*
>> + * reserve_elfcorehdr() - reserves memory for elf core header
>> + *
>> + * This function reserves memory area given in "elfcorehdr=" kernel command
>> + * line parameter. The memory reserved is used by a dump capture kernel to
>> + * identify the memory used by primary kernel.
>> + */
>
> Hi AKASHI,
>
> May I know why elfcorehdr need be reserved separately but not locate a
> memory region in crashkernel reserved region like all other ARCHs? Is
> there any special reason?

I don't get your point, but arm as well as arm64 locates elfcorehdr
in a crash kernel's memory region.
See kexec/arch/arm{,64}/crashdump-arm{,64}.c in kexec-tools.

And this region is reserved at boot time *on crash kernel* because we don't want
to corrupt it accidentally.
(After Mark's comment, we might better remove the mmu mapping for this region, too.)


Make sense?

-Takahiro AKASHI

> Thanks
> Baoquan
>
>> +static void __init reserve_elfcorehdr(void)
>> +{
>> +	if (!elfcorehdr_size)
>> +		return;
>> +
>> +	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
>> +		pr_warn("elfcorehdr reservation failed - memory is in use (0x%llx)\n",
>> +			elfcorehdr_addr);
>> +		return;
>> +	}
>> +
>> +	if (memblock_reserve(elfcorehdr_addr, elfcorehdr_size)) {
>> +		pr_warn("elfcorehdr reservation failed - out of memory\n");
>> +		return;
>> +	}
>> +
>> +	pr_info("Reserving %lldKB of memory at %lldMB for elfcorehdr\n",
>> +		elfcorehdr_size >> 10, elfcorehdr_addr >> 20);
>> +}
>> +#endif /* CONFIG_CRASH_DUMP */
>>   /*
>>    * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
>>    * currently assumes that for memory starting above 4G, 32-bit devices will
>> @@ -170,6 +247,13 @@ void __init arm64_memblock_init(void)
>>   		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
>>   #endif
>>
>> +#ifdef CONFIG_KEXEC
>> +	reserve_crashkernel(memory_limit);
>> +#endif
>> +#ifdef CONFIG_CRASH_DUMP
>> +	reserve_elfcorehdr();
>> +#endif
>> +
>>   	early_init_fdt_scan_reserved_mem();
>>
>>   	/* 4GB maximum for 32-bit only capable devices */
>> --
>> 1.7.9.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
Baoquan He May 11, 2015, 7:54 a.m. UTC | #5
On 05/11/15 at 04:38pm, AKASHI Takahiro wrote:
> Hi Baoquan,
> 
> On 04/28/2015 06:19 PM, Baoquan He wrote:
> >>+#ifdef CONFIG_CRASH_DUMP
> >>+/*
> >>+ * reserve_elfcorehdr() - reserves memory for elf core header
> >>+ *
> >>+ * This function reserves memory area given in "elfcorehdr=" kernel command
> >>+ * line parameter. The memory reserved is used by a dump capture kernel to
> >>+ * identify the memory used by primary kernel.
> >>+ */
> >
> >Hi AKASHI,
> >
> >May I know why elfcorehdr need be reserved separately but not locate a
> >memory region in crashkernel reserved region like all other ARCHs? Is
> >there any special reason?
> 
> I don't get your point, but arm as well as arm64 locates elfcorehdr
> in a crash kernel's memory region.
> See kexec/arch/arm{,64}/crashdump-arm{,64}.c in kexec-tools.
> 
> And this region is reserved at boot time *on crash kernel* because we don't want
> to corrupt it accidentally.
> (After Mark's comment, we might better remove the mmu mapping for this region, too.)


Sorry, I don't make myself clear.

In this patch you reserve a separate memory region in 1st kernel to
store elfcorehdr. I am wondering why you don't call add_buffer in
kexec-tools directly. Like this you can get a region from reserved
crashkernel region. Then you don't need reserve_elfcorehdr() to reserve
memory for elfcorehdr specifically. Like other ARCHs do only one memory
region is reserved in 1st kernel, that's crashkernel region.

Thanks
Baoquan
> 
> 
> Make sense?
> 
> -Takahiro AKASHI
> 
> >Thanks
> >Baoquan
> >
> >>+static void __init reserve_elfcorehdr(void)
> >>+{
> >>+	if (!elfcorehdr_size)
> >>+		return;
> >>+
> >>+	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
> >>+		pr_warn("elfcorehdr reservation failed - memory is in use (0x%llx)\n",
> >>+			elfcorehdr_addr);
> >>+		return;
> >>+	}
> >>+
> >>+	if (memblock_reserve(elfcorehdr_addr, elfcorehdr_size)) {
> >>+		pr_warn("elfcorehdr reservation failed - out of memory\n");
> >>+		return;
> >>+	}
> >>+
> >>+	pr_info("Reserving %lldKB of memory at %lldMB for elfcorehdr\n",
> >>+		elfcorehdr_size >> 10, elfcorehdr_addr >> 20);
> >>+}
> >>+#endif /* CONFIG_CRASH_DUMP */
> >>  /*
> >>   * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
> >>   * currently assumes that for memory starting above 4G, 32-bit devices will
> >>@@ -170,6 +247,13 @@ void __init arm64_memblock_init(void)
> >>  		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
> >>  #endif
> >>
> >>+#ifdef CONFIG_KEXEC
> >>+	reserve_crashkernel(memory_limit);
> >>+#endif
> >>+#ifdef CONFIG_CRASH_DUMP
> >>+	reserve_elfcorehdr();
> >>+#endif
> >>+
> >>  	early_init_fdt_scan_reserved_mem();
> >>
> >>  	/* 4GB maximum for 32-bit only capable devices */
> >>--
> >>1.7.9.5
> >>
> >>--
> >>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> >>the body of a message to majordomo@vger.kernel.org
> >>More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>Please read the FAQ at  http://www.tux.org/lkml/
AKASHI Takahiro May 11, 2015, 8:17 a.m. UTC | #6
On 05/11/2015 04:54 PM, Baoquan He wrote:
> On 05/11/15 at 04:38pm, AKASHI Takahiro wrote:
>> Hi Baoquan,
>>
>> On 04/28/2015 06:19 PM, Baoquan He wrote:
>>>> +#ifdef CONFIG_CRASH_DUMP
>>>> +/*
>>>> + * reserve_elfcorehdr() - reserves memory for elf core header
>>>> + *
>>>> + * This function reserves memory area given in "elfcorehdr=" kernel command
>>>> + * line parameter. The memory reserved is used by a dump capture kernel to
>>>> + * identify the memory used by primary kernel.
>>>> + */
>>>
>>> Hi AKASHI,
>>>
>>> May I know why elfcorehdr need be reserved separately but not locate a
>>> memory region in crashkernel reserved region like all other ARCHs? Is
>>> there any special reason?
>>
>> I don't get your point, but arm as well as arm64 locates elfcorehdr
>> in a crash kernel's memory region.
>> See kexec/arch/arm{,64}/crashdump-arm{,64}.c in kexec-tools.
>>
>> And this region is reserved at boot time *on crash kernel* because we don't want
>> to corrupt it accidentally.
>> (After Mark's comment, we might better remove the mmu mapping for this region, too.)
>
>
> Sorry, I don't make myself clear.
>
> In this patch you reserve a separate memory region in 1st kernel to
> store elfcorehdr. I am wondering why you don't call add_buffer in
> kexec-tools directly. Like this you can get a region from reserved
> crashkernel region. Then you don't need reserve_elfcorehdr() to reserve
> memory for elfcorehdr specifically. Like other ARCHs do only one memory
> region is reserved in 1st kernel, that's crashkernel region.

I think that you misunderstand somewhat.
* Kexec-tools only locates/identifies a small region for elfcore header within crash kernel's
memory region while 1st kernel is running.
* the data in elfcore header is filled up by kexec_load system call on 1st kernel.
* 1st kernel doesn't reserve any region for elfcore header because the kernel
commandline parameters don't contains "elfcorehdr=" parameter, then elfcorehdr_size=0.
* Crash dump kernel does reserve the region, as I said, because we don't want to
corrupt the info in elfcore header accidentally while crash kernel is running.

Clear?

-Takahiro AKASHI

> Thanks
> Baoquan
>>
>>
>> Make sense?
>>
>> -Takahiro AKASHI
>>
>>> Thanks
>>> Baoquan
>>>
>>>> +static void __init reserve_elfcorehdr(void)
>>>> +{
>>>> +	if (!elfcorehdr_size)
>>>> +		return;
>>>> +
>>>> +	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
>>>> +		pr_warn("elfcorehdr reservation failed - memory is in use (0x%llx)\n",
>>>> +			elfcorehdr_addr);
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	if (memblock_reserve(elfcorehdr_addr, elfcorehdr_size)) {
>>>> +		pr_warn("elfcorehdr reservation failed - out of memory\n");
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	pr_info("Reserving %lldKB of memory at %lldMB for elfcorehdr\n",
>>>> +		elfcorehdr_size >> 10, elfcorehdr_addr >> 20);
>>>> +}
>>>> +#endif /* CONFIG_CRASH_DUMP */
>>>>   /*
>>>>    * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
>>>>    * currently assumes that for memory starting above 4G, 32-bit devices will
>>>> @@ -170,6 +247,13 @@ void __init arm64_memblock_init(void)
>>>>   		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
>>>>   #endif
>>>>
>>>> +#ifdef CONFIG_KEXEC
>>>> +	reserve_crashkernel(memory_limit);
>>>> +#endif
>>>> +#ifdef CONFIG_CRASH_DUMP
>>>> +	reserve_elfcorehdr();
>>>> +#endif
>>>> +
>>>>   	early_init_fdt_scan_reserved_mem();
>>>>
>>>>   	/* 4GB maximum for 32-bit only capable devices */
>>>> --
>>>> 1.7.9.5
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>> Please read the FAQ at  http://www.tux.org/lkml/
Baoquan He May 11, 2015, 9:41 a.m. UTC | #7
On 05/11/15 at 05:17pm, AKASHI Takahiro wrote:
> On 05/11/2015 04:54 PM, Baoquan He wrote:
> >In this patch you reserve a separate memory region in 1st kernel to
> >store elfcorehdr. I am wondering why you don't call add_buffer in
> >kexec-tools directly. Like this you can get a region from reserved
> >crashkernel region. Then you don't need reserve_elfcorehdr() to reserve
> >memory for elfcorehdr specifically. Like other ARCHs do only one memory
> >region is reserved in 1st kernel, that's crashkernel region.
> 
> I think that you misunderstand somewhat.
> * Kexec-tools only locates/identifies a small region for elfcore header within crash kernel's
> memory region while 1st kernel is running.
> * the data in elfcore header is filled up by kexec_load system call on 1st kernel.
> * 1st kernel doesn't reserve any region for elfcore header because the kernel
> commandline parameters don't contains "elfcorehdr=" parameter, then elfcorehdr_size=0.
> * Crash dump kernel does reserve the region, as I said, because we don't want to
> corrupt the info in elfcore header accidentally while crash kernel is running.
> 
> Clear?

OK, got it now.

Then I am wondering why "elfcorehdr=" can't be contained in kernel
cmdline as other ARCH does. Maybe I need go over all related threads
then know why it is. Thanks for explanation.

Thanks
Baoquan
AKASHI Takahiro May 12, 2015, 7:32 a.m. UTC | #8
On 05/11/2015 06:41 PM, Baoquan He wrote:
> On 05/11/15 at 05:17pm, AKASHI Takahiro wrote:
>> On 05/11/2015 04:54 PM, Baoquan He wrote:
>>> In this patch you reserve a separate memory region in 1st kernel to
>>> store elfcorehdr. I am wondering why you don't call add_buffer in
>>> kexec-tools directly. Like this you can get a region from reserved
>>> crashkernel region. Then you don't need reserve_elfcorehdr() to reserve
>>> memory for elfcorehdr specifically. Like other ARCHs do only one memory
>>> region is reserved in 1st kernel, that's crashkernel region.
>>
>> I think that you misunderstand somewhat.
>> * Kexec-tools only locates/identifies a small region for elfcore header within crash kernel's
>> memory region while 1st kernel is running.
>> * the data in elfcore header is filled up by kexec_load system call on 1st kernel.
>> * 1st kernel doesn't reserve any region for elfcore header because the kernel
>> commandline parameters don't contains "elfcorehdr=" parameter, then elfcorehdr_size=0.
>> * Crash dump kernel does reserve the region, as I said, because we don't want to
>> corrupt the info in elfcore header accidentally while crash kernel is running.
>>
>> Clear?
>
> OK, got it now.
>
> Then I am wondering why "elfcorehdr=" can't be contained in kernel
> cmdline as other ARCH does. Maybe I need go over all related threads
> then know why it is. Thanks for explanation.

Kexec-tools on 1st kernel appends "elfcorehdr=" to the kernel command line,
actually chosen/bootargs in a device tree, that is passed to *crash dump kernel*.
So when crash dump kernel boots up, it can recognizes that area (and reserves it
for later use of managing /proc/vmcore.)

Thanks,
-Takahiro AKASHI

> Thanks
> Baoquan
>
diff mbox

Patch

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index ac3c2e2..6fcc602 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -36,6 +36,7 @@  arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index 0000000..3d86c0a
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@ 
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
+			iounmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	iounmap(vaddr);
+
+	return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+	return count;
+}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 51ef972..7932bd0 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@ 
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
-#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/clk-provider.h>
 #include <linux/cpu.h>
@@ -364,6 +363,12 @@  static void __init request_standard_resources(void)
 		    kernel_data.end <= res->end)
 			request_resource(res, &kernel_data);
 	}
+
+#ifdef CONFIG_KEXEC
+	/* User space tools will detect the region with /proc/iomem */
+	if (crashk_res.end)
+		insert_resource(&iomem_resource, &crashk_res);
+#endif
 }
 
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
@@ -393,6 +398,7 @@  void __init setup_arch(char **cmdline_p)
 	local_async_enable();
 
 	efi_init();
+
 	arm64_memblock_init();
 
 	paging_init();
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ae85da6..ea70d41 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -34,6 +34,8 @@ 
 #include <linux/dma-contiguous.h>
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
 #include <asm/fixmap.h>
 #include <asm/memory.h>
@@ -66,6 +68,81 @@  static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif
 
+#ifdef CONFIG_KEXEC
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by a dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(phys_addr_t limit)
+{
+	unsigned long long crash_size = 0, crash_base = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, limit,
+				&crash_size, &crash_base);
+	if (ret)
+		return;
+
+	if (crash_base == 0) {
+		crash_base = memblock_alloc(crash_size, 1 << 20);
+		if (crash_base == 0) {
+			pr_warn("crashkernel allocation failed (size:%llx)\n",
+				crash_size);
+			return;
+		}
+	} else {
+		/* User specifies base address explicitly. Sanity check */
+		if (!memblock_is_region_memory(crash_base, crash_size) ||
+			memblock_is_region_reserved(crash_base, crash_size)) {
+			pr_warn("crashkernel= has wrong address or size\n");
+			return;
+		}
+
+		if (memblock_reserve(crash_base, crash_size)) {
+			pr_warn("crashkernel reservation failed - out of memory\n");
+			return;
+		}
+	}
+
+	pr_info("Reserving %lldMB of memory at %lldMB for crashkernel\n",
+		crash_size >> 20, crash_base >> 20);
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+}
+#endif /* CONFIG_KEXEC */
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * reserve_elfcorehdr() - reserves memory for elf core header
+ *
+ * This function reserves memory area given in "elfcorehdr=" kernel command
+ * line parameter. The memory reserved is used by a dump capture kernel to
+ * identify the memory used by primary kernel.
+ */
+static void __init reserve_elfcorehdr(void)
+{
+	if (!elfcorehdr_size)
+		return;
+
+	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+		pr_warn("elfcorehdr reservation failed - memory is in use (0x%llx)\n",
+			elfcorehdr_addr);
+		return;
+	}
+
+	if (memblock_reserve(elfcorehdr_addr, elfcorehdr_size)) {
+		pr_warn("elfcorehdr reservation failed - out of memory\n");
+		return;
+	}
+
+	pr_info("Reserving %lldKB of memory at %lldMB for elfcorehdr\n",
+		elfcorehdr_size >> 10, elfcorehdr_addr >> 20);
+}
+#endif /* CONFIG_CRASH_DUMP */
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -170,6 +247,13 @@  void __init arm64_memblock_init(void)
 		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
 #endif
 
+#ifdef CONFIG_KEXEC
+	reserve_crashkernel(memory_limit);
+#endif
+#ifdef CONFIG_CRASH_DUMP
+	reserve_elfcorehdr();
+#endif
+
 	early_init_fdt_scan_reserved_mem();
 
 	/* 4GB maximum for 32-bit only capable devices */