diff mbox series

[RFC,v2,3/3] resource, crash: Make kexec_file_load support pmem

Message ID 20230427101838.12267-4-lizhijian@fujitsu.com (mailing list archive)
State Superseded
Headers show
Series pmem memmap dump support | expand

Commit Message

Zhijian Li (Fujitsu) April 27, 2023, 10:18 a.m. UTC
It does:
1. Add pmem region into PT_LOADs of vmcore
2. Mark pmem region's p_flags as PF_DEV

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Eric Biederman <ebiederm@xmission.com>
CC: Takashi Iwai <tiwai@suse.de>
CC: Baoquan He <bhe@redhat.com>
CC: Vlastimil Babka <vbabka@suse.cz>
CC: Sean Christopherson <seanjc@google.com>
CC: Jonathan Cameron <Jonathan.Cameron@huawei.com>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
CC: Dan Williams <dan.j.williams@intel.com>
CC: "Rafael J. Wysocki" <rafael@kernel.org>
CC: Ira Weiny <ira.weiny@intel.com>
CC: Raul E Rangel <rrangel@chromium.org>
CC: Colin Foster <colin.foster@in-advantage.com>
CC: Vishal Verma <vishal.l.verma@intel.com>
CC: x86@kernel.org
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
 arch/x86/kernel/crash.c |  2 ++
 include/linux/ioport.h  |  3 +++
 kernel/kexec_file.c     | 10 ++++++++++
 kernel/resource.c       | 11 +++++++++++
 4 files changed, 26 insertions(+)

Comments

Greg Kroah-Hartman April 27, 2023, 11:39 a.m. UTC | #1
On Thu, Apr 27, 2023 at 06:18:34PM +0800, Li Zhijian wrote:
> It does:
> 1. Add pmem region into PT_LOADs of vmcore
> 2. Mark pmem region's p_flags as PF_DEV

I'm sorry, but I can not parse this changelog.

Please take a look at the kernel documentation for how to write a good
changelog message so that we can properly review the change you wish to
have accepted.

thanks,

greg k-h
Jane Chu April 27, 2023, 8:41 p.m. UTC | #2
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index cdd92ab43cda..dc9d03083565 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -178,6 +178,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
>   	if (!nr_ranges)
>   		return NULL;
>   
> +	walk_pmem_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);

So this will overwrite 'nr_ranges' produced by the previous 
walk_system_ram_res() call, sure it's correct?

Regards,
-jane
Zhijian Li (Fujitsu) April 28, 2023, 7:10 a.m. UTC | #3
Jane,


On 28/04/2023 04:41, Jane Chu wrote:
>> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
>> index cdd92ab43cda..dc9d03083565 100644
>> --- a/arch/x86/kernel/crash.c
>> +++ b/arch/x86/kernel/crash.c
>> @@ -178,6 +178,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
>>       if (!nr_ranges)
>>           return NULL;
>> +    walk_pmem_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
> 
> So this will overwrite 'nr_ranges' produced by the previous walk_system_ram_res() call, sure it's correct?


It depends on how the callback walk_system_ram_res() handle 'nr_ranges', so it's safe for this changes IMHO.

163 static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
164 {
165         unsigned int *nr_ranges = arg;
166
167         (*nr_ranges)++;
168         return 0;
169 }
170
171 /* Gather all the required information to prepare elf headers for ram regions */
172 static struct crash_mem *fill_up_crash_elf_data(void)
173 {
174         unsigned int nr_ranges = 0;
175         struct crash_mem *cmem;
176
177         walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
178         if (!nr_ranges)
179                 return NULL;
180
181         walk_pmem_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);

At last, nr_ranges = #ram_res + #pmem_res.

Thanks
Zhijian

> 
> Regards,
> -jane
Zhijian Li (Fujitsu) April 28, 2023, 7:36 a.m. UTC | #4
Greg,

Sorry for these *BAD* changelog, This patch is most like a *HACKing* to resource.c currently.
Please allow me to rewrite it once.

Only the region described by PT_LOADs of /proc/vmcore are dumpable/readble by dumping applications.
Previously, on x86/x86_64 only system ram resources will be injected into PT_LOADs.

So in order to make the entire pmem resource is dumpable/readable, we need to add pmem region
into the PT_LOADs of /proc/vmcore.

Here we introduce a new API walk_pmem_res() to walk the pmem region first. Further, we will also
mark pmem region with extra p_flags PF_DEV when it's adding into PT_LOADs.
Then the dumping applications are able to know if the region is pmem or not according this flag
and take special actions correspondingly.

Thanks
Zhijian


On 27/04/2023 19:39, Greg Kroah-Hartman wrote:
> On Thu, Apr 27, 2023 at 06:18:34PM +0800, Li Zhijian wrote:
>> It does:
>> 1. Add pmem region into PT_LOADs of vmcore
>> 2. Mark pmem region's p_flags as PF_DEV
> 
> I'm sorry, but I can not parse this changelog.
> 
> Please take a look at the kernel documentation for how to write a good
> changelog message so that we can properly review the change you wish to
> have accepted.
> 
> thanks,
> 
> greg k-h
diff mbox series

Patch

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index cdd92ab43cda..dc9d03083565 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -178,6 +178,7 @@  static struct crash_mem *fill_up_crash_elf_data(void)
 	if (!nr_ranges)
 		return NULL;
 
+	walk_pmem_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
 	/*
 	 * Exclusion of crash region and/or crashk_low_res may cause
 	 * another range split. So add extra two slots here.
@@ -243,6 +244,7 @@  static int prepare_elf_headers(struct kimage *image, void **addr,
 	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
 	if (ret)
 		goto out;
+	walk_pmem_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
 
 	/* Exclude unwanted mem ranges */
 	ret = elf_header_exclude_ranges(cmem);
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 25d768d48970..bde88a47cc1a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -331,6 +331,9 @@  extern int
 walk_system_ram_res(u64 start, u64 end, void *arg,
 		    int (*func)(struct resource *, void *));
 extern int
+walk_pmem_res(u64 start, u64 end, void *arg,
+	      int (*func)(struct resource *, void *));
+extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
 		    void *arg, int (*func)(struct resource *, void *));
 
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index f1a0e4e3fb5c..e79ceaee2926 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -29,6 +29,8 @@ 
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
 
+#define PF_DEV (1 << 4)
+
 #ifdef CONFIG_KEXEC_SIG
 static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
 
@@ -1221,6 +1223,12 @@  int crash_exclude_mem_range(struct crash_mem *mem,
 	return 0;
 }
 
+static bool is_pmem_range(u64 start, u64 size)
+{
+	return REGION_INTERSECTS == region_intersects(start, size,
+			IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
+}
+
 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 			  void **addr, unsigned long *sz)
 {
@@ -1302,6 +1310,8 @@  int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 
 		phdr->p_type = PT_LOAD;
 		phdr->p_flags = PF_R|PF_W|PF_X;
+		if (is_pmem_range(mstart, mend - mstart))
+			phdr->p_flags |= PF_DEV;
 		phdr->p_offset  = mstart;
 
 		phdr->p_paddr = mstart;
diff --git a/kernel/resource.c b/kernel/resource.c
index b1763b2fd7ef..f3f1ce6fc384 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -431,6 +431,17 @@  int walk_system_ram_res(u64 start, u64 end, void *arg,
 				     func);
 }
 
+/*
+ * This function calls the @func callback against all memory ranges, which
+ * are ranges marked as IORESOURCE_MEM and IORES_DESC_PERSISTENT_MEMORY.
+ */
+int walk_pmem_res(u64 start, u64 end, void *arg,
+			int (*func)(struct resource *, void *))
+{
+	return __walk_iomem_res_desc(start, end, IORESOURCE_MEM,
+				     IORES_DESC_PERSISTENT_MEMORY, arg, func);
+}
+
 /*
  * This function calls the @func callback against all memory ranges, which
  * are ranges marked as IORESOURCE_MEM and IORESOUCE_BUSY.